- setup.py: logging module with file (setup.log) + console output - Line-buffered output (fixes background execution buffering) - API calls with timeout (300s), retry (3x), debug logging - Per-batch progress: [1/29] persona batch 1/20 (20 docs) - --verbose flag for debug-level console - monitor.py: log tail in CLI + web dashboard - CLI: colorized last 15 log lines - Web: scrollable log panel with level-based colors - Smaller embed batches (20 instead of 50) for reliability Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
477 lines
18 KiB
Python
Executable File
477 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
AnythingLLM Persona RAG Monitor
|
||
|
||
Usage:
|
||
python3 monitor.py # CLI one-shot
|
||
python3 monitor.py --watch # CLI auto-refresh (2s)
|
||
python3 monitor.py --web # Web dashboard on :8899
|
||
python3 monitor.py --web 9000 # Custom port
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||
from pathlib import Path
|
||
|
||
import yaml
|
||
|
||
try:
|
||
import requests
|
||
except ImportError:
|
||
requests = None
|
||
|
||
CONFIG_PATH = Path(__file__).parent / "config.yaml"
|
||
PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
|
||
LOG_PATH = Path(__file__).parent / "setup.log"
|
||
LANCEDB_PATH = Path.home() / ".config/anythingllm-desktop/storage/lancedb"
|
||
DOCS_PATH = Path.home() / ".config/anythingllm-desktop/storage/documents"
|
||
VCACHE_PATH = Path.home() / ".config/anythingllm-desktop/storage/vector-cache"
|
||
|
||
|
||
def load_config():
|
||
with open(CONFIG_PATH) as f:
|
||
return yaml.safe_load(f)
|
||
|
||
|
||
def load_progress():
|
||
if PROGRESS_PATH.exists():
|
||
with open(PROGRESS_PATH) as f:
|
||
return json.load(f)
|
||
return {}
|
||
|
||
|
||
def dir_size_mb(path):
|
||
if not path.exists():
|
||
return 0
|
||
try:
|
||
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / (1024 * 1024)
|
||
except Exception:
|
||
return 0
|
||
|
||
|
||
def get_lance_workspaces():
|
||
if not LANCEDB_PATH.exists():
|
||
return set()
|
||
return {d.name.replace(".lance", "") for d in LANCEDB_PATH.iterdir()
|
||
if d.is_dir() and d.name.endswith(".lance")}
|
||
|
||
|
||
def get_lance_sizes():
|
||
sizes = {}
|
||
if not LANCEDB_PATH.exists():
|
||
return sizes
|
||
for d in LANCEDB_PATH.iterdir():
|
||
if d.is_dir() and d.name.endswith(".lance"):
|
||
slug = d.name.replace(".lance", "")
|
||
sizes[slug] = sum(f.stat().st_size for f in d.rglob("*") if f.is_file()) / (1024 * 1024)
|
||
return sizes
|
||
|
||
|
||
def check_api(config):
|
||
if not requests:
|
||
return None
|
||
try:
|
||
url = f"{config['anythingllm']['base_url']}/auth"
|
||
headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
|
||
resp = requests.get(url, headers=headers, timeout=3)
|
||
return resp.status_code == 200
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
def check_script_running():
|
||
try:
|
||
import subprocess
|
||
result = subprocess.run(["pgrep", "-f", "setup.py"], capture_output=True, text=True)
|
||
return result.returncode == 0
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def collect_status():
|
||
config = load_config()
|
||
progress = load_progress()
|
||
|
||
workspaces = config.get("workspaces", {})
|
||
ws_docs = progress.get("workspace_docs", {})
|
||
uploaded = progress.get("uploaded_files", {})
|
||
lance_ws = get_lance_workspaces()
|
||
lance_sizes = get_lance_sizes()
|
||
|
||
# Build expected doc counts per persona
|
||
folder_counts = {}
|
||
for fpath, info in uploaded.items():
|
||
f = info.get("folder", "")
|
||
if f:
|
||
folder_counts[f] = folder_counts.get(f, 0) + 1
|
||
|
||
personas = []
|
||
for codename, ws_cfg in workspaces.items():
|
||
slug = ws_cfg["name"].lower()
|
||
# Normalize slug like AnythingLLM does
|
||
import re
|
||
slug = re.sub(r'[^a-z0-9\s-]', '', slug.replace('ş', 's').replace('ç', 'c')
|
||
.replace('ğ', 'g').replace('ü', 'u').replace('ö', 'o')
|
||
.replace('ı', 'i').replace('İ', 'i').replace('&', 'and'))
|
||
slug = re.sub(r'\s+', '-', slug.strip())
|
||
|
||
# Expected docs from mapped folders
|
||
expected = 0
|
||
for entry in ws_cfg.get("folders", []):
|
||
fn = entry["path"].replace("/", "_")
|
||
expected += folder_counts.get(fn, 0)
|
||
|
||
assigned = len(ws_docs.get(codename, []))
|
||
has_vectors = any(slug in lw for lw in lance_ws)
|
||
vector_size = 0
|
||
for lw, sz in lance_sizes.items():
|
||
if slug in lw:
|
||
vector_size = sz
|
||
break
|
||
|
||
personas.append({
|
||
"codename": codename,
|
||
"name": ws_cfg["name"],
|
||
"expected": expected,
|
||
"assigned": assigned,
|
||
"has_vectors": has_vectors,
|
||
"vector_size_mb": vector_size,
|
||
})
|
||
|
||
# Cluster grouping
|
||
clusters = {
|
||
"intel": ["frodo", "echo", "ghost", "oracle", "wraith", "scribe", "polyglot"],
|
||
"cyber": ["neo", "bastion", "sentinel", "specter", "phantom", "cipher", "vortex"],
|
||
"military": ["marshal", "centurion", "corsair", "warden", "medic"],
|
||
"humanities": ["chronos", "tribune", "arbiter", "ledger", "sage", "herald", "scholar", "gambit"],
|
||
"engineering": ["forge", "architect"],
|
||
}
|
||
|
||
api_ok = check_api(config)
|
||
script_running = check_script_running()
|
||
|
||
# Read last N lines from setup.log
|
||
log_lines = []
|
||
if LOG_PATH.exists():
|
||
try:
|
||
with open(LOG_PATH, "r", encoding="utf-8") as f:
|
||
all_lines = f.readlines()
|
||
log_lines = [l.rstrip() for l in all_lines[-15:]]
|
||
except Exception:
|
||
pass
|
||
|
||
return {
|
||
"personas": personas,
|
||
"clusters": clusters,
|
||
"total_uploaded": len(uploaded),
|
||
"total_assigned": sum(len(v) for v in ws_docs.values()),
|
||
"total_personas": len(workspaces),
|
||
"personas_with_vectors": sum(1 for p in personas if p["has_vectors"]),
|
||
"lancedb_size_mb": dir_size_mb(LANCEDB_PATH),
|
||
"docs_size_mb": dir_size_mb(DOCS_PATH),
|
||
"vcache_size_mb": dir_size_mb(VCACHE_PATH),
|
||
"api_online": api_ok,
|
||
"script_running": script_running,
|
||
"timestamp": time.strftime("%H:%M:%S"),
|
||
"log_tail": log_lines,
|
||
}
|
||
|
||
|
||
# ──────────────────────────────────────────────────
|
||
# CLI OUTPUT
|
||
# ──────────────────────────────────────────────────
|
||
|
||
CLUSTER_COLORS = {
|
||
"intel": "\033[34m", # blue
|
||
"cyber": "\033[31m", # red
|
||
"military": "\033[33m", # yellow
|
||
"humanities": "\033[35m", # magenta
|
||
"engineering": "\033[36m",# cyan
|
||
}
|
||
RESET = "\033[0m"
|
||
BOLD = "\033[1m"
|
||
DIM = "\033[2m"
|
||
GREEN = "\033[32m"
|
||
RED = "\033[31m"
|
||
|
||
|
||
def progress_bar(current, total, width=20):
|
||
if total == 0:
|
||
return f"{'░' * width}"
|
||
filled = int(width * min(current, total) / total)
|
||
return f"{'█' * filled}{'░' * (width - filled)}"
|
||
|
||
|
||
def cli_output(status):
|
||
lines = []
|
||
lines.append(f"{BOLD}═══ AnythingLLM Persona Monitor ═══{RESET} {DIM}{status['timestamp']}{RESET}")
|
||
lines.append("")
|
||
|
||
# System status
|
||
api = f"{GREEN}●{RESET}" if status["api_online"] else f"{RED}●{RESET}"
|
||
script = f"{GREEN}● running{RESET}" if status["script_running"] else f"{DIM}○ idle{RESET}"
|
||
lines.append(f" API: {api} Script: {script} "
|
||
f"LanceDB: {status['lancedb_size_mb']:.0f}MB "
|
||
f"Docs: {status['docs_size_mb']:.0f}MB")
|
||
lines.append(f" Uploaded: {status['total_uploaded']} "
|
||
f"Assigned: {status['total_assigned']} "
|
||
f"Vectors: {status['personas_with_vectors']}/{status['total_personas']}")
|
||
lines.append("")
|
||
|
||
# Per-cluster persona table
|
||
persona_map = {p["codename"]: p for p in status["personas"]}
|
||
|
||
for cluster_name, members in status["clusters"].items():
|
||
color = CLUSTER_COLORS.get(cluster_name, "")
|
||
lines.append(f" {color}{BOLD}{cluster_name.upper()}{RESET}")
|
||
|
||
for codename in members:
|
||
p = persona_map.get(codename)
|
||
if not p:
|
||
continue
|
||
|
||
vec_icon = f"{GREEN}✓{RESET}" if p["has_vectors"] else f"{DIM}○{RESET}"
|
||
bar = progress_bar(p["assigned"], p["expected"])
|
||
pct = (p["assigned"] / p["expected"] * 100) if p["expected"] > 0 else 0
|
||
size_str = f"{p['vector_size_mb']:.0f}MB" if p["vector_size_mb"] > 0 else ""
|
||
|
||
lines.append(f" {vec_icon} {codename:<12} {bar} {p['assigned']:>5}/{p['expected']:<5} "
|
||
f"{pct:>5.0f}% {size_str}")
|
||
|
||
lines.append("")
|
||
|
||
# Log tail
|
||
log_tail = status.get("log_tail", [])
|
||
if log_tail:
|
||
lines.append(f" {BOLD}── Log (setup.log) ──{RESET}")
|
||
for ll in log_tail:
|
||
# Colorize log levels
|
||
if "[ERROR]" in ll:
|
||
lines.append(f" {RED}{ll}{RESET}")
|
||
elif "[WARNING]" in ll:
|
||
lines.append(f" \033[33m{ll}{RESET}")
|
||
elif "✓" in ll:
|
||
lines.append(f" {GREEN}{ll}{RESET}")
|
||
else:
|
||
lines.append(f" {DIM}{ll}{RESET}")
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def cli_mode(watch=False):
|
||
while True:
|
||
status = collect_status()
|
||
if watch:
|
||
os.system("clear")
|
||
print(cli_output(status))
|
||
if not watch:
|
||
break
|
||
time.sleep(2)
|
||
|
||
|
||
# ──────────────────────────────────────────────────
|
||
# WEB DASHBOARD
|
||
# ──────────────────────────────────────────────────
|
||
|
||
HTML_TEMPLATE = """<!DOCTYPE html>
|
||
<html lang="tr">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
<title>AnythingLLM Monitor</title>
|
||
<style>
|
||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||
body { background: #0a0a0f; color: #e0e0e0; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 14px; padding: 24px; }
|
||
h1 { color: #7aa2f7; font-size: 18px; margin-bottom: 4px; }
|
||
.subtitle { color: #565f89; font-size: 12px; margin-bottom: 20px; }
|
||
.status-bar { display: flex; gap: 24px; margin-bottom: 20px; padding: 12px 16px; background: #13131a; border-radius: 8px; border: 1px solid #1a1b26; }
|
||
.status-item { display: flex; align-items: center; gap: 6px; }
|
||
.dot { width: 8px; height: 8px; border-radius: 50%; }
|
||
.dot.green { background: #9ece6a; box-shadow: 0 0 6px #9ece6a44; }
|
||
.dot.red { background: #f7768e; box-shadow: 0 0 6px #f7768e44; }
|
||
.dot.dim { background: #565f89; }
|
||
.stat { color: #7aa2f7; font-weight: bold; }
|
||
.cluster { margin-bottom: 16px; }
|
||
.cluster-name { font-size: 13px; font-weight: bold; padding: 6px 12px; border-radius: 4px 4px 0 0; display: inline-block; margin-bottom: 2px; }
|
||
.cluster-intel .cluster-name { background: #1a1b3a; color: #7aa2f7; }
|
||
.cluster-cyber .cluster-name { background: #2a1a1a; color: #f7768e; }
|
||
.cluster-military .cluster-name { background: #2a2a1a; color: #e0af68; }
|
||
.cluster-humanities .cluster-name { background: #2a1a2a; color: #bb9af7; }
|
||
.cluster-engineering .cluster-name { background: #1a2a2a; color: #73daca; }
|
||
.persona-table { width: 100%; border-collapse: collapse; }
|
||
.persona-table td { padding: 5px 10px; border-bottom: 1px solid #1a1b26; }
|
||
.persona-table tr:hover { background: #13131a; }
|
||
.persona-name { color: #c0caf5; min-width: 120px; }
|
||
.progress-wrap { width: 200px; }
|
||
.progress-bg { background: #1a1b26; border-radius: 3px; height: 14px; overflow: hidden; position: relative; }
|
||
.progress-fill { height: 100%; border-radius: 3px; transition: width 0.5s ease; }
|
||
.fill-intel { background: linear-gradient(90deg, #3d59a1, #7aa2f7); }
|
||
.fill-cyber { background: linear-gradient(90deg, #a1304d, #f7768e); }
|
||
.fill-military { background: linear-gradient(90deg, #8a6d2e, #e0af68); }
|
||
.fill-humanities { background: linear-gradient(90deg, #6e3da1, #bb9af7); }
|
||
.fill-engineering { background: linear-gradient(90deg, #2e8a6d, #73daca); }
|
||
.progress-pct { position: absolute; right: 6px; top: 0; font-size: 10px; line-height: 14px; color: #fff; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
|
||
.counts { color: #565f89; font-size: 12px; min-width: 100px; text-align: right; }
|
||
.vec-icon { font-size: 14px; min-width: 20px; text-align: center; }
|
||
.vec-ok { color: #9ece6a; }
|
||
.vec-no { color: #565f89; }
|
||
.size { color: #565f89; font-size: 11px; min-width: 60px; text-align: right; }
|
||
.summary { display: flex; gap: 16px; margin-bottom: 20px; flex-wrap: wrap; }
|
||
.summary-card { background: #13131a; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; min-width: 120px; }
|
||
.summary-card .label { color: #565f89; font-size: 11px; text-transform: uppercase; }
|
||
.summary-card .value { color: #7aa2f7; font-size: 20px; font-weight: bold; margin-top: 2px; }
|
||
.summary-card .unit { color: #565f89; font-size: 12px; }
|
||
.log-panel { background: #0d0d12; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; margin-top: 20px; max-height: 300px; overflow-y: auto; }
|
||
.log-panel h3 { color: #565f89; font-size: 12px; text-transform: uppercase; margin-bottom: 8px; }
|
||
.log-line { font-size: 12px; line-height: 1.6; color: #565f89; white-space: pre-wrap; word-break: break-all; }
|
||
.log-line.error { color: #f7768e; }
|
||
.log-line.warning { color: #e0af68; }
|
||
.log-line.success { color: #9ece6a; }
|
||
.log-line.info { color: #7aa2f7; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<h1>AnythingLLM Persona Monitor</h1>
|
||
<div class="subtitle" id="timestamp">Loading...</div>
|
||
|
||
<div class="summary" id="summary"></div>
|
||
<div class="status-bar" id="statusbar"></div>
|
||
<div id="clusters"></div>
|
||
<div class="log-panel" id="logpanel"><h3>Log (setup.log)</h3><div id="loglines">No log data</div></div>
|
||
|
||
<script>
|
||
const CLUSTER_ORDER = ['intel', 'cyber', 'military', 'humanities', 'engineering'];
|
||
|
||
function render(data) {
|
||
document.getElementById('timestamp').textContent = 'Updated: ' + data.timestamp + ' — auto-refresh 2s';
|
||
|
||
// Summary cards
|
||
const pctVec = Math.round(data.personas_with_vectors / data.total_personas * 100);
|
||
document.getElementById('summary').innerHTML = `
|
||
<div class="summary-card"><div class="label">Uploaded</div><div class="value">${data.total_uploaded}</div><div class="unit">files</div></div>
|
||
<div class="summary-card"><div class="label">Assigned</div><div class="value">${data.total_assigned}</div><div class="unit">docs</div></div>
|
||
<div class="summary-card"><div class="label">Vectors</div><div class="value">${data.personas_with_vectors}<span class="unit">/${data.total_personas}</span></div><div class="unit">${pctVec}%</div></div>
|
||
<div class="summary-card"><div class="label">LanceDB</div><div class="value">${Math.round(data.lancedb_size_mb)}<span class="unit">MB</span></div></div>
|
||
<div class="summary-card"><div class="label">Documents</div><div class="value">${Math.round(data.docs_size_mb)}<span class="unit">MB</span></div></div>
|
||
`;
|
||
|
||
// Status bar
|
||
const apiDot = data.api_online ? 'green' : 'red';
|
||
const scriptDot = data.script_running ? 'green' : 'dim';
|
||
const scriptText = data.script_running ? 'running' : 'idle';
|
||
document.getElementById('statusbar').innerHTML = `
|
||
<div class="status-item"><span class="dot ${apiDot}"></span>API</div>
|
||
<div class="status-item"><span class="dot ${scriptDot}"></span>Script ${scriptText}</div>
|
||
`;
|
||
|
||
// Clusters
|
||
const personaMap = {};
|
||
data.personas.forEach(p => personaMap[p.codename] = p);
|
||
|
||
let html = '';
|
||
CLUSTER_ORDER.forEach(cl => {
|
||
const members = data.clusters[cl] || [];
|
||
html += `<div class="cluster cluster-${cl}"><span class="cluster-name">${cl.toUpperCase()}</span><table class="persona-table">`;
|
||
members.forEach(code => {
|
||
const p = personaMap[code];
|
||
if (!p) return;
|
||
const pct = p.expected > 0 ? Math.round(p.assigned / p.expected * 100) : 0;
|
||
const vecClass = p.has_vectors ? 'vec-ok' : 'vec-no';
|
||
const vecIcon = p.has_vectors ? '✓' : '○';
|
||
const sizeStr = p.vector_size_mb > 0 ? Math.round(p.vector_size_mb) + 'MB' : '';
|
||
html += `<tr>
|
||
<td class="vec-icon ${vecClass}">${vecIcon}</td>
|
||
<td class="persona-name">${code}</td>
|
||
<td class="progress-wrap"><div class="progress-bg"><div class="progress-fill fill-${cl}" style="width:${Math.min(pct,100)}%"></div><span class="progress-pct">${pct}%</span></div></td>
|
||
<td class="counts">${p.assigned} / ${p.expected}</td>
|
||
<td class="size">${sizeStr}</td>
|
||
</tr>`;
|
||
});
|
||
html += '</table></div>';
|
||
});
|
||
document.getElementById('clusters').innerHTML = html;
|
||
|
||
// Log panel
|
||
const logLines = data.log_tail || [];
|
||
if (logLines.length > 0) {
|
||
let logHtml = '';
|
||
logLines.forEach(line => {
|
||
let cls = '';
|
||
if (line.includes('[ERROR]')) cls = 'error';
|
||
else if (line.includes('[WARNING]')) cls = 'warning';
|
||
else if (line.includes('✓')) cls = 'success';
|
||
else if (line.includes('[INFO]')) cls = 'info';
|
||
logHtml += `<div class="log-line ${cls}">${line.replace(/</g,'<')}</div>`;
|
||
});
|
||
document.getElementById('loglines').innerHTML = logHtml;
|
||
const panel = document.getElementById('logpanel');
|
||
panel.scrollTop = panel.scrollHeight;
|
||
}
|
||
}
|
||
|
||
async function poll() {
|
||
try {
|
||
const resp = await fetch('/api/status');
|
||
const data = await resp.json();
|
||
render(data);
|
||
} catch(e) {
|
||
document.getElementById('timestamp').textContent = 'Connection lost — retrying...';
|
||
}
|
||
setTimeout(poll, 2000);
|
||
}
|
||
poll();
|
||
</script>
|
||
</body>
|
||
</html>"""
|
||
|
||
|
||
class MonitorHandler(BaseHTTPRequestHandler):
|
||
def do_GET(self):
|
||
if self.path == "/api/status":
|
||
status = collect_status()
|
||
self.send_response(200)
|
||
self.send_header("Content-Type", "application/json")
|
||
self.send_header("Access-Control-Allow-Origin", "*")
|
||
self.end_headers()
|
||
self.wfile.write(json.dumps(status).encode())
|
||
elif self.path in ("/", "/index.html"):
|
||
self.send_response(200)
|
||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||
self.end_headers()
|
||
self.wfile.write(HTML_TEMPLATE.encode())
|
||
else:
|
||
self.send_response(404)
|
||
self.end_headers()
|
||
|
||
def log_message(self, format, *args):
|
||
pass # quiet
|
||
|
||
|
||
def web_mode(port=8899):
|
||
server = HTTPServer(("0.0.0.0", port), MonitorHandler)
|
||
print(f" AnythingLLM Monitor → http://localhost:{port}")
|
||
print(f" API endpoint → http://localhost:{port}/api/status")
|
||
print(f" Press Ctrl+C to stop\n")
|
||
try:
|
||
server.serve_forever()
|
||
except KeyboardInterrupt:
|
||
print("\n Stopped.")
|
||
server.server_close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
args = sys.argv[1:]
|
||
|
||
if "--web" in args:
|
||
idx = args.index("--web")
|
||
port = int(args[idx + 1]) if idx + 1 < len(args) and args[idx + 1].isdigit() else 8899
|
||
web_mode(port)
|
||
elif "--watch" in args:
|
||
cli_mode(watch=True)
|
||
else:
|
||
cli_mode(watch=False)
|