Add monitor.py: CLI + web dashboard for embedding progress

Three modes: python3 monitor.py # one-shot CLI python3 monitor.py --watch # auto-refresh 2s python3 monitor.py --web # web dashboard on :8899 Shows per-persona progress bars, vector sizes, API/script status, cluster grouping with color coding. Web mode auto-polls /api/status. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 00:24:07 +03:00
parent 98ed69653d
commit 9105c03b4b
1 changed files with 423 additions and 0 deletions
--- a/monitor.py
+++ b/monitor.py
@@ -0,0 +1,423 @@
+#!/usr/bin/env python3
+"""
+AnythingLLM Persona RAG Monitor
+
+Usage:
+  python3 monitor.py              # CLI one-shot
+  python3 monitor.py --watch      # CLI auto-refresh (2s)
+  python3 monitor.py --web        # Web dashboard on :8899
+  python3 monitor.py --web 9000   # Custom port
+"""
+
+import json
+import os
+import sys
+import time
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from pathlib import Path
+
+import yaml
+
+try:
+    import requests
+except ImportError:
+    requests = None
+
+CONFIG_PATH = Path(__file__).parent / "config.yaml"
+PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
+LANCEDB_PATH = Path.home() / ".config/anythingllm-desktop/storage/lancedb"
+DOCS_PATH = Path.home() / ".config/anythingllm-desktop/storage/documents"
+VCACHE_PATH = Path.home() / ".config/anythingllm-desktop/storage/vector-cache"
+
+
+def load_config():
+    with open(CONFIG_PATH) as f:
+        return yaml.safe_load(f)
+
+
+def load_progress():
+    if PROGRESS_PATH.exists():
+        with open(PROGRESS_PATH) as f:
+            return json.load(f)
+    return {}
+
+
+def dir_size_mb(path):
+    if not path.exists():
+        return 0
+    try:
+        return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / (1024 * 1024)
+    except Exception:
+        return 0
+
+
+def get_lance_workspaces():
+    if not LANCEDB_PATH.exists():
+        return set()
+    return {d.name.replace(".lance", "") for d in LANCEDB_PATH.iterdir()
+            if d.is_dir() and d.name.endswith(".lance")}
+
+
+def get_lance_sizes():
+    sizes = {}
+    if not LANCEDB_PATH.exists():
+        return sizes
+    for d in LANCEDB_PATH.iterdir():
+        if d.is_dir() and d.name.endswith(".lance"):
+            slug = d.name.replace(".lance", "")
+            sizes[slug] = sum(f.stat().st_size for f in d.rglob("*") if f.is_file()) / (1024 * 1024)
+    return sizes
+
+
+def check_api(config):
+    if not requests:
+        return None
+    try:
+        url = f"{config['anythingllm']['base_url']}/auth"
+        headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
+        resp = requests.get(url, headers=headers, timeout=3)
+        return resp.status_code == 200
+    except Exception:
+        return False
+
+
+def check_script_running():
+    try:
+        import subprocess
+        result = subprocess.run(["pgrep", "-f", "setup.py"], capture_output=True, text=True)
+        return result.returncode == 0
+    except Exception:
+        return None
+
+
+def collect_status():
+    config = load_config()
+    progress = load_progress()
+
+    workspaces = config.get("workspaces", {})
+    ws_docs = progress.get("workspace_docs", {})
+    uploaded = progress.get("uploaded_files", {})
+    lance_ws = get_lance_workspaces()
+    lance_sizes = get_lance_sizes()
+
+    # Build expected doc counts per persona
+    folder_counts = {}
+    for fpath, info in uploaded.items():
+        f = info.get("folder", "")
+        if f:
+            folder_counts[f] = folder_counts.get(f, 0) + 1
+
+    personas = []
+    for codename, ws_cfg in workspaces.items():
+        slug = ws_cfg["name"].lower()
+        # Normalize slug like AnythingLLM does
+        import re
+        slug = re.sub(r'[^a-z0-9\s-]', '', slug.replace('ş', 's').replace('ç', 'c')
+                      .replace('ğ', 'g').replace('ü', 'u').replace('ö', 'o')
+                      .replace('ı', 'i').replace('İ', 'i').replace('&', 'and'))
+        slug = re.sub(r'\s+', '-', slug.strip())
+
+        # Expected docs from mapped folders
+        expected = 0
+        for entry in ws_cfg.get("folders", []):
+            fn = entry["path"].replace("/", "_")
+            expected += folder_counts.get(fn, 0)
+
+        assigned = len(ws_docs.get(codename, []))
+        has_vectors = any(slug in lw for lw in lance_ws)
+        vector_size = 0
+        for lw, sz in lance_sizes.items():
+            if slug in lw:
+                vector_size = sz
+                break
+
+        personas.append({
+            "codename": codename,
+            "name": ws_cfg["name"],
+            "expected": expected,
+            "assigned": assigned,
+            "has_vectors": has_vectors,
+            "vector_size_mb": vector_size,
+        })
+
+    # Cluster grouping
+    clusters = {
+        "intel": ["frodo", "echo", "ghost", "oracle", "wraith", "scribe", "polyglot"],
+        "cyber": ["neo", "bastion", "sentinel", "specter", "phantom", "cipher", "vortex"],
+        "military": ["marshal", "centurion", "corsair", "warden", "medic"],
+        "humanities": ["chronos", "tribune", "arbiter", "ledger", "sage", "herald", "scholar", "gambit"],
+        "engineering": ["forge", "architect"],
+    }
+
+    api_ok = check_api(config)
+    script_running = check_script_running()
+
+    return {
+        "personas": personas,
+        "clusters": clusters,
+        "total_uploaded": len(uploaded),
+        "total_assigned": sum(len(v) for v in ws_docs.values()),
+        "total_personas": len(workspaces),
+        "personas_with_vectors": sum(1 for p in personas if p["has_vectors"]),
+        "lancedb_size_mb": dir_size_mb(LANCEDB_PATH),
+        "docs_size_mb": dir_size_mb(DOCS_PATH),
+        "vcache_size_mb": dir_size_mb(VCACHE_PATH),
+        "api_online": api_ok,
+        "script_running": script_running,
+        "timestamp": time.strftime("%H:%M:%S"),
+    }
+
+
+# ──────────────────────────────────────────────────
+# CLI OUTPUT
+# ──────────────────────────────────────────────────
+
+CLUSTER_COLORS = {
+    "intel": "\033[34m",      # blue
+    "cyber": "\033[31m",      # red
+    "military": "\033[33m",   # yellow
+    "humanities": "\033[35m", # magenta
+    "engineering": "\033[36m",# cyan
+}
+RESET = "\033[0m"
+BOLD = "\033[1m"
+DIM = "\033[2m"
+GREEN = "\033[32m"
+RED = "\033[31m"
+
+
+def progress_bar(current, total, width=20):
+    if total == 0:
+        return f"{'░' * width}"
+    filled = int(width * min(current, total) / total)
+    return f"{'█' * filled}{'░' * (width - filled)}"
+
+
+def cli_output(status):
+    lines = []
+    lines.append(f"{BOLD}═══ AnythingLLM Persona Monitor ═══{RESET}  {DIM}{status['timestamp']}{RESET}")
+    lines.append("")
+
+    # System status
+    api = f"{GREEN}●{RESET}" if status["api_online"] else f"{RED}●{RESET}"
+    script = f"{GREEN}● running{RESET}" if status["script_running"] else f"{DIM}○ idle{RESET}"
+    lines.append(f"  API: {api}  Script: {script}  "
+                 f"LanceDB: {status['lancedb_size_mb']:.0f}MB  "
+                 f"Docs: {status['docs_size_mb']:.0f}MB")
+    lines.append(f"  Uploaded: {status['total_uploaded']}  "
+                 f"Assigned: {status['total_assigned']}  "
+                 f"Vectors: {status['personas_with_vectors']}/{status['total_personas']}")
+    lines.append("")
+
+    # Per-cluster persona table
+    persona_map = {p["codename"]: p for p in status["personas"]}
+
+    for cluster_name, members in status["clusters"].items():
+        color = CLUSTER_COLORS.get(cluster_name, "")
+        lines.append(f"  {color}{BOLD}{cluster_name.upper()}{RESET}")
+
+        for codename in members:
+            p = persona_map.get(codename)
+            if not p:
+                continue
+
+            vec_icon = f"{GREEN}✓{RESET}" if p["has_vectors"] else f"{DIM}○{RESET}"
+            bar = progress_bar(p["assigned"], p["expected"])
+            pct = (p["assigned"] / p["expected"] * 100) if p["expected"] > 0 else 0
+            size_str = f"{p['vector_size_mb']:.0f}MB" if p["vector_size_mb"] > 0 else ""
+
+            lines.append(f"    {vec_icon} {codename:<12} {bar} {p['assigned']:>5}/{p['expected']:<5} "
+                         f"{pct:>5.0f}%  {size_str}")
+
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def cli_mode(watch=False):
+    while True:
+        status = collect_status()
+        if watch:
+            os.system("clear")
+        print(cli_output(status))
+        if not watch:
+            break
+        time.sleep(2)
+
+
+# ──────────────────────────────────────────────────
+# WEB DASHBOARD
+# ──────────────────────────────────────────────────
+
+HTML_TEMPLATE = """<!DOCTYPE html>
+<html lang="tr">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>AnythingLLM Monitor</title>
+<style>
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body { background: #0a0a0f; color: #e0e0e0; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 14px; padding: 24px; }
+  h1 { color: #7aa2f7; font-size: 18px; margin-bottom: 4px; }
+  .subtitle { color: #565f89; font-size: 12px; margin-bottom: 20px; }
+  .status-bar { display: flex; gap: 24px; margin-bottom: 20px; padding: 12px 16px; background: #13131a; border-radius: 8px; border: 1px solid #1a1b26; }
+  .status-item { display: flex; align-items: center; gap: 6px; }
+  .dot { width: 8px; height: 8px; border-radius: 50%; }
+  .dot.green { background: #9ece6a; box-shadow: 0 0 6px #9ece6a44; }
+  .dot.red { background: #f7768e; box-shadow: 0 0 6px #f7768e44; }
+  .dot.dim { background: #565f89; }
+  .stat { color: #7aa2f7; font-weight: bold; }
+  .cluster { margin-bottom: 16px; }
+  .cluster-name { font-size: 13px; font-weight: bold; padding: 6px 12px; border-radius: 4px 4px 0 0; display: inline-block; margin-bottom: 2px; }
+  .cluster-intel .cluster-name { background: #1a1b3a; color: #7aa2f7; }
+  .cluster-cyber .cluster-name { background: #2a1a1a; color: #f7768e; }
+  .cluster-military .cluster-name { background: #2a2a1a; color: #e0af68; }
+  .cluster-humanities .cluster-name { background: #2a1a2a; color: #bb9af7; }
+  .cluster-engineering .cluster-name { background: #1a2a2a; color: #73daca; }
+  .persona-table { width: 100%; border-collapse: collapse; }
+  .persona-table td { padding: 5px 10px; border-bottom: 1px solid #1a1b26; }
+  .persona-table tr:hover { background: #13131a; }
+  .persona-name { color: #c0caf5; min-width: 120px; }
+  .progress-wrap { width: 200px; }
+  .progress-bg { background: #1a1b26; border-radius: 3px; height: 14px; overflow: hidden; position: relative; }
+  .progress-fill { height: 100%; border-radius: 3px; transition: width 0.5s ease; }
+  .fill-intel { background: linear-gradient(90deg, #3d59a1, #7aa2f7); }
+  .fill-cyber { background: linear-gradient(90deg, #a1304d, #f7768e); }
+  .fill-military { background: linear-gradient(90deg, #8a6d2e, #e0af68); }
+  .fill-humanities { background: linear-gradient(90deg, #6e3da1, #bb9af7); }
+  .fill-engineering { background: linear-gradient(90deg, #2e8a6d, #73daca); }
+  .progress-pct { position: absolute; right: 6px; top: 0; font-size: 10px; line-height: 14px; color: #fff; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
+  .counts { color: #565f89; font-size: 12px; min-width: 100px; text-align: right; }
+  .vec-icon { font-size: 14px; min-width: 20px; text-align: center; }
+  .vec-ok { color: #9ece6a; }
+  .vec-no { color: #565f89; }
+  .size { color: #565f89; font-size: 11px; min-width: 60px; text-align: right; }
+  .summary { display: flex; gap: 16px; margin-bottom: 20px; flex-wrap: wrap; }
+  .summary-card { background: #13131a; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; min-width: 120px; }
+  .summary-card .label { color: #565f89; font-size: 11px; text-transform: uppercase; }
+  .summary-card .value { color: #7aa2f7; font-size: 20px; font-weight: bold; margin-top: 2px; }
+  .summary-card .unit { color: #565f89; font-size: 12px; }
+</style>
+</head>
+<body>
+<h1>AnythingLLM Persona Monitor</h1>
+<div class="subtitle" id="timestamp">Loading...</div>
+
+<div class="summary" id="summary"></div>
+<div class="status-bar" id="statusbar"></div>
+<div id="clusters"></div>
+
+<script>
+const CLUSTER_ORDER = ['intel', 'cyber', 'military', 'humanities', 'engineering'];
+
+function render(data) {
+  document.getElementById('timestamp').textContent = 'Updated: ' + data.timestamp + ' — auto-refresh 2s';
+
+  // Summary cards
+  const pctVec = Math.round(data.personas_with_vectors / data.total_personas * 100);
+  document.getElementById('summary').innerHTML = `
+    <div class="summary-card"><div class="label">Uploaded</div><div class="value">${data.total_uploaded}</div><div class="unit">files</div></div>
+    <div class="summary-card"><div class="label">Assigned</div><div class="value">${data.total_assigned}</div><div class="unit">docs</div></div>
+    <div class="summary-card"><div class="label">Vectors</div><div class="value">${data.personas_with_vectors}<span class="unit">/${data.total_personas}</span></div><div class="unit">${pctVec}%</div></div>
+    <div class="summary-card"><div class="label">LanceDB</div><div class="value">${Math.round(data.lancedb_size_mb)}<span class="unit">MB</span></div></div>
+    <div class="summary-card"><div class="label">Documents</div><div class="value">${Math.round(data.docs_size_mb)}<span class="unit">MB</span></div></div>
+  `;
+
+  // Status bar
+  const apiDot = data.api_online ? 'green' : 'red';
+  const scriptDot = data.script_running ? 'green' : 'dim';
+  const scriptText = data.script_running ? 'running' : 'idle';
+  document.getElementById('statusbar').innerHTML = `
+    <div class="status-item"><span class="dot ${apiDot}"></span>API</div>
+    <div class="status-item"><span class="dot ${scriptDot}"></span>Script ${scriptText}</div>
+  `;
+
+  // Clusters
+  const personaMap = {};
+  data.personas.forEach(p => personaMap[p.codename] = p);
+
+  let html = '';
+  CLUSTER_ORDER.forEach(cl => {
+    const members = data.clusters[cl] || [];
+    html += `<div class="cluster cluster-${cl}"><span class="cluster-name">${cl.toUpperCase()}</span><table class="persona-table">`;
+    members.forEach(code => {
+      const p = personaMap[code];
+      if (!p) return;
+      const pct = p.expected > 0 ? Math.round(p.assigned / p.expected * 100) : 0;
+      const vecClass = p.has_vectors ? 'vec-ok' : 'vec-no';
+      const vecIcon = p.has_vectors ? '✓' : '○';
+      const sizeStr = p.vector_size_mb > 0 ? Math.round(p.vector_size_mb) + 'MB' : '';
+      html += `<tr>
+        <td class="vec-icon ${vecClass}">${vecIcon}</td>
+        <td class="persona-name">${code}</td>
+        <td class="progress-wrap"><div class="progress-bg"><div class="progress-fill fill-${cl}" style="width:${Math.min(pct,100)}%"></div><span class="progress-pct">${pct}%</span></div></td>
+        <td class="counts">${p.assigned} / ${p.expected}</td>
+        <td class="size">${sizeStr}</td>
+      </tr>`;
+    });
+    html += '</table></div>';
+  });
+  document.getElementById('clusters').innerHTML = html;
+}
+
+async function poll() {
+  try {
+    const resp = await fetch('/api/status');
+    const data = await resp.json();
+    render(data);
+  } catch(e) {
+    document.getElementById('timestamp').textContent = 'Connection lost — retrying...';
+  }
+  setTimeout(poll, 2000);
+}
+poll();
+</script>
+</body>
+</html>"""
+
+
+class MonitorHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        if self.path == "/api/status":
+            status = collect_status()
+            self.send_response(200)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Access-Control-Allow-Origin", "*")
+            self.end_headers()
+            self.wfile.write(json.dumps(status).encode())
+        elif self.path in ("/", "/index.html"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/html; charset=utf-8")
+            self.end_headers()
+            self.wfile.write(HTML_TEMPLATE.encode())
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+    def log_message(self, format, *args):
+        pass  # quiet
+
+
+def web_mode(port=8899):
+    server = HTTPServer(("0.0.0.0", port), MonitorHandler)
+    print(f"  AnythingLLM Monitor → http://localhost:{port}")
+    print(f"  API endpoint → http://localhost:{port}/api/status")
+    print(f"  Press Ctrl+C to stop\n")
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print("\n  Stopped.")
+        server.server_close()
+
+
+if __name__ == "__main__":
+    args = sys.argv[1:]
+
+    if "--web" in args:
+        idx = args.index("--web")
+        port = int(args[idx + 1]) if idx + 1 < len(args) and args[idx + 1].isdigit() else 8899
+        web_mode(port)
+    elif "--watch" in args:
+        cli_mode(watch=True)
+    else:
+        cli_mode(watch=False)