Add monitor.py: CLI + web dashboard for embedding progress

Three modes:
  python3 monitor.py          # one-shot CLI
  python3 monitor.py --watch  # auto-refresh 2s
  python3 monitor.py --web    # web dashboard on :8899

Shows per-persona progress bars, vector sizes, API/script status,
cluster grouping with color coding. Web mode auto-polls /api/status.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
salvacybersec
2026-04-07 00:24:07 +03:00
parent 98ed69653d
commit 9105c03b4b

423
monitor.py Executable file
View File

@@ -0,0 +1,423 @@
#!/usr/bin/env python3
"""
AnythingLLM Persona RAG Monitor
Usage:
python3 monitor.py # CLI one-shot
python3 monitor.py --watch # CLI auto-refresh (2s)
python3 monitor.py --web # Web dashboard on :8899
python3 monitor.py --web 9000 # Custom port
"""
import json
import os
import sys
import time
from http.server import HTTPServer, BaseHTTPRequestHandler
from pathlib import Path
import yaml
try:
import requests
except ImportError:
requests = None
CONFIG_PATH = Path(__file__).parent / "config.yaml"
PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
LANCEDB_PATH = Path.home() / ".config/anythingllm-desktop/storage/lancedb"
DOCS_PATH = Path.home() / ".config/anythingllm-desktop/storage/documents"
VCACHE_PATH = Path.home() / ".config/anythingllm-desktop/storage/vector-cache"
def load_config():
with open(CONFIG_PATH) as f:
return yaml.safe_load(f)
def load_progress():
if PROGRESS_PATH.exists():
with open(PROGRESS_PATH) as f:
return json.load(f)
return {}
def dir_size_mb(path):
if not path.exists():
return 0
try:
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / (1024 * 1024)
except Exception:
return 0
def get_lance_workspaces():
if not LANCEDB_PATH.exists():
return set()
return {d.name.replace(".lance", "") for d in LANCEDB_PATH.iterdir()
if d.is_dir() and d.name.endswith(".lance")}
def get_lance_sizes():
sizes = {}
if not LANCEDB_PATH.exists():
return sizes
for d in LANCEDB_PATH.iterdir():
if d.is_dir() and d.name.endswith(".lance"):
slug = d.name.replace(".lance", "")
sizes[slug] = sum(f.stat().st_size for f in d.rglob("*") if f.is_file()) / (1024 * 1024)
return sizes
def check_api(config):
if not requests:
return None
try:
url = f"{config['anythingllm']['base_url']}/auth"
headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
resp = requests.get(url, headers=headers, timeout=3)
return resp.status_code == 200
except Exception:
return False
def check_script_running():
try:
import subprocess
result = subprocess.run(["pgrep", "-f", "setup.py"], capture_output=True, text=True)
return result.returncode == 0
except Exception:
return None
def collect_status():
config = load_config()
progress = load_progress()
workspaces = config.get("workspaces", {})
ws_docs = progress.get("workspace_docs", {})
uploaded = progress.get("uploaded_files", {})
lance_ws = get_lance_workspaces()
lance_sizes = get_lance_sizes()
# Build expected doc counts per persona
folder_counts = {}
for fpath, info in uploaded.items():
f = info.get("folder", "")
if f:
folder_counts[f] = folder_counts.get(f, 0) + 1
personas = []
for codename, ws_cfg in workspaces.items():
slug = ws_cfg["name"].lower()
# Normalize slug like AnythingLLM does
import re
slug = re.sub(r'[^a-z0-9\s-]', '', slug.replace('ş', 's').replace('ç', 'c')
.replace('ğ', 'g').replace('ü', 'u').replace('ö', 'o')
.replace('ı', 'i').replace('İ', 'i').replace('&', 'and'))
slug = re.sub(r'\s+', '-', slug.strip())
# Expected docs from mapped folders
expected = 0
for entry in ws_cfg.get("folders", []):
fn = entry["path"].replace("/", "_")
expected += folder_counts.get(fn, 0)
assigned = len(ws_docs.get(codename, []))
has_vectors = any(slug in lw for lw in lance_ws)
vector_size = 0
for lw, sz in lance_sizes.items():
if slug in lw:
vector_size = sz
break
personas.append({
"codename": codename,
"name": ws_cfg["name"],
"expected": expected,
"assigned": assigned,
"has_vectors": has_vectors,
"vector_size_mb": vector_size,
})
# Cluster grouping
clusters = {
"intel": ["frodo", "echo", "ghost", "oracle", "wraith", "scribe", "polyglot"],
"cyber": ["neo", "bastion", "sentinel", "specter", "phantom", "cipher", "vortex"],
"military": ["marshal", "centurion", "corsair", "warden", "medic"],
"humanities": ["chronos", "tribune", "arbiter", "ledger", "sage", "herald", "scholar", "gambit"],
"engineering": ["forge", "architect"],
}
api_ok = check_api(config)
script_running = check_script_running()
return {
"personas": personas,
"clusters": clusters,
"total_uploaded": len(uploaded),
"total_assigned": sum(len(v) for v in ws_docs.values()),
"total_personas": len(workspaces),
"personas_with_vectors": sum(1 for p in personas if p["has_vectors"]),
"lancedb_size_mb": dir_size_mb(LANCEDB_PATH),
"docs_size_mb": dir_size_mb(DOCS_PATH),
"vcache_size_mb": dir_size_mb(VCACHE_PATH),
"api_online": api_ok,
"script_running": script_running,
"timestamp": time.strftime("%H:%M:%S"),
}
# ──────────────────────────────────────────────────
# CLI OUTPUT
# ──────────────────────────────────────────────────
CLUSTER_COLORS = {
"intel": "\033[34m", # blue
"cyber": "\033[31m", # red
"military": "\033[33m", # yellow
"humanities": "\033[35m", # magenta
"engineering": "\033[36m",# cyan
}
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
GREEN = "\033[32m"
RED = "\033[31m"
def progress_bar(current, total, width=20):
if total == 0:
return f"{'' * width}"
filled = int(width * min(current, total) / total)
return f"{'' * filled}{'' * (width - filled)}"
def cli_output(status):
lines = []
lines.append(f"{BOLD}═══ AnythingLLM Persona Monitor ═══{RESET} {DIM}{status['timestamp']}{RESET}")
lines.append("")
# System status
api = f"{GREEN}{RESET}" if status["api_online"] else f"{RED}{RESET}"
script = f"{GREEN}● running{RESET}" if status["script_running"] else f"{DIM}○ idle{RESET}"
lines.append(f" API: {api} Script: {script} "
f"LanceDB: {status['lancedb_size_mb']:.0f}MB "
f"Docs: {status['docs_size_mb']:.0f}MB")
lines.append(f" Uploaded: {status['total_uploaded']} "
f"Assigned: {status['total_assigned']} "
f"Vectors: {status['personas_with_vectors']}/{status['total_personas']}")
lines.append("")
# Per-cluster persona table
persona_map = {p["codename"]: p for p in status["personas"]}
for cluster_name, members in status["clusters"].items():
color = CLUSTER_COLORS.get(cluster_name, "")
lines.append(f" {color}{BOLD}{cluster_name.upper()}{RESET}")
for codename in members:
p = persona_map.get(codename)
if not p:
continue
vec_icon = f"{GREEN}{RESET}" if p["has_vectors"] else f"{DIM}{RESET}"
bar = progress_bar(p["assigned"], p["expected"])
pct = (p["assigned"] / p["expected"] * 100) if p["expected"] > 0 else 0
size_str = f"{p['vector_size_mb']:.0f}MB" if p["vector_size_mb"] > 0 else ""
lines.append(f" {vec_icon} {codename:<12} {bar} {p['assigned']:>5}/{p['expected']:<5} "
f"{pct:>5.0f}% {size_str}")
lines.append("")
return "\n".join(lines)
def cli_mode(watch=False):
while True:
status = collect_status()
if watch:
os.system("clear")
print(cli_output(status))
if not watch:
break
time.sleep(2)
# ──────────────────────────────────────────────────
# WEB DASHBOARD
# ──────────────────────────────────────────────────
HTML_TEMPLATE = """<!DOCTYPE html>
<html lang="tr">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>AnythingLLM Monitor</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { background: #0a0a0f; color: #e0e0e0; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 14px; padding: 24px; }
h1 { color: #7aa2f7; font-size: 18px; margin-bottom: 4px; }
.subtitle { color: #565f89; font-size: 12px; margin-bottom: 20px; }
.status-bar { display: flex; gap: 24px; margin-bottom: 20px; padding: 12px 16px; background: #13131a; border-radius: 8px; border: 1px solid #1a1b26; }
.status-item { display: flex; align-items: center; gap: 6px; }
.dot { width: 8px; height: 8px; border-radius: 50%; }
.dot.green { background: #9ece6a; box-shadow: 0 0 6px #9ece6a44; }
.dot.red { background: #f7768e; box-shadow: 0 0 6px #f7768e44; }
.dot.dim { background: #565f89; }
.stat { color: #7aa2f7; font-weight: bold; }
.cluster { margin-bottom: 16px; }
.cluster-name { font-size: 13px; font-weight: bold; padding: 6px 12px; border-radius: 4px 4px 0 0; display: inline-block; margin-bottom: 2px; }
.cluster-intel .cluster-name { background: #1a1b3a; color: #7aa2f7; }
.cluster-cyber .cluster-name { background: #2a1a1a; color: #f7768e; }
.cluster-military .cluster-name { background: #2a2a1a; color: #e0af68; }
.cluster-humanities .cluster-name { background: #2a1a2a; color: #bb9af7; }
.cluster-engineering .cluster-name { background: #1a2a2a; color: #73daca; }
.persona-table { width: 100%; border-collapse: collapse; }
.persona-table td { padding: 5px 10px; border-bottom: 1px solid #1a1b26; }
.persona-table tr:hover { background: #13131a; }
.persona-name { color: #c0caf5; min-width: 120px; }
.progress-wrap { width: 200px; }
.progress-bg { background: #1a1b26; border-radius: 3px; height: 14px; overflow: hidden; position: relative; }
.progress-fill { height: 100%; border-radius: 3px; transition: width 0.5s ease; }
.fill-intel { background: linear-gradient(90deg, #3d59a1, #7aa2f7); }
.fill-cyber { background: linear-gradient(90deg, #a1304d, #f7768e); }
.fill-military { background: linear-gradient(90deg, #8a6d2e, #e0af68); }
.fill-humanities { background: linear-gradient(90deg, #6e3da1, #bb9af7); }
.fill-engineering { background: linear-gradient(90deg, #2e8a6d, #73daca); }
.progress-pct { position: absolute; right: 6px; top: 0; font-size: 10px; line-height: 14px; color: #fff; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
.counts { color: #565f89; font-size: 12px; min-width: 100px; text-align: right; }
.vec-icon { font-size: 14px; min-width: 20px; text-align: center; }
.vec-ok { color: #9ece6a; }
.vec-no { color: #565f89; }
.size { color: #565f89; font-size: 11px; min-width: 60px; text-align: right; }
.summary { display: flex; gap: 16px; margin-bottom: 20px; flex-wrap: wrap; }
.summary-card { background: #13131a; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; min-width: 120px; }
.summary-card .label { color: #565f89; font-size: 11px; text-transform: uppercase; }
.summary-card .value { color: #7aa2f7; font-size: 20px; font-weight: bold; margin-top: 2px; }
.summary-card .unit { color: #565f89; font-size: 12px; }
</style>
</head>
<body>
<h1>AnythingLLM Persona Monitor</h1>
<div class="subtitle" id="timestamp">Loading...</div>
<div class="summary" id="summary"></div>
<div class="status-bar" id="statusbar"></div>
<div id="clusters"></div>
<script>
const CLUSTER_ORDER = ['intel', 'cyber', 'military', 'humanities', 'engineering'];
function render(data) {
document.getElementById('timestamp').textContent = 'Updated: ' + data.timestamp + ' — auto-refresh 2s';
// Summary cards
const pctVec = Math.round(data.personas_with_vectors / data.total_personas * 100);
document.getElementById('summary').innerHTML = `
<div class="summary-card"><div class="label">Uploaded</div><div class="value">${data.total_uploaded}</div><div class="unit">files</div></div>
<div class="summary-card"><div class="label">Assigned</div><div class="value">${data.total_assigned}</div><div class="unit">docs</div></div>
<div class="summary-card"><div class="label">Vectors</div><div class="value">${data.personas_with_vectors}<span class="unit">/${data.total_personas}</span></div><div class="unit">${pctVec}%</div></div>
<div class="summary-card"><div class="label">LanceDB</div><div class="value">${Math.round(data.lancedb_size_mb)}<span class="unit">MB</span></div></div>
<div class="summary-card"><div class="label">Documents</div><div class="value">${Math.round(data.docs_size_mb)}<span class="unit">MB</span></div></div>
`;
// Status bar
const apiDot = data.api_online ? 'green' : 'red';
const scriptDot = data.script_running ? 'green' : 'dim';
const scriptText = data.script_running ? 'running' : 'idle';
document.getElementById('statusbar').innerHTML = `
<div class="status-item"><span class="dot ${apiDot}"></span>API</div>
<div class="status-item"><span class="dot ${scriptDot}"></span>Script ${scriptText}</div>
`;
// Clusters
const personaMap = {};
data.personas.forEach(p => personaMap[p.codename] = p);
let html = '';
CLUSTER_ORDER.forEach(cl => {
const members = data.clusters[cl] || [];
html += `<div class="cluster cluster-${cl}"><span class="cluster-name">${cl.toUpperCase()}</span><table class="persona-table">`;
members.forEach(code => {
const p = personaMap[code];
if (!p) return;
const pct = p.expected > 0 ? Math.round(p.assigned / p.expected * 100) : 0;
const vecClass = p.has_vectors ? 'vec-ok' : 'vec-no';
const vecIcon = p.has_vectors ? '' : '';
const sizeStr = p.vector_size_mb > 0 ? Math.round(p.vector_size_mb) + 'MB' : '';
html += `<tr>
<td class="vec-icon ${vecClass}">${vecIcon}</td>
<td class="persona-name">${code}</td>
<td class="progress-wrap"><div class="progress-bg"><div class="progress-fill fill-${cl}" style="width:${Math.min(pct,100)}%"></div><span class="progress-pct">${pct}%</span></div></td>
<td class="counts">${p.assigned} / ${p.expected}</td>
<td class="size">${sizeStr}</td>
</tr>`;
});
html += '</table></div>';
});
document.getElementById('clusters').innerHTML = html;
}
async function poll() {
try {
const resp = await fetch('/api/status');
const data = await resp.json();
render(data);
} catch(e) {
document.getElementById('timestamp').textContent = 'Connection lost — retrying...';
}
setTimeout(poll, 2000);
}
poll();
</script>
</body>
</html>"""
class MonitorHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == "/api/status":
status = collect_status()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(json.dumps(status).encode())
elif self.path in ("/", "/index.html"):
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.end_headers()
self.wfile.write(HTML_TEMPLATE.encode())
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
pass # quiet
def web_mode(port=8899):
server = HTTPServer(("0.0.0.0", port), MonitorHandler)
print(f" AnythingLLM Monitor → http://localhost:{port}")
print(f" API endpoint → http://localhost:{port}/api/status")
print(f" Press Ctrl+C to stop\n")
try:
server.serve_forever()
except KeyboardInterrupt:
print("\n Stopped.")
server.server_close()
if __name__ == "__main__":
args = sys.argv[1:]
if "--web" in args:
idx = args.index("--web")
port = int(args[idx + 1]) if idx + 1 < len(args) and args[idx + 1].isdigit() else 8899
web_mode(port)
elif "--watch" in args:
cli_mode(watch=True)
else:
cli_mode(watch=False)