Files
anything-llm-rag/monitor.py
salvacybersec 9105c03b4b Add monitor.py: CLI + web dashboard for embedding progress
Three modes:
  python3 monitor.py          # one-shot CLI
  python3 monitor.py --watch  # auto-refresh 2s
  python3 monitor.py --web    # web dashboard on :8899

Shows per-persona progress bars, vector sizes, API/script status,
cluster grouping with color coding. Web mode auto-polls /api/status.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 00:24:07 +03:00

424 lines
16 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
AnythingLLM Persona RAG Monitor
Usage:
python3 monitor.py # CLI one-shot
python3 monitor.py --watch # CLI auto-refresh (2s)
python3 monitor.py --web # Web dashboard on :8899
python3 monitor.py --web 9000 # Custom port
"""
import json
import os
import sys
import time
from http.server import HTTPServer, BaseHTTPRequestHandler
from pathlib import Path
import yaml
try:
import requests
except ImportError:
requests = None
CONFIG_PATH = Path(__file__).parent / "config.yaml"
PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
LANCEDB_PATH = Path.home() / ".config/anythingllm-desktop/storage/lancedb"
DOCS_PATH = Path.home() / ".config/anythingllm-desktop/storage/documents"
VCACHE_PATH = Path.home() / ".config/anythingllm-desktop/storage/vector-cache"
def load_config():
with open(CONFIG_PATH) as f:
return yaml.safe_load(f)
def load_progress():
if PROGRESS_PATH.exists():
with open(PROGRESS_PATH) as f:
return json.load(f)
return {}
def dir_size_mb(path):
if not path.exists():
return 0
try:
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / (1024 * 1024)
except Exception:
return 0
def get_lance_workspaces():
if not LANCEDB_PATH.exists():
return set()
return {d.name.replace(".lance", "") for d in LANCEDB_PATH.iterdir()
if d.is_dir() and d.name.endswith(".lance")}
def get_lance_sizes():
sizes = {}
if not LANCEDB_PATH.exists():
return sizes
for d in LANCEDB_PATH.iterdir():
if d.is_dir() and d.name.endswith(".lance"):
slug = d.name.replace(".lance", "")
sizes[slug] = sum(f.stat().st_size for f in d.rglob("*") if f.is_file()) / (1024 * 1024)
return sizes
def check_api(config):
if not requests:
return None
try:
url = f"{config['anythingllm']['base_url']}/auth"
headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
resp = requests.get(url, headers=headers, timeout=3)
return resp.status_code == 200
except Exception:
return False
def check_script_running():
try:
import subprocess
result = subprocess.run(["pgrep", "-f", "setup.py"], capture_output=True, text=True)
return result.returncode == 0
except Exception:
return None
def collect_status():
config = load_config()
progress = load_progress()
workspaces = config.get("workspaces", {})
ws_docs = progress.get("workspace_docs", {})
uploaded = progress.get("uploaded_files", {})
lance_ws = get_lance_workspaces()
lance_sizes = get_lance_sizes()
# Build expected doc counts per persona
folder_counts = {}
for fpath, info in uploaded.items():
f = info.get("folder", "")
if f:
folder_counts[f] = folder_counts.get(f, 0) + 1
personas = []
for codename, ws_cfg in workspaces.items():
slug = ws_cfg["name"].lower()
# Normalize slug like AnythingLLM does
import re
slug = re.sub(r'[^a-z0-9\s-]', '', slug.replace('ş', 's').replace('ç', 'c')
.replace('ğ', 'g').replace('ü', 'u').replace('ö', 'o')
.replace('ı', 'i').replace('İ', 'i').replace('&', 'and'))
slug = re.sub(r'\s+', '-', slug.strip())
# Expected docs from mapped folders
expected = 0
for entry in ws_cfg.get("folders", []):
fn = entry["path"].replace("/", "_")
expected += folder_counts.get(fn, 0)
assigned = len(ws_docs.get(codename, []))
has_vectors = any(slug in lw for lw in lance_ws)
vector_size = 0
for lw, sz in lance_sizes.items():
if slug in lw:
vector_size = sz
break
personas.append({
"codename": codename,
"name": ws_cfg["name"],
"expected": expected,
"assigned": assigned,
"has_vectors": has_vectors,
"vector_size_mb": vector_size,
})
# Cluster grouping
clusters = {
"intel": ["frodo", "echo", "ghost", "oracle", "wraith", "scribe", "polyglot"],
"cyber": ["neo", "bastion", "sentinel", "specter", "phantom", "cipher", "vortex"],
"military": ["marshal", "centurion", "corsair", "warden", "medic"],
"humanities": ["chronos", "tribune", "arbiter", "ledger", "sage", "herald", "scholar", "gambit"],
"engineering": ["forge", "architect"],
}
api_ok = check_api(config)
script_running = check_script_running()
return {
"personas": personas,
"clusters": clusters,
"total_uploaded": len(uploaded),
"total_assigned": sum(len(v) for v in ws_docs.values()),
"total_personas": len(workspaces),
"personas_with_vectors": sum(1 for p in personas if p["has_vectors"]),
"lancedb_size_mb": dir_size_mb(LANCEDB_PATH),
"docs_size_mb": dir_size_mb(DOCS_PATH),
"vcache_size_mb": dir_size_mb(VCACHE_PATH),
"api_online": api_ok,
"script_running": script_running,
"timestamp": time.strftime("%H:%M:%S"),
}
# ──────────────────────────────────────────────────
# CLI OUTPUT
# ──────────────────────────────────────────────────
CLUSTER_COLORS = {
"intel": "\033[34m", # blue
"cyber": "\033[31m", # red
"military": "\033[33m", # yellow
"humanities": "\033[35m", # magenta
"engineering": "\033[36m",# cyan
}
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
GREEN = "\033[32m"
RED = "\033[31m"
def progress_bar(current, total, width=20):
if total == 0:
return f"{'' * width}"
filled = int(width * min(current, total) / total)
return f"{'' * filled}{'' * (width - filled)}"
def cli_output(status):
lines = []
lines.append(f"{BOLD}═══ AnythingLLM Persona Monitor ═══{RESET} {DIM}{status['timestamp']}{RESET}")
lines.append("")
# System status
api = f"{GREEN}{RESET}" if status["api_online"] else f"{RED}{RESET}"
script = f"{GREEN}● running{RESET}" if status["script_running"] else f"{DIM}○ idle{RESET}"
lines.append(f" API: {api} Script: {script} "
f"LanceDB: {status['lancedb_size_mb']:.0f}MB "
f"Docs: {status['docs_size_mb']:.0f}MB")
lines.append(f" Uploaded: {status['total_uploaded']} "
f"Assigned: {status['total_assigned']} "
f"Vectors: {status['personas_with_vectors']}/{status['total_personas']}")
lines.append("")
# Per-cluster persona table
persona_map = {p["codename"]: p for p in status["personas"]}
for cluster_name, members in status["clusters"].items():
color = CLUSTER_COLORS.get(cluster_name, "")
lines.append(f" {color}{BOLD}{cluster_name.upper()}{RESET}")
for codename in members:
p = persona_map.get(codename)
if not p:
continue
vec_icon = f"{GREEN}{RESET}" if p["has_vectors"] else f"{DIM}{RESET}"
bar = progress_bar(p["assigned"], p["expected"])
pct = (p["assigned"] / p["expected"] * 100) if p["expected"] > 0 else 0
size_str = f"{p['vector_size_mb']:.0f}MB" if p["vector_size_mb"] > 0 else ""
lines.append(f" {vec_icon} {codename:<12} {bar} {p['assigned']:>5}/{p['expected']:<5} "
f"{pct:>5.0f}% {size_str}")
lines.append("")
return "\n".join(lines)
def cli_mode(watch=False):
while True:
status = collect_status()
if watch:
os.system("clear")
print(cli_output(status))
if not watch:
break
time.sleep(2)
# ──────────────────────────────────────────────────
# WEB DASHBOARD
# ──────────────────────────────────────────────────
HTML_TEMPLATE = """<!DOCTYPE html>
<html lang="tr">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>AnythingLLM Monitor</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { background: #0a0a0f; color: #e0e0e0; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 14px; padding: 24px; }
h1 { color: #7aa2f7; font-size: 18px; margin-bottom: 4px; }
.subtitle { color: #565f89; font-size: 12px; margin-bottom: 20px; }
.status-bar { display: flex; gap: 24px; margin-bottom: 20px; padding: 12px 16px; background: #13131a; border-radius: 8px; border: 1px solid #1a1b26; }
.status-item { display: flex; align-items: center; gap: 6px; }
.dot { width: 8px; height: 8px; border-radius: 50%; }
.dot.green { background: #9ece6a; box-shadow: 0 0 6px #9ece6a44; }
.dot.red { background: #f7768e; box-shadow: 0 0 6px #f7768e44; }
.dot.dim { background: #565f89; }
.stat { color: #7aa2f7; font-weight: bold; }
.cluster { margin-bottom: 16px; }
.cluster-name { font-size: 13px; font-weight: bold; padding: 6px 12px; border-radius: 4px 4px 0 0; display: inline-block; margin-bottom: 2px; }
.cluster-intel .cluster-name { background: #1a1b3a; color: #7aa2f7; }
.cluster-cyber .cluster-name { background: #2a1a1a; color: #f7768e; }
.cluster-military .cluster-name { background: #2a2a1a; color: #e0af68; }
.cluster-humanities .cluster-name { background: #2a1a2a; color: #bb9af7; }
.cluster-engineering .cluster-name { background: #1a2a2a; color: #73daca; }
.persona-table { width: 100%; border-collapse: collapse; }
.persona-table td { padding: 5px 10px; border-bottom: 1px solid #1a1b26; }
.persona-table tr:hover { background: #13131a; }
.persona-name { color: #c0caf5; min-width: 120px; }
.progress-wrap { width: 200px; }
.progress-bg { background: #1a1b26; border-radius: 3px; height: 14px; overflow: hidden; position: relative; }
.progress-fill { height: 100%; border-radius: 3px; transition: width 0.5s ease; }
.fill-intel { background: linear-gradient(90deg, #3d59a1, #7aa2f7); }
.fill-cyber { background: linear-gradient(90deg, #a1304d, #f7768e); }
.fill-military { background: linear-gradient(90deg, #8a6d2e, #e0af68); }
.fill-humanities { background: linear-gradient(90deg, #6e3da1, #bb9af7); }
.fill-engineering { background: linear-gradient(90deg, #2e8a6d, #73daca); }
.progress-pct { position: absolute; right: 6px; top: 0; font-size: 10px; line-height: 14px; color: #fff; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
.counts { color: #565f89; font-size: 12px; min-width: 100px; text-align: right; }
.vec-icon { font-size: 14px; min-width: 20px; text-align: center; }
.vec-ok { color: #9ece6a; }
.vec-no { color: #565f89; }
.size { color: #565f89; font-size: 11px; min-width: 60px; text-align: right; }
.summary { display: flex; gap: 16px; margin-bottom: 20px; flex-wrap: wrap; }
.summary-card { background: #13131a; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; min-width: 120px; }
.summary-card .label { color: #565f89; font-size: 11px; text-transform: uppercase; }
.summary-card .value { color: #7aa2f7; font-size: 20px; font-weight: bold; margin-top: 2px; }
.summary-card .unit { color: #565f89; font-size: 12px; }
</style>
</head>
<body>
<h1>AnythingLLM Persona Monitor</h1>
<div class="subtitle" id="timestamp">Loading...</div>
<div class="summary" id="summary"></div>
<div class="status-bar" id="statusbar"></div>
<div id="clusters"></div>
<script>
const CLUSTER_ORDER = ['intel', 'cyber', 'military', 'humanities', 'engineering'];
function render(data) {
document.getElementById('timestamp').textContent = 'Updated: ' + data.timestamp + ' — auto-refresh 2s';
// Summary cards
const pctVec = Math.round(data.personas_with_vectors / data.total_personas * 100);
document.getElementById('summary').innerHTML = `
<div class="summary-card"><div class="label">Uploaded</div><div class="value">${data.total_uploaded}</div><div class="unit">files</div></div>
<div class="summary-card"><div class="label">Assigned</div><div class="value">${data.total_assigned}</div><div class="unit">docs</div></div>
<div class="summary-card"><div class="label">Vectors</div><div class="value">${data.personas_with_vectors}<span class="unit">/${data.total_personas}</span></div><div class="unit">${pctVec}%</div></div>
<div class="summary-card"><div class="label">LanceDB</div><div class="value">${Math.round(data.lancedb_size_mb)}<span class="unit">MB</span></div></div>
<div class="summary-card"><div class="label">Documents</div><div class="value">${Math.round(data.docs_size_mb)}<span class="unit">MB</span></div></div>
`;
// Status bar
const apiDot = data.api_online ? 'green' : 'red';
const scriptDot = data.script_running ? 'green' : 'dim';
const scriptText = data.script_running ? 'running' : 'idle';
document.getElementById('statusbar').innerHTML = `
<div class="status-item"><span class="dot ${apiDot}"></span>API</div>
<div class="status-item"><span class="dot ${scriptDot}"></span>Script ${scriptText}</div>
`;
// Clusters
const personaMap = {};
data.personas.forEach(p => personaMap[p.codename] = p);
let html = '';
CLUSTER_ORDER.forEach(cl => {
const members = data.clusters[cl] || [];
html += `<div class="cluster cluster-${cl}"><span class="cluster-name">${cl.toUpperCase()}</span><table class="persona-table">`;
members.forEach(code => {
const p = personaMap[code];
if (!p) return;
const pct = p.expected > 0 ? Math.round(p.assigned / p.expected * 100) : 0;
const vecClass = p.has_vectors ? 'vec-ok' : 'vec-no';
const vecIcon = p.has_vectors ? '' : '';
const sizeStr = p.vector_size_mb > 0 ? Math.round(p.vector_size_mb) + 'MB' : '';
html += `<tr>
<td class="vec-icon ${vecClass}">${vecIcon}</td>
<td class="persona-name">${code}</td>
<td class="progress-wrap"><div class="progress-bg"><div class="progress-fill fill-${cl}" style="width:${Math.min(pct,100)}%"></div><span class="progress-pct">${pct}%</span></div></td>
<td class="counts">${p.assigned} / ${p.expected}</td>
<td class="size">${sizeStr}</td>
</tr>`;
});
html += '</table></div>';
});
document.getElementById('clusters').innerHTML = html;
}
async function poll() {
try {
const resp = await fetch('/api/status');
const data = await resp.json();
render(data);
} catch(e) {
document.getElementById('timestamp').textContent = 'Connection lost — retrying...';
}
setTimeout(poll, 2000);
}
poll();
</script>
</body>
</html>"""
class MonitorHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == "/api/status":
status = collect_status()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(json.dumps(status).encode())
elif self.path in ("/", "/index.html"):
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.end_headers()
self.wfile.write(HTML_TEMPLATE.encode())
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
pass # quiet
def web_mode(port=8899):
server = HTTPServer(("0.0.0.0", port), MonitorHandler)
print(f" AnythingLLM Monitor → http://localhost:{port}")
print(f" API endpoint → http://localhost:{port}/api/status")
print(f" Press Ctrl+C to stop\n")
try:
server.serve_forever()
except KeyboardInterrupt:
print("\n Stopped.")
server.server_close()
if __name__ == "__main__":
args = sys.argv[1:]
if "--web" in args:
idx = args.index("--web")
port = int(args[idx + 1]) if idx + 1 < len(args) and args[idx + 1].isdigit() else 8899
web_mode(port)
elif "--watch" in args:
cli_mode(watch=True)
else:
cli_mode(watch=False)