- Fetch real workspace slugs from AnythingLLM API instead of guessing - Show KB instead of 0MB for small LanceDB/vector sizes - Fixes incorrect vector detection after embedding engine change Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
493 lines
19 KiB
Python
Executable File
493 lines
19 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
AnythingLLM Persona RAG Monitor
|
|
|
|
Usage:
|
|
python3 monitor.py # CLI one-shot
|
|
python3 monitor.py --watch # CLI auto-refresh (2s)
|
|
python3 monitor.py --web # Web dashboard on :8899
|
|
python3 monitor.py --web 9000 # Custom port
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
try:
|
|
import requests
|
|
except ImportError:
|
|
requests = None
|
|
|
|
CONFIG_PATH = Path(__file__).parent / "config.yaml"
|
|
PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
|
|
LOG_PATH = Path(__file__).parent / "setup.log"
|
|
LANCEDB_PATH = Path.home() / ".config/anythingllm-desktop/storage/lancedb"
|
|
DOCS_PATH = Path.home() / ".config/anythingllm-desktop/storage/documents"
|
|
VCACHE_PATH = Path.home() / ".config/anythingllm-desktop/storage/vector-cache"
|
|
|
|
|
|
def load_config():
|
|
with open(CONFIG_PATH) as f:
|
|
return yaml.safe_load(f)
|
|
|
|
|
|
def load_progress():
|
|
if PROGRESS_PATH.exists():
|
|
with open(PROGRESS_PATH) as f:
|
|
return json.load(f)
|
|
return {}
|
|
|
|
|
|
def dir_size_mb(path):
|
|
if not path.exists():
|
|
return 0
|
|
try:
|
|
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / (1024 * 1024)
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def get_lance_workspaces():
|
|
if not LANCEDB_PATH.exists():
|
|
return set()
|
|
return {d.name.replace(".lance", "") for d in LANCEDB_PATH.iterdir()
|
|
if d.is_dir() and d.name.endswith(".lance")}
|
|
|
|
|
|
def get_lance_sizes():
|
|
sizes = {}
|
|
if not LANCEDB_PATH.exists():
|
|
return sizes
|
|
for d in LANCEDB_PATH.iterdir():
|
|
if d.is_dir() and d.name.endswith(".lance"):
|
|
slug = d.name.replace(".lance", "")
|
|
sizes[slug] = sum(f.stat().st_size for f in d.rglob("*") if f.is_file()) / (1024 * 1024)
|
|
return sizes
|
|
|
|
|
|
def check_api(config):
|
|
if not requests:
|
|
return None
|
|
try:
|
|
url = f"{config['anythingllm']['base_url']}/auth"
|
|
headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
|
|
resp = requests.get(url, headers=headers, timeout=3)
|
|
return resp.status_code == 200
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def check_script_running():
|
|
try:
|
|
import subprocess
|
|
result = subprocess.run(
|
|
["pgrep", "-af", "setup.py"],
|
|
capture_output=True, text=True
|
|
)
|
|
# Filter out monitor.py and grep itself
|
|
for line in result.stdout.strip().split("\n"):
|
|
if line and "monitor" not in line and "pgrep" not in line:
|
|
return True
|
|
return False
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def collect_status():
|
|
config = load_config()
|
|
progress = load_progress()
|
|
|
|
workspaces = config.get("workspaces", {})
|
|
ws_docs = progress.get("workspace_docs", {})
|
|
uploaded = progress.get("uploaded_files", {})
|
|
lance_ws = get_lance_workspaces()
|
|
lance_sizes = get_lance_sizes()
|
|
|
|
# Get real slugs from API
|
|
api_slugs = {} # workspace name → slug
|
|
if requests:
|
|
try:
|
|
url = f"{config['anythingllm']['base_url']}/workspaces"
|
|
headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
|
|
resp = requests.get(url, headers=headers, timeout=5)
|
|
if resp.status_code == 200:
|
|
for ws in resp.json().get("workspaces", []):
|
|
api_slugs[ws["name"]] = ws["slug"]
|
|
except Exception:
|
|
pass
|
|
|
|
# Build expected doc counts per persona
|
|
folder_counts = {}
|
|
for fpath, info in uploaded.items():
|
|
f = info.get("folder", "")
|
|
if f:
|
|
folder_counts[f] = folder_counts.get(f, 0) + 1
|
|
|
|
personas = []
|
|
for codename, ws_cfg in workspaces.items():
|
|
# Use real slug from API, fallback to name-based guess
|
|
slug = api_slugs.get(ws_cfg["name"], codename)
|
|
|
|
# Expected docs from mapped folders
|
|
expected = 0
|
|
for entry in ws_cfg.get("folders", []):
|
|
fn = entry["path"].replace("/", "_")
|
|
expected += folder_counts.get(fn, 0)
|
|
|
|
assigned = len(ws_docs.get(codename, []))
|
|
has_vectors = any(slug in lw for lw in lance_ws)
|
|
vector_size = 0
|
|
for lw, sz in lance_sizes.items():
|
|
if slug in lw:
|
|
vector_size = sz
|
|
break
|
|
|
|
personas.append({
|
|
"codename": codename,
|
|
"name": ws_cfg["name"],
|
|
"slug": slug,
|
|
"expected": expected,
|
|
"assigned": assigned,
|
|
"has_vectors": has_vectors,
|
|
"vector_size_mb": vector_size,
|
|
})
|
|
|
|
# Cluster grouping
|
|
clusters = {
|
|
"intel": ["frodo", "echo", "ghost", "oracle", "wraith", "scribe", "polyglot"],
|
|
"cyber": ["neo", "bastion", "sentinel", "specter", "phantom", "cipher", "vortex"],
|
|
"military": ["marshal", "centurion", "corsair", "warden", "medic"],
|
|
"humanities": ["chronos", "tribune", "arbiter", "ledger", "sage", "herald", "scholar", "gambit"],
|
|
"engineering": ["forge", "architect"],
|
|
}
|
|
|
|
api_ok = check_api(config)
|
|
script_running = check_script_running()
|
|
|
|
# Read last N lines from setup.log
|
|
log_lines = []
|
|
if LOG_PATH.exists():
|
|
try:
|
|
with open(LOG_PATH, "r", encoding="utf-8") as f:
|
|
all_lines = f.readlines()
|
|
log_lines = [l.rstrip() for l in all_lines[-15:]]
|
|
except Exception:
|
|
pass
|
|
|
|
return {
|
|
"personas": personas,
|
|
"clusters": clusters,
|
|
"total_uploaded": len(uploaded),
|
|
"total_assigned": sum(len(v) for v in ws_docs.values()),
|
|
"total_personas": len(workspaces),
|
|
"personas_with_vectors": sum(1 for p in personas if p["has_vectors"]),
|
|
"lancedb_size_mb": dir_size_mb(LANCEDB_PATH),
|
|
"docs_size_mb": dir_size_mb(DOCS_PATH),
|
|
"vcache_size_mb": dir_size_mb(VCACHE_PATH),
|
|
"api_online": api_ok,
|
|
"script_running": script_running,
|
|
"timestamp": time.strftime("%H:%M:%S"),
|
|
"log_tail": log_lines,
|
|
}
|
|
|
|
|
|
# ──────────────────────────────────────────────────
|
|
# CLI OUTPUT
|
|
# ──────────────────────────────────────────────────
|
|
|
|
CLUSTER_COLORS = {
|
|
"intel": "\033[34m", # blue
|
|
"cyber": "\033[31m", # red
|
|
"military": "\033[33m", # yellow
|
|
"humanities": "\033[35m", # magenta
|
|
"engineering": "\033[36m",# cyan
|
|
}
|
|
RESET = "\033[0m"
|
|
BOLD = "\033[1m"
|
|
DIM = "\033[2m"
|
|
GREEN = "\033[32m"
|
|
RED = "\033[31m"
|
|
|
|
|
|
def progress_bar(current, total, width=20):
|
|
if total == 0:
|
|
return f"{'░' * width}"
|
|
filled = int(width * min(current, total) / total)
|
|
return f"{'█' * filled}{'░' * (width - filled)}"
|
|
|
|
|
|
def cli_output(status):
|
|
lines = []
|
|
lines.append(f"{BOLD}═══ AnythingLLM Persona Monitor ═══{RESET} {DIM}{status['timestamp']}{RESET}")
|
|
lines.append("")
|
|
|
|
# System status
|
|
api = f"{GREEN}●{RESET}" if status["api_online"] else f"{RED}●{RESET}"
|
|
script = f"{GREEN}● running{RESET}" if status["script_running"] else f"{DIM}○ idle{RESET}"
|
|
lines.append(f" API: {api} Script: {script} "
|
|
f"LanceDB: {status['lancedb_size_mb']:.0f}MB "
|
|
f"Docs: {status['docs_size_mb']:.0f}MB")
|
|
lines.append(f" Uploaded: {status['total_uploaded']} "
|
|
f"Assigned: {status['total_assigned']} "
|
|
f"Vectors: {status['personas_with_vectors']}/{status['total_personas']}")
|
|
lines.append("")
|
|
|
|
# Per-cluster persona table
|
|
persona_map = {p["codename"]: p for p in status["personas"]}
|
|
|
|
for cluster_name, members in status["clusters"].items():
|
|
color = CLUSTER_COLORS.get(cluster_name, "")
|
|
lines.append(f" {color}{BOLD}{cluster_name.upper()}{RESET}")
|
|
|
|
for codename in members:
|
|
p = persona_map.get(codename)
|
|
if not p:
|
|
continue
|
|
|
|
vec_icon = f"{GREEN}✓{RESET}" if p["has_vectors"] else f"{DIM}○{RESET}"
|
|
bar = progress_bar(p["assigned"], p["expected"])
|
|
pct = (p["assigned"] / p["expected"] * 100) if p["expected"] > 0 else 0
|
|
size_str = f"{p['vector_size_mb']:.0f}MB" if p["vector_size_mb"] > 0 else ""
|
|
|
|
lines.append(f" {vec_icon} {codename:<12} {bar} {p['assigned']:>5}/{p['expected']:<5} "
|
|
f"{pct:>5.0f}% {size_str}")
|
|
|
|
lines.append("")
|
|
|
|
# Log tail
|
|
log_tail = status.get("log_tail", [])
|
|
if log_tail:
|
|
lines.append(f" {BOLD}── Log (setup.log) ──{RESET}")
|
|
for ll in log_tail:
|
|
# Colorize log levels
|
|
if "[ERROR]" in ll:
|
|
lines.append(f" {RED}{ll}{RESET}")
|
|
elif "[WARNING]" in ll:
|
|
lines.append(f" \033[33m{ll}{RESET}")
|
|
elif "✓" in ll:
|
|
lines.append(f" {GREEN}{ll}{RESET}")
|
|
else:
|
|
lines.append(f" {DIM}{ll}{RESET}")
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def cli_mode(watch=False):
|
|
while True:
|
|
status = collect_status()
|
|
if watch:
|
|
os.system("clear")
|
|
print(cli_output(status))
|
|
if not watch:
|
|
break
|
|
time.sleep(2)
|
|
|
|
|
|
# ──────────────────────────────────────────────────
|
|
# WEB DASHBOARD
|
|
# ──────────────────────────────────────────────────
|
|
|
|
HTML_TEMPLATE = """<!DOCTYPE html>
|
|
<html lang="tr">
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<title>AnythingLLM Monitor</title>
|
|
<style>
|
|
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
body { background: #0a0a0f; color: #e0e0e0; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 14px; padding: 24px; }
|
|
h1 { color: #7aa2f7; font-size: 18px; margin-bottom: 4px; }
|
|
.subtitle { color: #565f89; font-size: 12px; margin-bottom: 20px; }
|
|
.status-bar { display: flex; gap: 24px; margin-bottom: 20px; padding: 12px 16px; background: #13131a; border-radius: 8px; border: 1px solid #1a1b26; }
|
|
.status-item { display: flex; align-items: center; gap: 6px; }
|
|
.dot { width: 8px; height: 8px; border-radius: 50%; }
|
|
.dot.green { background: #9ece6a; box-shadow: 0 0 6px #9ece6a44; }
|
|
.dot.red { background: #f7768e; box-shadow: 0 0 6px #f7768e44; }
|
|
.dot.dim { background: #565f89; }
|
|
.stat { color: #7aa2f7; font-weight: bold; }
|
|
.cluster { margin-bottom: 16px; }
|
|
.cluster-name { font-size: 13px; font-weight: bold; padding: 6px 12px; border-radius: 4px 4px 0 0; display: inline-block; margin-bottom: 2px; }
|
|
.cluster-intel .cluster-name { background: #1a1b3a; color: #7aa2f7; }
|
|
.cluster-cyber .cluster-name { background: #2a1a1a; color: #f7768e; }
|
|
.cluster-military .cluster-name { background: #2a2a1a; color: #e0af68; }
|
|
.cluster-humanities .cluster-name { background: #2a1a2a; color: #bb9af7; }
|
|
.cluster-engineering .cluster-name { background: #1a2a2a; color: #73daca; }
|
|
.persona-table { width: 100%; border-collapse: collapse; }
|
|
.persona-table td { padding: 5px 10px; border-bottom: 1px solid #1a1b26; }
|
|
.persona-table tr:hover { background: #13131a; }
|
|
.persona-name { color: #c0caf5; min-width: 120px; }
|
|
.progress-wrap { width: 200px; }
|
|
.progress-bg { background: #1a1b26; border-radius: 3px; height: 14px; overflow: hidden; position: relative; }
|
|
.progress-fill { height: 100%; border-radius: 3px; transition: width 0.5s ease; }
|
|
.fill-intel { background: linear-gradient(90deg, #3d59a1, #7aa2f7); }
|
|
.fill-cyber { background: linear-gradient(90deg, #a1304d, #f7768e); }
|
|
.fill-military { background: linear-gradient(90deg, #8a6d2e, #e0af68); }
|
|
.fill-humanities { background: linear-gradient(90deg, #6e3da1, #bb9af7); }
|
|
.fill-engineering { background: linear-gradient(90deg, #2e8a6d, #73daca); }
|
|
.progress-pct { position: absolute; right: 6px; top: 0; font-size: 10px; line-height: 14px; color: #fff; text-shadow: 0 0 4px rgba(0,0,0,0.8); }
|
|
.counts { color: #565f89; font-size: 12px; min-width: 100px; text-align: right; }
|
|
.vec-icon { font-size: 14px; min-width: 20px; text-align: center; }
|
|
.vec-ok { color: #9ece6a; }
|
|
.vec-no { color: #565f89; }
|
|
.size { color: #565f89; font-size: 11px; min-width: 60px; text-align: right; }
|
|
.summary { display: flex; gap: 16px; margin-bottom: 20px; flex-wrap: wrap; }
|
|
.summary-card { background: #13131a; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; min-width: 120px; }
|
|
.summary-card .label { color: #565f89; font-size: 11px; text-transform: uppercase; }
|
|
.summary-card .value { color: #7aa2f7; font-size: 20px; font-weight: bold; margin-top: 2px; }
|
|
.summary-card .unit { color: #565f89; font-size: 12px; }
|
|
.log-panel { background: #0d0d12; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; margin-top: 20px; max-height: 300px; overflow-y: auto; }
|
|
.log-panel h3 { color: #565f89; font-size: 12px; text-transform: uppercase; margin-bottom: 8px; }
|
|
.log-line { font-size: 12px; line-height: 1.6; color: #565f89; white-space: pre-wrap; word-break: break-all; }
|
|
.log-line.error { color: #f7768e; }
|
|
.log-line.warning { color: #e0af68; }
|
|
.log-line.success { color: #9ece6a; }
|
|
.log-line.info { color: #7aa2f7; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>AnythingLLM Persona Monitor</h1>
|
|
<div class="subtitle" id="timestamp">Loading...</div>
|
|
|
|
<div class="summary" id="summary"></div>
|
|
<div class="status-bar" id="statusbar"></div>
|
|
<div id="clusters"></div>
|
|
<div class="log-panel" id="logpanel"><h3>Log (setup.log)</h3><div id="loglines">No log data</div></div>
|
|
|
|
<script>
|
|
const CLUSTER_ORDER = ['intel', 'cyber', 'military', 'humanities', 'engineering'];
|
|
|
|
function render(data) {
|
|
document.getElementById('timestamp').textContent = 'Updated: ' + data.timestamp + ' — auto-refresh 2s';
|
|
|
|
// Summary cards
|
|
const pctVec = Math.round(data.personas_with_vectors / data.total_personas * 100);
|
|
document.getElementById('summary').innerHTML = `
|
|
<div class="summary-card"><div class="label">Uploaded</div><div class="value">${data.total_uploaded}</div><div class="unit">files</div></div>
|
|
<div class="summary-card"><div class="label">Assigned</div><div class="value">${data.total_assigned}</div><div class="unit">docs</div></div>
|
|
<div class="summary-card"><div class="label">Vectors</div><div class="value">${data.personas_with_vectors}<span class="unit">/${data.total_personas}</span></div><div class="unit">${pctVec}%</div></div>
|
|
<div class="summary-card"><div class="label">LanceDB</div><div class="value">${data.lancedb_size_mb < 1 ? Math.round(data.lancedb_size_mb * 1024) + '<span class="unit">KB</span>' : Math.round(data.lancedb_size_mb) + '<span class="unit">MB</span>'}</div></div>
|
|
<div class="summary-card"><div class="label">Documents</div><div class="value">${Math.round(data.docs_size_mb)}<span class="unit">MB</span></div></div>
|
|
`;
|
|
|
|
// Status bar
|
|
const apiDot = data.api_online ? 'green' : 'red';
|
|
const scriptDot = data.script_running ? 'green' : 'dim';
|
|
const scriptText = data.script_running ? 'running' : 'idle';
|
|
document.getElementById('statusbar').innerHTML = `
|
|
<div class="status-item"><span class="dot ${apiDot}"></span>API</div>
|
|
<div class="status-item"><span class="dot ${scriptDot}"></span>Script ${scriptText}</div>
|
|
`;
|
|
|
|
// Clusters
|
|
const personaMap = {};
|
|
data.personas.forEach(p => personaMap[p.codename] = p);
|
|
|
|
let html = '';
|
|
CLUSTER_ORDER.forEach(cl => {
|
|
const members = data.clusters[cl] || [];
|
|
html += `<div class="cluster cluster-${cl}"><span class="cluster-name">${cl.toUpperCase()}</span><table class="persona-table">`;
|
|
members.forEach(code => {
|
|
const p = personaMap[code];
|
|
if (!p) return;
|
|
const pct = p.expected > 0 ? Math.round(p.assigned / p.expected * 100) : 0;
|
|
const vecClass = p.has_vectors ? 'vec-ok' : 'vec-no';
|
|
const vecIcon = p.has_vectors ? '✓' : '○';
|
|
const sizeStr = p.vector_size_mb > 0 ? (p.vector_size_mb < 1 ? Math.round(p.vector_size_mb * 1024) + 'KB' : Math.round(p.vector_size_mb) + 'MB') : '';
|
|
html += `<tr>
|
|
<td class="vec-icon ${vecClass}">${vecIcon}</td>
|
|
<td class="persona-name">${code}</td>
|
|
<td class="progress-wrap"><div class="progress-bg"><div class="progress-fill fill-${cl}" style="width:${Math.min(pct,100)}%"></div><span class="progress-pct">${pct}%</span></div></td>
|
|
<td class="counts">${p.assigned} / ${p.expected}</td>
|
|
<td class="size">${sizeStr}</td>
|
|
</tr>`;
|
|
});
|
|
html += '</table></div>';
|
|
});
|
|
document.getElementById('clusters').innerHTML = html;
|
|
|
|
// Log panel
|
|
const logLines = data.log_tail || [];
|
|
if (logLines.length > 0) {
|
|
let logHtml = '';
|
|
logLines.forEach(line => {
|
|
let cls = '';
|
|
if (line.includes('[ERROR]')) cls = 'error';
|
|
else if (line.includes('[WARNING]')) cls = 'warning';
|
|
else if (line.includes('✓')) cls = 'success';
|
|
else if (line.includes('[INFO]')) cls = 'info';
|
|
logHtml += `<div class="log-line ${cls}">${line.replace(/</g,'<')}</div>`;
|
|
});
|
|
document.getElementById('loglines').innerHTML = logHtml;
|
|
const panel = document.getElementById('logpanel');
|
|
panel.scrollTop = panel.scrollHeight;
|
|
}
|
|
}
|
|
|
|
async function poll() {
|
|
try {
|
|
const resp = await fetch('/api/status');
|
|
const data = await resp.json();
|
|
render(data);
|
|
} catch(e) {
|
|
document.getElementById('timestamp').textContent = 'Connection lost — retrying...';
|
|
}
|
|
setTimeout(poll, 2000);
|
|
}
|
|
poll();
|
|
</script>
|
|
</body>
|
|
</html>"""
|
|
|
|
|
|
class MonitorHandler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
if self.path == "/api/status":
|
|
status = collect_status()
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.send_header("Access-Control-Allow-Origin", "*")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps(status).encode())
|
|
elif self.path in ("/", "/index.html"):
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "text/html; charset=utf-8")
|
|
self.end_headers()
|
|
self.wfile.write(HTML_TEMPLATE.encode())
|
|
else:
|
|
self.send_response(404)
|
|
self.end_headers()
|
|
|
|
def log_message(self, format, *args):
|
|
pass # quiet
|
|
|
|
|
|
def web_mode(port=8899):
|
|
server = HTTPServer(("0.0.0.0", port), MonitorHandler)
|
|
print(f" AnythingLLM Monitor → http://localhost:{port}")
|
|
print(f" API endpoint → http://localhost:{port}/api/status")
|
|
print(f" Press Ctrl+C to stop\n")
|
|
try:
|
|
server.serve_forever()
|
|
except KeyboardInterrupt:
|
|
print("\n Stopped.")
|
|
server.server_close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = sys.argv[1:]
|
|
|
|
if "--web" in args:
|
|
idx = args.index("--web")
|
|
port = int(args[idx + 1]) if idx + 1 < len(args) and args[idx + 1].isdigit() else 8899
|
|
web_mode(port)
|
|
elif "--watch" in args:
|
|
cli_mode(watch=True)
|
|
else:
|
|
cli_mode(watch=False)
|