Add structured logging + log panel to monitor
- setup.py: logging module with file (setup.log) + console output - Line-buffered output (fixes background execution buffering) - API calls with timeout (300s), retry (3x), debug logging - Per-batch progress: [1/29] persona batch 1/20 (20 docs) - --verbose flag for debug-level console - monitor.py: log tail in CLI + web dashboard - CLI: colorized last 15 log lines - Web: scrollable log panel with level-based colors - Smaller embed batches (20 instead of 50) for reliability Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
# State files (machine-specific, regenerated by script)
|
||||
upload_progress.json
|
||||
setup.log
|
||||
|
||||
# OCR output (large binary files)
|
||||
ocr_output/
|
||||
|
||||
53
monitor.py
53
monitor.py
@@ -25,6 +25,7 @@ except ImportError:
|
||||
|
||||
CONFIG_PATH = Path(__file__).parent / "config.yaml"
|
||||
PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
|
||||
LOG_PATH = Path(__file__).parent / "setup.log"
|
||||
LANCEDB_PATH = Path.home() / ".config/anythingllm-desktop/storage/lancedb"
|
||||
DOCS_PATH = Path.home() / ".config/anythingllm-desktop/storage/documents"
|
||||
VCACHE_PATH = Path.home() / ".config/anythingllm-desktop/storage/vector-cache"
|
||||
@@ -152,6 +153,16 @@ def collect_status():
|
||||
api_ok = check_api(config)
|
||||
script_running = check_script_running()
|
||||
|
||||
# Read last N lines from setup.log
|
||||
log_lines = []
|
||||
if LOG_PATH.exists():
|
||||
try:
|
||||
with open(LOG_PATH, "r", encoding="utf-8") as f:
|
||||
all_lines = f.readlines()
|
||||
log_lines = [l.rstrip() for l in all_lines[-15:]]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"personas": personas,
|
||||
"clusters": clusters,
|
||||
@@ -165,6 +176,7 @@ def collect_status():
|
||||
"api_online": api_ok,
|
||||
"script_running": script_running,
|
||||
"timestamp": time.strftime("%H:%M:%S"),
|
||||
"log_tail": log_lines,
|
||||
}
|
||||
|
||||
|
||||
@@ -231,6 +243,22 @@ def cli_output(status):
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Log tail
|
||||
log_tail = status.get("log_tail", [])
|
||||
if log_tail:
|
||||
lines.append(f" {BOLD}── Log (setup.log) ──{RESET}")
|
||||
for ll in log_tail:
|
||||
# Colorize log levels
|
||||
if "[ERROR]" in ll:
|
||||
lines.append(f" {RED}{ll}{RESET}")
|
||||
elif "[WARNING]" in ll:
|
||||
lines.append(f" \033[33m{ll}{RESET}")
|
||||
elif "✓" in ll:
|
||||
lines.append(f" {GREEN}{ll}{RESET}")
|
||||
else:
|
||||
lines.append(f" {DIM}{ll}{RESET}")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@@ -297,6 +325,13 @@ HTML_TEMPLATE = """<!DOCTYPE html>
|
||||
.summary-card .label { color: #565f89; font-size: 11px; text-transform: uppercase; }
|
||||
.summary-card .value { color: #7aa2f7; font-size: 20px; font-weight: bold; margin-top: 2px; }
|
||||
.summary-card .unit { color: #565f89; font-size: 12px; }
|
||||
.log-panel { background: #0d0d12; border: 1px solid #1a1b26; border-radius: 8px; padding: 12px 16px; margin-top: 20px; max-height: 300px; overflow-y: auto; }
|
||||
.log-panel h3 { color: #565f89; font-size: 12px; text-transform: uppercase; margin-bottom: 8px; }
|
||||
.log-line { font-size: 12px; line-height: 1.6; color: #565f89; white-space: pre-wrap; word-break: break-all; }
|
||||
.log-line.error { color: #f7768e; }
|
||||
.log-line.warning { color: #e0af68; }
|
||||
.log-line.success { color: #9ece6a; }
|
||||
.log-line.info { color: #7aa2f7; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -306,6 +341,7 @@ HTML_TEMPLATE = """<!DOCTYPE html>
|
||||
<div class="summary" id="summary"></div>
|
||||
<div class="status-bar" id="statusbar"></div>
|
||||
<div id="clusters"></div>
|
||||
<div class="log-panel" id="logpanel"><h3>Log (setup.log)</h3><div id="loglines">No log data</div></div>
|
||||
|
||||
<script>
|
||||
const CLUSTER_ORDER = ['intel', 'cyber', 'military', 'humanities', 'engineering'];
|
||||
@@ -358,6 +394,23 @@ function render(data) {
|
||||
html += '</table></div>';
|
||||
});
|
||||
document.getElementById('clusters').innerHTML = html;
|
||||
|
||||
// Log panel
|
||||
const logLines = data.log_tail || [];
|
||||
if (logLines.length > 0) {
|
||||
let logHtml = '';
|
||||
logLines.forEach(line => {
|
||||
let cls = '';
|
||||
if (line.includes('[ERROR]')) cls = 'error';
|
||||
else if (line.includes('[WARNING]')) cls = 'warning';
|
||||
else if (line.includes('✓')) cls = 'success';
|
||||
else if (line.includes('[INFO]')) cls = 'info';
|
||||
logHtml += `<div class="log-line ${cls}">${line.replace(/</g,'<')}</div>`;
|
||||
});
|
||||
document.getElementById('loglines').innerHTML = logHtml;
|
||||
const panel = document.getElementById('logpanel');
|
||||
panel.scrollTop = panel.scrollHeight;
|
||||
}
|
||||
}
|
||||
|
||||
async function poll() {
|
||||
|
||||
149
setup.py
149
setup.py
@@ -26,11 +26,13 @@ Usage:
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
@@ -44,9 +46,43 @@ except ImportError:
|
||||
|
||||
CONFIG_PATH = Path(__file__).parent / "config.yaml"
|
||||
PROGRESS_PATH = Path(__file__).parent / "upload_progress.json"
|
||||
LOG_PATH = Path(__file__).parent / "setup.log"
|
||||
ANYTHINGLLM_STORAGE = Path.home() / ".config/anythingllm-desktop/storage"
|
||||
SKIP_EXT = set()
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# LOGGING
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
log = logging.getLogger("anythingllm")
|
||||
|
||||
|
||||
def setup_logging(verbose=False):
|
||||
log.setLevel(logging.DEBUG)
|
||||
fmt = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S")
|
||||
|
||||
# File handler — always debug level
|
||||
fh = logging.FileHandler(LOG_PATH, encoding="utf-8")
|
||||
fh.setLevel(logging.DEBUG)
|
||||
fh.setFormatter(fmt)
|
||||
log.addHandler(fh)
|
||||
|
||||
# Console handler — info or debug based on --verbose
|
||||
ch = logging.StreamHandler(sys.stdout)
|
||||
ch.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||
ch.setFormatter(fmt)
|
||||
log.addHandler(ch)
|
||||
|
||||
# Flush on every write (critical for background/piped execution)
|
||||
for h in log.handlers:
|
||||
if hasattr(h, 'stream'):
|
||||
h.stream = os.fdopen(os.dup(h.stream.fileno()), 'w', buffering=1)
|
||||
|
||||
|
||||
def log_print(msg, level="info"):
|
||||
"""Log and print (backward compat for functions that still use print)."""
|
||||
getattr(log, level)(msg)
|
||||
|
||||
CLUSTERS = {
|
||||
"intel": ["frodo", "echo", "ghost", "oracle", "wraith", "scribe", "polyglot"],
|
||||
"cyber": ["neo", "bastion", "sentinel", "specter", "phantom", "cipher", "vortex"],
|
||||
@@ -80,16 +116,36 @@ def save_progress(progress):
|
||||
# API
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
def api_request(config, method, endpoint, **kwargs):
|
||||
def api_request(config, method, endpoint, timeout=120, retries=3, **kwargs):
|
||||
url = f"{config['anythingllm']['base_url']}{endpoint}"
|
||||
headers = {"Authorization": f"Bearer {config['anythingllm']['api_key']}"}
|
||||
if "json" in kwargs:
|
||||
headers["Content-Type"] = "application/json"
|
||||
resp = getattr(requests, method)(url, headers=headers, **kwargs)
|
||||
if resp.status_code not in (200, 201):
|
||||
print(f" API error {resp.status_code}: {resp.text[:300]}")
|
||||
return None
|
||||
return resp.json()
|
||||
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
log.debug(f"API {method.upper()} {endpoint} (attempt {attempt+1})")
|
||||
resp = getattr(requests, method)(url, headers=headers, timeout=timeout, **kwargs)
|
||||
if resp.status_code not in (200, 201):
|
||||
log.error(f"API {resp.status_code}: {resp.text[:300]}")
|
||||
if attempt < retries - 1:
|
||||
time.sleep(3)
|
||||
continue
|
||||
return None
|
||||
log.debug(f"API {method.upper()} {endpoint} → {resp.status_code}")
|
||||
return resp.json()
|
||||
except requests.exceptions.Timeout:
|
||||
log.warning(f"API timeout ({timeout}s) on {endpoint} (attempt {attempt+1}/{retries})")
|
||||
if attempt < retries - 1:
|
||||
time.sleep(5)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
log.error(f"API connection error: {e}")
|
||||
if attempt < retries - 1:
|
||||
time.sleep(5)
|
||||
except Exception as e:
|
||||
log.error(f"API unexpected error: {e}")
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def api_upload(config, file_path, folder_name=None):
|
||||
@@ -436,43 +492,81 @@ def upload_file_batch(config, folder_name, files, progress, batch_size, delay):
|
||||
|
||||
def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
||||
"""Phase C2: assign uploaded docs to persona workspaces."""
|
||||
print("── Assigning to workspaces ──\n")
|
||||
log.info("── Assigning to workspaces ──")
|
||||
existing_ws = get_existing_workspaces(config)
|
||||
|
||||
for codename, folders in sorted(persona_folders.items()):
|
||||
if not existing_ws:
|
||||
log.error("Could not fetch workspaces from API")
|
||||
return
|
||||
|
||||
total_personas = len(persona_folders)
|
||||
total_embedded = 0
|
||||
total_failed = 0
|
||||
|
||||
for idx, (codename, folders) in enumerate(sorted(persona_folders.items()), 1):
|
||||
ws_name = config["workspaces"][codename]["name"]
|
||||
ws_info = existing_ws.get(ws_name)
|
||||
if not ws_info:
|
||||
log.warning(f"[{idx}/{total_personas}] {codename}: workspace '{ws_name}' not found, skipping")
|
||||
continue
|
||||
|
||||
slug = ws_info["slug"]
|
||||
doc_locs = []
|
||||
for fn in folders:
|
||||
folder_docs = 0
|
||||
for fpath, info in progress["uploaded_files"].items():
|
||||
if info.get("folder") == fn and info.get("location"):
|
||||
doc_locs.append(info["location"])
|
||||
folder_docs += 1
|
||||
if folder_docs > 0:
|
||||
log.debug(f" {codename}/{fn}: {folder_docs} docs")
|
||||
|
||||
already = set(progress.get("workspace_docs", {}).get(codename, []))
|
||||
new_docs = [loc for loc in doc_locs if loc not in already]
|
||||
if not new_docs:
|
||||
if doc_locs:
|
||||
print(f" ✓ {codename}: {len(doc_locs)} docs assigned")
|
||||
log.info(f"[{idx}/{total_personas}] ✓ {codename}: {len(doc_locs)} docs already assigned")
|
||||
else:
|
||||
log.info(f"[{idx}/{total_personas}] ○ {codename}: no uploaded docs found")
|
||||
continue
|
||||
|
||||
print(f" → {codename} ({slug}): {len(new_docs)} docs")
|
||||
for bs in range(0, len(new_docs), batch_size):
|
||||
batch = new_docs[bs:bs + batch_size]
|
||||
log.info(f"[{idx}/{total_personas}] → {codename} ({slug}): {len(new_docs)} docs to embed")
|
||||
|
||||
# Use smaller batches for embedding (10-20 is safer than 50)
|
||||
embed_batch = min(batch_size, 20)
|
||||
persona_ok = 0
|
||||
persona_fail = 0
|
||||
|
||||
for bs in range(0, len(new_docs), embed_batch):
|
||||
batch = new_docs[bs:bs + embed_batch]
|
||||
batch_num = bs // embed_batch + 1
|
||||
total_batches = (len(new_docs) + embed_batch - 1) // embed_batch
|
||||
|
||||
log.debug(f" {codename} batch {batch_num}/{total_batches} ({len(batch)} docs)")
|
||||
|
||||
result = api_request(config, "post", f"/workspace/{slug}/update-embeddings",
|
||||
json={"adds": batch, "deletes": []})
|
||||
json={"adds": batch, "deletes": []},
|
||||
timeout=300, retries=3)
|
||||
if result:
|
||||
progress.setdefault("workspace_docs", {}).setdefault(codename, []).extend(batch)
|
||||
print(f" ✓ {len(batch)} docs embedded")
|
||||
persona_ok += len(batch)
|
||||
log.info(f" ✓ {codename} batch {batch_num}/{total_batches}: "
|
||||
f"{len(batch)} embedded ({persona_ok}/{len(new_docs)})")
|
||||
else:
|
||||
print(f" ✗ batch failed")
|
||||
if bs + batch_size < len(new_docs):
|
||||
persona_fail += len(batch)
|
||||
log.error(f" ✗ {codename} batch {batch_num}/{total_batches}: FAILED")
|
||||
|
||||
# Save after every batch
|
||||
save_progress(progress)
|
||||
|
||||
if bs + embed_batch < len(new_docs):
|
||||
time.sleep(delay)
|
||||
save_progress(progress)
|
||||
print()
|
||||
|
||||
total_embedded += persona_ok
|
||||
total_failed += persona_fail
|
||||
log.info(f" {codename} done: {persona_ok} ok, {persona_fail} failed")
|
||||
|
||||
log.info(f"── Assignment complete: {total_embedded} embedded, {total_failed} failed ──")
|
||||
|
||||
|
||||
def upload_documents(config, persona_list=None, priority_filter=None,
|
||||
@@ -628,10 +722,10 @@ def show_status(config):
|
||||
def reassign_workspaces(config, persona_list=None, reset=False, dry_run=False):
|
||||
"""Re-assign already-uploaded docs to workspaces without scanning/uploading.
|
||||
Skips the slow folder scan — uses upload_progress.json directly."""
|
||||
print("═══ Re-assign Workspaces ═══\n")
|
||||
log.info("═══ Re-assign Workspaces ═══")
|
||||
|
||||
if not check_api(config):
|
||||
print(" ✗ AnythingLLM API not reachable.")
|
||||
log.error("AnythingLLM API not reachable")
|
||||
return
|
||||
|
||||
progress = load_progress()
|
||||
@@ -642,10 +736,10 @@ def reassign_workspaces(config, persona_list=None, reset=False, dry_run=False):
|
||||
if persona_list:
|
||||
for p in persona_list:
|
||||
progress.get("workspace_docs", {}).pop(p, None)
|
||||
print(f" ✓ Reset assignments for: {', '.join(persona_list)}\n")
|
||||
log.info(f"Reset assignments for: {', '.join(persona_list)}")
|
||||
else:
|
||||
progress["workspace_docs"] = {}
|
||||
print(" ✓ Reset all workspace assignments\n")
|
||||
log.info("Reset all workspace assignments")
|
||||
save_progress(progress)
|
||||
|
||||
# Build persona_folders from config (no disk scan needed)
|
||||
@@ -659,8 +753,8 @@ def reassign_workspaces(config, persona_list=None, reset=False, dry_run=False):
|
||||
]
|
||||
|
||||
uploaded = len(progress.get("uploaded_files", {}))
|
||||
print(f" Uploaded files in progress: {uploaded}")
|
||||
print(f" Personas to assign: {len(persona_folders)}\n")
|
||||
log.info(f"Uploaded files in progress: {uploaded}")
|
||||
log.info(f"Personas to assign: {len(persona_folders)}")
|
||||
|
||||
if dry_run:
|
||||
existing_ws = get_existing_workspaces(config)
|
||||
@@ -673,11 +767,11 @@ def reassign_workspaces(config, persona_list=None, reset=False, dry_run=False):
|
||||
if info.get("folder") == fn and info.get("location"):
|
||||
doc_count += 1
|
||||
already = len(progress.get("workspace_docs", {}).get(codename, []))
|
||||
print(f" {codename} ({slug}): {doc_count} docs, {already} already assigned")
|
||||
log.info(f" {codename} ({slug}): {doc_count} docs, {already} already assigned")
|
||||
return
|
||||
|
||||
assign_to_workspaces(config, persona_folders, progress, batch_size, delay)
|
||||
print(" Done.\n")
|
||||
log.info("Re-assign complete.")
|
||||
|
||||
|
||||
def resolve_persona_list(args, config):
|
||||
@@ -709,8 +803,11 @@ def main():
|
||||
parser.add_argument("--max-size", type=int, default=100, help="Max file MB (default: 100)")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--resume", action="store_true")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Debug-level console output")
|
||||
|
||||
args = parser.parse_args()
|
||||
setup_logging(verbose=args.verbose)
|
||||
log.info(f"AnythingLLM Integration started — args: {vars(args)}")
|
||||
config = load_config()
|
||||
|
||||
if not any([args.storage_setup, args.create_workspaces,
|
||||
|
||||
Reference in New Issue
Block a user