From 0a07045e178265debc6dfa3abea3d5ed7096a6af Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Tue, 7 Apr 2026 10:30:50 +0300 Subject: [PATCH] Add --speed fast/medium/slow profiles for embed operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speed profiles control timeout, retries, batch size, and delays: fast: 30s timeout, 7 retries, batch 10, 1s delay (~5x faster) medium: 60s timeout, 5 retries, batch 5, 2s delay (default) slow: 300s timeout, 3 retries, batch 5, 5s delay (safe) Analysis showed 54% of batches hit 300s timeout on Olla bad routes, wasting 7.7h on 155 batches. Fast mode reduces timeout waste from 300s to 30s per bad route — real embeds take ~18s on average. Also reduced default batch delay from 5s to 2s in config.yaml. Co-Authored-By: Claude Opus 4.6 (1M context) --- config.yaml | 2 +- setup.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/config.yaml b/config.yaml index 42559cf..7a645a4 100644 --- a/config.yaml +++ b/config.yaml @@ -26,7 +26,7 @@ embedding: # Batch processing — avoid API rate limits processing: batch_size: 50 # files per batch - delay_between_batches: 5 # seconds + delay_between_batches: 2 # seconds max_concurrent: 3 # parallel uploads skip_extensions: # don't process these - ".bin" diff --git a/setup.py b/setup.py index 28af3e3..412e809 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,9 @@ Usage: python3 setup.py --upload-documents # Full pipeline: upload → OCR → upload → assign python3 setup.py --reassign # Re-assign existing docs to workspaces (no scan/upload) python3 setup.py --reassign --reset # Reset assignment tracking + re-assign all + python3 setup.py --reassign --speed fast # Fast: 30s timeout, 7 retries, batch 10 (~5x faster) + python3 setup.py --reassign --speed medium # Medium: 60s timeout, 5 retries, batch 5 (default) + python3 setup.py --reassign --speed slow # Slow: 300s timeout, 3 retries, batch 5 (safe) python3 setup.py --persona frodo # Single persona python3 setup.py --cluster intel # Intel cluster (frodo,echo,ghost,oracle,wraith,scribe,polyglot) python3 setup.py --cluster cyber # Cyber cluster @@ -50,6 +53,44 @@ LOG_PATH = Path(__file__).parent / "setup.log" ANYTHINGLLM_STORAGE = Path.home() / ".config/anythingllm-desktop/storage" SKIP_EXT = set() +# ────────────────────────────────────────────────────────── +# SPEED PROFILES +# ────────────────────────────────────────────────────────── +# Olla load balancer routes to random instances. +# Not all instances have the embedding model → timeout on bad routes. +# Fast mode: short timeout = fail fast + retry = less wasted time. +# Slow mode: long timeout = works even on cold model loads. + +SPEED_PROFILES = { + "fast": { + "embed_timeout": 30, # 30s — real embeds take ~18s, fail fast on bad route + "embed_retries": 7, # more retries since they're cheap at 30s + "embed_batch": 10, # larger batches when connection is good + "batch_delay": 1, # minimal delay + "verify_interval": 20, # check LanceDB every 20 batches + "description": "Aggressive — 30s timeout, 7 retries, batch 10, 1s delay", + }, + "medium": { + "embed_timeout": 60, # 60s — tolerates some slow responses + "embed_retries": 5, # standard retries + "embed_batch": 5, # safe batch size + "batch_delay": 2, # reasonable delay + "verify_interval": 10, # check every 10 batches + "description": "Balanced — 60s timeout, 5 retries, batch 5, 2s delay", + }, + "slow": { + "embed_timeout": 300, # 300s — waits for cold model loads + "embed_retries": 3, # fewer retries since each is expensive + "embed_batch": 5, # safe batch size + "batch_delay": 5, # generous delay + "verify_interval": 10, # check every 10 batches + "description": "Safe — 300s timeout, 3 retries, batch 5, 5s delay", + }, +} + +# Active speed profile (set via --speed flag) +ACTIVE_SPEED = SPEED_PROFILES["medium"] + # ────────────────────────────────────────────────────────── # LOGGING # ────────────────────────────────────────────────────────── @@ -653,10 +694,15 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay): log.info(f"[{idx}/{total_personas}] ○ {codename}: no uploaded docs found") continue + speed = ACTIVE_SPEED + embed_batch = speed["embed_batch"] + embed_timeout = speed["embed_timeout"] + embed_retries = speed["embed_retries"] + verify_interval = speed["verify_interval"] + batch_delay = speed["batch_delay"] + log.info(f"[{idx}/{total_personas}] → {codename} ({slug}): {len(new_docs)} docs to embed") - # Use small batches for embedding — AnythingLLM hangs on large batches - embed_batch = min(batch_size, 5) persona_ok = 0 persona_fail = 0 consecutive_fails = 0 @@ -669,10 +715,9 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay): log.debug(f" {codename} batch {batch_num}/{total_batches} ({len(batch)} docs)") - # Each embed call gets 5 retries (Olla may route to instance without model) result = api_request(config, "post", f"/workspace/{slug}/update-embeddings", json={"adds": batch, "deletes": []}, - timeout=300, retries=5) + timeout=embed_timeout, retries=embed_retries) if not result: persona_fail += len(batch) @@ -712,8 +757,8 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay): f"LanceDB {lance_msg}, grew {lance_size_after - lance_size_before} bytes") lance_size_before = lance_size_after - # Periodic LanceDB growth check (every 10 batches) - elif batch_num % 10 == 0: + # Periodic LanceDB growth check + elif batch_num % verify_interval == 0: lance_size_now = get_lancedb_size(slug) if lance_size_now <= lance_size_before: log.warning(f" ⚠ {codename} batch {batch_num}: LanceDB NOT growing " @@ -734,7 +779,7 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay): save_progress(progress) if bs + embed_batch < len(new_docs): - time.sleep(delay) + time.sleep(batch_delay) # Final triple verification for this persona if persona_ok > 0: @@ -1096,13 +1141,20 @@ def main(): parser.add_argument("--cluster", type=str, help="Cluster filter: intel, cyber, military, humanities, engineering") parser.add_argument("--priority", type=int, help="Max priority (1=core)") parser.add_argument("--max-size", type=int, default=100, help="Max file MB (default: 100)") + parser.add_argument("--speed", type=str, choices=["fast", "medium", "slow"], default="medium", + help="Embed speed profile: fast (30s timeout), medium (60s), slow (300s)") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--resume", action="store_true") parser.add_argument("--verbose", "-v", action="store_true", help="Debug-level console output") args = parser.parse_args() setup_logging(verbose=args.verbose) - log.info(f"AnythingLLM Integration started — args: {vars(args)}") + + # Set speed profile + global ACTIVE_SPEED + ACTIVE_SPEED = SPEED_PROFILES[args.speed] + log.info(f"AnythingLLM Integration started — speed={args.speed} ({ACTIVE_SPEED['description']})") + log.info(f"Args: {vars(args)}") config = load_config() if not any([args.storage_setup, args.create_workspaces,