From 0a07045e178265debc6dfa3abea3d5ed7096a6af Mon Sep 17 00:00:00 2001
From: salvacybersec <salva@opsecti.local>
Date: Tue, 7 Apr 2026 10:30:50 +0300
Subject: [PATCH] Add --speed fast/medium/slow profiles for embed operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Speed profiles control timeout, retries, batch size, and delays:
  fast:   30s timeout, 7 retries, batch 10, 1s delay (~5x faster)
  medium: 60s timeout, 5 retries, batch 5, 2s delay (default)
  slow:   300s timeout, 3 retries, batch 5, 5s delay (safe)

Analysis showed 54% of batches hit 300s timeout on Olla bad routes,
wasting 7.7h on 155 batches. Fast mode reduces timeout waste from
300s to 30s per bad route — real embeds take ~18s on average.

Also reduced default batch delay from 5s to 2s in config.yaml.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 config.yaml |  2 +-
 setup.py    | 68 ++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/config.yaml b/config.yaml
index 42559cf..7a645a4 100644
--- a/config.yaml
+++ b/config.yaml
@@ -26,7 +26,7 @@ embedding:
 # Batch processing — avoid API rate limits
 processing:
   batch_size: 50          # files per batch
-  delay_between_batches: 5 # seconds
+  delay_between_batches: 2 # seconds
   max_concurrent: 3       # parallel uploads
   skip_extensions:        # don't process these
     - ".bin"
diff --git a/setup.py b/setup.py
index 28af3e3..412e809 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,9 @@ Usage:
   python3 setup.py --upload-documents           # Full pipeline: upload → OCR → upload → assign
   python3 setup.py --reassign                   # Re-assign existing docs to workspaces (no scan/upload)
   python3 setup.py --reassign --reset           # Reset assignment tracking + re-assign all
+  python3 setup.py --reassign --speed fast      # Fast: 30s timeout, 7 retries, batch 10 (~5x faster)
+  python3 setup.py --reassign --speed medium    # Medium: 60s timeout, 5 retries, batch 5 (default)
+  python3 setup.py --reassign --speed slow      # Slow: 300s timeout, 3 retries, batch 5 (safe)
   python3 setup.py --persona frodo              # Single persona
   python3 setup.py --cluster intel              # Intel cluster (frodo,echo,ghost,oracle,wraith,scribe,polyglot)
   python3 setup.py --cluster cyber              # Cyber cluster
@@ -50,6 +53,44 @@ LOG_PATH = Path(__file__).parent / "setup.log"
 ANYTHINGLLM_STORAGE = Path.home() / ".config/anythingllm-desktop/storage"
 SKIP_EXT = set()
 
+# ──────────────────────────────────────────────────────────
+# SPEED PROFILES
+# ──────────────────────────────────────────────────────────
+# Olla load balancer routes to random instances.
+# Not all instances have the embedding model → timeout on bad routes.
+# Fast mode: short timeout = fail fast + retry = less wasted time.
+# Slow mode: long timeout = works even on cold model loads.
+
+SPEED_PROFILES = {
+    "fast": {
+        "embed_timeout": 30,     # 30s — real embeds take ~18s, fail fast on bad route
+        "embed_retries": 7,      # more retries since they're cheap at 30s
+        "embed_batch": 10,       # larger batches when connection is good
+        "batch_delay": 1,        # minimal delay
+        "verify_interval": 20,   # check LanceDB every 20 batches
+        "description": "Aggressive — 30s timeout, 7 retries, batch 10, 1s delay",
+    },
+    "medium": {
+        "embed_timeout": 60,     # 60s — tolerates some slow responses
+        "embed_retries": 5,      # standard retries
+        "embed_batch": 5,        # safe batch size
+        "batch_delay": 2,        # reasonable delay
+        "verify_interval": 10,   # check every 10 batches
+        "description": "Balanced — 60s timeout, 5 retries, batch 5, 2s delay",
+    },
+    "slow": {
+        "embed_timeout": 300,    # 300s — waits for cold model loads
+        "embed_retries": 3,      # fewer retries since each is expensive
+        "embed_batch": 5,        # safe batch size
+        "batch_delay": 5,        # generous delay
+        "verify_interval": 10,   # check every 10 batches
+        "description": "Safe — 300s timeout, 3 retries, batch 5, 5s delay",
+    },
+}
+
+# Active speed profile (set via --speed flag)
+ACTIVE_SPEED = SPEED_PROFILES["medium"]
+
 # ──────────────────────────────────────────────────────────
 # LOGGING
 # ──────────────────────────────────────────────────────────
@@ -653,10 +694,15 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
                 log.info(f"[{idx}/{total_personas}] ○ {codename}: no uploaded docs found")
             continue
 
+        speed = ACTIVE_SPEED
+        embed_batch = speed["embed_batch"]
+        embed_timeout = speed["embed_timeout"]
+        embed_retries = speed["embed_retries"]
+        verify_interval = speed["verify_interval"]
+        batch_delay = speed["batch_delay"]
+
         log.info(f"[{idx}/{total_personas}] → {codename} ({slug}): {len(new_docs)} docs to embed")
 
-        # Use small batches for embedding — AnythingLLM hangs on large batches
-        embed_batch = min(batch_size, 5)
         persona_ok = 0
         persona_fail = 0
         consecutive_fails = 0
@@ -669,10 +715,9 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
 
             log.debug(f"  {codename} batch {batch_num}/{total_batches} ({len(batch)} docs)")
 
-            # Each embed call gets 5 retries (Olla may route to instance without model)
             result = api_request(config, "post", f"/workspace/{slug}/update-embeddings",
                                  json={"adds": batch, "deletes": []},
-                                 timeout=300, retries=5)
+                                 timeout=embed_timeout, retries=embed_retries)
 
             if not result:
                 persona_fail += len(batch)
@@ -712,8 +757,8 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
                              f"LanceDB {lance_msg}, grew {lance_size_after - lance_size_before} bytes")
                 lance_size_before = lance_size_after
 
-            # Periodic LanceDB growth check (every 10 batches)
-            elif batch_num % 10 == 0:
+            # Periodic LanceDB growth check
+            elif batch_num % verify_interval == 0:
                 lance_size_now = get_lancedb_size(slug)
                 if lance_size_now <= lance_size_before:
                     log.warning(f"  ⚠ {codename} batch {batch_num}: LanceDB NOT growing "
@@ -734,7 +779,7 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
             save_progress(progress)
 
             if bs + embed_batch < len(new_docs):
-                time.sleep(delay)
+                time.sleep(batch_delay)
 
         # Final triple verification for this persona
         if persona_ok > 0:
@@ -1096,13 +1141,20 @@ def main():
     parser.add_argument("--cluster", type=str, help="Cluster filter: intel, cyber, military, humanities, engineering")
     parser.add_argument("--priority", type=int, help="Max priority (1=core)")
     parser.add_argument("--max-size", type=int, default=100, help="Max file MB (default: 100)")
+    parser.add_argument("--speed", type=str, choices=["fast", "medium", "slow"], default="medium",
+                        help="Embed speed profile: fast (30s timeout), medium (60s), slow (300s)")
     parser.add_argument("--dry-run", action="store_true")
     parser.add_argument("--resume", action="store_true")
     parser.add_argument("--verbose", "-v", action="store_true", help="Debug-level console output")
 
     args = parser.parse_args()
     setup_logging(verbose=args.verbose)
-    log.info(f"AnythingLLM Integration started — args: {vars(args)}")
+
+    # Set speed profile
+    global ACTIVE_SPEED
+    ACTIVE_SPEED = SPEED_PROFILES[args.speed]
+    log.info(f"AnythingLLM Integration started — speed={args.speed} ({ACTIVE_SPEED['description']})")
+    log.info(f"Args: {vars(args)}")
     config = load_config()
 
     if not any([args.storage_setup, args.create_workspaces,