Add --speed fast/medium/slow profiles for embed operations
Speed profiles control timeout, retries, batch size, and delays: fast: 30s timeout, 7 retries, batch 10, 1s delay (~5x faster) medium: 60s timeout, 5 retries, batch 5, 2s delay (default) slow: 300s timeout, 3 retries, batch 5, 5s delay (safe) Analysis showed 54% of batches hit 300s timeout on Olla bad routes, wasting 7.7h on 155 batches. Fast mode reduces timeout waste from 300s to 30s per bad route — real embeds take ~18s on average. Also reduced default batch delay from 5s to 2s in config.yaml. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,7 @@ embedding:
|
|||||||
# Batch processing — avoid API rate limits
|
# Batch processing — avoid API rate limits
|
||||||
processing:
|
processing:
|
||||||
batch_size: 50 # files per batch
|
batch_size: 50 # files per batch
|
||||||
delay_between_batches: 5 # seconds
|
delay_between_batches: 2 # seconds
|
||||||
max_concurrent: 3 # parallel uploads
|
max_concurrent: 3 # parallel uploads
|
||||||
skip_extensions: # don't process these
|
skip_extensions: # don't process these
|
||||||
- ".bin"
|
- ".bin"
|
||||||
|
|||||||
68
setup.py
68
setup.py
@@ -13,6 +13,9 @@ Usage:
|
|||||||
python3 setup.py --upload-documents # Full pipeline: upload → OCR → upload → assign
|
python3 setup.py --upload-documents # Full pipeline: upload → OCR → upload → assign
|
||||||
python3 setup.py --reassign # Re-assign existing docs to workspaces (no scan/upload)
|
python3 setup.py --reassign # Re-assign existing docs to workspaces (no scan/upload)
|
||||||
python3 setup.py --reassign --reset # Reset assignment tracking + re-assign all
|
python3 setup.py --reassign --reset # Reset assignment tracking + re-assign all
|
||||||
|
python3 setup.py --reassign --speed fast # Fast: 30s timeout, 7 retries, batch 10 (~5x faster)
|
||||||
|
python3 setup.py --reassign --speed medium # Medium: 60s timeout, 5 retries, batch 5 (default)
|
||||||
|
python3 setup.py --reassign --speed slow # Slow: 300s timeout, 3 retries, batch 5 (safe)
|
||||||
python3 setup.py --persona frodo # Single persona
|
python3 setup.py --persona frodo # Single persona
|
||||||
python3 setup.py --cluster intel # Intel cluster (frodo,echo,ghost,oracle,wraith,scribe,polyglot)
|
python3 setup.py --cluster intel # Intel cluster (frodo,echo,ghost,oracle,wraith,scribe,polyglot)
|
||||||
python3 setup.py --cluster cyber # Cyber cluster
|
python3 setup.py --cluster cyber # Cyber cluster
|
||||||
@@ -50,6 +53,44 @@ LOG_PATH = Path(__file__).parent / "setup.log"
|
|||||||
ANYTHINGLLM_STORAGE = Path.home() / ".config/anythingllm-desktop/storage"
|
ANYTHINGLLM_STORAGE = Path.home() / ".config/anythingllm-desktop/storage"
|
||||||
SKIP_EXT = set()
|
SKIP_EXT = set()
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────
|
||||||
|
# SPEED PROFILES
|
||||||
|
# ──────────────────────────────────────────────────────────
|
||||||
|
# Olla load balancer routes to random instances.
|
||||||
|
# Not all instances have the embedding model → timeout on bad routes.
|
||||||
|
# Fast mode: short timeout = fail fast + retry = less wasted time.
|
||||||
|
# Slow mode: long timeout = works even on cold model loads.
|
||||||
|
|
||||||
|
SPEED_PROFILES = {
|
||||||
|
"fast": {
|
||||||
|
"embed_timeout": 30, # 30s — real embeds take ~18s, fail fast on bad route
|
||||||
|
"embed_retries": 7, # more retries since they're cheap at 30s
|
||||||
|
"embed_batch": 10, # larger batches when connection is good
|
||||||
|
"batch_delay": 1, # minimal delay
|
||||||
|
"verify_interval": 20, # check LanceDB every 20 batches
|
||||||
|
"description": "Aggressive — 30s timeout, 7 retries, batch 10, 1s delay",
|
||||||
|
},
|
||||||
|
"medium": {
|
||||||
|
"embed_timeout": 60, # 60s — tolerates some slow responses
|
||||||
|
"embed_retries": 5, # standard retries
|
||||||
|
"embed_batch": 5, # safe batch size
|
||||||
|
"batch_delay": 2, # reasonable delay
|
||||||
|
"verify_interval": 10, # check every 10 batches
|
||||||
|
"description": "Balanced — 60s timeout, 5 retries, batch 5, 2s delay",
|
||||||
|
},
|
||||||
|
"slow": {
|
||||||
|
"embed_timeout": 300, # 300s — waits for cold model loads
|
||||||
|
"embed_retries": 3, # fewer retries since each is expensive
|
||||||
|
"embed_batch": 5, # safe batch size
|
||||||
|
"batch_delay": 5, # generous delay
|
||||||
|
"verify_interval": 10, # check every 10 batches
|
||||||
|
"description": "Safe — 300s timeout, 3 retries, batch 5, 5s delay",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Active speed profile (set via --speed flag)
|
||||||
|
ACTIVE_SPEED = SPEED_PROFILES["medium"]
|
||||||
|
|
||||||
# ──────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────
|
||||||
# LOGGING
|
# LOGGING
|
||||||
# ──────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────
|
||||||
@@ -653,10 +694,15 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
|||||||
log.info(f"[{idx}/{total_personas}] ○ {codename}: no uploaded docs found")
|
log.info(f"[{idx}/{total_personas}] ○ {codename}: no uploaded docs found")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
speed = ACTIVE_SPEED
|
||||||
|
embed_batch = speed["embed_batch"]
|
||||||
|
embed_timeout = speed["embed_timeout"]
|
||||||
|
embed_retries = speed["embed_retries"]
|
||||||
|
verify_interval = speed["verify_interval"]
|
||||||
|
batch_delay = speed["batch_delay"]
|
||||||
|
|
||||||
log.info(f"[{idx}/{total_personas}] → {codename} ({slug}): {len(new_docs)} docs to embed")
|
log.info(f"[{idx}/{total_personas}] → {codename} ({slug}): {len(new_docs)} docs to embed")
|
||||||
|
|
||||||
# Use small batches for embedding — AnythingLLM hangs on large batches
|
|
||||||
embed_batch = min(batch_size, 5)
|
|
||||||
persona_ok = 0
|
persona_ok = 0
|
||||||
persona_fail = 0
|
persona_fail = 0
|
||||||
consecutive_fails = 0
|
consecutive_fails = 0
|
||||||
@@ -669,10 +715,9 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
|||||||
|
|
||||||
log.debug(f" {codename} batch {batch_num}/{total_batches} ({len(batch)} docs)")
|
log.debug(f" {codename} batch {batch_num}/{total_batches} ({len(batch)} docs)")
|
||||||
|
|
||||||
# Each embed call gets 5 retries (Olla may route to instance without model)
|
|
||||||
result = api_request(config, "post", f"/workspace/{slug}/update-embeddings",
|
result = api_request(config, "post", f"/workspace/{slug}/update-embeddings",
|
||||||
json={"adds": batch, "deletes": []},
|
json={"adds": batch, "deletes": []},
|
||||||
timeout=300, retries=5)
|
timeout=embed_timeout, retries=embed_retries)
|
||||||
|
|
||||||
if not result:
|
if not result:
|
||||||
persona_fail += len(batch)
|
persona_fail += len(batch)
|
||||||
@@ -712,8 +757,8 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
|||||||
f"LanceDB {lance_msg}, grew {lance_size_after - lance_size_before} bytes")
|
f"LanceDB {lance_msg}, grew {lance_size_after - lance_size_before} bytes")
|
||||||
lance_size_before = lance_size_after
|
lance_size_before = lance_size_after
|
||||||
|
|
||||||
# Periodic LanceDB growth check (every 10 batches)
|
# Periodic LanceDB growth check
|
||||||
elif batch_num % 10 == 0:
|
elif batch_num % verify_interval == 0:
|
||||||
lance_size_now = get_lancedb_size(slug)
|
lance_size_now = get_lancedb_size(slug)
|
||||||
if lance_size_now <= lance_size_before:
|
if lance_size_now <= lance_size_before:
|
||||||
log.warning(f" ⚠ {codename} batch {batch_num}: LanceDB NOT growing "
|
log.warning(f" ⚠ {codename} batch {batch_num}: LanceDB NOT growing "
|
||||||
@@ -734,7 +779,7 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
|||||||
save_progress(progress)
|
save_progress(progress)
|
||||||
|
|
||||||
if bs + embed_batch < len(new_docs):
|
if bs + embed_batch < len(new_docs):
|
||||||
time.sleep(delay)
|
time.sleep(batch_delay)
|
||||||
|
|
||||||
# Final triple verification for this persona
|
# Final triple verification for this persona
|
||||||
if persona_ok > 0:
|
if persona_ok > 0:
|
||||||
@@ -1096,13 +1141,20 @@ def main():
|
|||||||
parser.add_argument("--cluster", type=str, help="Cluster filter: intel, cyber, military, humanities, engineering")
|
parser.add_argument("--cluster", type=str, help="Cluster filter: intel, cyber, military, humanities, engineering")
|
||||||
parser.add_argument("--priority", type=int, help="Max priority (1=core)")
|
parser.add_argument("--priority", type=int, help="Max priority (1=core)")
|
||||||
parser.add_argument("--max-size", type=int, default=100, help="Max file MB (default: 100)")
|
parser.add_argument("--max-size", type=int, default=100, help="Max file MB (default: 100)")
|
||||||
|
parser.add_argument("--speed", type=str, choices=["fast", "medium", "slow"], default="medium",
|
||||||
|
help="Embed speed profile: fast (30s timeout), medium (60s), slow (300s)")
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
parser.add_argument("--dry-run", action="store_true")
|
||||||
parser.add_argument("--resume", action="store_true")
|
parser.add_argument("--resume", action="store_true")
|
||||||
parser.add_argument("--verbose", "-v", action="store_true", help="Debug-level console output")
|
parser.add_argument("--verbose", "-v", action="store_true", help="Debug-level console output")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
setup_logging(verbose=args.verbose)
|
setup_logging(verbose=args.verbose)
|
||||||
log.info(f"AnythingLLM Integration started — args: {vars(args)}")
|
|
||||||
|
# Set speed profile
|
||||||
|
global ACTIVE_SPEED
|
||||||
|
ACTIVE_SPEED = SPEED_PROFILES[args.speed]
|
||||||
|
log.info(f"AnythingLLM Integration started — speed={args.speed} ({ACTIVE_SPEED['description']})")
|
||||||
|
log.info(f"Args: {vars(args)}")
|
||||||
config = load_config()
|
config = load_config()
|
||||||
|
|
||||||
if not any([args.storage_setup, args.create_workspaces,
|
if not any([args.storage_setup, args.create_workspaces,
|
||||||
|
|||||||
Reference in New Issue
Block a user