Add robust embedding verification — no silent failures
- Pre-flight: test embedding model with 3 retries (120s timeout for cold start) - First-batch verify: after batch 1, query workspace to confirm vectors searchable - Abort on model errors: "not found" or "failed to embed" stops immediately - Consecutive failure guard: 3 fails in a row → skip persona, continue others - Response error check: API 200 but embed error in body → caught and logged - Never record progress for failed embeds Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
145
setup.py
145
setup.py
@@ -204,6 +204,77 @@ def get_existing_workspaces(config):
|
||||
return {}
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# EMBEDDING HEALTH CHECKS
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
def verify_embedding_model(config):
|
||||
"""Test that the configured embedding model actually works via Olla/Ollama."""
|
||||
embedding_path = ""
|
||||
model = ""
|
||||
|
||||
# Read from AnythingLLM .env
|
||||
env_file = ANYTHINGLLM_STORAGE / ".env"
|
||||
if env_file.exists():
|
||||
for line in env_file.read_text().splitlines():
|
||||
if line.startswith("EMBEDDING_BASE_PATH="):
|
||||
embedding_path = line.split("=", 1)[1].strip("'\"")
|
||||
elif line.startswith("EMBEDDING_MODEL_PREF="):
|
||||
model = line.split("=", 1)[1].strip("'\"")
|
||||
|
||||
if not embedding_path or not model:
|
||||
log.warning(f"Cannot determine embedding config (path={embedding_path}, model={model})")
|
||||
return False
|
||||
|
||||
log.info(f"Testing embedding model: {model} via {embedding_path}")
|
||||
for attempt in range(3):
|
||||
try:
|
||||
log.debug(f" Embed test attempt {attempt + 1}/3")
|
||||
resp = requests.post(
|
||||
f"{embedding_path}/api/embed",
|
||||
json={"model": model, "input": "embedding health check test"},
|
||||
timeout=120, # first call may need to load model into GPU
|
||||
)
|
||||
data = resp.json()
|
||||
if "embeddings" in data and len(data["embeddings"]) > 0:
|
||||
dims = len(data["embeddings"][0])
|
||||
log.info(f" ✓ Embedding model OK: {model} ({dims}d)")
|
||||
return True
|
||||
elif "error" in data:
|
||||
log.warning(f" Attempt {attempt + 1}: {data['error']}")
|
||||
if attempt < 2:
|
||||
time.sleep(5)
|
||||
except requests.exceptions.Timeout:
|
||||
log.warning(f" Attempt {attempt + 1}: timeout (model loading?)")
|
||||
if attempt < 2:
|
||||
time.sleep(10)
|
||||
except Exception as e:
|
||||
log.warning(f" Attempt {attempt + 1}: {e}")
|
||||
if attempt < 2:
|
||||
time.sleep(5)
|
||||
log.error(f" ✗ Embedding model FAILED after 3 attempts")
|
||||
return False
|
||||
|
||||
|
||||
def verify_workspace_vectors(config, slug, test_query="test"):
|
||||
"""Verify a workspace actually has working vectors by doing a test query."""
|
||||
result = api_request(config, "post", f"/workspace/{slug}/chat",
|
||||
json={"message": test_query, "mode": "query"},
|
||||
timeout=30, retries=1)
|
||||
if not result:
|
||||
return False, "API request failed"
|
||||
|
||||
if result.get("error"):
|
||||
return False, result["error"]
|
||||
|
||||
sources = result.get("sources", [])
|
||||
if sources:
|
||||
return True, f"{len(sources)} sources found"
|
||||
else:
|
||||
# No sources could mean no relevant docs or embedding failure
|
||||
return False, "no sources returned"
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# PDF DETECTION & OCR
|
||||
# ──────────────────────────────────────────────────────────
|
||||
@@ -493,6 +564,12 @@ def upload_file_batch(config, folder_name, files, progress, batch_size, delay):
|
||||
def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
||||
"""Phase C2: assign uploaded docs to persona workspaces."""
|
||||
log.info("── Assigning to workspaces ──")
|
||||
|
||||
# Pre-flight: verify embedding model works
|
||||
if not verify_embedding_model(config):
|
||||
log.error("ABORTING: Embedding model is not working. Fix model config and retry.")
|
||||
return
|
||||
|
||||
existing_ws = get_existing_workspaces(config)
|
||||
|
||||
if not existing_ws:
|
||||
@@ -502,6 +579,8 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
||||
total_personas = len(persona_folders)
|
||||
total_embedded = 0
|
||||
total_failed = 0
|
||||
verified_workspaces = 0
|
||||
failed_verification = []
|
||||
|
||||
for idx, (codename, folders) in enumerate(sorted(persona_folders.items()), 1):
|
||||
ws_name = config["workspaces"][codename]["name"]
|
||||
@@ -536,6 +615,7 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
||||
embed_batch = min(batch_size, 5)
|
||||
persona_ok = 0
|
||||
persona_fail = 0
|
||||
consecutive_fails = 0
|
||||
|
||||
for bs in range(0, len(new_docs), embed_batch):
|
||||
batch = new_docs[bs:bs + embed_batch]
|
||||
@@ -547,14 +627,55 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
||||
result = api_request(config, "post", f"/workspace/{slug}/update-embeddings",
|
||||
json={"adds": batch, "deletes": []},
|
||||
timeout=300, retries=3)
|
||||
if result:
|
||||
|
||||
if not result:
|
||||
persona_fail += len(batch)
|
||||
consecutive_fails += 1
|
||||
log.error(f" ✗ {codename} batch {batch_num}/{total_batches}: API FAILED")
|
||||
if consecutive_fails >= 3:
|
||||
log.error(f" ✗ {codename}: 3 consecutive failures, skipping remaining")
|
||||
break
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
# Check for embedding errors in the response
|
||||
ws_data = result.get("workspace", {})
|
||||
embed_error = result.get("error")
|
||||
if embed_error:
|
||||
persona_fail += len(batch)
|
||||
consecutive_fails += 1
|
||||
log.error(f" ✗ {codename} batch {batch_num}/{total_batches}: {embed_error}")
|
||||
if "not found" in str(embed_error).lower():
|
||||
log.error(f" ✗ ABORTING: Embedding model error — fix config and retry")
|
||||
save_progress(progress)
|
||||
return
|
||||
if consecutive_fails >= 3:
|
||||
log.error(f" ✗ {codename}: 3 consecutive failures, skipping remaining")
|
||||
break
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
# Verify first batch actually created vectors (one-time per persona)
|
||||
if batch_num == 1:
|
||||
time.sleep(2)
|
||||
vec_ok, vec_msg = verify_workspace_vectors(config, slug)
|
||||
if not vec_ok:
|
||||
log.error(f" ✗ {codename}: first batch embed returned 200 but "
|
||||
f"vectors NOT searchable: {vec_msg}")
|
||||
if "not found" in vec_msg.lower() or "failed to embed" in vec_msg.lower():
|
||||
log.error(f" ✗ ABORTING: Embedding model broken")
|
||||
save_progress(progress)
|
||||
return
|
||||
# might be "no sources" for unrelated query — continue but warn
|
||||
log.warning(f" ⚠ {codename}: continuing despite verification warning")
|
||||
else:
|
||||
log.info(f" ✓ {codename}: first batch verified — vectors searchable")
|
||||
|
||||
progress.setdefault("workspace_docs", {}).setdefault(codename, []).extend(batch)
|
||||
persona_ok += len(batch)
|
||||
consecutive_fails = 0
|
||||
log.info(f" ✓ {codename} batch {batch_num}/{total_batches}: "
|
||||
f"{len(batch)} embedded ({persona_ok}/{len(new_docs)})")
|
||||
else:
|
||||
persona_fail += len(batch)
|
||||
log.error(f" ✗ {codename} batch {batch_num}/{total_batches}: FAILED")
|
||||
|
||||
# Save after every batch
|
||||
save_progress(progress)
|
||||
@@ -564,9 +685,21 @@ def assign_to_workspaces(config, persona_folders, progress, batch_size, delay):
|
||||
|
||||
total_embedded += persona_ok
|
||||
total_failed += persona_fail
|
||||
log.info(f" {codename} done: {persona_ok} ok, {persona_fail} failed")
|
||||
|
||||
log.info(f"── Assignment complete: {total_embedded} embedded, {total_failed} failed ──")
|
||||
if persona_ok > 0:
|
||||
verified_workspaces += 1
|
||||
log.info(f" {codename} done: {persona_ok} ok, {persona_fail} failed ✓")
|
||||
elif persona_fail > 0:
|
||||
failed_verification.append(codename)
|
||||
log.error(f" {codename} done: {persona_ok} ok, {persona_fail} FAILED ✗")
|
||||
else:
|
||||
log.info(f" {codename} done: no docs")
|
||||
|
||||
log.info(f"── Assignment complete ──")
|
||||
log.info(f" Embedded: {total_embedded}, Failed: {total_failed}")
|
||||
log.info(f" Verified: {verified_workspaces}/{total_personas}")
|
||||
if failed_verification:
|
||||
log.error(f" FAILED verification: {', '.join(failed_verification)}")
|
||||
|
||||
|
||||
def upload_documents(config, persona_list=None, priority_filter=None,
|
||||
|
||||
Reference in New Issue
Block a user