diff --git a/setup.py b/setup.py index 412e809..9963c3b 100644 --- a/setup.py +++ b/setup.py @@ -63,23 +63,23 @@ SKIP_EXT = set() SPEED_PROFILES = { "fast": { - "embed_timeout": 30, # 30s — real embeds take ~18s, fail fast on bad route - "embed_retries": 7, # more retries since they're cheap at 30s - "embed_batch": 10, # larger batches when connection is good - "batch_delay": 1, # minimal delay + "embed_timeout": 90, # 90s — covers cold model load (~50s) + embed (~20s) + "embed_retries": 5, # enough retries for Olla routing issues + "embed_batch": 5, # safe — 10 can be slow during model load + "batch_delay": 1, # minimal delay — model stays warm between batches "verify_interval": 20, # check LanceDB every 20 batches - "description": "Aggressive — 30s timeout, 7 retries, batch 10, 1s delay", + "description": "Fast — 90s timeout, 5 retries, batch 5, 1s delay", }, "medium": { - "embed_timeout": 60, # 60s — tolerates some slow responses - "embed_retries": 5, # standard retries + "embed_timeout": 120, # 120s — comfortable for model load + embed + "embed_retries": 3, # fewer retries needed with longer timeout "embed_batch": 5, # safe batch size "batch_delay": 2, # reasonable delay "verify_interval": 10, # check every 10 batches - "description": "Balanced — 60s timeout, 5 retries, batch 5, 2s delay", + "description": "Balanced — 120s timeout, 3 retries, batch 5, 2s delay", }, "slow": { - "embed_timeout": 300, # 300s — waits for cold model loads + "embed_timeout": 300, # 300s — waits for cold model loads on slow GPU "embed_retries": 3, # fewer retries since each is expensive "embed_batch": 5, # safe batch size "batch_delay": 5, # generous delay