diff --git a/setup.py b/setup.py
index 412e809..9963c3b 100644
--- a/setup.py
+++ b/setup.py
@@ -63,23 +63,23 @@ SKIP_EXT = set()
 
 SPEED_PROFILES = {
     "fast": {
-        "embed_timeout": 30,     # 30s — real embeds take ~18s, fail fast on bad route
-        "embed_retries": 7,      # more retries since they're cheap at 30s
-        "embed_batch": 10,       # larger batches when connection is good
-        "batch_delay": 1,        # minimal delay
+        "embed_timeout": 90,     # 90s — covers cold model load (~50s) + embed (~20s)
+        "embed_retries": 5,      # enough retries for Olla routing issues
+        "embed_batch": 5,        # safe — 10 can be slow during model load
+        "batch_delay": 1,        # minimal delay — model stays warm between batches
         "verify_interval": 20,   # check LanceDB every 20 batches
-        "description": "Aggressive — 30s timeout, 7 retries, batch 10, 1s delay",
+        "description": "Fast — 90s timeout, 5 retries, batch 5, 1s delay",
     },
     "medium": {
-        "embed_timeout": 60,     # 60s — tolerates some slow responses
-        "embed_retries": 5,      # standard retries
+        "embed_timeout": 120,    # 120s — comfortable for model load + embed
+        "embed_retries": 3,      # fewer retries needed with longer timeout
         "embed_batch": 5,        # safe batch size
         "batch_delay": 2,        # reasonable delay
         "verify_interval": 10,   # check every 10 batches
-        "description": "Balanced — 60s timeout, 5 retries, batch 5, 2s delay",
+        "description": "Balanced — 120s timeout, 3 retries, batch 5, 2s delay",
     },
     "slow": {
-        "embed_timeout": 300,    # 300s — waits for cold model loads
+        "embed_timeout": 300,    # 300s — waits for cold model loads on slow GPU
         "embed_retries": 3,      # fewer retries since each is expensive
         "embed_batch": 5,        # safe batch size
         "batch_delay": 5,        # generous delay