From b456a4ed8c2f8454ed57606dbf517429c279ce8b Mon Sep 17 00:00:00 2001
From: 0xallam <ahmed39652003@gmail.com>
Date: Tue, 20 Jan 2026 20:30:45 -0800
Subject: [PATCH] fix(llm): collect usage stats from final stream chunk

The early break on </function> prevented receiving the final chunk
that contains token usage data (input_tokens, output_tokens).
---
 strix/llm/llm.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index 778e27f..311de35 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -128,12 +128,18 @@ class LLM:
     async def _stream(self, messages: list[dict[str, Any]]) -> AsyncIterator[LLMResponse]:
         accumulated = ""
         chunks: list[Any] = []
+        done_streaming = 0
 
         self._total_stats.requests += 1
         response = await acompletion(**self._build_completion_args(messages), stream=True)
 
         async for chunk in response:
             chunks.append(chunk)
+            if done_streaming:
+                done_streaming += 1
+                if getattr(chunk, "usage", None) or done_streaming > 5:
+                    break
+                continue
             delta = self._get_chunk_content(chunk)
             if delta:
                 accumulated += delta
@@ -142,7 +148,8 @@ class LLM:
                         : accumulated.find("</function>") + len("</function>")
                     ]
                     yield LLMResponse(content=accumulated)
-                    break
+                    done_streaming = 1
+                    continue
                 yield LLMResponse(content=accumulated)
 
         if chunks: