From b456a4ed8c2f8454ed57606dbf517429c279ce8b Mon Sep 17 00:00:00 2001 From: 0xallam Date: Tue, 20 Jan 2026 20:30:45 -0800 Subject: [PATCH] fix(llm): collect usage stats from final stream chunk The early break on prevented receiving the final chunk that contains token usage data (input_tokens, output_tokens). --- strix/llm/llm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 778e27f..311de35 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -128,12 +128,18 @@ class LLM: async def _stream(self, messages: list[dict[str, Any]]) -> AsyncIterator[LLMResponse]: accumulated = "" chunks: list[Any] = [] + done_streaming = 0 self._total_stats.requests += 1 response = await acompletion(**self._build_completion_args(messages), stream=True) async for chunk in response: chunks.append(chunk) + if done_streaming: + done_streaming += 1 + if getattr(chunk, "usage", None) or done_streaming > 5: + break + continue delta = self._get_chunk_content(chunk) if delta: accumulated += delta @@ -142,7 +148,8 @@ class LLM: : accumulated.find("") + len("") ] yield LLMResponse(content=accumulated) - break + done_streaming = 1 + continue yield LLMResponse(content=accumulated) if chunks: