fix(llm): collect usage stats from final stream chunk

The early break on </function> prevented receiving the final chunk
that contains token usage data (input_tokens, output_tokens).
This commit is contained in:
0xallam
2026-01-20 20:30:45 -08:00
committed by Ahmed Allam
parent 165887798d
commit b456a4ed8c

View File

@@ -128,12 +128,18 @@ class LLM:
async def _stream(self, messages: list[dict[str, Any]]) -> AsyncIterator[LLMResponse]:
accumulated = ""
chunks: list[Any] = []
done_streaming = 0
self._total_stats.requests += 1
response = await acompletion(**self._build_completion_args(messages), stream=True)
async for chunk in response:
chunks.append(chunk)
if done_streaming:
done_streaming += 1
if getattr(chunk, "usage", None) or done_streaming > 5:
break
continue
delta = self._get_chunk_content(chunk)
if delta:
accumulated += delta
@@ -142,7 +148,8 @@ class LLM:
: accumulated.find("</function>") + len("</function>")
]
yield LLMResponse(content=accumulated)
break
done_streaming = 1
continue
yield LLMResponse(content=accumulated)
if chunks: