fix(llm): collect usage stats from final stream chunk
The early break on </function> prevented receiving the final chunk that contains token usage data (input_tokens, output_tokens).
This commit is contained in:
@@ -128,12 +128,18 @@ class LLM:
|
|||||||
async def _stream(self, messages: list[dict[str, Any]]) -> AsyncIterator[LLMResponse]:
|
async def _stream(self, messages: list[dict[str, Any]]) -> AsyncIterator[LLMResponse]:
|
||||||
accumulated = ""
|
accumulated = ""
|
||||||
chunks: list[Any] = []
|
chunks: list[Any] = []
|
||||||
|
done_streaming = 0
|
||||||
|
|
||||||
self._total_stats.requests += 1
|
self._total_stats.requests += 1
|
||||||
response = await acompletion(**self._build_completion_args(messages), stream=True)
|
response = await acompletion(**self._build_completion_args(messages), stream=True)
|
||||||
|
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
|
if done_streaming:
|
||||||
|
done_streaming += 1
|
||||||
|
if getattr(chunk, "usage", None) or done_streaming > 5:
|
||||||
|
break
|
||||||
|
continue
|
||||||
delta = self._get_chunk_content(chunk)
|
delta = self._get_chunk_content(chunk)
|
||||||
if delta:
|
if delta:
|
||||||
accumulated += delta
|
accumulated += delta
|
||||||
@@ -142,7 +148,8 @@ class LLM:
|
|||||||
: accumulated.find("</function>") + len("</function>")
|
: accumulated.find("</function>") + len("</function>")
|
||||||
]
|
]
|
||||||
yield LLMResponse(content=accumulated)
|
yield LLMResponse(content=accumulated)
|
||||||
break
|
done_streaming = 1
|
||||||
|
continue
|
||||||
yield LLMResponse(content=accumulated)
|
yield LLMResponse(content=accumulated)
|
||||||
|
|
||||||
if chunks:
|
if chunks:
|
||||||
|
|||||||
Reference in New Issue
Block a user