Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
643f6ba54a | ||
|
|
7fb4b63b96 | ||
|
|
027cea2f25 | ||
|
|
b9dcf7f63d | ||
|
|
e09b5b42c1 | ||
|
|
e7970de6d2 | ||
|
|
7614fcc512 | ||
|
|
f4d522164d | ||
|
|
6166be841b | ||
|
|
bf8020fafb | ||
|
|
3b3576b024 | ||
|
|
d2c99ea4df | ||
|
|
06ae3d3860 |
@@ -30,7 +30,7 @@ Thank you for your interest in contributing to Strix! This guide will help you g
|
||||
|
||||
3. **Configure your LLM provider**
|
||||
```bash
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="your-api-key"
|
||||
```
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ curl -sSL https://strix.ai/install | bash
|
||||
pipx install strix-agent
|
||||
|
||||
# Configure your AI provider
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6" # or "strix/claude-sonnet-4.6" via Strix Router (https://models.strix.ai)
|
||||
export STRIX_LLM="openai/gpt-5" # or "strix/gpt-5" via Strix Router (https://models.strix.ai)
|
||||
export LLM_API_KEY="your-api-key"
|
||||
|
||||
# Run your first security assessment
|
||||
@@ -203,7 +203,7 @@ jobs:
|
||||
### Configuration
|
||||
|
||||
```bash
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="your-api-key"
|
||||
|
||||
# Optional
|
||||
@@ -217,8 +217,8 @@ export STRIX_REASONING_EFFORT="high" # control thinking effort (default: high,
|
||||
|
||||
**Recommended models for best results:**
|
||||
|
||||
- [Anthropic Claude Sonnet 4.6](https://claude.com/platform/api) — `anthropic/claude-sonnet-4-6`
|
||||
- [OpenAI GPT-5](https://openai.com/api/) — `openai/gpt-5`
|
||||
- [Anthropic Claude Sonnet 4.6](https://claude.com/platform/api) — `anthropic/claude-sonnet-4-6`
|
||||
- [Google Gemini 3 Pro Preview](https://cloud.google.com/vertex-ai) — `vertex_ai/gemini-3-pro-preview`
|
||||
|
||||
See the [LLM Providers documentation](https://docs.strix.ai/llm-providers/overview) for all supported providers including Vertex AI, Bedrock, Azure, and local models.
|
||||
|
||||
@@ -8,7 +8,7 @@ Configure Strix using environment variables or a config file.
|
||||
## LLM Configuration
|
||||
|
||||
<ParamField path="STRIX_LLM" type="string" required>
|
||||
Model name in LiteLLM format (e.g., `anthropic/claude-sonnet-4-6`, `openai/gpt-5`).
|
||||
Model name in LiteLLM format (e.g., `openai/gpt-5`, `anthropic/claude-sonnet-4-6`).
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="LLM_API_KEY" type="string">
|
||||
@@ -86,7 +86,7 @@ strix --target ./app --config /path/to/config.json
|
||||
```json
|
||||
{
|
||||
"env": {
|
||||
"STRIX_LLM": "anthropic/claude-sonnet-4-6",
|
||||
"STRIX_LLM": "openai/gpt-5",
|
||||
"LLM_API_KEY": "sk-...",
|
||||
"STRIX_REASONING_EFFORT": "high"
|
||||
}
|
||||
@@ -97,7 +97,7 @@ strix --target ./app --config /path/to/config.json
|
||||
|
||||
```bash
|
||||
# Required
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="sk-..."
|
||||
|
||||
# Optional: Enable web search
|
||||
|
||||
@@ -32,7 +32,7 @@ description: "Contribute to Strix development"
|
||||
</Step>
|
||||
<Step title="Configure LLM">
|
||||
```bash
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="your-api-key"
|
||||
```
|
||||
</Step>
|
||||
|
||||
@@ -78,7 +78,7 @@ Strix uses a graph of specialized agents for comprehensive security testing:
|
||||
curl -sSL https://strix.ai/install | bash
|
||||
|
||||
# Configure
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="your-api-key"
|
||||
|
||||
# Scan
|
||||
|
||||
@@ -35,7 +35,7 @@ Add these secrets to your repository:
|
||||
|
||||
| Secret | Description |
|
||||
|--------|-------------|
|
||||
| `STRIX_LLM` | Model name (e.g., `anthropic/claude-sonnet-4-6`) |
|
||||
| `STRIX_LLM` | Model name (e.g., `openai/gpt-5`) |
|
||||
| `LLM_API_KEY` | API key for your LLM provider |
|
||||
|
||||
## Exit Codes
|
||||
|
||||
@@ -6,7 +6,7 @@ description: "Configure Strix with Claude models"
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="sk-ant-..."
|
||||
```
|
||||
|
||||
@@ -14,7 +14,7 @@ export LLM_API_KEY="sk-ant-..."
|
||||
|
||||
| Model | Description |
|
||||
|-------|-------------|
|
||||
| `anthropic/claude-sonnet-4-6` | Best balance of intelligence and speed (recommended) |
|
||||
| `anthropic/claude-sonnet-4-6` | Best balance of intelligence and speed |
|
||||
| `anthropic/claude-opus-4-6` | Maximum capability for deep analysis |
|
||||
|
||||
## Get API Key
|
||||
|
||||
@@ -25,7 +25,7 @@ Strix Router is currently in **beta**. It's completely optional — Strix works
|
||||
|
||||
```bash
|
||||
export LLM_API_KEY='your-strix-api-key'
|
||||
export STRIX_LLM='strix/claude-sonnet-4.6'
|
||||
export STRIX_LLM='strix/gpt-5'
|
||||
```
|
||||
|
||||
3. Run a scan:
|
||||
|
||||
@@ -10,7 +10,7 @@ Strix uses [LiteLLM](https://docs.litellm.ai/docs/providers) for model compatibi
|
||||
The fastest way to get started. [Strix Router](/llm-providers/models) gives you access to tested models with the highest rate limits and zero data retention.
|
||||
|
||||
```bash
|
||||
export STRIX_LLM="strix/claude-sonnet-4.6"
|
||||
export STRIX_LLM="strix/gpt-5"
|
||||
export LLM_API_KEY="your-strix-api-key"
|
||||
```
|
||||
|
||||
@@ -22,12 +22,12 @@ You can also use any LiteLLM-compatible provider with your own API keys:
|
||||
|
||||
| Model | Provider | Configuration |
|
||||
| ----------------- | ------------- | -------------------------------- |
|
||||
| Claude Sonnet 4.6 | Anthropic | `anthropic/claude-sonnet-4-6` |
|
||||
| GPT-5 | OpenAI | `openai/gpt-5` |
|
||||
| Claude Sonnet 4.6 | Anthropic | `anthropic/claude-sonnet-4-6` |
|
||||
| Gemini 3 Pro | Google Vertex | `vertex_ai/gemini-3-pro-preview` |
|
||||
|
||||
```bash
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="your-api-key"
|
||||
```
|
||||
|
||||
@@ -52,7 +52,7 @@ See the [Local Models guide](/llm-providers/local) for setup instructions and re
|
||||
GPT-5 and Codex models.
|
||||
</Card>
|
||||
<Card title="Anthropic" href="/llm-providers/anthropic">
|
||||
Claude Sonnet 4.6, Opus, and Haiku.
|
||||
Claude Opus, Sonnet, and Haiku.
|
||||
</Card>
|
||||
<Card title="OpenRouter" href="/llm-providers/openrouter">
|
||||
Access 100+ models through a single API.
|
||||
@@ -76,8 +76,8 @@ See the [Local Models guide](/llm-providers/local) for setup instructions and re
|
||||
Use LiteLLM's `provider/model-name` format:
|
||||
|
||||
```
|
||||
anthropic/claude-sonnet-4-6
|
||||
openai/gpt-5
|
||||
anthropic/claude-sonnet-4-6
|
||||
vertex_ai/gemini-3-pro-preview
|
||||
bedrock/anthropic.claude-4-5-sonnet-20251022-v1:0
|
||||
ollama/llama4
|
||||
|
||||
@@ -30,20 +30,20 @@ Set your LLM provider:
|
||||
<Tabs>
|
||||
<Tab title="Strix Router">
|
||||
```bash
|
||||
export STRIX_LLM="strix/claude-sonnet-4.6"
|
||||
export STRIX_LLM="strix/gpt-5"
|
||||
export LLM_API_KEY="your-strix-api-key"
|
||||
```
|
||||
</Tab>
|
||||
<Tab title="Bring Your Own Key">
|
||||
```bash
|
||||
export STRIX_LLM="anthropic/claude-sonnet-4-6"
|
||||
export STRIX_LLM="openai/gpt-5"
|
||||
export LLM_API_KEY="your-api-key"
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
<Tip>
|
||||
For best results, use `strix/claude-sonnet-4.6`, `strix/claude-opus-4.6`, or `strix/gpt-5.2`.
|
||||
For best results, use `strix/gpt-5`, `strix/claude-opus-4.6`, or `strix/gpt-5.2`.
|
||||
</Tip>
|
||||
|
||||
## Run Your First Scan
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "strix-agent"
|
||||
version = "0.8.0"
|
||||
version = "0.8.1"
|
||||
description = "Open-source AI Hackers for your apps"
|
||||
authors = ["Strix <hi@usestrix.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
@@ -340,7 +340,7 @@ echo -e " ${MUTED}https://models.strix.ai${NC}"
|
||||
echo ""
|
||||
echo -e " ${CYAN}2.${NC} Set your environment:"
|
||||
echo -e " ${MUTED}export LLM_API_KEY='your-api-key'${NC}"
|
||||
echo -e " ${MUTED}export STRIX_LLM='strix/claude-sonnet-4.6'${NC}"
|
||||
echo -e " ${MUTED}export STRIX_LLM='strix/gpt-5'${NC}"
|
||||
echo ""
|
||||
echo -e " ${CYAN}3.${NC} Run a penetration test:"
|
||||
echo -e " ${MUTED}strix --target https://example.com${NC}"
|
||||
|
||||
@@ -314,13 +314,37 @@ CRITICAL RULES:
|
||||
4. Use ONLY the exact format shown above. NEVER use JSON/YAML/INI or any other syntax for tools or parameters.
|
||||
5. When sending ANY multi-line content in tool parameters, use real newlines (actual line breaks). Do NOT emit literal "\n" sequences. Literal "\n" instead of real line breaks will cause tools to fail.
|
||||
6. Tool names must match exactly the tool "name" defined (no module prefixes, dots, or variants).
|
||||
- Correct: <function=think> ... </function>
|
||||
- Incorrect: <thinking_tools.think> ... </function>
|
||||
- Incorrect: <think> ... </think>
|
||||
- Incorrect: {"think": {...}}
|
||||
7. Parameters must use <parameter=param_name>value</parameter> exactly. Do NOT pass parameters as JSON or key:value lines. Do NOT add quotes/braces around values.
|
||||
8. Do NOT wrap tool calls in markdown/code fences or add any text before or after the tool block.
|
||||
|
||||
CORRECT format — use this EXACTLY:
|
||||
<function=tool_name>
|
||||
<parameter=param_name>value</parameter>
|
||||
</function>
|
||||
|
||||
WRONG formats — NEVER use these:
|
||||
- <invoke name="tool_name"><parameter name="param_name">value</parameter></invoke>
|
||||
- <function_calls><invoke name="tool_name">...</invoke></function_calls>
|
||||
- <tool_call><tool_name>...</tool_name></tool_call>
|
||||
- {"tool_name": {"param_name": "value"}}
|
||||
- ```<function=tool_name>...</function>```
|
||||
- <function=tool_name>value_without_parameter_tags</function>
|
||||
|
||||
EVERY argument MUST be wrapped in <parameter=name>...</parameter> tags. NEVER put values directly in the function body without parameter tags. This WILL cause the tool call to fail.
|
||||
|
||||
Do NOT emit any extra XML tags in your output. In particular:
|
||||
- NO <thinking>...</thinking> or <thought>...</thought> blocks
|
||||
- NO <scratchpad>...</scratchpad> or <reasoning>...</reasoning> blocks
|
||||
- NO <answer>...</answer> or <response>...</response> wrappers
|
||||
If you need to reason, use the think tool. Your raw output must contain ONLY the tool call — no surrounding XML tags.
|
||||
|
||||
Notice: use <function=X> NOT <invoke name="X">, use <parameter=X> NOT <parameter name="X">, use </function> NOT </invoke>.
|
||||
|
||||
Example (terminal tool):
|
||||
<function=terminal_execute>
|
||||
<parameter=command>nmap -sV -p 1-1000 target.com</parameter>
|
||||
</function>
|
||||
|
||||
Example (agent creation tool):
|
||||
<function=create_agent>
|
||||
<parameter=task>Perform targeted XSS testing on the search endpoint</parameter>
|
||||
|
||||
@@ -187,6 +187,9 @@ def resolve_llm_config() -> tuple[str | None, str | None, str | None]:
|
||||
|
||||
Returns:
|
||||
tuple: (model_name, api_key, api_base)
|
||||
- model_name: Original model name (strix/ prefix preserved for display)
|
||||
- api_key: LLM API key
|
||||
- api_base: API base URL (auto-set to STRIX_API_BASE for strix/ models)
|
||||
"""
|
||||
model = Config.get("strix_llm")
|
||||
if not model:
|
||||
@@ -195,10 +198,8 @@ def resolve_llm_config() -> tuple[str | None, str | None, str | None]:
|
||||
api_key = Config.get("llm_api_key")
|
||||
|
||||
if model.startswith("strix/"):
|
||||
model_name = "openai/" + model[6:]
|
||||
api_base: str | None = STRIX_API_BASE
|
||||
else:
|
||||
model_name = model
|
||||
api_base = (
|
||||
Config.get("llm_api_base")
|
||||
or Config.get("openai_api_base")
|
||||
@@ -206,4 +207,4 @@ def resolve_llm_config() -> tuple[str | None, str | None, str | None]:
|
||||
or Config.get("ollama_api_base")
|
||||
)
|
||||
|
||||
return model_name, api_key, api_base
|
||||
return model, api_key, api_base
|
||||
|
||||
@@ -18,6 +18,8 @@ from rich.panel import Panel
|
||||
from rich.text import Text
|
||||
|
||||
from strix.config import Config, apply_saved_config, save_current_config
|
||||
from strix.config.config import resolve_llm_config
|
||||
from strix.llm.utils import resolve_strix_model
|
||||
|
||||
|
||||
apply_saved_config()
|
||||
@@ -99,7 +101,7 @@ def validate_environment() -> None: # noqa: PLR0912, PLR0915
|
||||
error_text.append("• ", style="white")
|
||||
error_text.append("STRIX_LLM", style="bold cyan")
|
||||
error_text.append(
|
||||
" - Model name to use with litellm (e.g., 'anthropic/claude-sonnet-4-6')\n",
|
||||
" - Model name to use with litellm (e.g., 'openai/gpt-5')\n",
|
||||
style="white",
|
||||
)
|
||||
|
||||
@@ -139,9 +141,9 @@ def validate_environment() -> None: # noqa: PLR0912, PLR0915
|
||||
|
||||
error_text.append("\nExample setup:\n", style="white")
|
||||
if uses_strix_models:
|
||||
error_text.append("export STRIX_LLM='strix/claude-sonnet-4.6'\n", style="dim white")
|
||||
error_text.append("export STRIX_LLM='strix/gpt-5'\n", style="dim white")
|
||||
else:
|
||||
error_text.append("export STRIX_LLM='anthropic/claude-sonnet-4-6'\n", style="dim white")
|
||||
error_text.append("export STRIX_LLM='openai/gpt-5'\n", style="dim white")
|
||||
|
||||
if missing_optional_vars:
|
||||
for var in missing_optional_vars:
|
||||
@@ -204,12 +206,12 @@ def check_docker_installed() -> None:
|
||||
|
||||
|
||||
async def warm_up_llm() -> None:
|
||||
from strix.config.config import resolve_llm_config
|
||||
|
||||
console = Console()
|
||||
|
||||
try:
|
||||
model_name, api_key, api_base = resolve_llm_config()
|
||||
litellm_model, _ = resolve_strix_model(model_name)
|
||||
litellm_model = litellm_model or model_name
|
||||
|
||||
test_messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
@@ -219,7 +221,7 @@ async def warm_up_llm() -> None:
|
||||
llm_timeout = int(Config.get("llm_timeout") or "300")
|
||||
|
||||
completion_kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"model": litellm_model,
|
||||
"messages": test_messages,
|
||||
"timeout": llm_timeout,
|
||||
}
|
||||
|
||||
@@ -3,8 +3,11 @@ import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
|
||||
from strix.llm.utils import normalize_tool_format
|
||||
|
||||
|
||||
_FUNCTION_TAG_PREFIX = "<function="
|
||||
_INVOKE_TAG_PREFIX = "<invoke "
|
||||
|
||||
_FUNC_PATTERN = re.compile(r"<function=([^>]+)>")
|
||||
_FUNC_END_PATTERN = re.compile(r"</function>")
|
||||
@@ -21,9 +24,8 @@ def _get_safe_content(content: str) -> tuple[str, str]:
|
||||
return content, ""
|
||||
|
||||
suffix = content[last_lt:]
|
||||
target = _FUNCTION_TAG_PREFIX # "<function="
|
||||
|
||||
if target.startswith(suffix):
|
||||
if _FUNCTION_TAG_PREFIX.startswith(suffix) or _INVOKE_TAG_PREFIX.startswith(suffix):
|
||||
return content[:last_lt], suffix
|
||||
|
||||
return content, ""
|
||||
@@ -42,6 +44,8 @@ def parse_streaming_content(content: str) -> list[StreamSegment]:
|
||||
if not content:
|
||||
return []
|
||||
|
||||
content = normalize_tool_format(content)
|
||||
|
||||
segments: list[StreamSegment] = []
|
||||
|
||||
func_matches = list(_FUNC_PATTERN.finditer(content))
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from strix.config import Config
|
||||
from strix.config.config import resolve_llm_config
|
||||
from strix.llm.utils import resolve_strix_model
|
||||
|
||||
|
||||
class LLMConfig:
|
||||
@@ -17,6 +18,10 @@ class LLMConfig:
|
||||
if not self.model_name:
|
||||
raise ValueError("STRIX_LLM environment variable must be set and not empty")
|
||||
|
||||
api_model, canonical = resolve_strix_model(self.model_name)
|
||||
self.litellm_model: str = api_model or self.model_name
|
||||
self.canonical_model: str = canonical or self.model_name
|
||||
|
||||
self.enable_prompt_caching = enable_prompt_caching
|
||||
self.skills = skills or []
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import Any
|
||||
import litellm
|
||||
|
||||
from strix.config.config import resolve_llm_config
|
||||
from strix.llm.utils import resolve_strix_model
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -156,6 +157,8 @@ def check_duplicate(
|
||||
comparison_data = {"candidate": candidate_cleaned, "existing_reports": existing_cleaned}
|
||||
|
||||
model_name, api_key, api_base = resolve_llm_config()
|
||||
litellm_model, _ = resolve_strix_model(model_name)
|
||||
litellm_model = litellm_model or model_name
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": DEDUPE_SYSTEM_PROMPT},
|
||||
@@ -170,7 +173,7 @@ def check_duplicate(
|
||||
]
|
||||
|
||||
completion_kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"model": litellm_model,
|
||||
"messages": messages,
|
||||
"timeout": 120,
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ from strix.llm.memory_compressor import MemoryCompressor
|
||||
from strix.llm.utils import (
|
||||
_truncate_to_first_function,
|
||||
fix_incomplete_tool_call,
|
||||
normalize_tool_format,
|
||||
parse_tool_invocations,
|
||||
)
|
||||
from strix.skills import load_skills
|
||||
@@ -63,7 +64,7 @@ class LLM:
|
||||
self.agent_name = agent_name
|
||||
self.agent_id: str | None = None
|
||||
self._total_stats = RequestStats()
|
||||
self.memory_compressor = MemoryCompressor(model_name=config.model_name)
|
||||
self.memory_compressor = MemoryCompressor(model_name=config.litellm_model)
|
||||
self.system_prompt = self._load_system_prompt(agent_name)
|
||||
|
||||
reasoning = Config.get("strix_reasoning_effort")
|
||||
@@ -143,10 +144,10 @@ class LLM:
|
||||
delta = self._get_chunk_content(chunk)
|
||||
if delta:
|
||||
accumulated += delta
|
||||
if "</function>" in accumulated:
|
||||
accumulated = accumulated[
|
||||
: accumulated.find("</function>") + len("</function>")
|
||||
]
|
||||
if "</function>" in accumulated or "</invoke>" in accumulated:
|
||||
end_tag = "</function>" if "</function>" in accumulated else "</invoke>"
|
||||
pos = accumulated.find(end_tag)
|
||||
accumulated = accumulated[: pos + len(end_tag)]
|
||||
yield LLMResponse(content=accumulated)
|
||||
done_streaming = 1
|
||||
continue
|
||||
@@ -155,6 +156,7 @@ class LLM:
|
||||
if chunks:
|
||||
self._update_usage_stats(stream_chunk_builder(chunks))
|
||||
|
||||
accumulated = normalize_tool_format(accumulated)
|
||||
accumulated = fix_incomplete_tool_call(_truncate_to_first_function(accumulated))
|
||||
yield LLMResponse(
|
||||
content=accumulated,
|
||||
@@ -184,6 +186,9 @@ class LLM:
|
||||
conversation_history.extend(compressed)
|
||||
messages.extend(compressed)
|
||||
|
||||
if messages[-1].get("role") == "assistant":
|
||||
messages.append({"role": "user", "content": "<meta>Continue the task.</meta>"})
|
||||
|
||||
if self._is_anthropic() and self.config.enable_prompt_caching:
|
||||
messages = self._add_cache_control(messages)
|
||||
|
||||
@@ -194,7 +199,7 @@ class LLM:
|
||||
messages = self._strip_images(messages)
|
||||
|
||||
args: dict[str, Any] = {
|
||||
"model": self.config.model_name,
|
||||
"model": self.config.litellm_model,
|
||||
"messages": messages,
|
||||
"timeout": self.config.timeout,
|
||||
"stream_options": {"include_usage": True},
|
||||
@@ -229,8 +234,8 @@ class LLM:
|
||||
def _update_usage_stats(self, response: Any) -> None:
|
||||
try:
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
input_tokens = getattr(response.usage, "prompt_tokens", 0)
|
||||
output_tokens = getattr(response.usage, "completion_tokens", 0)
|
||||
input_tokens = getattr(response.usage, "prompt_tokens", 0) or 0
|
||||
output_tokens = getattr(response.usage, "completion_tokens", 0) or 0
|
||||
|
||||
cached_tokens = 0
|
||||
if hasattr(response.usage, "prompt_tokens_details"):
|
||||
@@ -238,14 +243,11 @@ class LLM:
|
||||
if hasattr(prompt_details, "cached_tokens"):
|
||||
cached_tokens = prompt_details.cached_tokens or 0
|
||||
|
||||
cost = self._extract_cost(response)
|
||||
else:
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
cached_tokens = 0
|
||||
|
||||
try:
|
||||
cost = completion_cost(response) or 0.0
|
||||
except Exception: # noqa: BLE001
|
||||
cost = 0.0
|
||||
|
||||
self._total_stats.input_tokens += input_tokens
|
||||
@@ -256,6 +258,18 @@ class LLM:
|
||||
except Exception: # noqa: BLE001, S110 # nosec B110
|
||||
pass
|
||||
|
||||
def _extract_cost(self, response: Any) -> float:
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
direct_cost = getattr(response.usage, "cost", None)
|
||||
if direct_cost is not None:
|
||||
return float(direct_cost)
|
||||
try:
|
||||
if hasattr(response, "_hidden_params"):
|
||||
response._hidden_params.pop("custom_llm_provider", None)
|
||||
return completion_cost(response, model=self.config.canonical_model) or 0.0
|
||||
except Exception: # noqa: BLE001
|
||||
return 0.0
|
||||
|
||||
def _should_retry(self, e: Exception) -> bool:
|
||||
code = getattr(e, "status_code", None) or getattr(
|
||||
getattr(e, "response", None), "status_code", None
|
||||
@@ -275,13 +289,13 @@ class LLM:
|
||||
|
||||
def _supports_vision(self) -> bool:
|
||||
try:
|
||||
return bool(supports_vision(model=self.config.model_name))
|
||||
return bool(supports_vision(model=self.config.canonical_model))
|
||||
except Exception: # noqa: BLE001
|
||||
return False
|
||||
|
||||
def _supports_reasoning(self) -> bool:
|
||||
try:
|
||||
return bool(supports_reasoning(model=self.config.model_name))
|
||||
return bool(supports_reasoning(model=self.config.canonical_model))
|
||||
except Exception: # noqa: BLE001
|
||||
return False
|
||||
|
||||
@@ -302,7 +316,7 @@ class LLM:
|
||||
return result
|
||||
|
||||
def _add_cache_control(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
if not messages or not supports_prompt_caching(self.config.model_name):
|
||||
if not messages or not supports_prompt_caching(self.config.canonical_model):
|
||||
return messages
|
||||
|
||||
result = list(messages)
|
||||
|
||||
@@ -91,7 +91,7 @@ def _summarize_messages(
|
||||
if not messages:
|
||||
empty_summary = "<context_summary message_count='0'>{text}</context_summary>"
|
||||
return {
|
||||
"role": "assistant",
|
||||
"role": "user",
|
||||
"content": empty_summary.format(text="No messages to summarize"),
|
||||
}
|
||||
|
||||
@@ -123,7 +123,7 @@ def _summarize_messages(
|
||||
return messages[0]
|
||||
summary_msg = "<context_summary message_count='{count}'>{text}</context_summary>"
|
||||
return {
|
||||
"role": "assistant",
|
||||
"role": "user",
|
||||
"content": summary_msg.format(count=len(messages), text=summary),
|
||||
}
|
||||
except Exception:
|
||||
@@ -158,7 +158,7 @@ class MemoryCompressor:
|
||||
):
|
||||
self.max_images = max_images
|
||||
self.model_name = model_name or Config.get("strix_llm")
|
||||
self.timeout = timeout or int(Config.get("strix_memory_compressor_timeout") or "30")
|
||||
self.timeout = timeout or int(Config.get("strix_memory_compressor_timeout") or "120")
|
||||
|
||||
if not self.model_name:
|
||||
raise ValueError("STRIX_LLM environment variable must be set and not empty")
|
||||
|
||||
@@ -3,11 +3,75 @@ import re
|
||||
from typing import Any
|
||||
|
||||
|
||||
_INVOKE_OPEN = re.compile(r'<invoke\s+name=["\']([^"\']+)["\']>')
|
||||
_PARAM_NAME_ATTR = re.compile(r'<parameter\s+name=["\']([^"\']+)["\']>')
|
||||
_FUNCTION_CALLS_TAG = re.compile(r"</?function_calls>")
|
||||
_STRIP_TAG_QUOTES = re.compile(r"<(function|parameter)\s*=\s*([^>]*?)>")
|
||||
|
||||
|
||||
def normalize_tool_format(content: str) -> str:
|
||||
"""Convert alternative tool-call XML formats to the expected one.
|
||||
|
||||
Handles:
|
||||
<function_calls>...</function_calls> → stripped
|
||||
<invoke name="X"> → <function=X>
|
||||
<parameter name="X"> → <parameter=X>
|
||||
</invoke> → </function>
|
||||
<function="X"> → <function=X>
|
||||
<parameter="X"> → <parameter=X>
|
||||
"""
|
||||
if "<invoke" in content or "<function_calls" in content:
|
||||
content = _FUNCTION_CALLS_TAG.sub("", content)
|
||||
content = _INVOKE_OPEN.sub(r"<function=\1>", content)
|
||||
content = _PARAM_NAME_ATTR.sub(r"<parameter=\1>", content)
|
||||
content = content.replace("</invoke>", "</function>")
|
||||
|
||||
return _STRIP_TAG_QUOTES.sub(
|
||||
lambda m: f"<{m.group(1)}={m.group(2).strip().strip(chr(34) + chr(39))}>", content
|
||||
)
|
||||
|
||||
|
||||
STRIX_MODEL_MAP: dict[str, str] = {
|
||||
"claude-sonnet-4.6": "anthropic/claude-sonnet-4-6",
|
||||
"claude-opus-4.6": "anthropic/claude-opus-4-6",
|
||||
"gpt-5.2": "openai/gpt-5.2",
|
||||
"gpt-5.1": "openai/gpt-5.1",
|
||||
"gpt-5": "openai/gpt-5",
|
||||
"gpt-5.2-codex": "openai/gpt-5.2-codex",
|
||||
"gpt-5.1-codex-max": "openai/gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex": "openai/gpt-5.1-codex",
|
||||
"gpt-5-codex": "openai/gpt-5-codex",
|
||||
"gemini-3-pro-preview": "gemini/gemini-3-pro-preview",
|
||||
"gemini-3-flash-preview": "gemini/gemini-3-flash-preview",
|
||||
"glm-5": "openrouter/z-ai/glm-5",
|
||||
"glm-4.7": "openrouter/z-ai/glm-4.7",
|
||||
}
|
||||
|
||||
|
||||
def resolve_strix_model(model_name: str | None) -> tuple[str | None, str | None]:
|
||||
"""Resolve a strix/ model into names for API calls and capability lookups.
|
||||
|
||||
Returns (api_model, canonical_model):
|
||||
- api_model: openai/<base> for API calls (Strix API is OpenAI-compatible)
|
||||
- canonical_model: actual provider model name for litellm capability lookups
|
||||
Non-strix models return the same name for both.
|
||||
"""
|
||||
if not model_name or not model_name.startswith("strix/"):
|
||||
return model_name, model_name
|
||||
|
||||
base_model = model_name[6:]
|
||||
api_model = f"openai/{base_model}"
|
||||
canonical_model = STRIX_MODEL_MAP.get(base_model, api_model)
|
||||
return api_model, canonical_model
|
||||
|
||||
|
||||
def _truncate_to_first_function(content: str) -> str:
|
||||
if not content:
|
||||
return content
|
||||
|
||||
function_starts = [match.start() for match in re.finditer(r"<function=", content)]
|
||||
function_starts = [
|
||||
match.start() for match in re.finditer(r"<function=|<invoke\s+name=", content)
|
||||
]
|
||||
|
||||
if len(function_starts) >= 2:
|
||||
second_function_start = function_starts[1]
|
||||
@@ -18,6 +82,7 @@ def _truncate_to_first_function(content: str) -> str:
|
||||
|
||||
|
||||
def parse_tool_invocations(content: str) -> list[dict[str, Any]] | None:
|
||||
content = normalize_tool_format(content)
|
||||
content = fix_incomplete_tool_call(content)
|
||||
|
||||
tool_invocations: list[dict[str, Any]] = []
|
||||
@@ -47,12 +112,14 @@ def parse_tool_invocations(content: str) -> list[dict[str, Any]] | None:
|
||||
|
||||
|
||||
def fix_incomplete_tool_call(content: str) -> str:
|
||||
"""Fix incomplete tool calls by adding missing </function> tag."""
|
||||
if (
|
||||
"<function=" in content
|
||||
and content.count("<function=") == 1
|
||||
and "</function>" not in content
|
||||
):
|
||||
"""Fix incomplete tool calls by adding missing closing tag.
|
||||
|
||||
Handles both ``<function=…>`` and ``<invoke name="…">`` formats.
|
||||
"""
|
||||
has_open = "<function=" in content or "<invoke " in content
|
||||
count_open = content.count("<function=") + content.count("<invoke ")
|
||||
has_close = "</function>" in content or "</invoke>" in content
|
||||
if has_open and count_open == 1 and not has_close:
|
||||
content = content.rstrip()
|
||||
content = content + "function>" if content.endswith("</") else content + "\n</function>"
|
||||
return content
|
||||
@@ -73,6 +140,7 @@ def clean_content(content: str) -> str:
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
content = normalize_tool_format(content)
|
||||
content = fix_incomplete_tool_call(content)
|
||||
|
||||
tool_pattern = r"<function=[^>]+>.*?</function>"
|
||||
|
||||
Reference in New Issue
Block a user