Add OpenTelemetry observability with local JSONL traces (#347)

Co-authored-by: 0xallam <ahmed39652003@gmail.com>
This commit is contained in:
alex s
2026-03-09 05:11:24 -03:00
committed by GitHub
parent 048be1fe59
commit a60cb4b66c
15 changed files with 2880 additions and 48 deletions

View File

@@ -46,9 +46,37 @@ Configure Strix using environment variables or a config file.
</ParamField>
<ParamField path="STRIX_TELEMETRY" default="1" type="string">
Enable/disable anonymous telemetry. Set to `0`, `false`, `no`, or `off` to disable.
Global telemetry default toggle. Set to `0`, `false`, `no`, or `off` to disable both PostHog and OTEL unless overridden by per-channel flags below.
</ParamField>
<ParamField path="STRIX_OTEL_TELEMETRY" type="string">
Enable/disable OpenTelemetry run observability independently. When unset, falls back to `STRIX_TELEMETRY`.
</ParamField>
<ParamField path="STRIX_POSTHOG_TELEMETRY" type="string">
Enable/disable PostHog product telemetry independently. When unset, falls back to `STRIX_TELEMETRY`.
</ParamField>
<ParamField path="TRACELOOP_BASE_URL" type="string">
OTLP/Traceloop base URL for remote OpenTelemetry export. If unset, Strix keeps traces local only.
</ParamField>
<ParamField path="TRACELOOP_API_KEY" type="string">
API key used for remote trace export. Remote export is enabled only when both `TRACELOOP_BASE_URL` and `TRACELOOP_API_KEY` are set.
</ParamField>
<ParamField path="TRACELOOP_HEADERS" type="string">
Optional custom OTEL headers (JSON object or `key=value,key2=value2`). Useful for Langfuse or custom/self-hosted OTLP gateways.
</ParamField>
When remote OTEL vars are not set, Strix still writes complete run telemetry locally to:
```bash
strix_runs/<run_name>/events.jsonl
```
When remote vars are set, Strix dual-writes telemetry to both local JSONL and the remote OTEL endpoint.
## Docker Configuration
<ParamField path="STRIX_IMAGE" default="ghcr.io/usestrix/strix-sandbox:0.1.12" type="string">
@@ -106,4 +134,5 @@ export PERPLEXITY_API_KEY="pplx-..."
# Optional: Custom timeouts
export LLM_TIMEOUT="600"
export STRIX_SANDBOX_EXECUTION_TIMEOUT="300"
```

1512
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -56,6 +56,9 @@ textual = "^4.0.0"
xmltodict = "^0.13.0"
requests = "^2.32.0"
cvss = "^3.2"
traceloop-sdk = "^0.53.0"
opentelemetry-exporter-otlp-proto-http = "^1.40.0"
scrubadub = "^2.0.1"
# Optional LLM provider dependencies
google-cloud-aiplatform = { version = ">=1.38", optional = true }
@@ -148,6 +151,9 @@ module = [
"libtmux.*",
"pytest.*",
"cvss.*",
"opentelemetry.*",
"scrubadub.*",
"traceloop.*",
]
ignore_missing_imports = true
@@ -155,6 +161,7 @@ ignore_missing_imports = true
[[tool.mypy.overrides]]
module = ["tests.*"]
disallow_untyped_decorators = false
disallow_untyped_defs = false
# ============================================================================
# Ruff Configuration (Fast Python Linter & Formatter)

View File

@@ -47,6 +47,11 @@ class Config:
# Telemetry
strix_telemetry = "1"
strix_otel_telemetry = None
strix_posthog_telemetry = None
traceloop_base_url = None
traceloop_api_key = None
traceloop_headers = None
# Config file override (set via --config CLI arg)
_config_file_override: Path | None = None

View File

@@ -413,8 +413,6 @@ def display_completion_message(args: argparse.Namespace, results_path: Path) ->
if tracer and tracer.scan_results:
scan_completed = tracer.scan_results.get("scan_completed", False)
has_vulnerabilities = tracer and len(tracer.vulnerability_reports) > 0
completion_text = Text()
if scan_completed:
completion_text.append("Penetration test completed", style="bold #22c55e")
@@ -439,7 +437,6 @@ def display_completion_message(args: argparse.Namespace, results_path: Path) ->
if stats_text.plain:
panel_parts.extend(["\n", stats_text])
if scan_completed or has_vulnerabilities:
results_text = Text()
results_text.append("\n")
results_text.append("Output", style="dim")

23
strix/telemetry/flags.py Normal file
View File

@@ -0,0 +1,23 @@
from strix.config import Config
_DISABLED_VALUES = {"0", "false", "no", "off"}
def _is_enabled(raw_value: str | None, default: str = "1") -> bool:
value = (raw_value if raw_value is not None else default).strip().lower()
return value not in _DISABLED_VALUES
def is_otel_enabled() -> bool:
explicit = Config.get("strix_otel_telemetry")
if explicit is not None:
return _is_enabled(explicit)
return _is_enabled(Config.get("strix_telemetry"), default="1")
def is_posthog_enabled() -> bool:
explicit = Config.get("strix_posthog_telemetry")
if explicit is not None:
return _is_enabled(explicit)
return _is_enabled(Config.get("strix_telemetry"), default="1")

View File

@@ -6,7 +6,7 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any
from uuid import uuid4
from strix.config import Config
from strix.telemetry.flags import is_posthog_enabled
if TYPE_CHECKING:
@@ -19,7 +19,7 @@ _SESSION_ID = uuid4().hex[:16]
def _is_enabled() -> bool:
return (Config.get("strix_telemetry") or "1").lower() not in ("0", "false", "no", "off")
return is_posthog_enabled()
def _is_first_run() -> bool:

View File

@@ -1,20 +1,40 @@
import json
import logging
import threading
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional
from typing import Any, Callable, Optional
from uuid import uuid4
from opentelemetry import trace
from opentelemetry.trace import SpanContext, SpanKind
from strix.config import Config
from strix.telemetry import posthog
from strix.telemetry.flags import is_otel_enabled
from strix.telemetry.utils import (
TelemetrySanitizer,
append_jsonl_record,
bootstrap_otel,
format_span_id,
format_trace_id,
get_events_write_lock,
)
if TYPE_CHECKING:
from collections.abc import Callable
try:
from traceloop.sdk import Traceloop
except ImportError: # pragma: no cover - exercised when dependency is absent
Traceloop = None # type: ignore[assignment,unused-ignore]
logger = logging.getLogger(__name__)
_global_tracer: Optional["Tracer"] = None
_OTEL_BOOTSTRAP_LOCK = threading.Lock()
_OTEL_BOOTSTRAPPED = False
_OTEL_REMOTE_ENABLED = False
def get_global_tracer() -> Optional["Tracer"]:
return _global_tracer
@@ -52,16 +72,225 @@ class Tracer:
"status": "running",
}
self._run_dir: Path | None = None
self._events_file_path: Path | None = None
self._next_execution_id = 1
self._next_message_id = 1
self._saved_vuln_ids: set[str] = set()
self._run_completed_emitted = False
self._telemetry_enabled = is_otel_enabled()
self._sanitizer = TelemetrySanitizer()
self._otel_tracer: Any = None
self._remote_export_enabled = False
self.caido_url: str | None = None
self.vulnerability_found_callback: Callable[[dict[str, Any]], None] | None = None
self._setup_telemetry()
self._emit_run_started_event()
@property
def events_file_path(self) -> Path:
if self._events_file_path is None:
self._events_file_path = self.get_run_dir() / "events.jsonl"
return self._events_file_path
def _active_events_file_path(self) -> Path:
active = get_global_tracer()
if active and active._events_file_path is not None:
return active._events_file_path
return self.events_file_path
def _get_events_write_lock(self, output_path: Path | None = None) -> threading.Lock:
path = output_path or self.events_file_path
return get_events_write_lock(path)
def _active_run_metadata(self) -> dict[str, Any]:
active = get_global_tracer()
if active:
return active.run_metadata
return self.run_metadata
def _setup_telemetry(self) -> None:
global _OTEL_BOOTSTRAPPED, _OTEL_REMOTE_ENABLED
if not self._telemetry_enabled:
self._otel_tracer = None
self._remote_export_enabled = False
return
run_dir = self.get_run_dir()
self._events_file_path = run_dir / "events.jsonl"
base_url = (Config.get("traceloop_base_url") or "").strip()
api_key = (Config.get("traceloop_api_key") or "").strip()
headers_raw = Config.get("traceloop_headers") or ""
(
self._otel_tracer,
self._remote_export_enabled,
_OTEL_BOOTSTRAPPED,
_OTEL_REMOTE_ENABLED,
) = bootstrap_otel(
bootstrapped=_OTEL_BOOTSTRAPPED,
remote_enabled_state=_OTEL_REMOTE_ENABLED,
bootstrap_lock=_OTEL_BOOTSTRAP_LOCK,
traceloop=Traceloop,
base_url=base_url,
api_key=api_key,
headers_raw=headers_raw,
output_path_getter=self._active_events_file_path,
run_metadata_getter=self._active_run_metadata,
sanitizer=self._sanitize_data,
write_lock_getter=self._get_events_write_lock,
tracer_name="strix.telemetry.tracer",
)
def _set_association_properties(self, properties: dict[str, Any]) -> None:
if Traceloop is None:
return
sanitized = self._sanitize_data(properties)
try:
Traceloop.set_association_properties(sanitized)
except Exception: # noqa: BLE001
logger.debug("Failed to set Traceloop association properties")
def _sanitize_data(self, data: Any, key_hint: str | None = None) -> Any:
return self._sanitizer.sanitize(data, key_hint=key_hint)
def _append_event_record(self, record: dict[str, Any]) -> None:
try:
append_jsonl_record(self.events_file_path, record)
except OSError:
logger.exception("Failed to append JSONL event record")
def _enrich_actor(self, actor: dict[str, Any] | None) -> dict[str, Any] | None:
if not actor:
return None
enriched = dict(actor)
if "agent_name" in enriched:
return enriched
agent_id = enriched.get("agent_id")
if not isinstance(agent_id, str):
return enriched
agent_data = self.agents.get(agent_id, {})
agent_name = agent_data.get("name")
if isinstance(agent_name, str) and agent_name:
enriched["agent_name"] = agent_name
return enriched
def _emit_event(
self,
event_type: str,
actor: dict[str, Any] | None = None,
payload: Any | None = None,
status: str | None = None,
error: Any | None = None,
source: str = "strix.tracer",
include_run_metadata: bool = False,
) -> None:
if not self._telemetry_enabled:
return
enriched_actor = self._enrich_actor(actor)
sanitized_actor = self._sanitize_data(enriched_actor) if enriched_actor else None
sanitized_payload = self._sanitize_data(payload) if payload is not None else None
sanitized_error = self._sanitize_data(error) if error is not None else None
trace_id: str | None = None
span_id: str | None = None
parent_span_id: str | None = None
current_context = trace.get_current_span().get_span_context()
if isinstance(current_context, SpanContext) and current_context.is_valid:
parent_span_id = format_span_id(current_context.span_id)
if self._otel_tracer is not None:
try:
with self._otel_tracer.start_as_current_span(
f"strix.{event_type}",
kind=SpanKind.INTERNAL,
) as span:
span_context = span.get_span_context()
trace_id = format_trace_id(span_context.trace_id)
span_id = format_span_id(span_context.span_id)
span.set_attribute("strix.event_type", event_type)
span.set_attribute("strix.source", source)
span.set_attribute("strix.run_id", self.run_id)
span.set_attribute("strix.run_name", self.run_name or "")
if status:
span.set_attribute("strix.status", status)
if sanitized_actor is not None:
span.set_attribute(
"strix.actor",
json.dumps(sanitized_actor, ensure_ascii=False),
)
if sanitized_payload is not None:
span.set_attribute(
"strix.payload",
json.dumps(sanitized_payload, ensure_ascii=False),
)
if sanitized_error is not None:
span.set_attribute(
"strix.error",
json.dumps(sanitized_error, ensure_ascii=False),
)
except Exception: # noqa: BLE001
logger.debug("Failed to create OTEL span for event type '%s'", event_type)
if trace_id is None:
trace_id = format_trace_id(uuid4().int & ((1 << 128) - 1)) or uuid4().hex
if span_id is None:
span_id = format_span_id(uuid4().int & ((1 << 64) - 1)) or uuid4().hex[:16]
record = {
"timestamp": datetime.now(UTC).isoformat(),
"event_type": event_type,
"run_id": self.run_id,
"trace_id": trace_id,
"span_id": span_id,
"parent_span_id": parent_span_id,
"actor": sanitized_actor,
"payload": sanitized_payload,
"status": status,
"error": sanitized_error,
"source": source,
}
if include_run_metadata:
record["run_metadata"] = self._sanitize_data(self.run_metadata)
self._append_event_record(record)
def set_run_name(self, run_name: str) -> None:
self.run_name = run_name
self.run_id = run_name
self.run_metadata["run_name"] = run_name
self.run_metadata["run_id"] = run_name
self._run_dir = None
self._events_file_path = None
self._run_completed_emitted = False
self._set_association_properties({"run_id": self.run_id, "run_name": self.run_name or ""})
self._emit_run_started_event()
def _emit_run_started_event(self) -> None:
if not self._telemetry_enabled:
return
self._emit_event(
"run.started",
payload={
"run_name": self.run_name,
"start_time": self.start_time,
"local_jsonl_path": str(self.events_file_path),
"remote_export_enabled": self._remote_export_enabled,
},
status="running",
include_run_metadata=True,
)
def get_run_dir(self) -> Path:
if self._run_dir is None:
@@ -134,6 +363,12 @@ class Tracer:
self.vulnerability_reports.append(report)
logger.info(f"Added vulnerability report: {report_id} - {title}")
posthog.finding(severity)
self._emit_event(
"finding.created",
payload={"report": report},
status=report["severity"],
source="strix.findings",
)
if self.vulnerability_found_callback:
self.vulnerability_found_callback(report)
@@ -178,11 +413,24 @@ class Tracer:
"""
logger.info("Updated scan final fields")
self._emit_event(
"finding.reviewed",
payload={
"scan_completed": True,
"vulnerability_count": len(self.vulnerability_reports),
},
status="completed",
source="strix.findings",
)
self.save_run_data(mark_complete=True)
posthog.end(self, exit_reason="finished_by_tool")
def log_agent_creation(
self, agent_id: str, name: str, task: str, parent_id: str | None = None
self,
agent_id: str,
name: str,
task: str,
parent_id: str | None = None,
) -> None:
agent_data: dict[str, Any] = {
"id": agent_id,
@@ -196,6 +444,13 @@ class Tracer:
}
self.agents[agent_id] = agent_data
self._emit_event(
"agent.created",
actor={"agent_id": agent_id, "agent_name": name},
payload={"task": task, "parent_id": parent_id},
status="running",
source="strix.agents",
)
def log_chat_message(
self,
@@ -217,9 +472,21 @@ class Tracer:
}
self.chat_messages.append(message_data)
self._emit_event(
"chat.message",
actor={"agent_id": agent_id, "role": role},
payload={"message_id": message_id, "content": content, "metadata": metadata or {}},
status="logged",
source="strix.chat",
)
return message_id
def log_tool_execution_start(self, agent_id: str, tool_name: str, args: dict[str, Any]) -> int:
def log_tool_execution_start(
self,
agent_id: str,
tool_name: str,
args: dict[str, Any],
) -> int:
execution_id = self._next_execution_id
self._next_execution_id += 1
@@ -241,18 +508,67 @@ class Tracer:
if agent_id in self.agents:
self.agents[agent_id]["tool_executions"].append(execution_id)
self._emit_event(
"tool.execution.started",
actor={
"agent_id": agent_id,
"tool_name": tool_name,
"execution_id": execution_id,
},
payload={"args": args},
status="running",
source="strix.tools",
)
return execution_id
def update_tool_execution(
self, execution_id: int, status: str, result: Any | None = None
self,
execution_id: int,
status: str,
result: Any | None = None,
) -> None:
if execution_id in self.tool_executions:
self.tool_executions[execution_id]["status"] = status
self.tool_executions[execution_id]["result"] = result
self.tool_executions[execution_id]["completed_at"] = datetime.now(UTC).isoformat()
if execution_id not in self.tool_executions:
return
tool_data = self.tool_executions[execution_id]
tool_data["status"] = status
tool_data["result"] = result
tool_data["completed_at"] = datetime.now(UTC).isoformat()
tool_name = str(tool_data.get("tool_name", "unknown"))
agent_id = str(tool_data.get("agent_id", "unknown"))
error_payload = result if status in {"error", "failed"} else None
self._emit_event(
"tool.execution.updated",
actor={
"agent_id": agent_id,
"tool_name": tool_name,
"execution_id": execution_id,
},
payload={"result": result},
status=status,
error=error_payload,
source="strix.tools",
)
if tool_name == "create_vulnerability_report":
finding_status = "reviewed" if status == "completed" else "rejected"
self._emit_event(
"finding.reviewed",
actor={"agent_id": agent_id, "tool_name": tool_name},
payload={"execution_id": execution_id, "result": result},
status=finding_status,
error=error_payload,
source="strix.findings",
)
def update_agent_status(
self, agent_id: str, status: str, error_message: str | None = None
self,
agent_id: str,
status: str,
error_message: str | None = None,
) -> None:
if agent_id in self.agents:
self.agents[agent_id]["status"] = status
@@ -260,6 +576,15 @@ class Tracer:
if error_message:
self.agents[agent_id]["error_message"] = error_message
self._emit_event(
"agent.status.updated",
actor={"agent_id": agent_id},
payload={"error_message": error_message},
status=status,
error=error_message,
source="strix.agents",
)
def set_scan_config(self, config: dict[str, Any]) -> None:
self.scan_config = config
self.run_metadata.update(
@@ -269,13 +594,29 @@ class Tracer:
"max_iterations": config.get("max_iterations", 200),
}
)
self.get_run_dir()
self._set_association_properties(
{
"run_id": self.run_id,
"run_name": self.run_name or "",
"targets": config.get("targets", []),
"max_iterations": config.get("max_iterations", 200),
}
)
self._emit_event(
"run.configured",
payload={"scan_config": config},
status="configured",
source="strix.run",
)
def save_run_data(self, mark_complete: bool = False) -> None: # noqa: PLR0912, PLR0915
def save_run_data(self, mark_complete: bool = False) -> None:
try:
run_dir = self.get_run_dir()
if mark_complete:
if self.end_time is None:
self.end_time = datetime.now(UTC).isoformat()
self.run_metadata["end_time"] = self.end_time
self.run_metadata["status"] = "completed"
if self.final_scan_result:
penetration_test_report_file = run_dir / "penetration_test_report.md"
@@ -286,7 +627,8 @@ class Tracer:
)
f.write(f"{self.final_scan_result}\n")
logger.info(
f"Saved final penetration test report to: {penetration_test_report_file}"
"Saved final penetration test report to: %s",
penetration_test_report_file,
)
if self.vulnerability_reports:
@@ -302,7 +644,10 @@ class Tracer:
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
sorted_reports = sorted(
self.vulnerability_reports,
key=lambda x: (severity_order.get(x["severity"], 5), x["timestamp"]),
key=lambda report: (
severity_order.get(report["severity"], 5),
report["timestamp"],
),
)
for report in new_reports:
@@ -329,8 +674,8 @@ class Tracer:
f.write(f"**{label}:** {value}\n")
f.write("\n## Description\n\n")
desc = report.get("description") or "No description provided."
f.write(f"{desc}\n\n")
description = report.get("description") or "No description provided."
f.write(f"{description}\n\n")
if report.get("impact"):
f.write("## Impact\n\n")
@@ -404,11 +749,25 @@ class Tracer:
if new_reports:
logger.info(
f"Saved {len(new_reports)} new vulnerability report(s) to: {vuln_dir}"
"Saved %d new vulnerability report(s) to: %s",
len(new_reports),
vuln_dir,
)
logger.info(f"Updated vulnerability index: {vuln_csv_file}")
logger.info("Updated vulnerability index: %s", vuln_csv_file)
logger.info(f"📊 Essential scan data saved to: {run_dir}")
logger.info("📊 Essential scan data saved to: %s", run_dir)
if mark_complete and not self._run_completed_emitted:
self._emit_event(
"run.completed",
payload={
"duration_seconds": self._calculate_duration(),
"vulnerability_count": len(self.vulnerability_reports),
},
status="completed",
source="strix.run",
include_run_metadata=True,
)
self._run_completed_emitted = True
except (OSError, RuntimeError):
logger.exception("Failed to save scan data")

413
strix/telemetry/utils.py Normal file
View File

@@ -0,0 +1,413 @@
import json
import logging
import re
import threading
from collections.abc import Callable, Sequence
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from opentelemetry import trace
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
SimpleSpanProcessor,
SpanExporter,
SpanExportResult,
)
from scrubadub import Scrubber
from scrubadub.detectors import RegexDetector
from scrubadub.filth import Filth
logger = logging.getLogger(__name__)
_REDACTED = "[REDACTED]"
_SCREENSHOT_OMITTED = "[SCREENSHOT_OMITTED]"
_SCREENSHOT_KEY_PATTERN = re.compile(r"screenshot", re.IGNORECASE)
_SENSITIVE_KEY_PATTERN = re.compile(
r"(api[_-]?key|token|secret|password|authorization|cookie|session|credential|private[_-]?key)",
re.IGNORECASE,
)
_SENSITIVE_TOKEN_PATTERN = re.compile(
r"(?i)\b("
r"bearer\s+[a-z0-9._-]+|"
r"sk-[a-z0-9_-]{8,}|"
r"gh[pousr]_[a-z0-9_-]{12,}|"
r"xox[baprs]-[a-z0-9-]{12,}"
r")\b"
)
_SCRUBADUB_PLACEHOLDER_PATTERN = re.compile(r"\{\{[^}]+\}\}")
_EVENTS_FILE_LOCKS_LOCK = threading.Lock()
_EVENTS_FILE_LOCKS: dict[str, threading.Lock] = {}
_NOISY_OTEL_CONTENT_PREFIXES = (
"gen_ai.prompt.",
"gen_ai.completion.",
"llm.input_messages.",
"llm.output_messages.",
)
_NOISY_OTEL_EXACT_KEYS = {
"llm.input",
"llm.output",
"llm.prompt",
"llm.completion",
}
class _SecretFilth(Filth): # type: ignore[misc]
type = "secret"
class _SecretTokenDetector(RegexDetector): # type: ignore[misc]
name = "strix_secret_token_detector"
filth_cls = _SecretFilth
regex = _SENSITIVE_TOKEN_PATTERN
class TelemetrySanitizer:
def __init__(self) -> None:
self._scrubber = Scrubber(detector_list=[_SecretTokenDetector])
def sanitize(self, data: Any, key_hint: str | None = None) -> Any: # noqa: PLR0911
if data is None:
return None
if isinstance(data, dict):
sanitized: dict[str, Any] = {}
for key, value in data.items():
key_str = str(key)
if _SCREENSHOT_KEY_PATTERN.search(key_str):
sanitized[key_str] = _SCREENSHOT_OMITTED
elif _SENSITIVE_KEY_PATTERN.search(key_str):
sanitized[key_str] = _REDACTED
else:
sanitized[key_str] = self.sanitize(value, key_hint=key_str)
return sanitized
if isinstance(data, list):
return [self.sanitize(item, key_hint=key_hint) for item in data]
if isinstance(data, tuple):
return [self.sanitize(item, key_hint=key_hint) for item in data]
if isinstance(data, str):
if key_hint and _SENSITIVE_KEY_PATTERN.search(key_hint):
return _REDACTED
cleaned = self._scrubber.clean(data)
return _SCRUBADUB_PLACEHOLDER_PATTERN.sub(_REDACTED, cleaned)
if isinstance(data, int | float | bool):
return data
return str(data)
def format_trace_id(trace_id: int | None) -> str | None:
if trace_id is None or trace_id == 0:
return None
return f"{trace_id:032x}"
def format_span_id(span_id: int | None) -> str | None:
if span_id is None or span_id == 0:
return None
return f"{span_id:016x}"
def iso_from_unix_ns(unix_ns: int | None) -> str | None:
if unix_ns is None:
return None
try:
return datetime.fromtimestamp(unix_ns / 1_000_000_000, tz=UTC).isoformat()
except (OSError, OverflowError, ValueError):
return None
def get_events_write_lock(output_path: Path) -> threading.Lock:
path_key = str(output_path.resolve(strict=False))
with _EVENTS_FILE_LOCKS_LOCK:
lock = _EVENTS_FILE_LOCKS.get(path_key)
if lock is None:
lock = threading.Lock()
_EVENTS_FILE_LOCKS[path_key] = lock
return lock
def reset_events_write_locks() -> None:
with _EVENTS_FILE_LOCKS_LOCK:
_EVENTS_FILE_LOCKS.clear()
def append_jsonl_record(output_path: Path, record: dict[str, Any]) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)
with get_events_write_lock(output_path), output_path.open("a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
def default_resource_attributes() -> dict[str, str]:
return {
"service.name": "strix-agent",
"service.namespace": "strix",
}
def parse_traceloop_headers(raw_headers: str) -> dict[str, str]:
headers = raw_headers.strip()
if not headers:
return {}
if headers.startswith("{"):
try:
parsed = json.loads(headers)
except json.JSONDecodeError:
logger.warning("Invalid TRACELOOP_HEADERS JSON, ignoring custom headers")
return {}
if isinstance(parsed, dict):
return {str(key): str(value) for key, value in parsed.items() if value is not None}
logger.warning("TRACELOOP_HEADERS JSON must be an object, ignoring custom headers")
return {}
result: dict[str, str] = {}
for part in headers.split(","):
key, sep, value = part.partition("=")
if not sep:
continue
key = key.strip()
value = value.strip()
if key and value:
result[key] = value
return result
def prune_otel_span_attributes(attributes: dict[str, Any]) -> dict[str, Any]:
"""Drop high-volume LLM payload attributes to keep JSONL event files compact."""
filtered: dict[str, Any] = {}
filtered_count = 0
for key, value in attributes.items():
key_str = str(key)
if key_str in _NOISY_OTEL_EXACT_KEYS:
filtered_count += 1
continue
if key_str.endswith(".content") and key_str.startswith(_NOISY_OTEL_CONTENT_PREFIXES):
filtered_count += 1
continue
filtered[key_str] = value
if filtered_count:
filtered["strix.filtered_attributes_count"] = filtered_count
return filtered
class JsonlSpanExporter(SpanExporter): # type: ignore[misc]
"""Append OTEL spans to JSONL for local run artifacts."""
def __init__(
self,
output_path_getter: Callable[[], Path],
run_metadata_getter: Callable[[], dict[str, Any]],
sanitizer: Callable[[Any], Any],
write_lock_getter: Callable[[Path], threading.Lock],
):
self._output_path_getter = output_path_getter
self._run_metadata_getter = run_metadata_getter
self._sanitize = sanitizer
self._write_lock_getter = write_lock_getter
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
records: list[dict[str, Any]] = []
for span in spans:
attributes = prune_otel_span_attributes(dict(span.attributes or {}))
if "strix.event_type" in attributes:
# Tracer events are written directly in Tracer._emit_event.
continue
records.append(self._span_to_record(span, attributes))
if not records:
return SpanExportResult.SUCCESS
try:
output_path = self._output_path_getter()
output_path.parent.mkdir(parents=True, exist_ok=True)
with self._write_lock_getter(output_path), output_path.open("a", encoding="utf-8") as f:
for record in records:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
except OSError:
logger.exception("Failed to write OTEL span records to JSONL")
return SpanExportResult.FAILURE
return SpanExportResult.SUCCESS
def shutdown(self) -> None:
return None
def force_flush(self, timeout_millis: int = 30_000) -> bool: # noqa: ARG002
return True
def _span_to_record(
self,
span: ReadableSpan,
attributes: dict[str, Any],
) -> dict[str, Any]:
span_context = span.get_span_context()
parent_context = span.parent
status = None
if span.status and span.status.status_code:
status = span.status.status_code.name.lower()
event_type = str(attributes.get("gen_ai.operation.name", span.name))
run_metadata = self._run_metadata_getter()
run_id_attr = (
attributes.get("strix.run_id")
or attributes.get("strix_run_id")
or run_metadata.get("run_id")
or span.resource.attributes.get("strix.run_id")
)
record: dict[str, Any] = {
"timestamp": iso_from_unix_ns(span.end_time) or datetime.now(UTC).isoformat(),
"event_type": event_type,
"run_id": str(run_id_attr or run_metadata.get("run_id") or ""),
"trace_id": format_trace_id(span_context.trace_id),
"span_id": format_span_id(span_context.span_id),
"parent_span_id": format_span_id(parent_context.span_id if parent_context else None),
"actor": None,
"payload": None,
"status": status,
"error": None,
"source": "otel.span",
"span_name": span.name,
"span_kind": span.kind.name.lower(),
"attributes": self._sanitize(attributes),
}
if span.events:
record["otel_events"] = self._sanitize(
[
{
"name": event.name,
"timestamp": iso_from_unix_ns(event.timestamp),
"attributes": dict(event.attributes or {}),
}
for event in span.events
]
)
return record
def bootstrap_otel(
*,
bootstrapped: bool,
remote_enabled_state: bool,
bootstrap_lock: threading.Lock,
traceloop: Any,
base_url: str,
api_key: str,
headers_raw: str,
output_path_getter: Callable[[], Path],
run_metadata_getter: Callable[[], dict[str, Any]],
sanitizer: Callable[[Any], Any],
write_lock_getter: Callable[[Path], threading.Lock],
tracer_name: str = "strix.telemetry.tracer",
) -> tuple[Any, bool, bool, bool]:
with bootstrap_lock:
if bootstrapped:
return (
trace.get_tracer(tracer_name),
remote_enabled_state,
bootstrapped,
remote_enabled_state,
)
local_exporter = JsonlSpanExporter(
output_path_getter=output_path_getter,
run_metadata_getter=run_metadata_getter,
sanitizer=sanitizer,
write_lock_getter=write_lock_getter,
)
local_processor = SimpleSpanProcessor(local_exporter)
headers = parse_traceloop_headers(headers_raw)
remote_enabled = bool(base_url and api_key)
otlp_headers = headers
if remote_enabled:
otlp_headers = {"Authorization": f"Bearer {api_key}"}
otlp_headers.update(headers)
otel_init_ok = False
if traceloop:
try:
from traceloop.sdk.instruments import Instruments
init_kwargs: dict[str, Any] = {
"app_name": "strix-agent",
"processor": local_processor,
"telemetry_enabled": False,
"resource_attributes": default_resource_attributes(),
"block_instruments": {
Instruments.URLLIB3,
Instruments.REQUESTS,
},
}
if remote_enabled:
init_kwargs.update(
{
"api_endpoint": base_url,
"api_key": api_key,
"headers": headers,
}
)
import io
import sys
_stdout = sys.stdout
sys.stdout = io.StringIO()
try:
traceloop.init(**init_kwargs)
finally:
sys.stdout = _stdout
otel_init_ok = True
except Exception:
logger.exception("Failed to initialize Traceloop/OpenLLMetry")
remote_enabled = False
if not otel_init_ok:
from opentelemetry.sdk.resources import Resource
provider = TracerProvider(resource=Resource.create(default_resource_attributes()))
provider.add_span_processor(local_processor)
if remote_enabled:
try:
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
OTLPSpanExporter,
)
endpoint = base_url.rstrip("/") + "/v1/traces"
provider.add_span_processor(
BatchSpanProcessor(
OTLPSpanExporter(endpoint=endpoint, headers=otlp_headers)
)
)
except Exception:
logger.exception("Failed to configure OTLP HTTP exporter")
remote_enabled = False
try:
trace.set_tracer_provider(provider)
otel_init_ok = True
except Exception:
logger.exception("Failed to set OpenTelemetry tracer provider")
remote_enabled = False
otel_tracer = trace.get_tracer(tracer_name)
if otel_init_ok:
return otel_tracer, remote_enabled, True, remote_enabled
return otel_tracer, remote_enabled, bootstrapped, remote_enabled_state

1
tests/config/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Tests for strix.config module."""

View File

@@ -0,0 +1,55 @@
import json
from strix.config.config import Config
def test_traceloop_vars_are_tracked() -> None:
tracked = Config.tracked_vars()
assert "STRIX_OTEL_TELEMETRY" in tracked
assert "STRIX_POSTHOG_TELEMETRY" in tracked
assert "TRACELOOP_BASE_URL" in tracked
assert "TRACELOOP_API_KEY" in tracked
assert "TRACELOOP_HEADERS" in tracked
def test_apply_saved_uses_saved_traceloop_vars(monkeypatch, tmp_path) -> None:
config_path = tmp_path / "cli-config.json"
config_path.write_text(
json.dumps(
{
"env": {
"TRACELOOP_BASE_URL": "https://otel.example.com",
"TRACELOOP_API_KEY": "api-key",
"TRACELOOP_HEADERS": "x-test=value",
}
}
),
encoding="utf-8",
)
monkeypatch.setattr(Config, "_config_file_override", config_path)
monkeypatch.delenv("TRACELOOP_BASE_URL", raising=False)
monkeypatch.delenv("TRACELOOP_API_KEY", raising=False)
monkeypatch.delenv("TRACELOOP_HEADERS", raising=False)
applied = Config.apply_saved()
assert applied["TRACELOOP_BASE_URL"] == "https://otel.example.com"
assert applied["TRACELOOP_API_KEY"] == "api-key"
assert applied["TRACELOOP_HEADERS"] == "x-test=value"
def test_apply_saved_respects_existing_env_traceloop_vars(monkeypatch, tmp_path) -> None:
config_path = tmp_path / "cli-config.json"
config_path.write_text(
json.dumps({"env": {"TRACELOOP_BASE_URL": "https://otel.example.com"}}),
encoding="utf-8",
)
monkeypatch.setattr(Config, "_config_file_override", config_path)
monkeypatch.setenv("TRACELOOP_BASE_URL", "https://env.example.com")
applied = Config.apply_saved(force=False)
assert "TRACELOOP_BASE_URL" not in applied

View File

@@ -0,0 +1,15 @@
import litellm
from strix.llm.config import LLMConfig
from strix.llm.llm import LLM
def test_llm_does_not_modify_litellm_callbacks(monkeypatch) -> None:
monkeypatch.setenv("STRIX_TELEMETRY", "1")
monkeypatch.setenv("STRIX_OTEL_TELEMETRY", "1")
monkeypatch.setattr(litellm, "callbacks", ["custom-callback"])
llm = LLM(LLMConfig(model_name="openai/gpt-5"), agent_name=None)
assert llm is not None
assert litellm.callbacks == ["custom-callback"]

View File

@@ -0,0 +1,28 @@
from strix.telemetry.flags import is_otel_enabled, is_posthog_enabled
def test_flags_fallback_to_strix_telemetry(monkeypatch) -> None:
monkeypatch.delenv("STRIX_OTEL_TELEMETRY", raising=False)
monkeypatch.delenv("STRIX_POSTHOG_TELEMETRY", raising=False)
monkeypatch.setenv("STRIX_TELEMETRY", "0")
assert is_otel_enabled() is False
assert is_posthog_enabled() is False
def test_otel_flag_overrides_global_telemetry(monkeypatch) -> None:
monkeypatch.setenv("STRIX_TELEMETRY", "0")
monkeypatch.setenv("STRIX_OTEL_TELEMETRY", "1")
monkeypatch.delenv("STRIX_POSTHOG_TELEMETRY", raising=False)
assert is_otel_enabled() is True
assert is_posthog_enabled() is False
def test_posthog_flag_overrides_global_telemetry(monkeypatch) -> None:
monkeypatch.setenv("STRIX_TELEMETRY", "0")
monkeypatch.setenv("STRIX_POSTHOG_TELEMETRY", "1")
monkeypatch.delenv("STRIX_OTEL_TELEMETRY", raising=False)
assert is_otel_enabled() is False
assert is_posthog_enabled() is True

View File

@@ -0,0 +1,379 @@
import json
import sys
import types
from pathlib import Path
from typing import Any, ClassVar
import pytest
from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExportResult
from strix.telemetry import tracer as tracer_module
from strix.telemetry import utils as telemetry_utils
from strix.telemetry.tracer import Tracer, set_global_tracer
def _load_events(events_path: Path) -> list[dict[str, Any]]:
lines = events_path.read_text(encoding="utf-8").splitlines()
return [json.loads(line) for line in lines if line]
@pytest.fixture(autouse=True)
def _reset_tracer_globals(monkeypatch) -> None:
monkeypatch.setattr(tracer_module, "_global_tracer", None)
monkeypatch.setattr(tracer_module, "_OTEL_BOOTSTRAPPED", False)
monkeypatch.setattr(tracer_module, "_OTEL_REMOTE_ENABLED", False)
telemetry_utils.reset_events_write_locks()
monkeypatch.delenv("STRIX_TELEMETRY", raising=False)
monkeypatch.delenv("STRIX_OTEL_TELEMETRY", raising=False)
monkeypatch.delenv("STRIX_POSTHOG_TELEMETRY", raising=False)
monkeypatch.delenv("TRACELOOP_BASE_URL", raising=False)
monkeypatch.delenv("TRACELOOP_API_KEY", raising=False)
monkeypatch.delenv("TRACELOOP_HEADERS", raising=False)
def test_tracer_local_mode_writes_jsonl_with_correlation(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer("local-observability")
set_global_tracer(tracer)
tracer.set_scan_config({"targets": ["https://example.com"], "user_instructions": "focus auth"})
tracer.log_agent_creation("agent-1", "Root Agent", "scan auth")
tracer.log_chat_message("starting scan", "user", "agent-1")
execution_id = tracer.log_tool_execution_start(
"agent-1",
"send_request",
{"url": "https://example.com/login"},
)
tracer.update_tool_execution(execution_id, "completed", {"status_code": 200, "body": "ok"})
events_path = tmp_path / "strix_runs" / "local-observability" / "events.jsonl"
assert events_path.exists()
events = _load_events(events_path)
assert any(event["event_type"] == "tool.execution.updated" for event in events)
assert not any(event["event_type"] == "traffic.intercepted" for event in events)
for event in events:
assert event["run_id"] == "local-observability"
assert event["trace_id"]
assert event["span_id"]
def test_tracer_redacts_sensitive_payloads(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer("redaction-run")
set_global_tracer(tracer)
execution_id = tracer.log_tool_execution_start(
"agent-1",
"send_request",
{
"url": "https://example.com",
"api_key": "sk-secret-token-value",
"authorization": "Bearer super-secret-token",
},
)
tracer.update_tool_execution(
execution_id,
"error",
{"error": "request failed with token sk-secret-token-value"},
)
events_path = tmp_path / "strix_runs" / "redaction-run" / "events.jsonl"
events = _load_events(events_path)
serialized = json.dumps(events)
assert "sk-secret-token-value" not in serialized
assert "super-secret-token" not in serialized
assert "[REDACTED]" in serialized
def test_tracer_remote_mode_configures_traceloop_export(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
class FakeTraceloop:
init_calls: ClassVar[list[dict[str, Any]]] = []
@staticmethod
def init(**kwargs: Any) -> None:
FakeTraceloop.init_calls.append(kwargs)
@staticmethod
def set_association_properties(properties: dict[str, Any]) -> None: # noqa: ARG004
return None
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
monkeypatch.setenv("TRACELOOP_BASE_URL", "https://otel.example.com")
monkeypatch.setenv("TRACELOOP_API_KEY", "test-api-key")
monkeypatch.setenv("TRACELOOP_HEADERS", '{"x-custom":"header"}')
tracer = Tracer("remote-observability")
set_global_tracer(tracer)
tracer.log_chat_message("hello", "user", "agent-1")
assert tracer._remote_export_enabled is True
assert FakeTraceloop.init_calls
init_kwargs = FakeTraceloop.init_calls[-1]
assert init_kwargs["api_endpoint"] == "https://otel.example.com"
assert init_kwargs["api_key"] == "test-api-key"
assert init_kwargs["headers"] == {"x-custom": "header"}
assert isinstance(init_kwargs["processor"], SimpleSpanProcessor)
assert "strix.run_id" not in init_kwargs["resource_attributes"]
assert "strix.run_name" not in init_kwargs["resource_attributes"]
events_path = tmp_path / "strix_runs" / "remote-observability" / "events.jsonl"
events = _load_events(events_path)
run_started = next(event for event in events if event["event_type"] == "run.started")
assert run_started["payload"]["remote_export_enabled"] is True
def test_tracer_local_mode_avoids_traceloop_remote_endpoint(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
class FakeTraceloop:
init_calls: ClassVar[list[dict[str, Any]]] = []
@staticmethod
def init(**kwargs: Any) -> None:
FakeTraceloop.init_calls.append(kwargs)
@staticmethod
def set_association_properties(properties: dict[str, Any]) -> None: # noqa: ARG004
return None
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
tracer = Tracer("local-traceloop")
set_global_tracer(tracer)
tracer.log_chat_message("hello", "user", "agent-1")
assert FakeTraceloop.init_calls
init_kwargs = FakeTraceloop.init_calls[-1]
assert "api_endpoint" not in init_kwargs
assert "api_key" not in init_kwargs
assert "headers" not in init_kwargs
assert isinstance(init_kwargs["processor"], SimpleSpanProcessor)
assert tracer._remote_export_enabled is False
def test_otlp_fallback_includes_auth_and_custom_headers(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
monkeypatch.setattr(tracer_module, "Traceloop", None)
monkeypatch.setenv("TRACELOOP_BASE_URL", "https://otel.example.com")
monkeypatch.setenv("TRACELOOP_API_KEY", "test-api-key")
monkeypatch.setenv("TRACELOOP_HEADERS", '{"x-custom":"header"}')
captured: dict[str, Any] = {}
class FakeOTLPSpanExporter:
def __init__(self, endpoint: str, headers: dict[str, str] | None = None, **kwargs: Any):
captured["endpoint"] = endpoint
captured["headers"] = headers or {}
captured["kwargs"] = kwargs
def export(self, spans: Any) -> SpanExportResult: # noqa: ARG002
return SpanExportResult.SUCCESS
def shutdown(self) -> None:
return None
def force_flush(self, timeout_millis: int = 30_000) -> bool: # noqa: ARG002
return True
fake_module = types.ModuleType("opentelemetry.exporter.otlp.proto.http.trace_exporter")
fake_module.OTLPSpanExporter = FakeOTLPSpanExporter
monkeypatch.setitem(
sys.modules,
"opentelemetry.exporter.otlp.proto.http.trace_exporter",
fake_module,
)
tracer = Tracer("otlp-fallback")
set_global_tracer(tracer)
assert tracer._remote_export_enabled is True
assert captured["endpoint"] == "https://otel.example.com/v1/traces"
assert captured["headers"]["Authorization"] == "Bearer test-api-key"
assert captured["headers"]["x-custom"] == "header"
def test_traceloop_init_failure_does_not_mark_bootstrapped_on_provider_failure(
monkeypatch, tmp_path
) -> None:
monkeypatch.chdir(tmp_path)
class FakeTraceloop:
@staticmethod
def init(**kwargs: Any) -> None: # noqa: ARG004
raise RuntimeError("traceloop init failed")
@staticmethod
def set_association_properties(properties: dict[str, Any]) -> None: # noqa: ARG004
return None
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
def _raise_provider_error(provider: Any) -> None:
raise RuntimeError("provider setup failed")
monkeypatch.setattr(tracer_module.trace, "set_tracer_provider", _raise_provider_error)
tracer = Tracer("bootstrap-failure")
set_global_tracer(tracer)
assert tracer_module._OTEL_BOOTSTRAPPED is False
assert tracer._remote_export_enabled is False
def test_run_completed_event_emitted_once(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer("single-complete")
set_global_tracer(tracer)
tracer.save_run_data(mark_complete=True)
tracer.save_run_data(mark_complete=True)
events_path = tmp_path / "strix_runs" / "single-complete" / "events.jsonl"
events = _load_events(events_path)
run_completed = [event for event in events if event["event_type"] == "run.completed"]
assert len(run_completed) == 1
def test_events_with_agent_id_include_agent_name(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer("agent-name-enrichment")
set_global_tracer(tracer)
tracer.log_agent_creation("agent-1", "Root Agent", "scan auth")
tracer.log_chat_message("hello", "assistant", "agent-1")
events_path = tmp_path / "strix_runs" / "agent-name-enrichment" / "events.jsonl"
events = _load_events(events_path)
chat_event = next(event for event in events if event["event_type"] == "chat.message")
assert chat_event["actor"]["agent_id"] == "agent-1"
assert chat_event["actor"]["agent_name"] == "Root Agent"
def test_run_metadata_is_only_on_run_lifecycle_events(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer("metadata-scope")
set_global_tracer(tracer)
tracer.log_chat_message("hello", "assistant", "agent-1")
tracer.save_run_data(mark_complete=True)
events_path = tmp_path / "strix_runs" / "metadata-scope" / "events.jsonl"
events = _load_events(events_path)
run_started = next(event for event in events if event["event_type"] == "run.started")
run_completed = next(event for event in events if event["event_type"] == "run.completed")
chat_event = next(event for event in events if event["event_type"] == "chat.message")
assert "run_metadata" in run_started
assert "run_metadata" in run_completed
assert "run_metadata" not in chat_event
def test_set_run_name_resets_cached_paths(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer()
set_global_tracer(tracer)
old_events_path = tracer.events_file_path
tracer.set_run_name("renamed-run")
tracer.log_chat_message("hello", "assistant", "agent-1")
new_events_path = tracer.events_file_path
assert new_events_path != old_events_path
assert new_events_path == tmp_path / "strix_runs" / "renamed-run" / "events.jsonl"
events = _load_events(new_events_path)
assert any(event["event_type"] == "run.started" for event in events)
assert any(event["event_type"] == "chat.message" for event in events)
def test_set_run_name_resets_run_completed_flag(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
tracer = Tracer()
set_global_tracer(tracer)
tracer.save_run_data(mark_complete=True)
tracer.set_run_name("renamed-complete")
tracer.save_run_data(mark_complete=True)
events_path = tmp_path / "strix_runs" / "renamed-complete" / "events.jsonl"
events = _load_events(events_path)
run_completed = [event for event in events if event["event_type"] == "run.completed"]
assert any(event["event_type"] == "run.started" for event in events)
assert len(run_completed) == 1
def test_set_run_name_updates_traceloop_association_properties(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
class FakeTraceloop:
associations: ClassVar[list[dict[str, Any]]] = []
@staticmethod
def init(**kwargs: Any) -> None: # noqa: ARG004
return None
@staticmethod
def set_association_properties(properties: dict[str, Any]) -> None:
FakeTraceloop.associations.append(properties)
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
tracer = Tracer()
set_global_tracer(tracer)
tracer.set_run_name("renamed-run")
assert FakeTraceloop.associations
assert FakeTraceloop.associations[-1]["run_id"] == "renamed-run"
assert FakeTraceloop.associations[-1]["run_name"] == "renamed-run"
def test_events_write_locks_are_scoped_by_events_file(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
monkeypatch.setenv("STRIX_TELEMETRY", "0")
tracer_one = Tracer("lock-run-a")
tracer_two = Tracer("lock-run-b")
lock_a_from_one = tracer_one._get_events_write_lock(tracer_one.events_file_path)
lock_a_from_two = tracer_two._get_events_write_lock(tracer_one.events_file_path)
lock_b = tracer_two._get_events_write_lock(tracer_two.events_file_path)
assert lock_a_from_one is lock_a_from_two
assert lock_a_from_one is not lock_b
def test_tracer_skips_jsonl_when_telemetry_disabled(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
monkeypatch.setenv("STRIX_TELEMETRY", "0")
tracer = Tracer("telemetry-disabled")
set_global_tracer(tracer)
tracer.log_chat_message("hello", "assistant", "agent-1")
tracer.save_run_data(mark_complete=True)
events_path = tmp_path / "strix_runs" / "telemetry-disabled" / "events.jsonl"
assert not events_path.exists()
def test_tracer_otel_flag_overrides_global_telemetry(monkeypatch, tmp_path) -> None:
monkeypatch.chdir(tmp_path)
monkeypatch.setenv("STRIX_TELEMETRY", "0")
monkeypatch.setenv("STRIX_OTEL_TELEMETRY", "1")
tracer = Tracer("otel-enabled")
set_global_tracer(tracer)
tracer.log_chat_message("hello", "assistant", "agent-1")
tracer.save_run_data(mark_complete=True)
events_path = tmp_path / "strix_runs" / "otel-enabled" / "events.jsonl"
assert events_path.exists()

View File

@@ -0,0 +1,39 @@
from strix.telemetry.utils import prune_otel_span_attributes
def test_prune_otel_span_attributes_drops_high_volume_prompt_content() -> None:
attributes = {
"gen_ai.operation.name": "openai.chat",
"gen_ai.request.model": "gpt-5.2",
"gen_ai.prompt.0.role": "system",
"gen_ai.prompt.0.content": "a" * 20_000,
"gen_ai.completion.0.content": "b" * 10_000,
"llm.input_messages.0.content": "c" * 5_000,
"llm.output_messages.0.content": "d" * 5_000,
"llm.input": "x" * 3_000,
"llm.output": "y" * 3_000,
}
pruned = prune_otel_span_attributes(attributes)
assert "gen_ai.prompt.0.content" not in pruned
assert "gen_ai.completion.0.content" not in pruned
assert "llm.input_messages.0.content" not in pruned
assert "llm.output_messages.0.content" not in pruned
assert "llm.input" not in pruned
assert "llm.output" not in pruned
assert pruned["gen_ai.operation.name"] == "openai.chat"
assert pruned["gen_ai.prompt.0.role"] == "system"
assert pruned["strix.filtered_attributes_count"] == 6
def test_prune_otel_span_attributes_keeps_metadata_when_nothing_is_dropped() -> None:
attributes = {
"gen_ai.operation.name": "openai.chat",
"gen_ai.request.model": "gpt-5.2",
"gen_ai.prompt.0.role": "user",
}
pruned = prune_otel_span_attributes(attributes)
assert pruned == attributes