Add OpenTelemetry observability with local JSONL traces (#347)
Co-authored-by: 0xallam <ahmed39652003@gmail.com>
This commit is contained in:
@@ -46,9 +46,37 @@ Configure Strix using environment variables or a config file.
|
|||||||
</ParamField>
|
</ParamField>
|
||||||
|
|
||||||
<ParamField path="STRIX_TELEMETRY" default="1" type="string">
|
<ParamField path="STRIX_TELEMETRY" default="1" type="string">
|
||||||
Enable/disable anonymous telemetry. Set to `0`, `false`, `no`, or `off` to disable.
|
Global telemetry default toggle. Set to `0`, `false`, `no`, or `off` to disable both PostHog and OTEL unless overridden by per-channel flags below.
|
||||||
</ParamField>
|
</ParamField>
|
||||||
|
|
||||||
|
<ParamField path="STRIX_OTEL_TELEMETRY" type="string">
|
||||||
|
Enable/disable OpenTelemetry run observability independently. When unset, falls back to `STRIX_TELEMETRY`.
|
||||||
|
</ParamField>
|
||||||
|
|
||||||
|
<ParamField path="STRIX_POSTHOG_TELEMETRY" type="string">
|
||||||
|
Enable/disable PostHog product telemetry independently. When unset, falls back to `STRIX_TELEMETRY`.
|
||||||
|
</ParamField>
|
||||||
|
|
||||||
|
<ParamField path="TRACELOOP_BASE_URL" type="string">
|
||||||
|
OTLP/Traceloop base URL for remote OpenTelemetry export. If unset, Strix keeps traces local only.
|
||||||
|
</ParamField>
|
||||||
|
|
||||||
|
<ParamField path="TRACELOOP_API_KEY" type="string">
|
||||||
|
API key used for remote trace export. Remote export is enabled only when both `TRACELOOP_BASE_URL` and `TRACELOOP_API_KEY` are set.
|
||||||
|
</ParamField>
|
||||||
|
|
||||||
|
<ParamField path="TRACELOOP_HEADERS" type="string">
|
||||||
|
Optional custom OTEL headers (JSON object or `key=value,key2=value2`). Useful for Langfuse or custom/self-hosted OTLP gateways.
|
||||||
|
</ParamField>
|
||||||
|
|
||||||
|
When remote OTEL vars are not set, Strix still writes complete run telemetry locally to:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
strix_runs/<run_name>/events.jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
When remote vars are set, Strix dual-writes telemetry to both local JSONL and the remote OTEL endpoint.
|
||||||
|
|
||||||
## Docker Configuration
|
## Docker Configuration
|
||||||
|
|
||||||
<ParamField path="STRIX_IMAGE" default="ghcr.io/usestrix/strix-sandbox:0.1.12" type="string">
|
<ParamField path="STRIX_IMAGE" default="ghcr.io/usestrix/strix-sandbox:0.1.12" type="string">
|
||||||
@@ -106,4 +134,5 @@ export PERPLEXITY_API_KEY="pplx-..."
|
|||||||
# Optional: Custom timeouts
|
# Optional: Custom timeouts
|
||||||
export LLM_TIMEOUT="600"
|
export LLM_TIMEOUT="600"
|
||||||
export STRIX_SANDBOX_EXECUTION_TIMEOUT="300"
|
export STRIX_SANDBOX_EXECUTION_TIMEOUT="300"
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
1512
poetry.lock
generated
1512
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -56,6 +56,9 @@ textual = "^4.0.0"
|
|||||||
xmltodict = "^0.13.0"
|
xmltodict = "^0.13.0"
|
||||||
requests = "^2.32.0"
|
requests = "^2.32.0"
|
||||||
cvss = "^3.2"
|
cvss = "^3.2"
|
||||||
|
traceloop-sdk = "^0.53.0"
|
||||||
|
opentelemetry-exporter-otlp-proto-http = "^1.40.0"
|
||||||
|
scrubadub = "^2.0.1"
|
||||||
|
|
||||||
# Optional LLM provider dependencies
|
# Optional LLM provider dependencies
|
||||||
google-cloud-aiplatform = { version = ">=1.38", optional = true }
|
google-cloud-aiplatform = { version = ">=1.38", optional = true }
|
||||||
@@ -148,6 +151,9 @@ module = [
|
|||||||
"libtmux.*",
|
"libtmux.*",
|
||||||
"pytest.*",
|
"pytest.*",
|
||||||
"cvss.*",
|
"cvss.*",
|
||||||
|
"opentelemetry.*",
|
||||||
|
"scrubadub.*",
|
||||||
|
"traceloop.*",
|
||||||
]
|
]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
|
|
||||||
@@ -155,6 +161,7 @@ ignore_missing_imports = true
|
|||||||
[[tool.mypy.overrides]]
|
[[tool.mypy.overrides]]
|
||||||
module = ["tests.*"]
|
module = ["tests.*"]
|
||||||
disallow_untyped_decorators = false
|
disallow_untyped_decorators = false
|
||||||
|
disallow_untyped_defs = false
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Ruff Configuration (Fast Python Linter & Formatter)
|
# Ruff Configuration (Fast Python Linter & Formatter)
|
||||||
|
|||||||
@@ -47,6 +47,11 @@ class Config:
|
|||||||
|
|
||||||
# Telemetry
|
# Telemetry
|
||||||
strix_telemetry = "1"
|
strix_telemetry = "1"
|
||||||
|
strix_otel_telemetry = None
|
||||||
|
strix_posthog_telemetry = None
|
||||||
|
traceloop_base_url = None
|
||||||
|
traceloop_api_key = None
|
||||||
|
traceloop_headers = None
|
||||||
|
|
||||||
# Config file override (set via --config CLI arg)
|
# Config file override (set via --config CLI arg)
|
||||||
_config_file_override: Path | None = None
|
_config_file_override: Path | None = None
|
||||||
|
|||||||
@@ -413,8 +413,6 @@ def display_completion_message(args: argparse.Namespace, results_path: Path) ->
|
|||||||
if tracer and tracer.scan_results:
|
if tracer and tracer.scan_results:
|
||||||
scan_completed = tracer.scan_results.get("scan_completed", False)
|
scan_completed = tracer.scan_results.get("scan_completed", False)
|
||||||
|
|
||||||
has_vulnerabilities = tracer and len(tracer.vulnerability_reports) > 0
|
|
||||||
|
|
||||||
completion_text = Text()
|
completion_text = Text()
|
||||||
if scan_completed:
|
if scan_completed:
|
||||||
completion_text.append("Penetration test completed", style="bold #22c55e")
|
completion_text.append("Penetration test completed", style="bold #22c55e")
|
||||||
@@ -439,13 +437,12 @@ def display_completion_message(args: argparse.Namespace, results_path: Path) ->
|
|||||||
if stats_text.plain:
|
if stats_text.plain:
|
||||||
panel_parts.extend(["\n", stats_text])
|
panel_parts.extend(["\n", stats_text])
|
||||||
|
|
||||||
if scan_completed or has_vulnerabilities:
|
results_text = Text()
|
||||||
results_text = Text()
|
results_text.append("\n")
|
||||||
results_text.append("\n")
|
results_text.append("Output", style="dim")
|
||||||
results_text.append("Output", style="dim")
|
results_text.append(" ")
|
||||||
results_text.append(" ")
|
results_text.append(str(results_path), style="#60a5fa")
|
||||||
results_text.append(str(results_path), style="#60a5fa")
|
panel_parts.extend(["\n", results_text])
|
||||||
panel_parts.extend(["\n", results_text])
|
|
||||||
|
|
||||||
panel_content = Text.assemble(*panel_parts)
|
panel_content = Text.assemble(*panel_parts)
|
||||||
|
|
||||||
|
|||||||
23
strix/telemetry/flags.py
Normal file
23
strix/telemetry/flags.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from strix.config import Config
|
||||||
|
|
||||||
|
|
||||||
|
_DISABLED_VALUES = {"0", "false", "no", "off"}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_enabled(raw_value: str | None, default: str = "1") -> bool:
|
||||||
|
value = (raw_value if raw_value is not None else default).strip().lower()
|
||||||
|
return value not in _DISABLED_VALUES
|
||||||
|
|
||||||
|
|
||||||
|
def is_otel_enabled() -> bool:
|
||||||
|
explicit = Config.get("strix_otel_telemetry")
|
||||||
|
if explicit is not None:
|
||||||
|
return _is_enabled(explicit)
|
||||||
|
return _is_enabled(Config.get("strix_telemetry"), default="1")
|
||||||
|
|
||||||
|
|
||||||
|
def is_posthog_enabled() -> bool:
|
||||||
|
explicit = Config.get("strix_posthog_telemetry")
|
||||||
|
if explicit is not None:
|
||||||
|
return _is_enabled(explicit)
|
||||||
|
return _is_enabled(Config.get("strix_telemetry"), default="1")
|
||||||
@@ -6,7 +6,7 @@ from pathlib import Path
|
|||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from strix.config import Config
|
from strix.telemetry.flags import is_posthog_enabled
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -19,7 +19,7 @@ _SESSION_ID = uuid4().hex[:16]
|
|||||||
|
|
||||||
|
|
||||||
def _is_enabled() -> bool:
|
def _is_enabled() -> bool:
|
||||||
return (Config.get("strix_telemetry") or "1").lower() not in ("0", "false", "no", "off")
|
return is_posthog_enabled()
|
||||||
|
|
||||||
|
|
||||||
def _is_first_run() -> bool:
|
def _is_first_run() -> bool:
|
||||||
|
|||||||
@@ -1,20 +1,40 @@
|
|||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import threading
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import Any, Callable, Optional
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from opentelemetry import trace
|
||||||
|
from opentelemetry.trace import SpanContext, SpanKind
|
||||||
|
|
||||||
|
from strix.config import Config
|
||||||
from strix.telemetry import posthog
|
from strix.telemetry import posthog
|
||||||
|
from strix.telemetry.flags import is_otel_enabled
|
||||||
|
from strix.telemetry.utils import (
|
||||||
|
TelemetrySanitizer,
|
||||||
|
append_jsonl_record,
|
||||||
|
bootstrap_otel,
|
||||||
|
format_span_id,
|
||||||
|
format_trace_id,
|
||||||
|
get_events_write_lock,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
try:
|
||||||
from collections.abc import Callable
|
from traceloop.sdk import Traceloop
|
||||||
|
except ImportError: # pragma: no cover - exercised when dependency is absent
|
||||||
|
Traceloop = None # type: ignore[assignment,unused-ignore]
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_global_tracer: Optional["Tracer"] = None
|
_global_tracer: Optional["Tracer"] = None
|
||||||
|
|
||||||
|
_OTEL_BOOTSTRAP_LOCK = threading.Lock()
|
||||||
|
_OTEL_BOOTSTRAPPED = False
|
||||||
|
_OTEL_REMOTE_ENABLED = False
|
||||||
|
|
||||||
def get_global_tracer() -> Optional["Tracer"]:
|
def get_global_tracer() -> Optional["Tracer"]:
|
||||||
return _global_tracer
|
return _global_tracer
|
||||||
@@ -52,16 +72,225 @@ class Tracer:
|
|||||||
"status": "running",
|
"status": "running",
|
||||||
}
|
}
|
||||||
self._run_dir: Path | None = None
|
self._run_dir: Path | None = None
|
||||||
|
self._events_file_path: Path | None = None
|
||||||
self._next_execution_id = 1
|
self._next_execution_id = 1
|
||||||
self._next_message_id = 1
|
self._next_message_id = 1
|
||||||
self._saved_vuln_ids: set[str] = set()
|
self._saved_vuln_ids: set[str] = set()
|
||||||
|
self._run_completed_emitted = False
|
||||||
|
self._telemetry_enabled = is_otel_enabled()
|
||||||
|
self._sanitizer = TelemetrySanitizer()
|
||||||
|
|
||||||
|
self._otel_tracer: Any = None
|
||||||
|
self._remote_export_enabled = False
|
||||||
|
|
||||||
self.caido_url: str | None = None
|
self.caido_url: str | None = None
|
||||||
self.vulnerability_found_callback: Callable[[dict[str, Any]], None] | None = None
|
self.vulnerability_found_callback: Callable[[dict[str, Any]], None] | None = None
|
||||||
|
|
||||||
|
self._setup_telemetry()
|
||||||
|
self._emit_run_started_event()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def events_file_path(self) -> Path:
|
||||||
|
if self._events_file_path is None:
|
||||||
|
self._events_file_path = self.get_run_dir() / "events.jsonl"
|
||||||
|
return self._events_file_path
|
||||||
|
|
||||||
|
def _active_events_file_path(self) -> Path:
|
||||||
|
active = get_global_tracer()
|
||||||
|
if active and active._events_file_path is not None:
|
||||||
|
return active._events_file_path
|
||||||
|
return self.events_file_path
|
||||||
|
|
||||||
|
def _get_events_write_lock(self, output_path: Path | None = None) -> threading.Lock:
|
||||||
|
path = output_path or self.events_file_path
|
||||||
|
return get_events_write_lock(path)
|
||||||
|
|
||||||
|
def _active_run_metadata(self) -> dict[str, Any]:
|
||||||
|
active = get_global_tracer()
|
||||||
|
if active:
|
||||||
|
return active.run_metadata
|
||||||
|
return self.run_metadata
|
||||||
|
|
||||||
|
def _setup_telemetry(self) -> None:
|
||||||
|
global _OTEL_BOOTSTRAPPED, _OTEL_REMOTE_ENABLED
|
||||||
|
|
||||||
|
if not self._telemetry_enabled:
|
||||||
|
self._otel_tracer = None
|
||||||
|
self._remote_export_enabled = False
|
||||||
|
return
|
||||||
|
|
||||||
|
run_dir = self.get_run_dir()
|
||||||
|
self._events_file_path = run_dir / "events.jsonl"
|
||||||
|
base_url = (Config.get("traceloop_base_url") or "").strip()
|
||||||
|
api_key = (Config.get("traceloop_api_key") or "").strip()
|
||||||
|
headers_raw = Config.get("traceloop_headers") or ""
|
||||||
|
|
||||||
|
(
|
||||||
|
self._otel_tracer,
|
||||||
|
self._remote_export_enabled,
|
||||||
|
_OTEL_BOOTSTRAPPED,
|
||||||
|
_OTEL_REMOTE_ENABLED,
|
||||||
|
) = bootstrap_otel(
|
||||||
|
bootstrapped=_OTEL_BOOTSTRAPPED,
|
||||||
|
remote_enabled_state=_OTEL_REMOTE_ENABLED,
|
||||||
|
bootstrap_lock=_OTEL_BOOTSTRAP_LOCK,
|
||||||
|
traceloop=Traceloop,
|
||||||
|
base_url=base_url,
|
||||||
|
api_key=api_key,
|
||||||
|
headers_raw=headers_raw,
|
||||||
|
output_path_getter=self._active_events_file_path,
|
||||||
|
run_metadata_getter=self._active_run_metadata,
|
||||||
|
sanitizer=self._sanitize_data,
|
||||||
|
write_lock_getter=self._get_events_write_lock,
|
||||||
|
tracer_name="strix.telemetry.tracer",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _set_association_properties(self, properties: dict[str, Any]) -> None:
|
||||||
|
if Traceloop is None:
|
||||||
|
return
|
||||||
|
sanitized = self._sanitize_data(properties)
|
||||||
|
try:
|
||||||
|
Traceloop.set_association_properties(sanitized)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
logger.debug("Failed to set Traceloop association properties")
|
||||||
|
|
||||||
|
def _sanitize_data(self, data: Any, key_hint: str | None = None) -> Any:
|
||||||
|
return self._sanitizer.sanitize(data, key_hint=key_hint)
|
||||||
|
|
||||||
|
def _append_event_record(self, record: dict[str, Any]) -> None:
|
||||||
|
try:
|
||||||
|
append_jsonl_record(self.events_file_path, record)
|
||||||
|
except OSError:
|
||||||
|
logger.exception("Failed to append JSONL event record")
|
||||||
|
|
||||||
|
def _enrich_actor(self, actor: dict[str, Any] | None) -> dict[str, Any] | None:
|
||||||
|
if not actor:
|
||||||
|
return None
|
||||||
|
|
||||||
|
enriched = dict(actor)
|
||||||
|
if "agent_name" in enriched:
|
||||||
|
return enriched
|
||||||
|
|
||||||
|
agent_id = enriched.get("agent_id")
|
||||||
|
if not isinstance(agent_id, str):
|
||||||
|
return enriched
|
||||||
|
|
||||||
|
agent_data = self.agents.get(agent_id, {})
|
||||||
|
agent_name = agent_data.get("name")
|
||||||
|
if isinstance(agent_name, str) and agent_name:
|
||||||
|
enriched["agent_name"] = agent_name
|
||||||
|
|
||||||
|
return enriched
|
||||||
|
|
||||||
|
def _emit_event(
|
||||||
|
self,
|
||||||
|
event_type: str,
|
||||||
|
actor: dict[str, Any] | None = None,
|
||||||
|
payload: Any | None = None,
|
||||||
|
status: str | None = None,
|
||||||
|
error: Any | None = None,
|
||||||
|
source: str = "strix.tracer",
|
||||||
|
include_run_metadata: bool = False,
|
||||||
|
) -> None:
|
||||||
|
if not self._telemetry_enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
enriched_actor = self._enrich_actor(actor)
|
||||||
|
sanitized_actor = self._sanitize_data(enriched_actor) if enriched_actor else None
|
||||||
|
sanitized_payload = self._sanitize_data(payload) if payload is not None else None
|
||||||
|
sanitized_error = self._sanitize_data(error) if error is not None else None
|
||||||
|
|
||||||
|
trace_id: str | None = None
|
||||||
|
span_id: str | None = None
|
||||||
|
parent_span_id: str | None = None
|
||||||
|
|
||||||
|
current_context = trace.get_current_span().get_span_context()
|
||||||
|
if isinstance(current_context, SpanContext) and current_context.is_valid:
|
||||||
|
parent_span_id = format_span_id(current_context.span_id)
|
||||||
|
|
||||||
|
if self._otel_tracer is not None:
|
||||||
|
try:
|
||||||
|
with self._otel_tracer.start_as_current_span(
|
||||||
|
f"strix.{event_type}",
|
||||||
|
kind=SpanKind.INTERNAL,
|
||||||
|
) as span:
|
||||||
|
span_context = span.get_span_context()
|
||||||
|
trace_id = format_trace_id(span_context.trace_id)
|
||||||
|
span_id = format_span_id(span_context.span_id)
|
||||||
|
|
||||||
|
span.set_attribute("strix.event_type", event_type)
|
||||||
|
span.set_attribute("strix.source", source)
|
||||||
|
span.set_attribute("strix.run_id", self.run_id)
|
||||||
|
span.set_attribute("strix.run_name", self.run_name or "")
|
||||||
|
|
||||||
|
if status:
|
||||||
|
span.set_attribute("strix.status", status)
|
||||||
|
if sanitized_actor is not None:
|
||||||
|
span.set_attribute(
|
||||||
|
"strix.actor",
|
||||||
|
json.dumps(sanitized_actor, ensure_ascii=False),
|
||||||
|
)
|
||||||
|
if sanitized_payload is not None:
|
||||||
|
span.set_attribute(
|
||||||
|
"strix.payload",
|
||||||
|
json.dumps(sanitized_payload, ensure_ascii=False),
|
||||||
|
)
|
||||||
|
if sanitized_error is not None:
|
||||||
|
span.set_attribute(
|
||||||
|
"strix.error",
|
||||||
|
json.dumps(sanitized_error, ensure_ascii=False),
|
||||||
|
)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
logger.debug("Failed to create OTEL span for event type '%s'", event_type)
|
||||||
|
|
||||||
|
if trace_id is None:
|
||||||
|
trace_id = format_trace_id(uuid4().int & ((1 << 128) - 1)) or uuid4().hex
|
||||||
|
if span_id is None:
|
||||||
|
span_id = format_span_id(uuid4().int & ((1 << 64) - 1)) or uuid4().hex[:16]
|
||||||
|
|
||||||
|
record = {
|
||||||
|
"timestamp": datetime.now(UTC).isoformat(),
|
||||||
|
"event_type": event_type,
|
||||||
|
"run_id": self.run_id,
|
||||||
|
"trace_id": trace_id,
|
||||||
|
"span_id": span_id,
|
||||||
|
"parent_span_id": parent_span_id,
|
||||||
|
"actor": sanitized_actor,
|
||||||
|
"payload": sanitized_payload,
|
||||||
|
"status": status,
|
||||||
|
"error": sanitized_error,
|
||||||
|
"source": source,
|
||||||
|
}
|
||||||
|
if include_run_metadata:
|
||||||
|
record["run_metadata"] = self._sanitize_data(self.run_metadata)
|
||||||
|
self._append_event_record(record)
|
||||||
|
|
||||||
def set_run_name(self, run_name: str) -> None:
|
def set_run_name(self, run_name: str) -> None:
|
||||||
self.run_name = run_name
|
self.run_name = run_name
|
||||||
self.run_id = run_name
|
self.run_id = run_name
|
||||||
|
self.run_metadata["run_name"] = run_name
|
||||||
|
self.run_metadata["run_id"] = run_name
|
||||||
|
self._run_dir = None
|
||||||
|
self._events_file_path = None
|
||||||
|
self._run_completed_emitted = False
|
||||||
|
self._set_association_properties({"run_id": self.run_id, "run_name": self.run_name or ""})
|
||||||
|
self._emit_run_started_event()
|
||||||
|
|
||||||
|
def _emit_run_started_event(self) -> None:
|
||||||
|
if not self._telemetry_enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._emit_event(
|
||||||
|
"run.started",
|
||||||
|
payload={
|
||||||
|
"run_name": self.run_name,
|
||||||
|
"start_time": self.start_time,
|
||||||
|
"local_jsonl_path": str(self.events_file_path),
|
||||||
|
"remote_export_enabled": self._remote_export_enabled,
|
||||||
|
},
|
||||||
|
status="running",
|
||||||
|
include_run_metadata=True,
|
||||||
|
)
|
||||||
|
|
||||||
def get_run_dir(self) -> Path:
|
def get_run_dir(self) -> Path:
|
||||||
if self._run_dir is None:
|
if self._run_dir is None:
|
||||||
@@ -134,6 +363,12 @@ class Tracer:
|
|||||||
self.vulnerability_reports.append(report)
|
self.vulnerability_reports.append(report)
|
||||||
logger.info(f"Added vulnerability report: {report_id} - {title}")
|
logger.info(f"Added vulnerability report: {report_id} - {title}")
|
||||||
posthog.finding(severity)
|
posthog.finding(severity)
|
||||||
|
self._emit_event(
|
||||||
|
"finding.created",
|
||||||
|
payload={"report": report},
|
||||||
|
status=report["severity"],
|
||||||
|
source="strix.findings",
|
||||||
|
)
|
||||||
|
|
||||||
if self.vulnerability_found_callback:
|
if self.vulnerability_found_callback:
|
||||||
self.vulnerability_found_callback(report)
|
self.vulnerability_found_callback(report)
|
||||||
@@ -178,11 +413,24 @@ class Tracer:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
logger.info("Updated scan final fields")
|
logger.info("Updated scan final fields")
|
||||||
|
self._emit_event(
|
||||||
|
"finding.reviewed",
|
||||||
|
payload={
|
||||||
|
"scan_completed": True,
|
||||||
|
"vulnerability_count": len(self.vulnerability_reports),
|
||||||
|
},
|
||||||
|
status="completed",
|
||||||
|
source="strix.findings",
|
||||||
|
)
|
||||||
self.save_run_data(mark_complete=True)
|
self.save_run_data(mark_complete=True)
|
||||||
posthog.end(self, exit_reason="finished_by_tool")
|
posthog.end(self, exit_reason="finished_by_tool")
|
||||||
|
|
||||||
def log_agent_creation(
|
def log_agent_creation(
|
||||||
self, agent_id: str, name: str, task: str, parent_id: str | None = None
|
self,
|
||||||
|
agent_id: str,
|
||||||
|
name: str,
|
||||||
|
task: str,
|
||||||
|
parent_id: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
agent_data: dict[str, Any] = {
|
agent_data: dict[str, Any] = {
|
||||||
"id": agent_id,
|
"id": agent_id,
|
||||||
@@ -196,6 +444,13 @@ class Tracer:
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.agents[agent_id] = agent_data
|
self.agents[agent_id] = agent_data
|
||||||
|
self._emit_event(
|
||||||
|
"agent.created",
|
||||||
|
actor={"agent_id": agent_id, "agent_name": name},
|
||||||
|
payload={"task": task, "parent_id": parent_id},
|
||||||
|
status="running",
|
||||||
|
source="strix.agents",
|
||||||
|
)
|
||||||
|
|
||||||
def log_chat_message(
|
def log_chat_message(
|
||||||
self,
|
self,
|
||||||
@@ -217,9 +472,21 @@ class Tracer:
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.chat_messages.append(message_data)
|
self.chat_messages.append(message_data)
|
||||||
|
self._emit_event(
|
||||||
|
"chat.message",
|
||||||
|
actor={"agent_id": agent_id, "role": role},
|
||||||
|
payload={"message_id": message_id, "content": content, "metadata": metadata or {}},
|
||||||
|
status="logged",
|
||||||
|
source="strix.chat",
|
||||||
|
)
|
||||||
return message_id
|
return message_id
|
||||||
|
|
||||||
def log_tool_execution_start(self, agent_id: str, tool_name: str, args: dict[str, Any]) -> int:
|
def log_tool_execution_start(
|
||||||
|
self,
|
||||||
|
agent_id: str,
|
||||||
|
tool_name: str,
|
||||||
|
args: dict[str, Any],
|
||||||
|
) -> int:
|
||||||
execution_id = self._next_execution_id
|
execution_id = self._next_execution_id
|
||||||
self._next_execution_id += 1
|
self._next_execution_id += 1
|
||||||
|
|
||||||
@@ -241,18 +508,67 @@ class Tracer:
|
|||||||
if agent_id in self.agents:
|
if agent_id in self.agents:
|
||||||
self.agents[agent_id]["tool_executions"].append(execution_id)
|
self.agents[agent_id]["tool_executions"].append(execution_id)
|
||||||
|
|
||||||
|
self._emit_event(
|
||||||
|
"tool.execution.started",
|
||||||
|
actor={
|
||||||
|
"agent_id": agent_id,
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"execution_id": execution_id,
|
||||||
|
},
|
||||||
|
payload={"args": args},
|
||||||
|
status="running",
|
||||||
|
source="strix.tools",
|
||||||
|
)
|
||||||
|
|
||||||
return execution_id
|
return execution_id
|
||||||
|
|
||||||
def update_tool_execution(
|
def update_tool_execution(
|
||||||
self, execution_id: int, status: str, result: Any | None = None
|
self,
|
||||||
|
execution_id: int,
|
||||||
|
status: str,
|
||||||
|
result: Any | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if execution_id in self.tool_executions:
|
if execution_id not in self.tool_executions:
|
||||||
self.tool_executions[execution_id]["status"] = status
|
return
|
||||||
self.tool_executions[execution_id]["result"] = result
|
|
||||||
self.tool_executions[execution_id]["completed_at"] = datetime.now(UTC).isoformat()
|
tool_data = self.tool_executions[execution_id]
|
||||||
|
tool_data["status"] = status
|
||||||
|
tool_data["result"] = result
|
||||||
|
tool_data["completed_at"] = datetime.now(UTC).isoformat()
|
||||||
|
|
||||||
|
tool_name = str(tool_data.get("tool_name", "unknown"))
|
||||||
|
agent_id = str(tool_data.get("agent_id", "unknown"))
|
||||||
|
error_payload = result if status in {"error", "failed"} else None
|
||||||
|
|
||||||
|
self._emit_event(
|
||||||
|
"tool.execution.updated",
|
||||||
|
actor={
|
||||||
|
"agent_id": agent_id,
|
||||||
|
"tool_name": tool_name,
|
||||||
|
"execution_id": execution_id,
|
||||||
|
},
|
||||||
|
payload={"result": result},
|
||||||
|
status=status,
|
||||||
|
error=error_payload,
|
||||||
|
source="strix.tools",
|
||||||
|
)
|
||||||
|
|
||||||
|
if tool_name == "create_vulnerability_report":
|
||||||
|
finding_status = "reviewed" if status == "completed" else "rejected"
|
||||||
|
self._emit_event(
|
||||||
|
"finding.reviewed",
|
||||||
|
actor={"agent_id": agent_id, "tool_name": tool_name},
|
||||||
|
payload={"execution_id": execution_id, "result": result},
|
||||||
|
status=finding_status,
|
||||||
|
error=error_payload,
|
||||||
|
source="strix.findings",
|
||||||
|
)
|
||||||
|
|
||||||
def update_agent_status(
|
def update_agent_status(
|
||||||
self, agent_id: str, status: str, error_message: str | None = None
|
self,
|
||||||
|
agent_id: str,
|
||||||
|
status: str,
|
||||||
|
error_message: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if agent_id in self.agents:
|
if agent_id in self.agents:
|
||||||
self.agents[agent_id]["status"] = status
|
self.agents[agent_id]["status"] = status
|
||||||
@@ -260,6 +576,15 @@ class Tracer:
|
|||||||
if error_message:
|
if error_message:
|
||||||
self.agents[agent_id]["error_message"] = error_message
|
self.agents[agent_id]["error_message"] = error_message
|
||||||
|
|
||||||
|
self._emit_event(
|
||||||
|
"agent.status.updated",
|
||||||
|
actor={"agent_id": agent_id},
|
||||||
|
payload={"error_message": error_message},
|
||||||
|
status=status,
|
||||||
|
error=error_message,
|
||||||
|
source="strix.agents",
|
||||||
|
)
|
||||||
|
|
||||||
def set_scan_config(self, config: dict[str, Any]) -> None:
|
def set_scan_config(self, config: dict[str, Any]) -> None:
|
||||||
self.scan_config = config
|
self.scan_config = config
|
||||||
self.run_metadata.update(
|
self.run_metadata.update(
|
||||||
@@ -269,13 +594,29 @@ class Tracer:
|
|||||||
"max_iterations": config.get("max_iterations", 200),
|
"max_iterations": config.get("max_iterations", 200),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
self.get_run_dir()
|
self._set_association_properties(
|
||||||
|
{
|
||||||
|
"run_id": self.run_id,
|
||||||
|
"run_name": self.run_name or "",
|
||||||
|
"targets": config.get("targets", []),
|
||||||
|
"max_iterations": config.get("max_iterations", 200),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self._emit_event(
|
||||||
|
"run.configured",
|
||||||
|
payload={"scan_config": config},
|
||||||
|
status="configured",
|
||||||
|
source="strix.run",
|
||||||
|
)
|
||||||
|
|
||||||
def save_run_data(self, mark_complete: bool = False) -> None: # noqa: PLR0912, PLR0915
|
def save_run_data(self, mark_complete: bool = False) -> None:
|
||||||
try:
|
try:
|
||||||
run_dir = self.get_run_dir()
|
run_dir = self.get_run_dir()
|
||||||
if mark_complete:
|
if mark_complete:
|
||||||
self.end_time = datetime.now(UTC).isoformat()
|
if self.end_time is None:
|
||||||
|
self.end_time = datetime.now(UTC).isoformat()
|
||||||
|
self.run_metadata["end_time"] = self.end_time
|
||||||
|
self.run_metadata["status"] = "completed"
|
||||||
|
|
||||||
if self.final_scan_result:
|
if self.final_scan_result:
|
||||||
penetration_test_report_file = run_dir / "penetration_test_report.md"
|
penetration_test_report_file = run_dir / "penetration_test_report.md"
|
||||||
@@ -286,7 +627,8 @@ class Tracer:
|
|||||||
)
|
)
|
||||||
f.write(f"{self.final_scan_result}\n")
|
f.write(f"{self.final_scan_result}\n")
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Saved final penetration test report to: {penetration_test_report_file}"
|
"Saved final penetration test report to: %s",
|
||||||
|
penetration_test_report_file,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.vulnerability_reports:
|
if self.vulnerability_reports:
|
||||||
@@ -302,7 +644,10 @@ class Tracer:
|
|||||||
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
||||||
sorted_reports = sorted(
|
sorted_reports = sorted(
|
||||||
self.vulnerability_reports,
|
self.vulnerability_reports,
|
||||||
key=lambda x: (severity_order.get(x["severity"], 5), x["timestamp"]),
|
key=lambda report: (
|
||||||
|
severity_order.get(report["severity"], 5),
|
||||||
|
report["timestamp"],
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
for report in new_reports:
|
for report in new_reports:
|
||||||
@@ -329,8 +674,8 @@ class Tracer:
|
|||||||
f.write(f"**{label}:** {value}\n")
|
f.write(f"**{label}:** {value}\n")
|
||||||
|
|
||||||
f.write("\n## Description\n\n")
|
f.write("\n## Description\n\n")
|
||||||
desc = report.get("description") or "No description provided."
|
description = report.get("description") or "No description provided."
|
||||||
f.write(f"{desc}\n\n")
|
f.write(f"{description}\n\n")
|
||||||
|
|
||||||
if report.get("impact"):
|
if report.get("impact"):
|
||||||
f.write("## Impact\n\n")
|
f.write("## Impact\n\n")
|
||||||
@@ -404,11 +749,25 @@ class Tracer:
|
|||||||
|
|
||||||
if new_reports:
|
if new_reports:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Saved {len(new_reports)} new vulnerability report(s) to: {vuln_dir}"
|
"Saved %d new vulnerability report(s) to: %s",
|
||||||
|
len(new_reports),
|
||||||
|
vuln_dir,
|
||||||
)
|
)
|
||||||
logger.info(f"Updated vulnerability index: {vuln_csv_file}")
|
logger.info("Updated vulnerability index: %s", vuln_csv_file)
|
||||||
|
|
||||||
logger.info(f"📊 Essential scan data saved to: {run_dir}")
|
logger.info("📊 Essential scan data saved to: %s", run_dir)
|
||||||
|
if mark_complete and not self._run_completed_emitted:
|
||||||
|
self._emit_event(
|
||||||
|
"run.completed",
|
||||||
|
payload={
|
||||||
|
"duration_seconds": self._calculate_duration(),
|
||||||
|
"vulnerability_count": len(self.vulnerability_reports),
|
||||||
|
},
|
||||||
|
status="completed",
|
||||||
|
source="strix.run",
|
||||||
|
include_run_metadata=True,
|
||||||
|
)
|
||||||
|
self._run_completed_emitted = True
|
||||||
|
|
||||||
except (OSError, RuntimeError):
|
except (OSError, RuntimeError):
|
||||||
logger.exception("Failed to save scan data")
|
logger.exception("Failed to save scan data")
|
||||||
|
|||||||
413
strix/telemetry/utils.py
Normal file
413
strix/telemetry/utils.py
Normal file
@@ -0,0 +1,413 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
from collections.abc import Callable, Sequence
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from opentelemetry import trace
|
||||||
|
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
|
||||||
|
from opentelemetry.sdk.trace.export import (
|
||||||
|
BatchSpanProcessor,
|
||||||
|
SimpleSpanProcessor,
|
||||||
|
SpanExporter,
|
||||||
|
SpanExportResult,
|
||||||
|
)
|
||||||
|
from scrubadub import Scrubber
|
||||||
|
from scrubadub.detectors import RegexDetector
|
||||||
|
from scrubadub.filth import Filth
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_REDACTED = "[REDACTED]"
|
||||||
|
_SCREENSHOT_OMITTED = "[SCREENSHOT_OMITTED]"
|
||||||
|
_SCREENSHOT_KEY_PATTERN = re.compile(r"screenshot", re.IGNORECASE)
|
||||||
|
_SENSITIVE_KEY_PATTERN = re.compile(
|
||||||
|
r"(api[_-]?key|token|secret|password|authorization|cookie|session|credential|private[_-]?key)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_SENSITIVE_TOKEN_PATTERN = re.compile(
|
||||||
|
r"(?i)\b("
|
||||||
|
r"bearer\s+[a-z0-9._-]+|"
|
||||||
|
r"sk-[a-z0-9_-]{8,}|"
|
||||||
|
r"gh[pousr]_[a-z0-9_-]{12,}|"
|
||||||
|
r"xox[baprs]-[a-z0-9-]{12,}"
|
||||||
|
r")\b"
|
||||||
|
)
|
||||||
|
_SCRUBADUB_PLACEHOLDER_PATTERN = re.compile(r"\{\{[^}]+\}\}")
|
||||||
|
_EVENTS_FILE_LOCKS_LOCK = threading.Lock()
|
||||||
|
_EVENTS_FILE_LOCKS: dict[str, threading.Lock] = {}
|
||||||
|
_NOISY_OTEL_CONTENT_PREFIXES = (
|
||||||
|
"gen_ai.prompt.",
|
||||||
|
"gen_ai.completion.",
|
||||||
|
"llm.input_messages.",
|
||||||
|
"llm.output_messages.",
|
||||||
|
)
|
||||||
|
_NOISY_OTEL_EXACT_KEYS = {
|
||||||
|
"llm.input",
|
||||||
|
"llm.output",
|
||||||
|
"llm.prompt",
|
||||||
|
"llm.completion",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _SecretFilth(Filth): # type: ignore[misc]
|
||||||
|
type = "secret"
|
||||||
|
|
||||||
|
|
||||||
|
class _SecretTokenDetector(RegexDetector): # type: ignore[misc]
|
||||||
|
name = "strix_secret_token_detector"
|
||||||
|
filth_cls = _SecretFilth
|
||||||
|
regex = _SENSITIVE_TOKEN_PATTERN
|
||||||
|
|
||||||
|
|
||||||
|
class TelemetrySanitizer:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._scrubber = Scrubber(detector_list=[_SecretTokenDetector])
|
||||||
|
|
||||||
|
def sanitize(self, data: Any, key_hint: str | None = None) -> Any: # noqa: PLR0911
|
||||||
|
if data is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(data, dict):
|
||||||
|
sanitized: dict[str, Any] = {}
|
||||||
|
for key, value in data.items():
|
||||||
|
key_str = str(key)
|
||||||
|
if _SCREENSHOT_KEY_PATTERN.search(key_str):
|
||||||
|
sanitized[key_str] = _SCREENSHOT_OMITTED
|
||||||
|
elif _SENSITIVE_KEY_PATTERN.search(key_str):
|
||||||
|
sanitized[key_str] = _REDACTED
|
||||||
|
else:
|
||||||
|
sanitized[key_str] = self.sanitize(value, key_hint=key_str)
|
||||||
|
return sanitized
|
||||||
|
|
||||||
|
if isinstance(data, list):
|
||||||
|
return [self.sanitize(item, key_hint=key_hint) for item in data]
|
||||||
|
|
||||||
|
if isinstance(data, tuple):
|
||||||
|
return [self.sanitize(item, key_hint=key_hint) for item in data]
|
||||||
|
|
||||||
|
if isinstance(data, str):
|
||||||
|
if key_hint and _SENSITIVE_KEY_PATTERN.search(key_hint):
|
||||||
|
return _REDACTED
|
||||||
|
|
||||||
|
cleaned = self._scrubber.clean(data)
|
||||||
|
return _SCRUBADUB_PLACEHOLDER_PATTERN.sub(_REDACTED, cleaned)
|
||||||
|
|
||||||
|
if isinstance(data, int | float | bool):
|
||||||
|
return data
|
||||||
|
|
||||||
|
return str(data)
|
||||||
|
|
||||||
|
|
||||||
|
def format_trace_id(trace_id: int | None) -> str | None:
|
||||||
|
if trace_id is None or trace_id == 0:
|
||||||
|
return None
|
||||||
|
return f"{trace_id:032x}"
|
||||||
|
|
||||||
|
|
||||||
|
def format_span_id(span_id: int | None) -> str | None:
|
||||||
|
if span_id is None or span_id == 0:
|
||||||
|
return None
|
||||||
|
return f"{span_id:016x}"
|
||||||
|
|
||||||
|
|
||||||
|
def iso_from_unix_ns(unix_ns: int | None) -> str | None:
|
||||||
|
if unix_ns is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.fromtimestamp(unix_ns / 1_000_000_000, tz=UTC).isoformat()
|
||||||
|
except (OSError, OverflowError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_events_write_lock(output_path: Path) -> threading.Lock:
|
||||||
|
path_key = str(output_path.resolve(strict=False))
|
||||||
|
with _EVENTS_FILE_LOCKS_LOCK:
|
||||||
|
lock = _EVENTS_FILE_LOCKS.get(path_key)
|
||||||
|
if lock is None:
|
||||||
|
lock = threading.Lock()
|
||||||
|
_EVENTS_FILE_LOCKS[path_key] = lock
|
||||||
|
return lock
|
||||||
|
|
||||||
|
|
||||||
|
def reset_events_write_locks() -> None:
|
||||||
|
with _EVENTS_FILE_LOCKS_LOCK:
|
||||||
|
_EVENTS_FILE_LOCKS.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def append_jsonl_record(output_path: Path, record: dict[str, Any]) -> None:
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with get_events_write_lock(output_path), output_path.open("a", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def default_resource_attributes() -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"service.name": "strix-agent",
|
||||||
|
"service.namespace": "strix",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_traceloop_headers(raw_headers: str) -> dict[str, str]:
|
||||||
|
headers = raw_headers.strip()
|
||||||
|
if not headers:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
if headers.startswith("{"):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(headers)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning("Invalid TRACELOOP_HEADERS JSON, ignoring custom headers")
|
||||||
|
return {}
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
return {str(key): str(value) for key, value in parsed.items() if value is not None}
|
||||||
|
logger.warning("TRACELOOP_HEADERS JSON must be an object, ignoring custom headers")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
for part in headers.split(","):
|
||||||
|
key, sep, value = part.partition("=")
|
||||||
|
if not sep:
|
||||||
|
continue
|
||||||
|
key = key.strip()
|
||||||
|
value = value.strip()
|
||||||
|
if key and value:
|
||||||
|
result[key] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def prune_otel_span_attributes(attributes: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Drop high-volume LLM payload attributes to keep JSONL event files compact."""
|
||||||
|
filtered: dict[str, Any] = {}
|
||||||
|
filtered_count = 0
|
||||||
|
|
||||||
|
for key, value in attributes.items():
|
||||||
|
key_str = str(key)
|
||||||
|
if key_str in _NOISY_OTEL_EXACT_KEYS:
|
||||||
|
filtered_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if key_str.endswith(".content") and key_str.startswith(_NOISY_OTEL_CONTENT_PREFIXES):
|
||||||
|
filtered_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered[key_str] = value
|
||||||
|
|
||||||
|
if filtered_count:
|
||||||
|
filtered["strix.filtered_attributes_count"] = filtered_count
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
class JsonlSpanExporter(SpanExporter): # type: ignore[misc]
|
||||||
|
"""Append OTEL spans to JSONL for local run artifacts."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
output_path_getter: Callable[[], Path],
|
||||||
|
run_metadata_getter: Callable[[], dict[str, Any]],
|
||||||
|
sanitizer: Callable[[Any], Any],
|
||||||
|
write_lock_getter: Callable[[Path], threading.Lock],
|
||||||
|
):
|
||||||
|
self._output_path_getter = output_path_getter
|
||||||
|
self._run_metadata_getter = run_metadata_getter
|
||||||
|
self._sanitize = sanitizer
|
||||||
|
self._write_lock_getter = write_lock_getter
|
||||||
|
|
||||||
|
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
||||||
|
records: list[dict[str, Any]] = []
|
||||||
|
for span in spans:
|
||||||
|
attributes = prune_otel_span_attributes(dict(span.attributes or {}))
|
||||||
|
if "strix.event_type" in attributes:
|
||||||
|
# Tracer events are written directly in Tracer._emit_event.
|
||||||
|
continue
|
||||||
|
records.append(self._span_to_record(span, attributes))
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
return SpanExportResult.SUCCESS
|
||||||
|
|
||||||
|
try:
|
||||||
|
output_path = self._output_path_getter()
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with self._write_lock_getter(output_path), output_path.open("a", encoding="utf-8") as f:
|
||||||
|
for record in records:
|
||||||
|
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||||||
|
except OSError:
|
||||||
|
logger.exception("Failed to write OTEL span records to JSONL")
|
||||||
|
return SpanExportResult.FAILURE
|
||||||
|
|
||||||
|
return SpanExportResult.SUCCESS
|
||||||
|
|
||||||
|
def shutdown(self) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def force_flush(self, timeout_millis: int = 30_000) -> bool: # noqa: ARG002
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _span_to_record(
|
||||||
|
self,
|
||||||
|
span: ReadableSpan,
|
||||||
|
attributes: dict[str, Any],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
span_context = span.get_span_context()
|
||||||
|
parent_context = span.parent
|
||||||
|
|
||||||
|
status = None
|
||||||
|
if span.status and span.status.status_code:
|
||||||
|
status = span.status.status_code.name.lower()
|
||||||
|
|
||||||
|
event_type = str(attributes.get("gen_ai.operation.name", span.name))
|
||||||
|
run_metadata = self._run_metadata_getter()
|
||||||
|
run_id_attr = (
|
||||||
|
attributes.get("strix.run_id")
|
||||||
|
or attributes.get("strix_run_id")
|
||||||
|
or run_metadata.get("run_id")
|
||||||
|
or span.resource.attributes.get("strix.run_id")
|
||||||
|
)
|
||||||
|
|
||||||
|
record: dict[str, Any] = {
|
||||||
|
"timestamp": iso_from_unix_ns(span.end_time) or datetime.now(UTC).isoformat(),
|
||||||
|
"event_type": event_type,
|
||||||
|
"run_id": str(run_id_attr or run_metadata.get("run_id") or ""),
|
||||||
|
"trace_id": format_trace_id(span_context.trace_id),
|
||||||
|
"span_id": format_span_id(span_context.span_id),
|
||||||
|
"parent_span_id": format_span_id(parent_context.span_id if parent_context else None),
|
||||||
|
"actor": None,
|
||||||
|
"payload": None,
|
||||||
|
"status": status,
|
||||||
|
"error": None,
|
||||||
|
"source": "otel.span",
|
||||||
|
"span_name": span.name,
|
||||||
|
"span_kind": span.kind.name.lower(),
|
||||||
|
"attributes": self._sanitize(attributes),
|
||||||
|
}
|
||||||
|
|
||||||
|
if span.events:
|
||||||
|
record["otel_events"] = self._sanitize(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": event.name,
|
||||||
|
"timestamp": iso_from_unix_ns(event.timestamp),
|
||||||
|
"attributes": dict(event.attributes or {}),
|
||||||
|
}
|
||||||
|
for event in span.events
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def bootstrap_otel(
|
||||||
|
*,
|
||||||
|
bootstrapped: bool,
|
||||||
|
remote_enabled_state: bool,
|
||||||
|
bootstrap_lock: threading.Lock,
|
||||||
|
traceloop: Any,
|
||||||
|
base_url: str,
|
||||||
|
api_key: str,
|
||||||
|
headers_raw: str,
|
||||||
|
output_path_getter: Callable[[], Path],
|
||||||
|
run_metadata_getter: Callable[[], dict[str, Any]],
|
||||||
|
sanitizer: Callable[[Any], Any],
|
||||||
|
write_lock_getter: Callable[[Path], threading.Lock],
|
||||||
|
tracer_name: str = "strix.telemetry.tracer",
|
||||||
|
) -> tuple[Any, bool, bool, bool]:
|
||||||
|
with bootstrap_lock:
|
||||||
|
if bootstrapped:
|
||||||
|
return (
|
||||||
|
trace.get_tracer(tracer_name),
|
||||||
|
remote_enabled_state,
|
||||||
|
bootstrapped,
|
||||||
|
remote_enabled_state,
|
||||||
|
)
|
||||||
|
|
||||||
|
local_exporter = JsonlSpanExporter(
|
||||||
|
output_path_getter=output_path_getter,
|
||||||
|
run_metadata_getter=run_metadata_getter,
|
||||||
|
sanitizer=sanitizer,
|
||||||
|
write_lock_getter=write_lock_getter,
|
||||||
|
)
|
||||||
|
local_processor = SimpleSpanProcessor(local_exporter)
|
||||||
|
|
||||||
|
headers = parse_traceloop_headers(headers_raw)
|
||||||
|
remote_enabled = bool(base_url and api_key)
|
||||||
|
otlp_headers = headers
|
||||||
|
if remote_enabled:
|
||||||
|
otlp_headers = {"Authorization": f"Bearer {api_key}"}
|
||||||
|
otlp_headers.update(headers)
|
||||||
|
|
||||||
|
otel_init_ok = False
|
||||||
|
if traceloop:
|
||||||
|
try:
|
||||||
|
from traceloop.sdk.instruments import Instruments
|
||||||
|
|
||||||
|
init_kwargs: dict[str, Any] = {
|
||||||
|
"app_name": "strix-agent",
|
||||||
|
"processor": local_processor,
|
||||||
|
"telemetry_enabled": False,
|
||||||
|
"resource_attributes": default_resource_attributes(),
|
||||||
|
"block_instruments": {
|
||||||
|
Instruments.URLLIB3,
|
||||||
|
Instruments.REQUESTS,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if remote_enabled:
|
||||||
|
init_kwargs.update(
|
||||||
|
{
|
||||||
|
"api_endpoint": base_url,
|
||||||
|
"api_key": api_key,
|
||||||
|
"headers": headers,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_stdout = sys.stdout
|
||||||
|
sys.stdout = io.StringIO()
|
||||||
|
try:
|
||||||
|
traceloop.init(**init_kwargs)
|
||||||
|
finally:
|
||||||
|
sys.stdout = _stdout
|
||||||
|
otel_init_ok = True
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to initialize Traceloop/OpenLLMetry")
|
||||||
|
remote_enabled = False
|
||||||
|
|
||||||
|
if not otel_init_ok:
|
||||||
|
from opentelemetry.sdk.resources import Resource
|
||||||
|
|
||||||
|
provider = TracerProvider(resource=Resource.create(default_resource_attributes()))
|
||||||
|
provider.add_span_processor(local_processor)
|
||||||
|
if remote_enabled:
|
||||||
|
try:
|
||||||
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
||||||
|
OTLPSpanExporter,
|
||||||
|
)
|
||||||
|
|
||||||
|
endpoint = base_url.rstrip("/") + "/v1/traces"
|
||||||
|
provider.add_span_processor(
|
||||||
|
BatchSpanProcessor(
|
||||||
|
OTLPSpanExporter(endpoint=endpoint, headers=otlp_headers)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to configure OTLP HTTP exporter")
|
||||||
|
remote_enabled = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
trace.set_tracer_provider(provider)
|
||||||
|
otel_init_ok = True
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to set OpenTelemetry tracer provider")
|
||||||
|
remote_enabled = False
|
||||||
|
|
||||||
|
otel_tracer = trace.get_tracer(tracer_name)
|
||||||
|
if otel_init_ok:
|
||||||
|
return otel_tracer, remote_enabled, True, remote_enabled
|
||||||
|
|
||||||
|
return otel_tracer, remote_enabled, bootstrapped, remote_enabled_state
|
||||||
1
tests/config/__init__.py
Normal file
1
tests/config/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Tests for strix.config module."""
|
||||||
55
tests/config/test_config_telemetry.py
Normal file
55
tests/config/test_config_telemetry.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from strix.config.config import Config
|
||||||
|
|
||||||
|
|
||||||
|
def test_traceloop_vars_are_tracked() -> None:
|
||||||
|
tracked = Config.tracked_vars()
|
||||||
|
|
||||||
|
assert "STRIX_OTEL_TELEMETRY" in tracked
|
||||||
|
assert "STRIX_POSTHOG_TELEMETRY" in tracked
|
||||||
|
assert "TRACELOOP_BASE_URL" in tracked
|
||||||
|
assert "TRACELOOP_API_KEY" in tracked
|
||||||
|
assert "TRACELOOP_HEADERS" in tracked
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_saved_uses_saved_traceloop_vars(monkeypatch, tmp_path) -> None:
|
||||||
|
config_path = tmp_path / "cli-config.json"
|
||||||
|
config_path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"env": {
|
||||||
|
"TRACELOOP_BASE_URL": "https://otel.example.com",
|
||||||
|
"TRACELOOP_API_KEY": "api-key",
|
||||||
|
"TRACELOOP_HEADERS": "x-test=value",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(Config, "_config_file_override", config_path)
|
||||||
|
monkeypatch.delenv("TRACELOOP_BASE_URL", raising=False)
|
||||||
|
monkeypatch.delenv("TRACELOOP_API_KEY", raising=False)
|
||||||
|
monkeypatch.delenv("TRACELOOP_HEADERS", raising=False)
|
||||||
|
|
||||||
|
applied = Config.apply_saved()
|
||||||
|
|
||||||
|
assert applied["TRACELOOP_BASE_URL"] == "https://otel.example.com"
|
||||||
|
assert applied["TRACELOOP_API_KEY"] == "api-key"
|
||||||
|
assert applied["TRACELOOP_HEADERS"] == "x-test=value"
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_saved_respects_existing_env_traceloop_vars(monkeypatch, tmp_path) -> None:
|
||||||
|
config_path = tmp_path / "cli-config.json"
|
||||||
|
config_path.write_text(
|
||||||
|
json.dumps({"env": {"TRACELOOP_BASE_URL": "https://otel.example.com"}}),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(Config, "_config_file_override", config_path)
|
||||||
|
monkeypatch.setenv("TRACELOOP_BASE_URL", "https://env.example.com")
|
||||||
|
|
||||||
|
applied = Config.apply_saved(force=False)
|
||||||
|
|
||||||
|
assert "TRACELOOP_BASE_URL" not in applied
|
||||||
15
tests/llm/test_llm_otel.py
Normal file
15
tests/llm/test_llm_otel.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import litellm
|
||||||
|
|
||||||
|
from strix.llm.config import LLMConfig
|
||||||
|
from strix.llm.llm import LLM
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_does_not_modify_litellm_callbacks(monkeypatch) -> None:
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "1")
|
||||||
|
monkeypatch.setenv("STRIX_OTEL_TELEMETRY", "1")
|
||||||
|
monkeypatch.setattr(litellm, "callbacks", ["custom-callback"])
|
||||||
|
|
||||||
|
llm = LLM(LLMConfig(model_name="openai/gpt-5"), agent_name=None)
|
||||||
|
|
||||||
|
assert llm is not None
|
||||||
|
assert litellm.callbacks == ["custom-callback"]
|
||||||
28
tests/telemetry/test_flags.py
Normal file
28
tests/telemetry/test_flags.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from strix.telemetry.flags import is_otel_enabled, is_posthog_enabled
|
||||||
|
|
||||||
|
|
||||||
|
def test_flags_fallback_to_strix_telemetry(monkeypatch) -> None:
|
||||||
|
monkeypatch.delenv("STRIX_OTEL_TELEMETRY", raising=False)
|
||||||
|
monkeypatch.delenv("STRIX_POSTHOG_TELEMETRY", raising=False)
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "0")
|
||||||
|
|
||||||
|
assert is_otel_enabled() is False
|
||||||
|
assert is_posthog_enabled() is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_otel_flag_overrides_global_telemetry(monkeypatch) -> None:
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "0")
|
||||||
|
monkeypatch.setenv("STRIX_OTEL_TELEMETRY", "1")
|
||||||
|
monkeypatch.delenv("STRIX_POSTHOG_TELEMETRY", raising=False)
|
||||||
|
|
||||||
|
assert is_otel_enabled() is True
|
||||||
|
assert is_posthog_enabled() is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_posthog_flag_overrides_global_telemetry(monkeypatch) -> None:
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "0")
|
||||||
|
monkeypatch.setenv("STRIX_POSTHOG_TELEMETRY", "1")
|
||||||
|
monkeypatch.delenv("STRIX_OTEL_TELEMETRY", raising=False)
|
||||||
|
|
||||||
|
assert is_otel_enabled() is False
|
||||||
|
assert is_posthog_enabled() is True
|
||||||
379
tests/telemetry/test_tracer.py
Normal file
379
tests/telemetry/test_tracer.py
Normal file
@@ -0,0 +1,379 @@
|
|||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, ClassVar
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExportResult
|
||||||
|
|
||||||
|
from strix.telemetry import tracer as tracer_module
|
||||||
|
from strix.telemetry import utils as telemetry_utils
|
||||||
|
from strix.telemetry.tracer import Tracer, set_global_tracer
|
||||||
|
|
||||||
|
|
||||||
|
def _load_events(events_path: Path) -> list[dict[str, Any]]:
|
||||||
|
lines = events_path.read_text(encoding="utf-8").splitlines()
|
||||||
|
return [json.loads(line) for line in lines if line]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset_tracer_globals(monkeypatch) -> None:
|
||||||
|
monkeypatch.setattr(tracer_module, "_global_tracer", None)
|
||||||
|
monkeypatch.setattr(tracer_module, "_OTEL_BOOTSTRAPPED", False)
|
||||||
|
monkeypatch.setattr(tracer_module, "_OTEL_REMOTE_ENABLED", False)
|
||||||
|
telemetry_utils.reset_events_write_locks()
|
||||||
|
monkeypatch.delenv("STRIX_TELEMETRY", raising=False)
|
||||||
|
monkeypatch.delenv("STRIX_OTEL_TELEMETRY", raising=False)
|
||||||
|
monkeypatch.delenv("STRIX_POSTHOG_TELEMETRY", raising=False)
|
||||||
|
monkeypatch.delenv("TRACELOOP_BASE_URL", raising=False)
|
||||||
|
monkeypatch.delenv("TRACELOOP_API_KEY", raising=False)
|
||||||
|
monkeypatch.delenv("TRACELOOP_HEADERS", raising=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracer_local_mode_writes_jsonl_with_correlation(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer("local-observability")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.set_scan_config({"targets": ["https://example.com"], "user_instructions": "focus auth"})
|
||||||
|
tracer.log_agent_creation("agent-1", "Root Agent", "scan auth")
|
||||||
|
tracer.log_chat_message("starting scan", "user", "agent-1")
|
||||||
|
execution_id = tracer.log_tool_execution_start(
|
||||||
|
"agent-1",
|
||||||
|
"send_request",
|
||||||
|
{"url": "https://example.com/login"},
|
||||||
|
)
|
||||||
|
tracer.update_tool_execution(execution_id, "completed", {"status_code": 200, "body": "ok"})
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "local-observability" / "events.jsonl"
|
||||||
|
assert events_path.exists()
|
||||||
|
|
||||||
|
events = _load_events(events_path)
|
||||||
|
assert any(event["event_type"] == "tool.execution.updated" for event in events)
|
||||||
|
assert not any(event["event_type"] == "traffic.intercepted" for event in events)
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
assert event["run_id"] == "local-observability"
|
||||||
|
assert event["trace_id"]
|
||||||
|
assert event["span_id"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracer_redacts_sensitive_payloads(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer("redaction-run")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
execution_id = tracer.log_tool_execution_start(
|
||||||
|
"agent-1",
|
||||||
|
"send_request",
|
||||||
|
{
|
||||||
|
"url": "https://example.com",
|
||||||
|
"api_key": "sk-secret-token-value",
|
||||||
|
"authorization": "Bearer super-secret-token",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
tracer.update_tool_execution(
|
||||||
|
execution_id,
|
||||||
|
"error",
|
||||||
|
{"error": "request failed with token sk-secret-token-value"},
|
||||||
|
)
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "redaction-run" / "events.jsonl"
|
||||||
|
events = _load_events(events_path)
|
||||||
|
serialized = json.dumps(events)
|
||||||
|
|
||||||
|
assert "sk-secret-token-value" not in serialized
|
||||||
|
assert "super-secret-token" not in serialized
|
||||||
|
assert "[REDACTED]" in serialized
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracer_remote_mode_configures_traceloop_export(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
class FakeTraceloop:
|
||||||
|
init_calls: ClassVar[list[dict[str, Any]]] = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def init(**kwargs: Any) -> None:
|
||||||
|
FakeTraceloop.init_calls.append(kwargs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_association_properties(properties: dict[str, Any]) -> None: # noqa: ARG004
|
||||||
|
return None
|
||||||
|
|
||||||
|
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
|
||||||
|
monkeypatch.setenv("TRACELOOP_BASE_URL", "https://otel.example.com")
|
||||||
|
monkeypatch.setenv("TRACELOOP_API_KEY", "test-api-key")
|
||||||
|
monkeypatch.setenv("TRACELOOP_HEADERS", '{"x-custom":"header"}')
|
||||||
|
|
||||||
|
tracer = Tracer("remote-observability")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.log_chat_message("hello", "user", "agent-1")
|
||||||
|
|
||||||
|
assert tracer._remote_export_enabled is True
|
||||||
|
assert FakeTraceloop.init_calls
|
||||||
|
init_kwargs = FakeTraceloop.init_calls[-1]
|
||||||
|
assert init_kwargs["api_endpoint"] == "https://otel.example.com"
|
||||||
|
assert init_kwargs["api_key"] == "test-api-key"
|
||||||
|
assert init_kwargs["headers"] == {"x-custom": "header"}
|
||||||
|
assert isinstance(init_kwargs["processor"], SimpleSpanProcessor)
|
||||||
|
assert "strix.run_id" not in init_kwargs["resource_attributes"]
|
||||||
|
assert "strix.run_name" not in init_kwargs["resource_attributes"]
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "remote-observability" / "events.jsonl"
|
||||||
|
events = _load_events(events_path)
|
||||||
|
run_started = next(event for event in events if event["event_type"] == "run.started")
|
||||||
|
assert run_started["payload"]["remote_export_enabled"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracer_local_mode_avoids_traceloop_remote_endpoint(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
class FakeTraceloop:
|
||||||
|
init_calls: ClassVar[list[dict[str, Any]]] = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def init(**kwargs: Any) -> None:
|
||||||
|
FakeTraceloop.init_calls.append(kwargs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_association_properties(properties: dict[str, Any]) -> None: # noqa: ARG004
|
||||||
|
return None
|
||||||
|
|
||||||
|
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
|
||||||
|
|
||||||
|
tracer = Tracer("local-traceloop")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.log_chat_message("hello", "user", "agent-1")
|
||||||
|
|
||||||
|
assert FakeTraceloop.init_calls
|
||||||
|
init_kwargs = FakeTraceloop.init_calls[-1]
|
||||||
|
assert "api_endpoint" not in init_kwargs
|
||||||
|
assert "api_key" not in init_kwargs
|
||||||
|
assert "headers" not in init_kwargs
|
||||||
|
assert isinstance(init_kwargs["processor"], SimpleSpanProcessor)
|
||||||
|
assert tracer._remote_export_enabled is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_otlp_fallback_includes_auth_and_custom_headers(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
monkeypatch.setattr(tracer_module, "Traceloop", None)
|
||||||
|
monkeypatch.setenv("TRACELOOP_BASE_URL", "https://otel.example.com")
|
||||||
|
monkeypatch.setenv("TRACELOOP_API_KEY", "test-api-key")
|
||||||
|
monkeypatch.setenv("TRACELOOP_HEADERS", '{"x-custom":"header"}')
|
||||||
|
|
||||||
|
captured: dict[str, Any] = {}
|
||||||
|
|
||||||
|
class FakeOTLPSpanExporter:
|
||||||
|
def __init__(self, endpoint: str, headers: dict[str, str] | None = None, **kwargs: Any):
|
||||||
|
captured["endpoint"] = endpoint
|
||||||
|
captured["headers"] = headers or {}
|
||||||
|
captured["kwargs"] = kwargs
|
||||||
|
|
||||||
|
def export(self, spans: Any) -> SpanExportResult: # noqa: ARG002
|
||||||
|
return SpanExportResult.SUCCESS
|
||||||
|
|
||||||
|
def shutdown(self) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def force_flush(self, timeout_millis: int = 30_000) -> bool: # noqa: ARG002
|
||||||
|
return True
|
||||||
|
|
||||||
|
fake_module = types.ModuleType("opentelemetry.exporter.otlp.proto.http.trace_exporter")
|
||||||
|
fake_module.OTLPSpanExporter = FakeOTLPSpanExporter
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"opentelemetry.exporter.otlp.proto.http.trace_exporter",
|
||||||
|
fake_module,
|
||||||
|
)
|
||||||
|
|
||||||
|
tracer = Tracer("otlp-fallback")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
|
||||||
|
assert tracer._remote_export_enabled is True
|
||||||
|
assert captured["endpoint"] == "https://otel.example.com/v1/traces"
|
||||||
|
assert captured["headers"]["Authorization"] == "Bearer test-api-key"
|
||||||
|
assert captured["headers"]["x-custom"] == "header"
|
||||||
|
|
||||||
|
|
||||||
|
def test_traceloop_init_failure_does_not_mark_bootstrapped_on_provider_failure(
|
||||||
|
monkeypatch, tmp_path
|
||||||
|
) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
class FakeTraceloop:
|
||||||
|
@staticmethod
|
||||||
|
def init(**kwargs: Any) -> None: # noqa: ARG004
|
||||||
|
raise RuntimeError("traceloop init failed")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_association_properties(properties: dict[str, Any]) -> None: # noqa: ARG004
|
||||||
|
return None
|
||||||
|
|
||||||
|
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
|
||||||
|
|
||||||
|
def _raise_provider_error(provider: Any) -> None:
|
||||||
|
raise RuntimeError("provider setup failed")
|
||||||
|
|
||||||
|
monkeypatch.setattr(tracer_module.trace, "set_tracer_provider", _raise_provider_error)
|
||||||
|
|
||||||
|
tracer = Tracer("bootstrap-failure")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
|
||||||
|
assert tracer_module._OTEL_BOOTSTRAPPED is False
|
||||||
|
assert tracer._remote_export_enabled is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_completed_event_emitted_once(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer("single-complete")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "single-complete" / "events.jsonl"
|
||||||
|
events = _load_events(events_path)
|
||||||
|
run_completed = [event for event in events if event["event_type"] == "run.completed"]
|
||||||
|
assert len(run_completed) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_events_with_agent_id_include_agent_name(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer("agent-name-enrichment")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.log_agent_creation("agent-1", "Root Agent", "scan auth")
|
||||||
|
tracer.log_chat_message("hello", "assistant", "agent-1")
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "agent-name-enrichment" / "events.jsonl"
|
||||||
|
events = _load_events(events_path)
|
||||||
|
chat_event = next(event for event in events if event["event_type"] == "chat.message")
|
||||||
|
|
||||||
|
assert chat_event["actor"]["agent_id"] == "agent-1"
|
||||||
|
assert chat_event["actor"]["agent_name"] == "Root Agent"
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_metadata_is_only_on_run_lifecycle_events(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer("metadata-scope")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.log_chat_message("hello", "assistant", "agent-1")
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "metadata-scope" / "events.jsonl"
|
||||||
|
events = _load_events(events_path)
|
||||||
|
|
||||||
|
run_started = next(event for event in events if event["event_type"] == "run.started")
|
||||||
|
run_completed = next(event for event in events if event["event_type"] == "run.completed")
|
||||||
|
chat_event = next(event for event in events if event["event_type"] == "chat.message")
|
||||||
|
|
||||||
|
assert "run_metadata" in run_started
|
||||||
|
assert "run_metadata" in run_completed
|
||||||
|
assert "run_metadata" not in chat_event
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_run_name_resets_cached_paths(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer()
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
old_events_path = tracer.events_file_path
|
||||||
|
|
||||||
|
tracer.set_run_name("renamed-run")
|
||||||
|
tracer.log_chat_message("hello", "assistant", "agent-1")
|
||||||
|
|
||||||
|
new_events_path = tracer.events_file_path
|
||||||
|
assert new_events_path != old_events_path
|
||||||
|
assert new_events_path == tmp_path / "strix_runs" / "renamed-run" / "events.jsonl"
|
||||||
|
|
||||||
|
events = _load_events(new_events_path)
|
||||||
|
assert any(event["event_type"] == "run.started" for event in events)
|
||||||
|
assert any(event["event_type"] == "chat.message" for event in events)
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_run_name_resets_run_completed_flag(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
tracer = Tracer()
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
tracer.set_run_name("renamed-complete")
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "renamed-complete" / "events.jsonl"
|
||||||
|
events = _load_events(events_path)
|
||||||
|
run_completed = [event for event in events if event["event_type"] == "run.completed"]
|
||||||
|
|
||||||
|
assert any(event["event_type"] == "run.started" for event in events)
|
||||||
|
assert len(run_completed) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_run_name_updates_traceloop_association_properties(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
|
||||||
|
class FakeTraceloop:
|
||||||
|
associations: ClassVar[list[dict[str, Any]]] = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def init(**kwargs: Any) -> None: # noqa: ARG004
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_association_properties(properties: dict[str, Any]) -> None:
|
||||||
|
FakeTraceloop.associations.append(properties)
|
||||||
|
|
||||||
|
monkeypatch.setattr(tracer_module, "Traceloop", FakeTraceloop)
|
||||||
|
|
||||||
|
tracer = Tracer()
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.set_run_name("renamed-run")
|
||||||
|
|
||||||
|
assert FakeTraceloop.associations
|
||||||
|
assert FakeTraceloop.associations[-1]["run_id"] == "renamed-run"
|
||||||
|
assert FakeTraceloop.associations[-1]["run_name"] == "renamed-run"
|
||||||
|
|
||||||
|
|
||||||
|
def test_events_write_locks_are_scoped_by_events_file(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "0")
|
||||||
|
|
||||||
|
tracer_one = Tracer("lock-run-a")
|
||||||
|
tracer_two = Tracer("lock-run-b")
|
||||||
|
|
||||||
|
lock_a_from_one = tracer_one._get_events_write_lock(tracer_one.events_file_path)
|
||||||
|
lock_a_from_two = tracer_two._get_events_write_lock(tracer_one.events_file_path)
|
||||||
|
lock_b = tracer_two._get_events_write_lock(tracer_two.events_file_path)
|
||||||
|
|
||||||
|
assert lock_a_from_one is lock_a_from_two
|
||||||
|
assert lock_a_from_one is not lock_b
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracer_skips_jsonl_when_telemetry_disabled(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "0")
|
||||||
|
|
||||||
|
tracer = Tracer("telemetry-disabled")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.log_chat_message("hello", "assistant", "agent-1")
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "telemetry-disabled" / "events.jsonl"
|
||||||
|
assert not events_path.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracer_otel_flag_overrides_global_telemetry(monkeypatch, tmp_path) -> None:
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
monkeypatch.setenv("STRIX_TELEMETRY", "0")
|
||||||
|
monkeypatch.setenv("STRIX_OTEL_TELEMETRY", "1")
|
||||||
|
|
||||||
|
tracer = Tracer("otel-enabled")
|
||||||
|
set_global_tracer(tracer)
|
||||||
|
tracer.log_chat_message("hello", "assistant", "agent-1")
|
||||||
|
tracer.save_run_data(mark_complete=True)
|
||||||
|
|
||||||
|
events_path = tmp_path / "strix_runs" / "otel-enabled" / "events.jsonl"
|
||||||
|
assert events_path.exists()
|
||||||
39
tests/telemetry/test_utils.py
Normal file
39
tests/telemetry/test_utils.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from strix.telemetry.utils import prune_otel_span_attributes
|
||||||
|
|
||||||
|
|
||||||
|
def test_prune_otel_span_attributes_drops_high_volume_prompt_content() -> None:
|
||||||
|
attributes = {
|
||||||
|
"gen_ai.operation.name": "openai.chat",
|
||||||
|
"gen_ai.request.model": "gpt-5.2",
|
||||||
|
"gen_ai.prompt.0.role": "system",
|
||||||
|
"gen_ai.prompt.0.content": "a" * 20_000,
|
||||||
|
"gen_ai.completion.0.content": "b" * 10_000,
|
||||||
|
"llm.input_messages.0.content": "c" * 5_000,
|
||||||
|
"llm.output_messages.0.content": "d" * 5_000,
|
||||||
|
"llm.input": "x" * 3_000,
|
||||||
|
"llm.output": "y" * 3_000,
|
||||||
|
}
|
||||||
|
|
||||||
|
pruned = prune_otel_span_attributes(attributes)
|
||||||
|
|
||||||
|
assert "gen_ai.prompt.0.content" not in pruned
|
||||||
|
assert "gen_ai.completion.0.content" not in pruned
|
||||||
|
assert "llm.input_messages.0.content" not in pruned
|
||||||
|
assert "llm.output_messages.0.content" not in pruned
|
||||||
|
assert "llm.input" not in pruned
|
||||||
|
assert "llm.output" not in pruned
|
||||||
|
assert pruned["gen_ai.operation.name"] == "openai.chat"
|
||||||
|
assert pruned["gen_ai.prompt.0.role"] == "system"
|
||||||
|
assert pruned["strix.filtered_attributes_count"] == 6
|
||||||
|
|
||||||
|
|
||||||
|
def test_prune_otel_span_attributes_keeps_metadata_when_nothing_is_dropped() -> None:
|
||||||
|
attributes = {
|
||||||
|
"gen_ai.operation.name": "openai.chat",
|
||||||
|
"gen_ai.request.model": "gpt-5.2",
|
||||||
|
"gen_ai.prompt.0.role": "user",
|
||||||
|
}
|
||||||
|
|
||||||
|
pruned = prune_otel_span_attributes(attributes)
|
||||||
|
|
||||||
|
assert pruned == attributes
|
||||||
Reference in New Issue
Block a user