From f65a97f6b234bb6eac93f7435f9a7cec76531c8d Mon Sep 17 00:00:00 2001 From: bearsyankees Date: Thu, 19 Mar 2026 19:33:16 -0600 Subject: [PATCH] STR-39: expand source-aware whitebox workflows and wiki memory --- README.md | 3 + containers/Dockerfile | 5 +- docs/advanced/configuration.mdx | 2 +- docs/tools/sandbox.mdx | 14 +- docs/tools/terminal.mdx | 16 +- docs/usage/scan-modes.mdx | 4 + strix/agents/StrixAgent/system_prompt.jinja | 15 +- strix/config/config.py | 2 +- strix/interface/cli.py | 5 +- strix/interface/tui.py | 4 +- strix/llm/config.py | 2 + strix/llm/llm.py | 2 + strix/skills/README.md | 4 + .../coordination/source_aware_whitebox.md | 58 +++ strix/skills/custom/source_aware_sast.md | 100 +++++ strix/skills/scan_modes/deep.md | 5 +- strix/skills/scan_modes/quick.md | 3 + strix/skills/scan_modes/standard.md | 5 +- .../agents_graph/agents_graph_actions.py | 18 +- strix/tools/notes/notes_actions.py | 360 ++++++++++++++---- strix/tools/notes/notes_actions_schema.xml | 10 +- tests/llm/test_source_aware_whitebox.py | 28 ++ tests/tools/test_agents_graph_whitebox.py | 107 ++++++ tests/tools/test_notes_wiki.py | 100 +++++ 24 files changed, 768 insertions(+), 104 deletions(-) create mode 100644 strix/skills/coordination/source_aware_whitebox.md create mode 100644 strix/skills/custom/source_aware_sast.md create mode 100644 tests/llm/test_source_aware_whitebox.py create mode 100644 tests/tools/test_agents_graph_whitebox.py create mode 100644 tests/tools/test_notes_wiki.py diff --git a/README.md b/README.md index b2b06fa..f164532 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,9 @@ strix --target https://your-app.com --instruction "Perform authenticated testing # Multi-target testing (source code + deployed app) strix -t https://github.com/org/app -t https://your-app.com +# White-box source-aware scan (local repository) +strix --target ./app-directory --scan-mode standard + # Focused testing with custom instructions strix --target api.your-app.com --instruction "Focus on business logic flaws and IDOR vulnerabilities" diff --git a/containers/Dockerfile b/containers/Dockerfile index 6734973..0ece11e 100644 --- a/containers/Dockerfile +++ b/containers/Dockerfile @@ -97,7 +97,9 @@ RUN mkdir -p /home/pentester/.npm-global RUN npm install -g retire@latest && \ npm install -g eslint@latest && \ - npm install -g js-beautify@latest + npm install -g js-beautify@latest && \ + npm install -g @ast-grep/cli@latest && \ + npm install -g tree-sitter-cli@latest WORKDIR /home/pentester/tools RUN git clone https://github.com/aravind0x7/JS-Snooper.git && \ @@ -110,6 +112,7 @@ RUN git clone https://github.com/aravind0x7/JS-Snooper.git && \ USER root RUN curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh | sh -s -- -b /usr/local/bin +RUN curl -sSfL https://raw.githubusercontent.com/gitleaks/gitleaks/master/install.sh | sh -s -- -b /usr/local/bin RUN apt-get update && apt-get install -y zaproxy diff --git a/docs/advanced/configuration.mdx b/docs/advanced/configuration.mdx index 9a6d9e4..91f19bb 100644 --- a/docs/advanced/configuration.mdx +++ b/docs/advanced/configuration.mdx @@ -51,7 +51,7 @@ Configure Strix using environment variables or a config file. ## Docker Configuration - + Docker image to use for the sandbox container. diff --git a/docs/tools/sandbox.mdx b/docs/tools/sandbox.mdx index c9043b9..3c815ec 100644 --- a/docs/tools/sandbox.mdx +++ b/docs/tools/sandbox.mdx @@ -45,13 +45,21 @@ Strix runs inside a Kali Linux-based Docker container with a comprehensive set o | [js-beautify](https://github.com/beautifier/js-beautify) | JavaScript deobfuscation | | [JSHint](https://jshint.com) | JavaScript code quality tool | +## Source-Aware Analysis + +| Tool | Description | +| ------------------------------------------------------- | --------------------------------------------- | +| [Semgrep](https://github.com/semgrep/semgrep) | Fast SAST and custom rule matching | +| [ast-grep](https://ast-grep.github.io) | Structural AST/CST-aware code search (`sg`) | +| [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) | Syntax tree parsing and symbol extraction | +| [Bandit](https://bandit.readthedocs.io) | Python security linter | + ## Secret Detection | Tool | Description | | ----------------------------------------------------------- | ------------------------------------- | | [TruffleHog](https://github.com/trufflesecurity/trufflehog) | Find secrets in code and history | -| [Semgrep](https://github.com/semgrep/semgrep) | Static analysis for security patterns | -| [Bandit](https://bandit.readthedocs.io) | Python security linter | +| [Gitleaks](https://github.com/gitleaks/gitleaks) | Detect hardcoded secrets in repositories | ## Authentication Testing @@ -64,7 +72,7 @@ Strix runs inside a Kali Linux-based Docker container with a comprehensive set o | Tool | Description | | -------------------------- | ---------------------------------------------- | -| [Trivy](https://trivy.dev) | Container and dependency vulnerability scanner | +| [Trivy](https://trivy.dev) | Filesystem/container scanning for vulns, misconfigurations, secrets, and licenses | ## HTTP Proxy diff --git a/docs/tools/terminal.mdx b/docs/tools/terminal.mdx index 4b28bc7..5f7cb84 100644 --- a/docs/tools/terminal.mdx +++ b/docs/tools/terminal.mdx @@ -32,14 +32,18 @@ sqlmap -u "https://example.com/page?id=1" ### Code Analysis ```bash -# Search for secrets -trufflehog filesystem ./ - -# Static analysis +# Fast SAST triage semgrep --config auto ./src -# Grep for patterns -grep -r "password" ./ +# Structural AST search +sg scan ./src + +# Secret detection +gitleaks detect --source ./ +trufflehog filesystem ./ + +# Supply-chain and misconfiguration checks +trivy fs ./ ``` ### Custom Scripts diff --git a/docs/usage/scan-modes.mdx b/docs/usage/scan-modes.mdx index 73ed84d..9f95891 100644 --- a/docs/usage/scan-modes.mdx +++ b/docs/usage/scan-modes.mdx @@ -31,6 +31,8 @@ Balanced testing for routine security reviews. Best for: **Duration**: 30 minutes to 1 hour +**White-box behavior**: Uses source-aware mapping and static triage to prioritize dynamic exploit validation paths. + ## Deep ```bash @@ -44,6 +46,8 @@ Thorough penetration testing. Best for: **Duration**: 1-4 hours depending on target complexity +**White-box behavior**: Runs broad source-aware triage (`semgrep`, AST structural search, secrets, supply-chain checks) and then systematically validates top candidates dynamically. + Deep mode is the default. It explores edge cases, chained vulnerabilities, and complex attack paths. diff --git a/strix/agents/StrixAgent/system_prompt.jinja b/strix/agents/StrixAgent/system_prompt.jinja index bde3157..5f8f35c 100644 --- a/strix/agents/StrixAgent/system_prompt.jinja +++ b/strix/agents/StrixAgent/system_prompt.jinja @@ -77,11 +77,12 @@ BLACK-BOX TESTING (domain/subdomain only): WHITE-BOX TESTING (code provided): - MUST perform BOTH static AND dynamic analysis -- Static: Review code for vulnerabilities -- Dynamic: Run the application and test live -- NEVER rely solely on static code analysis - always test dynamically -- You MUST begin at the very first step by running the code and testing live. -- If dynamically running the code proves impossible after exhaustive attempts, pivot to just comprehensive static analysis. +- Static: Use source-aware triage first to map risk quickly (`semgrep`, `ast-grep`, Tree-sitter tooling, `gitleaks`, `trufflehog`, `trivy fs`). Then review code for vulnerabilities +- Shared memory: Use notes as shared working memory; check existing `wiki` notes first (`list_notes`), then update one repo wiki note instead of creating duplicates +- Dynamic: Run the application and test live to validate exploitability +- NEVER rely solely on static code analysis when dynamic validation is possible +- Begin with fast source triage and dynamic run preparation in parallel; use static findings to prioritize live testing. +- If dynamically running the code proves impossible after exhaustive attempts, pivot to comprehensive static analysis. - Try to infer how to run the code based on its structure and content. - FIX discovered vulnerabilities in code in same file. - Test patches to confirm vulnerability removal. @@ -369,8 +370,12 @@ JAVASCRIPT ANALYSIS: CODE ANALYSIS: - semgrep - Static analysis/SAST +- ast-grep (sg) - Structural AST/CST-aware code search +- tree-sitter - Syntax-aware parsing and symbol extraction support - bandit - Python security linter - trufflehog - Secret detection in code +- gitleaks - Secret detection in repository content/history +- trivy fs - Filesystem vulnerability/misconfiguration/license/secret scanning SPECIALIZED TOOLS: - jwt_tool - JWT token manipulation diff --git a/strix/config/config.py b/strix/config/config.py index 387834b..aba5343 100644 --- a/strix/config/config.py +++ b/strix/config/config.py @@ -37,7 +37,7 @@ class Config: strix_disable_browser = "false" # Runtime Configuration - strix_image = "ghcr.io/usestrix/strix-sandbox:0.1.11" + strix_image = "ghcr.io/usestrix/strix-sandbox:0.1.12" strix_runtime_backend = "docker" strix_sandbox_execution_timeout = "120" strix_sandbox_connect_timeout = "10" diff --git a/strix/interface/cli.py b/strix/interface/cli.py index f99aefb..fe0992b 100644 --- a/strix/interface/cli.py +++ b/strix/interface/cli.py @@ -75,7 +75,10 @@ async def run_cli(args: Any) -> None: # noqa: PLR0915 "diff_scope": getattr(args, "diff_scope", {"active": False}), } - llm_config = LLMConfig(scan_mode=scan_mode) + llm_config = LLMConfig( + scan_mode=scan_mode, + is_whitebox=bool(getattr(args, "local_sources", [])), + ) agent_config = { "llm_config": llm_config, "max_iterations": 300, diff --git a/strix/interface/tui.py b/strix/interface/tui.py index 54a4adb..4cd0eec 100644 --- a/strix/interface/tui.py +++ b/strix/interface/tui.py @@ -748,7 +748,9 @@ class StrixTUIApp(App): # type: ignore[misc] def _build_agent_config(self, args: argparse.Namespace) -> dict[str, Any]: scan_mode = getattr(args, "scan_mode", "deep") - llm_config = LLMConfig(scan_mode=scan_mode) + llm_config = LLMConfig( + scan_mode=scan_mode, is_whitebox=bool(getattr(args, "local_sources", [])) + ) config = { "llm_config": llm_config, diff --git a/strix/llm/config.py b/strix/llm/config.py index 3426327..f3a2ac9 100644 --- a/strix/llm/config.py +++ b/strix/llm/config.py @@ -9,6 +9,7 @@ class LLMConfig: skills: list[str] | None = None, timeout: int | None = None, scan_mode: str = "deep", + is_whitebox: bool = False, ): self.model_name = model_name or Config.get("strix_llm") @@ -21,3 +22,4 @@ class LLMConfig: self.timeout = timeout or int(Config.get("llm_timeout") or "300") self.scan_mode = scan_mode if scan_mode in ["quick", "standard", "deep"] else "deep" + self.is_whitebox = is_whitebox diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 311de35..f19461b 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -90,6 +90,8 @@ class LLM: *list(self.config.skills or []), f"scan_modes/{self.config.scan_mode}", ] + if self.config.is_whitebox: + skills_to_load.append("coordination/source_aware_whitebox") skill_content = load_skills(skills_to_load) env.globals["get_skill"] = lambda name: skill_content.get(name, "") diff --git a/strix/skills/README.md b/strix/skills/README.md index 4543cd5..5509192 100644 --- a/strix/skills/README.md +++ b/strix/skills/README.md @@ -37,6 +37,10 @@ The skills are dynamically injected into the agent's system prompt, allowing it | **`/reconnaissance`** | Advanced information gathering and enumeration techniques for comprehensive attack surface mapping | | **`/custom`** | Community-contributed skills for specialized or industry-specific testing scenarios | +Notable source-aware skills: +- `source_aware_whitebox` (coordination): white-box orchestration playbook +- `source_aware_sast` (custom): semgrep/AST/secrets/supply-chain static triage workflow + --- ## 🎨 Creating New Skills diff --git a/strix/skills/coordination/source_aware_whitebox.md b/strix/skills/coordination/source_aware_whitebox.md new file mode 100644 index 0000000..c4943cc --- /dev/null +++ b/strix/skills/coordination/source_aware_whitebox.md @@ -0,0 +1,58 @@ +--- +name: source-aware-whitebox +description: Coordination playbook for source-aware white-box testing with static triage and dynamic validation +--- + +# Source-Aware White-Box Coordination + +Use this coordination playbook when repository source code is available. + +## Objective + +Increase white-box coverage by combining source-aware triage with dynamic validation. Source-aware tooling is recommended by default, but not mandatory when context suggests a better path. + +## Recommended Workflow + +1. Build a quick source map before deep exploitation. +2. Run first-pass static triage to rank high-risk paths. +3. Use triage outputs to prioritize dynamic PoC validation. +4. Keep findings evidence-driven: no report without validation. +5. Keep shared wiki memory current so all agents can reuse context. + +## Source-Aware Triage Stack + +- `semgrep`: fast security-first triage and custom pattern scans +- `ast-grep` (`sg`): structural pattern hunting and targeted repo mapping +- `tree-sitter`: syntax-aware parsing support for symbol and route extraction +- `gitleaks` + `trufflehog`: complementary secret detection (working tree and history coverage) +- `trivy fs`: dependency, misconfiguration, license, and secret checks + +## Agent Delegation Guidance + +- Keep child agents specialized by vulnerability/component as usual. +- For source-heavy subtasks, prefer creating child agents with `source_aware_sast` skill. +- Use source findings to shape payloads and endpoint selection for dynamic testing. + +## Wiki Note Requirement (Source Map) + +When source is present, maintain one wiki note per repository and keep it current. + +Operational rules: +- At task start, call `list_notes` with `category=wiki` and reuse existing repo wiki if present. +- If no repo wiki exists, create one with `create_note` and `category=wiki`. +- Update the same wiki via `update_note`; avoid creating duplicate wiki notes for the same repo. +- Child agents should read wiki notes first, then extend with new evidence from their scope. + +Recommended sections: +- Architecture overview +- Entrypoints and routing +- AuthN/AuthZ model +- High-risk sinks and trust boundaries +- Static scanner summary +- Dynamic validation follow-ups + +## Validation Guardrails + +- Static findings are hypotheses until validated. +- Dynamic exploitation evidence is still required before vulnerability reporting. +- Keep scanner output concise, deduplicated, and mapped to concrete code locations. diff --git a/strix/skills/custom/source_aware_sast.md b/strix/skills/custom/source_aware_sast.md new file mode 100644 index 0000000..dc9d058 --- /dev/null +++ b/strix/skills/custom/source_aware_sast.md @@ -0,0 +1,100 @@ +--- +name: source-aware-sast +description: Practical source-aware SAST and AST playbook for semgrep, ast-grep, gitleaks, and trivy fs +--- + +# Source-Aware SAST Playbook + +Use this skill for source-heavy analysis where static and structural signals should guide dynamic testing. + +## Fast Start + +Run tools from repo root and store outputs in a dedicated artifact directory: + +```bash +mkdir -p /workspace/.strix-source-aware +``` + +Before scanning, check shared wiki memory: + +```text +1) list_notes(category="wiki") +2) Reuse matching repo wiki note if present +3) create_note(category="wiki") only if missing +``` + +## Semgrep First Pass + +Use Semgrep as the default static triage pass: + +```bash +semgrep --config auto --json --output /workspace/.strix-source-aware/semgrep.json . +``` + +If diff scope is active, restrict to changed files first, then expand only when needed. + +## AST-Grep Structural Mapping + +Use `sg` for structure-aware code hunting: + +```bash +sg scan --json . > /workspace/.strix-source-aware/ast-grep.json +``` + +Target high-value patterns such as: +- missing auth checks near route handlers +- dynamic command/query construction +- unsafe deserialization or template execution paths +- file and path operations influenced by user input + +## Tree-Sitter Assisted Repo Mapping + +Use tree-sitter CLI for syntax-aware parsing when grep-level mapping is noisy: + +```bash +tree-sitter parse -q +``` + +Use outputs to improve route/symbol/sink maps for subsequent targeted scans. + +## Secret and Supply Chain Coverage + +Detect hardcoded credentials: + +```bash +gitleaks detect --source . --report-format json --report-path /workspace/.strix-source-aware/gitleaks.json +trufflehog filesystem --json . > /workspace/.strix-source-aware/trufflehog.json +``` + +Run repository-wide dependency and config checks: + +```bash +trivy fs --format json --output /workspace/.strix-source-aware/trivy-fs.json . +``` + +## Converting Static Signals Into Exploits + +1. Rank candidates by impact and exploitability. +2. Trace source-to-sink flow for top candidates. +3. Build dynamic PoCs that reproduce the suspected issue. +4. Report only after dynamic validation succeeds. + +## Wiki Update Template + +Keep one wiki note per repository and update these sections: + +```text +## Architecture +## Entrypoints +## AuthN/AuthZ +## High-Risk Sinks +## Static Findings Summary +## Dynamic Validation Follow-Ups +``` + +## Anti-Patterns + +- Do not treat scanner output as final truth. +- Do not spend full cycles on low-signal pattern matches. +- Do not report source-only findings without validation evidence. +- Do not create multiple wiki notes for the same repository when one already exists. diff --git a/strix/skills/scan_modes/deep.md b/strix/skills/scan_modes/deep.md index 4235f11..f6fe11b 100644 --- a/strix/skills/scan_modes/deep.md +++ b/strix/skills/scan_modes/deep.md @@ -15,6 +15,9 @@ Thorough understanding before exploitation. Test every parameter, every endpoint **Whitebox (source available)** - Map every file, module, and code path in the repository +- Load and maintain shared `wiki` notes from the start (`list_notes(category="wiki")`), then continuously update one repo note +- Start with broad source-aware triage (`semgrep`, `ast-grep`, `gitleaks`, `trufflehog`, `trivy fs`) and use outputs to drive deep review +- Use syntax-aware parsing (Tree-sitter tooling) to improve symbol, route, and sink extraction quality - Trace all entry points from HTTP handlers to database queries - Document all authentication mechanisms and implementations - Map authorization checks and access control model @@ -25,7 +28,7 @@ Thorough understanding before exploitation. Test every parameter, every endpoint - Identify all serialization/deserialization points - Review file handling: upload, download, processing - Understand the deployment model and infrastructure assumptions -- Check all dependency versions against CVE databases +- Check all dependency versions and repository risks against CVE/misconfiguration data **Blackbox (no source)** - Exhaustive subdomain enumeration with multiple sources and tools diff --git a/strix/skills/scan_modes/quick.md b/strix/skills/scan_modes/quick.md index 506ffc4..14cf860 100644 --- a/strix/skills/scan_modes/quick.md +++ b/strix/skills/scan_modes/quick.md @@ -15,6 +15,9 @@ Optimize for fast feedback on critical security issues. Skip exhaustive enumerat **Whitebox (source available)** - Focus on recent changes: git diffs, new commits, modified files—these are most likely to contain fresh bugs +- Read existing `wiki` notes first (`list_notes(category="wiki")`) to avoid remapping from scratch +- Run a fast static triage on changed files first (`semgrep`, then targeted `sg` queries) +- Run quick secret and dependency checks (`gitleaks`, `trufflehog`, `trivy fs`) scoped to changed areas when possible - Identify security-sensitive patterns in changed code: auth checks, input handling, database queries, file operations - Trace user input through modified code paths - Check if security controls were modified or bypassed diff --git a/strix/skills/scan_modes/standard.md b/strix/skills/scan_modes/standard.md index a13b786..773dab1 100644 --- a/strix/skills/scan_modes/standard.md +++ b/strix/skills/scan_modes/standard.md @@ -15,11 +15,14 @@ Systematic testing across the full attack surface. Understand the application be **Whitebox (source available)** - Map codebase structure: modules, entry points, routing +- Start by loading existing `wiki` notes (`list_notes(category="wiki")`) and update one shared repo note as mapping evolves +- Run `semgrep` first-pass triage to prioritize risky flows before deep manual review +- Use `ast-grep`/Tree-sitter-assisted structural search for route, sink, and trust-boundary mapping - Identify architecture pattern (MVC, microservices, monolith) - Trace input vectors: forms, APIs, file uploads, headers, cookies - Review authentication and authorization flows - Analyze database interactions and ORM usage -- Check dependencies for known CVEs +- Check dependencies and repo risks with `trivy fs`, `gitleaks`, and `trufflehog` - Understand the data model and sensitive data locations **Blackbox (no source)** diff --git a/strix/tools/agents_graph/agents_graph_actions.py b/strix/tools/agents_graph/agents_graph_actions.py index dd0e569..90ef171 100644 --- a/strix/tools/agents_graph/agents_graph_actions.py +++ b/strix/tools/agents_graph/agents_graph_actions.py @@ -39,6 +39,13 @@ def _run_agent_in_thread( if inherited_messages else "started with a fresh context" ) + wiki_memory_instruction = "" + if getattr(getattr(agent, "llm_config", None), "is_whitebox", False): + wiki_memory_instruction = ( + '\n - White-box memory: call list_notes(category="wiki") early, ' + "reuse existing repo wiki notes, and update the same note instead of " + "creating duplicates" + ) task_xml = f""" @@ -64,6 +71,7 @@ def _run_agent_in_thread( - All agents share /workspace directory and proxy history for better collaboration - You can see files created by other agents and proxy traffic from previous work - Build upon previous work but focus on your specific delegated task +{wiki_memory_instruction} """ @@ -233,13 +241,21 @@ def create_agent( timeout = None scan_mode = "deep" + is_whitebox = False if parent_agent and hasattr(parent_agent, "llm_config"): if hasattr(parent_agent.llm_config, "timeout"): timeout = parent_agent.llm_config.timeout if hasattr(parent_agent.llm_config, "scan_mode"): scan_mode = parent_agent.llm_config.scan_mode + if hasattr(parent_agent.llm_config, "is_whitebox"): + is_whitebox = parent_agent.llm_config.is_whitebox - llm_config = LLMConfig(skills=skill_list, timeout=timeout, scan_mode=scan_mode) + llm_config = LLMConfig( + skills=skill_list, + timeout=timeout, + scan_mode=scan_mode, + is_whitebox=is_whitebox, + ) agent_config = { "llm_config": llm_config, diff --git a/strix/tools/notes/notes_actions.py b/strix/tools/notes/notes_actions.py index daab233..6364dd7 100644 --- a/strix/tools/notes/notes_actions.py +++ b/strix/tools/notes/notes_actions.py @@ -1,11 +1,175 @@ +import json +import threading import uuid from datetime import UTC, datetime +from pathlib import Path from typing import Any from strix.tools.registry import register_tool _notes_storage: dict[str, dict[str, Any]] = {} +_VALID_NOTE_CATEGORIES = ["general", "findings", "methodology", "questions", "plan", "wiki"] +_notes_lock = threading.RLock() +_loaded_notes_run_dir: str | None = None + + +def _get_run_dir() -> Path | None: + try: + from strix.telemetry.tracer import get_global_tracer + + tracer = get_global_tracer() + if not tracer: + return None + return tracer.get_run_dir() + except (ImportError, OSError, RuntimeError): + return None + + +def _get_notes_jsonl_path() -> Path | None: + run_dir = _get_run_dir() + if not run_dir: + return None + + notes_dir = run_dir / "notes" + notes_dir.mkdir(parents=True, exist_ok=True) + return notes_dir / "notes.jsonl" + + +def _append_note_event(op: str, note_id: str, note: dict[str, Any] | None = None) -> None: + notes_path = _get_notes_jsonl_path() + if not notes_path: + return + + event: dict[str, Any] = { + "timestamp": datetime.now(UTC).isoformat(), + "op": op, + "note_id": note_id, + } + if note is not None: + event["note"] = note + + with notes_path.open("a", encoding="utf-8") as f: + f.write(f"{json.dumps(event, ensure_ascii=True)}\n") + + +def _load_notes_from_jsonl(notes_path: Path) -> dict[str, dict[str, Any]]: + hydrated: dict[str, dict[str, Any]] = {} + if not notes_path.exists(): + return hydrated + + with notes_path.open(encoding="utf-8") as f: + for raw_line in f: + line = raw_line.strip() + if not line: + continue + + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + + op = str(event.get("op", "")).strip().lower() + note_id = str(event.get("note_id", "")).strip() + if not note_id or op not in {"create", "update", "delete"}: + continue + + if op == "delete": + hydrated.pop(note_id, None) + continue + + note = event.get("note") + if not isinstance(note, dict): + continue + + existing = hydrated.get(note_id, {}) + existing.update(note) + hydrated[note_id] = existing + + return hydrated + + +def _ensure_notes_loaded() -> None: + global _loaded_notes_run_dir # noqa: PLW0603 + + run_dir = _get_run_dir() + run_dir_key = str(run_dir.resolve()) if run_dir else "__no_run_dir__" + if _loaded_notes_run_dir == run_dir_key: + return + + _notes_storage.clear() + + notes_path = _get_notes_jsonl_path() + if notes_path: + _notes_storage.update(_load_notes_from_jsonl(notes_path)) + for note_id, note in _notes_storage.items(): + if note.get("category") == "wiki": + _persist_wiki_note(note_id, note) + + _loaded_notes_run_dir = run_dir_key + + +def _sanitize_wiki_title(title: str) -> str: + cleaned = "".join(ch.lower() if ch.isalnum() else "-" for ch in title.strip()) + slug = "-".join(part for part in cleaned.split("-") if part) + return slug or "wiki-note" + + +def _get_wiki_directory() -> Path | None: + try: + run_dir = _get_run_dir() + if not run_dir: + return None + + wiki_dir = run_dir / "wiki" + wiki_dir.mkdir(parents=True, exist_ok=True) + except OSError: + return None + else: + return wiki_dir + + +def _get_wiki_note_path(note_id: str, note: dict[str, Any]) -> Path | None: + wiki_dir = _get_wiki_directory() + if not wiki_dir: + return None + + wiki_filename = note.get("wiki_filename") + if not isinstance(wiki_filename, str) or not wiki_filename.strip(): + title = note.get("title", "wiki-note") + wiki_filename = f"{note_id}-{_sanitize_wiki_title(str(title))}.md" + note["wiki_filename"] = wiki_filename + + return wiki_dir / wiki_filename + + +def _persist_wiki_note(note_id: str, note: dict[str, Any]) -> None: + wiki_path = _get_wiki_note_path(note_id, note) + if not wiki_path: + return + + tags = note.get("tags", []) + tags_line = ", ".join(str(tag) for tag in tags) if isinstance(tags, list) and tags else "none" + + content = ( + f"# {note.get('title', 'Wiki Note')}\n\n" + f"**Note ID:** {note_id}\n" + f"**Created:** {note.get('created_at', '')}\n" + f"**Updated:** {note.get('updated_at', '')}\n" + f"**Tags:** {tags_line}\n\n" + "## Content\n\n" + f"{note.get('content', '')}\n" + ) + wiki_path.write_text(content, encoding="utf-8") + + +def _remove_wiki_note(note_id: str, note: dict[str, Any]) -> None: + wiki_path = _get_wiki_note_path(note_id, note) + if not wiki_path: + return + + if wiki_path.exists(): + wiki_path.unlink() def _filter_notes( @@ -13,6 +177,7 @@ def _filter_notes( tags: list[str] | None = None, search_query: str | None = None, ) -> list[dict[str, Any]]: + _ensure_notes_loaded() filtered_notes = [] for note_id, note in _notes_storage.items(): @@ -40,49 +205,66 @@ def _filter_notes( @register_tool(sandbox_execution=False) -def create_note( +def create_note( # noqa: PLR0911 title: str, content: str, category: str = "general", tags: list[str] | None = None, ) -> dict[str, Any]: - try: - if not title or not title.strip(): - return {"success": False, "error": "Title cannot be empty", "note_id": None} + with _notes_lock: + try: + _ensure_notes_loaded() - if not content or not content.strip(): - return {"success": False, "error": "Content cannot be empty", "note_id": None} + if not title or not title.strip(): + return {"success": False, "error": "Title cannot be empty", "note_id": None} - valid_categories = ["general", "findings", "methodology", "questions", "plan"] - if category not in valid_categories: - return { - "success": False, - "error": f"Invalid category. Must be one of: {', '.join(valid_categories)}", - "note_id": None, + if not content or not content.strip(): + return {"success": False, "error": "Content cannot be empty", "note_id": None} + + if category not in _VALID_NOTE_CATEGORIES: + return { + "success": False, + "error": ( + f"Invalid category. Must be one of: {', '.join(_VALID_NOTE_CATEGORIES)}" + ), + "note_id": None, + } + + note_id = "" + for _ in range(20): + candidate = str(uuid.uuid4())[:5] + if candidate not in _notes_storage: + note_id = candidate + break + if not note_id: + return {"success": False, "error": "Failed to allocate note ID", "note_id": None} + + timestamp = datetime.now(UTC).isoformat() + + note = { + "title": title.strip(), + "content": content.strip(), + "category": category, + "tags": tags or [], + "created_at": timestamp, + "updated_at": timestamp, } - note_id = str(uuid.uuid4())[:5] - timestamp = datetime.now(UTC).isoformat() + _notes_storage[note_id] = note + _append_note_event("create", note_id, note) + if category == "wiki": + _persist_wiki_note(note_id, note) - note = { - "title": title.strip(), - "content": content.strip(), - "category": category, - "tags": tags or [], - "created_at": timestamp, - "updated_at": timestamp, - } - - _notes_storage[note_id] = note - - except (ValueError, TypeError) as e: - return {"success": False, "error": f"Failed to create note: {e}", "note_id": None} - else: - return { - "success": True, - "note_id": note_id, - "message": f"Note '{title}' created successfully", - } + except (ValueError, TypeError) as e: + return {"success": False, "error": f"Failed to create note: {e}", "note_id": None} + except OSError as e: + return {"success": False, "error": f"Failed to persist wiki note: {e}", "note_id": None} + else: + return { + "success": True, + "note_id": note_id, + "message": f"Note '{title}' created successfully", + } @register_tool(sandbox_execution=False) @@ -91,22 +273,23 @@ def list_notes( tags: list[str] | None = None, search: str | None = None, ) -> dict[str, Any]: - try: - filtered_notes = _filter_notes(category=category, tags=tags, search_query=search) + with _notes_lock: + try: + filtered_notes = _filter_notes(category=category, tags=tags, search_query=search) - return { - "success": True, - "notes": filtered_notes, - "total_count": len(filtered_notes), - } + return { + "success": True, + "notes": filtered_notes, + "total_count": len(filtered_notes), + } - except (ValueError, TypeError) as e: - return { - "success": False, - "error": f"Failed to list notes: {e}", - "notes": [], - "total_count": 0, - } + except (ValueError, TypeError) as e: + return { + "success": False, + "error": f"Failed to list notes: {e}", + "notes": [], + "total_count": 0, + } @register_tool(sandbox_execution=False) @@ -116,49 +299,66 @@ def update_note( content: str | None = None, tags: list[str] | None = None, ) -> dict[str, Any]: - try: - if note_id not in _notes_storage: - return {"success": False, "error": f"Note with ID '{note_id}' not found"} + with _notes_lock: + try: + _ensure_notes_loaded() - note = _notes_storage[note_id] + if note_id not in _notes_storage: + return {"success": False, "error": f"Note with ID '{note_id}' not found"} - if title is not None: - if not title.strip(): - return {"success": False, "error": "Title cannot be empty"} - note["title"] = title.strip() + note = _notes_storage[note_id] - if content is not None: - if not content.strip(): - return {"success": False, "error": "Content cannot be empty"} - note["content"] = content.strip() + if title is not None: + if not title.strip(): + return {"success": False, "error": "Title cannot be empty"} + note["title"] = title.strip() - if tags is not None: - note["tags"] = tags + if content is not None: + if not content.strip(): + return {"success": False, "error": "Content cannot be empty"} + note["content"] = content.strip() - note["updated_at"] = datetime.now(UTC).isoformat() + if tags is not None: + note["tags"] = tags - return { - "success": True, - "message": f"Note '{note['title']}' updated successfully", - } + note["updated_at"] = datetime.now(UTC).isoformat() + _append_note_event("update", note_id, note) + if note.get("category") == "wiki": + _persist_wiki_note(note_id, note) - except (ValueError, TypeError) as e: - return {"success": False, "error": f"Failed to update note: {e}"} + return { + "success": True, + "message": f"Note '{note['title']}' updated successfully", + } + + except (ValueError, TypeError) as e: + return {"success": False, "error": f"Failed to update note: {e}"} + except OSError as e: + return {"success": False, "error": f"Failed to persist wiki note: {e}"} @register_tool(sandbox_execution=False) def delete_note(note_id: str) -> dict[str, Any]: - try: - if note_id not in _notes_storage: - return {"success": False, "error": f"Note with ID '{note_id}' not found"} + with _notes_lock: + try: + _ensure_notes_loaded() - note_title = _notes_storage[note_id]["title"] - del _notes_storage[note_id] + if note_id not in _notes_storage: + return {"success": False, "error": f"Note with ID '{note_id}' not found"} - except (ValueError, TypeError) as e: - return {"success": False, "error": f"Failed to delete note: {e}"} - else: - return { - "success": True, - "message": f"Note '{note_title}' deleted successfully", - } + note = _notes_storage[note_id] + note_title = note["title"] + if note.get("category") == "wiki": + _remove_wiki_note(note_id, note) + del _notes_storage[note_id] + _append_note_event("delete", note_id) + + except (ValueError, TypeError) as e: + return {"success": False, "error": f"Failed to delete note: {e}"} + except OSError as e: + return {"success": False, "error": f"Failed to delete wiki note: {e}"} + else: + return { + "success": True, + "message": f"Note '{note_title}' deleted successfully", + } diff --git a/strix/tools/notes/notes_actions_schema.xml b/strix/tools/notes/notes_actions_schema.xml index f47c167..0329187 100644 --- a/strix/tools/notes/notes_actions_schema.xml +++ b/strix/tools/notes/notes_actions_schema.xml @@ -2,7 +2,8 @@ Create a personal note for observations, findings, and research during the scan.
Use this tool for documenting discoveries, observations, methodology notes, and questions. - This is your personal notepad for recording information you want to remember or reference later. + This is your personal and shared run memory for recording information you want to remember or reference later. + Use category "wiki" for repository source maps shared across agents in the same run. For tracking actionable tasks, use the todo tool instead.
@@ -12,7 +13,7 @@ Content of the note - Category to organize the note (default: "general", "findings", "methodology", "questions", "plan") + Category to organize the note (default: "general", "findings", "methodology", "questions", "plan", "wiki") Tags for categorization @@ -122,6 +123,11 @@ The /api/internal/* endpoints are high priority as they appear to lack authentic admin findings + + + # Load shared repository wiki notes + + wiki
diff --git a/tests/llm/test_source_aware_whitebox.py b/tests/llm/test_source_aware_whitebox.py new file mode 100644 index 0000000..2b22ae6 --- /dev/null +++ b/tests/llm/test_source_aware_whitebox.py @@ -0,0 +1,28 @@ +from strix.llm.config import LLMConfig +from strix.llm.llm import LLM + + +def test_llm_config_whitebox_defaults_to_false(monkeypatch) -> None: + monkeypatch.setenv("STRIX_LLM", "openai/gpt-5") + config = LLMConfig() + assert config.is_whitebox is False + + +def test_llm_config_whitebox_can_be_enabled(monkeypatch) -> None: + monkeypatch.setenv("STRIX_LLM", "openai/gpt-5") + config = LLMConfig(is_whitebox=True) + assert config.is_whitebox is True + + +def test_whitebox_prompt_loads_source_aware_coordination_skill(monkeypatch) -> None: + monkeypatch.setenv("STRIX_LLM", "openai/gpt-5") + + whitebox_llm = LLM(LLMConfig(scan_mode="quick", is_whitebox=True), agent_name="StrixAgent") + assert "" in whitebox_llm.system_prompt + assert "Begin with fast source triage" in whitebox_llm.system_prompt + assert "You MUST begin at the very first step by running the code and testing live." not in ( + whitebox_llm.system_prompt + ) + + non_whitebox_llm = LLM(LLMConfig(scan_mode="quick", is_whitebox=False), agent_name="StrixAgent") + assert "" not in non_whitebox_llm.system_prompt diff --git a/tests/tools/test_agents_graph_whitebox.py b/tests/tools/test_agents_graph_whitebox.py new file mode 100644 index 0000000..ac98163 --- /dev/null +++ b/tests/tools/test_agents_graph_whitebox.py @@ -0,0 +1,107 @@ +from types import SimpleNamespace + +import strix.agents as agents_module +from strix.llm.config import LLMConfig +from strix.tools.agents_graph import agents_graph_actions + + +def test_create_agent_inherits_parent_whitebox_flag(monkeypatch) -> None: + monkeypatch.setenv("STRIX_LLM", "openai/gpt-5") + + agents_graph_actions._agent_graph["nodes"].clear() + agents_graph_actions._agent_graph["edges"].clear() + agents_graph_actions._agent_messages.clear() + agents_graph_actions._running_agents.clear() + agents_graph_actions._agent_instances.clear() + agents_graph_actions._agent_states.clear() + + parent_id = "parent-agent" + parent_llm = LLMConfig(timeout=123, scan_mode="standard", is_whitebox=True) + agents_graph_actions._agent_instances[parent_id] = SimpleNamespace( + llm_config=parent_llm, + non_interactive=True, + ) + + captured_config: dict[str, object] = {} + + class FakeStrixAgent: + def __init__(self, config: dict[str, object]): + captured_config["agent_config"] = config + + class FakeThread: + def __init__(self, target, args, daemon, name): + self.target = target + self.args = args + self.daemon = daemon + self.name = name + + def start(self) -> None: + return None + + monkeypatch.setattr(agents_module, "StrixAgent", FakeStrixAgent) + monkeypatch.setattr(agents_graph_actions.threading, "Thread", FakeThread) + + agent_state = SimpleNamespace( + agent_id=parent_id, + get_conversation_history=list, + ) + result = agents_graph_actions.create_agent( + agent_state=agent_state, + task="source-aware child task", + name="SourceAwareChild", + inherit_context=False, + ) + + assert result["success"] is True + llm_config = captured_config["agent_config"]["llm_config"] + assert isinstance(llm_config, LLMConfig) + assert llm_config.timeout == 123 + assert llm_config.scan_mode == "standard" + assert llm_config.is_whitebox is True + + +def test_delegation_prompt_includes_wiki_memory_instruction_in_whitebox(monkeypatch) -> None: + monkeypatch.setenv("STRIX_LLM", "openai/gpt-5") + + agents_graph_actions._agent_graph["nodes"].clear() + agents_graph_actions._agent_graph["edges"].clear() + agents_graph_actions._agent_messages.clear() + agents_graph_actions._running_agents.clear() + agents_graph_actions._agent_instances.clear() + agents_graph_actions._agent_states.clear() + + parent_id = "parent-1" + child_id = "child-1" + agents_graph_actions._agent_graph["nodes"][parent_id] = {"name": "Parent", "status": "running"} + agents_graph_actions._agent_graph["nodes"][child_id] = {"name": "Child", "status": "running"} + + class FakeState: + def __init__(self) -> None: + self.agent_id = child_id + self.agent_name = "Child" + self.parent_id = parent_id + self.task = "analyze source risks" + self.stop_requested = False + self.messages: list[tuple[str, str]] = [] + + def add_message(self, role: str, content: str) -> None: + self.messages.append((role, content)) + + def model_dump(self) -> dict[str, str]: + return {"agent_id": self.agent_id} + + class FakeAgent: + def __init__(self) -> None: + self.llm_config = LLMConfig(is_whitebox=True) + + async def agent_loop(self, _task: str) -> dict[str, bool]: + return {"ok": True} + + state = FakeState() + agent = FakeAgent() + result = agents_graph_actions._run_agent_in_thread(agent, state, inherited_messages=[]) + + assert result["result"] == {"ok": True} + task_messages = [msg for role, msg in state.messages if role == "user"] + assert task_messages + assert 'list_notes(category="wiki")' in task_messages[-1] diff --git a/tests/tools/test_notes_wiki.py b/tests/tools/test_notes_wiki.py new file mode 100644 index 0000000..06725c1 --- /dev/null +++ b/tests/tools/test_notes_wiki.py @@ -0,0 +1,100 @@ +from pathlib import Path + +from strix.telemetry.tracer import Tracer, get_global_tracer, set_global_tracer +from strix.tools.notes import notes_actions + + +def _reset_notes_state() -> None: + notes_actions._notes_storage.clear() + notes_actions._loaded_notes_run_dir = None + + +def test_wiki_notes_are_persisted_and_removed(tmp_path: Path, monkeypatch) -> None: + monkeypatch.chdir(tmp_path) + _reset_notes_state() + + previous_tracer = get_global_tracer() + tracer = Tracer("wiki-test-run") + set_global_tracer(tracer) + + try: + created = notes_actions.create_note( + title="Repo Map", + content="## Architecture\n- monolith", + category="wiki", + tags=["source-map"], + ) + assert created["success"] is True + note_id = created["note_id"] + assert isinstance(note_id, str) + + note = notes_actions._notes_storage[note_id] + wiki_filename = note.get("wiki_filename") + assert isinstance(wiki_filename, str) + + wiki_path = tmp_path / "strix_runs" / "wiki-test-run" / "wiki" / wiki_filename + assert wiki_path.exists() + assert "## Architecture" in wiki_path.read_text(encoding="utf-8") + + updated = notes_actions.update_note( + note_id=note_id, + content="## Architecture\n- service-oriented", + ) + assert updated["success"] is True + assert "service-oriented" in wiki_path.read_text(encoding="utf-8") + + deleted = notes_actions.delete_note(note_id=note_id) + assert deleted["success"] is True + assert wiki_path.exists() is False + finally: + _reset_notes_state() + set_global_tracer(previous_tracer) # type: ignore[arg-type] + + +def test_notes_jsonl_replay_survives_memory_reset(tmp_path: Path, monkeypatch) -> None: + monkeypatch.chdir(tmp_path) + _reset_notes_state() + + previous_tracer = get_global_tracer() + tracer = Tracer("notes-replay-run") + set_global_tracer(tracer) + + try: + created = notes_actions.create_note( + title="Auth findings", + content="initial finding", + category="findings", + tags=["auth"], + ) + assert created["success"] is True + note_id = created["note_id"] + assert isinstance(note_id, str) + + notes_path = tmp_path / "strix_runs" / "notes-replay-run" / "notes" / "notes.jsonl" + assert notes_path.exists() is True + + _reset_notes_state() + listed = notes_actions.list_notes(category="findings") + assert listed["success"] is True + assert listed["total_count"] == 1 + assert listed["notes"][0]["note_id"] == note_id + + updated = notes_actions.update_note(note_id=note_id, content="updated finding") + assert updated["success"] is True + + _reset_notes_state() + listed_after_update = notes_actions.list_notes(search="updated finding") + assert listed_after_update["success"] is True + assert listed_after_update["total_count"] == 1 + assert listed_after_update["notes"][0]["note_id"] == note_id + + deleted = notes_actions.delete_note(note_id=note_id) + assert deleted["success"] is True + + _reset_notes_state() + listed_after_delete = notes_actions.list_notes(category="findings") + assert listed_after_delete["success"] is True + assert listed_after_delete["total_count"] == 0 + finally: + _reset_notes_state() + set_global_tracer(previous_tracer) # type: ignore[arg-type]