feat: test suite + skill search + config-based mapping + custom skills

Test suite (personas/_tests/): - 8 persona test files: neo, frodo, oracle, ghost, forge, sentinel, architect, scholar, gambit - 43 test cases validating tone, keywords, escalation, confidence, language - Run: python3 build.py --test (all) or --test neo (specific) Skill search: - BM25-like scoring across 795 skills with header boost - Run: python3 build.py --search "pentest active directory" Config-based skill mapping: - SKILL_PERSONA_MAP moved to DEFAULT_SKILL_PERSONA_MAP - Users can override in config.yaml via skill_persona_map: key - load_skill_persona_map() merges defaults + user config New Claude skills (custom for Salva's workflow): - pentest-reporter: Turkish/English pentest report generator, Kill Chain Scanner format - intel-briefing: IC-format intelligence products (EXEC_SUMMARY, FULL_INTEL_REPORT, JSON) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 22:28:17 +03:00
parent 430ffcf8dd
commit 88ef52a82d
11 changed files with 519 additions and 6 deletions
--- a/build.py
+++ b/build.py
@@ -189,6 +189,11 @@ def build_persona(persona_dir: Path, output_dir: Path, flat_config: dict, config
            for skill_name, skill_info in skills_index.get("skills", {}).items():
                if persona_name in skill_info.get("personas", []):
                    mapped_skills.append(skill_name)
+            # Also check config-based custom mapping
+            skill_map = skills_index.get("_skill_persona_map", {})
+            for skill_name, persona_list in skill_map.items():
+                if persona_name in persona_list and skill_name not in mapped_skills:
+                    mapped_skills.append(skill_name)
            if mapped_skills:
                output["skills"] = sorted(mapped_skills)

@@ -220,7 +225,7 @@ def build_persona(persona_dir: Path, output_dir: Path, flat_config: dict, config
    return count


-SKILL_PERSONA_MAP = {
+DEFAULT_SKILL_PERSONA_MAP = {
    # Cybersecurity skills → personas
    "pentest": ["neo"], "nmap-recon": ["neo", "vortex"], "security-scanner": ["neo", "phantom"],
    "sql-injection-testing": ["neo", "phantom"], "stealth-browser": ["neo", "oracle"],
@@ -248,9 +253,140 @@ SKILL_PERSONA_MAP = {
 }


-def build_skills_index(shared_dir: Path) -> dict:
+def load_skill_persona_map(config: dict) -> dict:
+    """Load skill→persona mapping from config.yaml or use defaults."""
+    custom = config.get("skill_persona_map", {})
+    merged = dict(DEFAULT_SKILL_PERSONA_MAP)
+    merged.update(custom)
+    return merged
+
+
+def search_skills(shared_dir: Path, query: str):
+    """Search across all shared skills using simple BM25-like scoring."""
+    query_terms = query.lower().split()
+    results = []
+
+    for skills_subdir in ["skills", "paperclip-skills", "community-skills"]:
+        skills_path = shared_dir / skills_subdir
+        if not skills_path.exists():
+            continue
+        for skill_dir in sorted(skills_path.iterdir()):
+            if not skill_dir.is_dir():
+                continue
+            skill_md = skill_dir / "SKILL.md"
+            if not skill_md.exists():
+                continue
+            content = skill_md.read_text(encoding="utf-8").lower()
+            # Simple scoring: count query term occurrences weighted by position
+            score = 0
+            first_50 = content[:500]  # boost matches in header/description
+            for term in query_terms:
+                score += first_50.count(term) * 3  # header boost
+                score += content.count(term)
+            if score > 0:
+                # Extract name and first description line
+                name = skill_dir.name
+                desc = ""
+                for line in content.split("\n"):
+                    line = line.strip()
+                    if line and not line.startswith(("---", "#", "name:", "description:")):
+                        desc = line[:100]
+                        break
+                results.append((score, name, skills_subdir, desc))
+
+    results.sort(key=lambda x: -x[0])
+    print(f"\n  Search: '{query}' — {len(results)} results\n")
+    for i, (score, name, source, desc) in enumerate(results[:20]):
+        print(f"  {i+1:2}. [{score:3}] {name} ({source})")
+        if desc:
+            print(f"       {desc}")
+    if len(results) > 20:
+        print(f"\n  ... and {len(results) - 20} more. Refine your query.")
+    elif len(results) == 0:
+        print("  No matches found. Try different keywords.")
+
+
+def run_tests(personas_dir: Path, target: str = None):
+    """Run persona test suite from _tests/*.yaml files."""
+    tests_dir = personas_dir / "_tests"
+    if not tests_dir.exists():
+        print("  No _tests/ directory found.")
+        return
+
+    test_files = sorted(tests_dir.glob("*.yaml"))
+    if not test_files:
+        print("  No test files found in _tests/")
+        return
+
+    total = 0
+    passed = 0
+    failed = 0
+    warnings = []
+
+    for test_file in test_files:
+        if test_file.name == "README.md":
+            continue
+        suite = yaml.safe_load(test_file.read_text(encoding="utf-8"))
+        if not suite:
+            continue
+        persona_name = suite.get("persona", test_file.stem)
+        if target and persona_name != target:
+            continue
+
+        print(f"\n  Testing: {persona_name} ({len(suite.get('tests', []))} cases)")
+
+        # Load persona prompt for validation
+        persona_prompt_path = personas_dir / persona_name / "general.md"
+        if not persona_prompt_path.exists():
+            print(f"    SKIP: {persona_name}/general.md not found")
+            continue
+        prompt_content = persona_prompt_path.read_text(encoding="utf-8").lower()
+
+        for test in suite.get("tests", []):
+            total += 1
+            test_name = test.get("name", f"test_{total}")
+            expect = test.get("expect", {})
+            test_passed = True
+
+            # Check must_include keywords exist in persona definition
+            for keyword in expect.get("must_include", []):
+                if keyword.lower() not in prompt_content:
+                    warnings.append(f"    {persona_name}/{test_name}: '{keyword}' not in persona prompt")
+                    test_passed = False
+
+            # Check escalation targets are defined
+            if expect.get("escalation"):
+                target_persona = expect["escalation"].lower()
+                if target_persona not in prompt_content:
+                    warnings.append(f"    {persona_name}/{test_name}: escalation to '{target_persona}' not defined in boundaries")
+                    test_passed = False
+
+            # Check confidence language for intel personas
+            if expect.get("confidence"):
+                if "confidence" not in prompt_content and "high" not in prompt_content:
+                    warnings.append(f"    {persona_name}/{test_name}: confidence levels not defined in persona")
+                    test_passed = False
+
+            if test_passed:
+                passed += 1
+                print(f"    PASS: {test_name}")
+            else:
+                failed += 1
+                print(f"    WARN: {test_name}")
+
+    print(f"\n  {'=' * 40}")
+    print(f"  Tests: {total} total, {passed} passed, {failed} warnings")
+    if warnings:
+        print(f"\n  Warnings:")
+        for w in warnings:
+            print(w)
+    print(f"  {'=' * 40}")
+
+
+def build_skills_index(shared_dir: Path, config: dict = None) -> dict:
    """Index all shared skills from _shared/skills/ and _shared/paperclip-skills/."""
-    index = {"skills": {}, "paperclip_skills": {}, "design_brands": [], "ui_ux_styles": 0}
+    skill_map = load_skill_persona_map(config or {})
+    index = {"skills": {}, "paperclip_skills": {}, "design_brands": [], "ui_ux_styles": 0, "_skill_persona_map": skill_map}

    # Index shared-skills
    skills_dir = shared_dir / "skills"
@@ -268,7 +404,7 @@ def build_skills_index(shared_dir: Path) -> dict:
                        first_line = line[:120]
                        break
                index["skills"][skill_dir.name] = {
-                    "personas": SKILL_PERSONA_MAP.get(skill_dir.name, []),
+                    "personas": skill_map.get(skill_dir.name, []),
                    "summary": first_line,
                    "has_references": (skill_dir / "references").is_dir(),
                }
@@ -774,6 +910,10 @@ def main():
    parser = argparse.ArgumentParser(description="Build persona library and optionally install to platforms.")
    parser.add_argument("--install", choices=["claude", "antigravity", "gemini", "openclaw", "paperclip", "all"],
                        help="Install generated personas to a target platform")
+    parser.add_argument("--search", type=str, metavar="QUERY",
+                        help="Search across all shared skills (e.g. --search 'pentest AD')")
+    parser.add_argument("--test", nargs="?", const="__all__", metavar="PERSONA",
+                        help="Run persona test suite (optionally specify persona name)")
    args = parser.parse_args()

    root = Path(__file__).parent
@@ -798,13 +938,25 @@ def main():
        print("No persona directories found.")
        sys.exit(1)

+    shared_dir = personas_dir / "_shared"
+
+    # Handle search-only mode
+    if args.search:
+        search_skills(shared_dir, args.search)
+        return
+
+    # Handle test-only mode
+    if args.test:
+        target = None if args.test == "__all__" else args.test
+        run_tests(personas_dir, target)
+        return
+
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"Building {len(persona_dirs)} personas -> {output_dir}\n")

    # Pre-build escalation graph and skills index
    escalation_graph = build_escalation_graph(personas_dir, flat_config)
-    shared_dir = personas_dir / "_shared"
-    skills_index = build_skills_index(shared_dir) if shared_dir.exists() else {}
+    skills_index = build_skills_index(shared_dir, config) if shared_dir.exists() else {}

    total_variants = 0
    for pdir in persona_dirs: