personas/build.py

#!/usr/bin/env python3
"""Build script: Generate .yaml, .json, .prompt.md from persona .md files.

Supports config.yaml for dynamic variable injection and user-specific customization.
New users: copy config.example.yaml → config.yaml and customize.
"""

import json
import re
import sys
import unicodedata
from pathlib import Path

try:
    import yaml
except ImportError:
    print("PyYAML required: pip install pyyaml")
    sys.exit(1)


def load_config(root: Path) -> dict:
    """Load config.yaml if it exists, otherwise return empty config."""
    config_path = root / "config.yaml"
    if config_path.exists():
        config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
        print(f"Config loaded: {config_path}")
        return config

    example_path = root / "config.example.yaml"
    if example_path.exists():
        print(
            "WARN: No config.yaml found. Using defaults. Copy config.example.yaml → config.yaml to customize."
        )
    return {}


def resolve_shared_dir(root: Path, personas_dir: Path) -> Path | None:
    """Resolve canonical shared library path.

    Primary location is personas/_shared. If that is missing, fall back to
    known source mirrors under sources/.
    """
    primary = personas_dir / "_shared"
    if primary.exists():
        return primary

    sources_dir = root / "sources"
    fallbacks = [
        sources_dir / "temp-cyber-skills" / "personas" / "_shared",
        sources_dir / "paperclip-docs-main" / "_shared",
    ]
    for candidate in fallbacks:
        if candidate.exists():
            return candidate

    return None


def discover_sources(root: Path) -> list[str]:
    """List known source mirrors under root/sources."""
    sources_dir = root / "sources"
    if not sources_dir.exists():
        return []

    known = [
        "Anthropic-Cybersecurity-Skills",
        "paperclip-docs-main",
        "temp-cyber-skills",
    ]
    return [name for name in known if (sources_dir / name).exists()]


def flatten_config(config: dict, prefix: str = "") -> dict:
    """Flatten nested config dict for template substitution.

    Example: {"user": {"name": "Salva"}} → {"user.name": "Salva"}
    """
    flat = {}
    for key, value in config.items():
        full_key = f"{prefix}{key}" if not prefix else f"{prefix}.{key}"
        if isinstance(value, dict):
            flat.update(flatten_config(value, full_key))
        elif isinstance(value, list):
            flat[full_key] = value
            flat[f"{full_key}.count"] = len(value)
            flat[f"{full_key}.csv"] = ", ".join(
                str(v) for v in value if not isinstance(v, dict)
            )
        else:
            flat[full_key] = value
    return flat


def inject_config(content: str, flat_config: dict) -> str:
    """Replace {{config.key}} placeholders with config values."""

    def replacer(match):
        key = match.group(1).strip()
        value = flat_config.get(key, match.group(0))  # keep original if not found
        if isinstance(value, list):
            return ", ".join(str(v) for v in value if not isinstance(v, dict))
        if isinstance(value, bool):
            return "enabled" if value else "disabled"
        return str(value)

    return re.sub(r"\{\{(.+?)\}\}", replacer, content)


def check_conditionals(content: str, flat_config: dict) -> str:
    """Process {{#if key}}...{{/if}} and {{#unless key}}...{{/unless}} blocks."""

    # Handle {{#if key}}content{{/if}}
    def if_replacer(match):
        key = match.group(1).strip()
        body = match.group(2)
        value = flat_config.get(key)
        if value and value not in (False, 0, "", "false", "none", "disabled", None, []):
            return body
        return ""

    content = re.sub(
        r"\{\{#if (.+?)\}\}(.*?)\{\{/if\}\}", if_replacer, content, flags=re.DOTALL
    )

    # Handle {{#unless key}}content{{/unless}}
    def unless_replacer(match):
        key = match.group(1).strip()
        body = match.group(2)
        value = flat_config.get(key)
        if not value or value in (False, 0, "", "false", "none", "disabled", None, []):
            return body
        return ""

    content = re.sub(
        r"\{\{#unless (.+?)\}\}(.*?)\{\{/unless\}\}",
        unless_replacer,
        content,
        flags=re.DOTALL,
    )

    return content


def parse_persona_md(filepath: Path, flat_config: dict) -> dict:
    """Parse a persona markdown file into structured data."""
    content = filepath.read_text(encoding="utf-8")

    # Apply config injection
    if flat_config:
        content = check_conditionals(content, flat_config)
        content = inject_config(content, flat_config)

    # Extract YAML frontmatter
    fm_match = re.match(r"^---\n(.*?)\n---\n(.*)$", content, re.DOTALL)
    if not fm_match:
        print(f"  WARN: No frontmatter in {filepath}")
        return {}

    frontmatter = yaml.safe_load(fm_match.group(1))
    body = fm_match.group(2).strip()

    # Extract sections from body
    sections = {}
    current_section = None
    current_content = []

    for line in body.split("\n"):
        if line.startswith("## "):
            if current_section:
                sections[current_section] = "\n".join(current_content).strip()
            current_section = (
                line[3:].strip().lower().replace(" ", "_").replace("&", "and")
            )
            current_content = []
        else:
            current_content.append(line)

    if current_section:
        sections[current_section] = "\n".join(current_content).strip()

    return {
        "metadata": frontmatter,
        "sections": sections,
        "raw_body": body,
    }


def build_persona(
    persona_dir: Path,
    output_dir: Path,
    flat_config: dict,
    config: dict,
    escalation_graph: dict = None,
    skills_index: dict = None,
):
    """Build all variants for a persona directory."""
    md_files = sorted(persona_dir.glob("*.md"))
    if not md_files:
        return 0

    persona_name = persona_dir.name
    out_path = output_dir / persona_name
    out_path.mkdir(parents=True, exist_ok=True)

    # Load _meta.yaml if exists
    meta_file = persona_dir / "_meta.yaml"
    meta = {}
    if meta_file.exists():
        meta_content = meta_file.read_text(encoding="utf-8")
        if flat_config:
            meta_content = inject_config(meta_content, flat_config)
        meta = yaml.safe_load(meta_content) or {}

    # Apply config overrides for address
    addresses = config.get("persona_defaults", {}).get("custom_addresses", {})
    if persona_name in addresses:
        meta["address_to"] = addresses[persona_name]

    count = 0
    for md_file in md_files:
        if md_file.name.startswith("_"):
            continue

        variant = md_file.stem
        parsed = parse_persona_md(md_file, flat_config)
        if not parsed:
            continue

        # Build output object
        output = {
            **meta,
            **parsed["metadata"],
            "variant": variant,
            "sections": parsed["sections"],
        }

        # Inject config metadata
        if config:
            output["_config"] = {
                "user": config.get("user", {}).get("name", "unknown"),
                "tools": {
                    k: v
                    for k, v in config.get("infrastructure", {})
                    .get("tools", {})
                    .items()
                    if v is True
                },
                "frameworks": {
                    k: v for k, v in config.get("frameworks", {}).items() if v is True
                },
                "regional_focus": config.get("regional_focus", {}),
            }

        # Inject escalation graph for this persona
        if escalation_graph and persona_name in escalation_graph:
            output["escalates_to"] = escalation_graph[persona_name]

        # Inject mapped skills for this persona
        if skills_index:
            mapped_skills = []
            for skill_name, skill_info in skills_index.get("skills", {}).items():
                if persona_name in skill_info.get("personas", []):
                    mapped_skills.append(skill_name)
            # Also check config-based custom mapping
            skill_map = skills_index.get("_skill_persona_map", {})
            for skill_name, persona_list in skill_map.items():
                if persona_name in persona_list and skill_name not in mapped_skills:
                    mapped_skills.append(skill_name)
            if mapped_skills:
                output["skills"] = sorted(mapped_skills)

        # Inject section word counts for quality tracking
        output["_stats"] = {
            "total_words": sum(len(s.split()) for s in parsed["sections"].values()),
            "sections": list(parsed["sections"].keys()),
            "section_count": len(parsed["sections"]),
        }

        # Write YAML
        yaml_out = out_path / f"{variant}.yaml"
        yaml_out.write_text(
            yaml.dump(
                output, allow_unicode=True, default_flow_style=False, sort_keys=False
            ),
            encoding="utf-8",
        )

        # Write JSON
        json_out = out_path / f"{variant}.json"
        json_out.write_text(
            json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8"
        )

        # Write plain system prompt (just the body, no config metadata)
        prompt_out = out_path / f"{variant}.prompt.md"
        prompt_out.write_text(parsed["raw_body"], encoding="utf-8")

        count += 1
        print(f"  Built: {persona_name}/{variant} -> .yaml .json .prompt.md")

    return count


DEFAULT_SKILL_PERSONA_MAP = {
    # Cybersecurity skills → personas
    "pentest": ["neo"],
    "nmap-recon": ["neo", "vortex"],
    "security-scanner": ["neo", "phantom"],
    "sql-injection-testing": ["neo", "phantom"],
    "stealth-browser": ["neo", "oracle"],
    "security-audit-toolkit": ["neo", "forge"],
    "pwnclaw-security-scan": ["neo"],
    "senior-secops": ["bastion"],
    "clawsec": ["neo", "vortex"],
    "pcap-analyzer": ["vortex", "bastion"],
    "sys-guard-linux-remediator": ["bastion"],
    "ctf-writeup-generator": ["neo"],
    "dns-networking": ["vortex", "architect"],
    "network-scanner": ["neo", "vortex"],
    "security-skill-scanner": ["neo"],
    "pentest-active-directory": ["neo"],
    "pentest-api-attacker": ["neo", "phantom"],
    "pentest-auth-bypass": ["neo", "phantom"],
    "pentest-c2-operator": ["neo", "sentinel"],
    "gov-cybersecurity": ["sentinel", "bastion"],
    # Intelligence skills → personas
    "osint-investigator": ["oracle"],
    "seithar-intel": ["sentinel", "frodo"],
    "freshrss": ["frodo", "oracle"],
    "freshrss-reader": ["frodo", "oracle"],
    "war-intel-monitor": ["frodo", "marshal"],
    "news-crawler": ["frodo", "herald"],
    "dellight-intelligence-ops": ["frodo", "echo"],
    "dellight-strategic-intelligence": ["frodo"],
    "agent-intelligence-network-scan": ["oracle"],
    "social-trust-manipulation-detector": ["ghost"],
    # Infrastructure skills → personas
    "docker-essentials": ["architect"],
    "session-logs": ["architect"],
    # Document processing → personas
    "image-ocr": ["oracle", "scribe"],
    "mistral-ocr": ["oracle", "scribe"],
    "pdf-text-extractor": ["scribe", "scholar"],
    "youtube-transcript": ["herald", "scholar"],
    # Web scraping → personas
    "deep-scraper": ["oracle"],
    "crawl-for-ai": ["oracle", "herald"],
}


VALID_PERSONAS = {
    "arbiter",
    "architect",
    "bastion",
    "centurion",
    "chronos",
    "cipher",
    "corsair",
    "echo",
    "forge",
    "frodo",
    "gambit",
    "ghost",
    "herald",
    "ledger",
    "marshal",
    "medic",
    "neo",
    "oracle",
    "phantom",
    "polyglot",
    "sage",
    "scholar",
    "scribe",
    "sentinel",
    "specter",
    "tribune",
    "vortex",
    "warden",
    "wraith",
}


def parse_skill_frontmatter(skill_md: Path) -> dict:
    """Parse YAML frontmatter from SKILL.md; return empty dict if absent/invalid."""
    content = skill_md.read_text(encoding="utf-8")
    fm_match = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL)
    if not fm_match:
        return {}
    parsed = yaml.safe_load(fm_match.group(1))
    return parsed if isinstance(parsed, dict) else {}


def infer_personas_from_skill_metadata(skill_name: str, metadata: dict) -> list:
    """Infer likely persona mappings using skill frontmatter metadata and naming."""
    name = (skill_name or "").lower()
    domain = str(metadata.get("domain", "")).lower()
    subdomain = str(metadata.get("subdomain", "")).lower()
    description = str(metadata.get("description", "")).lower()
    tags = [str(t).lower() for t in metadata.get("tags", []) if t is not None]
    blob = " ".join([name, domain, subdomain, description] + tags)

    personas = set()

    # Subdomain affinity
    subdomain_map = {
        "penetration-testing": ["neo", "phantom", "vortex"],
        "application-security": ["phantom", "neo"],
        "api-security": ["phantom", "neo"],
        "web-security": ["phantom", "neo"],
        "malware-analysis": ["specter", "bastion", "sentinel"],
        "memory-forensics": ["specter", "bastion"],
        "forensics": ["specter", "bastion"],
        "threat-intelligence": ["sentinel", "frodo", "oracle"],
        "incident-response": ["bastion", "sentinel", "medic"],
        "soc-operations": ["bastion", "sentinel"],
        "threat-hunting": ["sentinel", "bastion", "vortex"],
        "network-security": ["vortex", "bastion"],
        "network-forensics": ["vortex", "specter", "bastion"],
        "cloud-security": ["architect", "bastion", "sentinel"],
        "identity-security": ["cipher", "neo", "bastion"],
        "active-directory": ["cipher", "neo", "bastion"],
        "vulnerability-management": ["bastion", "forge"],
        "compliance": ["ledger", "arbiter", "bastion"],
        "ot-security": ["centurion", "bastion", "sentinel"],
    }
    personas.update(subdomain_map.get(subdomain, []))

    # Keyword affinity fallback
    keyword_map = {
        "apt": ["sentinel", "frodo"],
        "threat intel": ["sentinel", "oracle", "frodo"],
        "ioc": ["sentinel", "bastion"],
        "misp": ["sentinel", "oracle"],
        "siem": ["bastion", "sentinel"],
        "splunk": ["bastion", "sentinel"],
        "soc": ["bastion", "sentinel"],
        "incident response": ["bastion", "medic", "sentinel"],
        "phishing": ["bastion", "oracle", "sentinel"],
        "malware": ["specter", "bastion", "sentinel"],
        "ransomware": ["specter", "bastion", "sentinel"],
        "forensic": ["specter", "bastion"],
        "volatility": ["specter", "bastion"],
        "yara": ["specter", "bastion"],
        "memory": ["specter", "bastion"],
        "network": ["vortex", "bastion"],
        "zeek": ["vortex", "bastion", "sentinel"],
        "wireshark": ["vortex", "bastion"],
        "nmap": ["neo", "vortex"],
        "pentest": ["neo", "phantom", "vortex"],
        "red team": ["neo", "phantom", "specter"],
        "web": ["phantom", "neo"],
        "xss": ["phantom", "neo"],
        "sql injection": ["phantom", "neo"],
        "api": ["phantom", "neo"],
        "kubernetes": ["architect", "bastion", "sentinel"],
        "docker": ["architect", "bastion"],
        "aws": ["architect", "bastion", "sentinel"],
        "azure": ["architect", "bastion", "sentinel"],
        "gcp": ["architect", "bastion", "sentinel"],
        "iam": ["cipher", "architect", "bastion"],
        "active directory": ["cipher", "neo", "bastion"],
        "kerberos": ["cipher", "neo", "bastion"],
        "compliance": ["ledger", "arbiter", "bastion"],
        "nist": ["ledger", "bastion", "sentinel"],
        "ot": ["centurion", "bastion", "sentinel"],
        "scada": ["centurion", "bastion", "sentinel"],
        "ics": ["centurion", "bastion", "sentinel"],
    }
    for keyword, mapped_personas in keyword_map.items():
        if keyword in blob:
            personas.update(mapped_personas)

    # Conservative fallback for unmapped cybersecurity skills
    if not personas and "cyber" in domain:
        personas.update(["bastion"])

    # Keep only valid personas and deterministic order
    return sorted([p for p in personas if p in VALID_PERSONAS])


def load_skill_persona_map(config: dict) -> dict:
    """Load skill→persona mapping from config.yaml or use defaults."""
    custom = config.get("skill_persona_map", {})
    merged = {
        k: [p for p in v if p in VALID_PERSONAS]
        for k, v in DEFAULT_SKILL_PERSONA_MAP.items()
    }
    for skill, personas in custom.items():
        if isinstance(personas, list):
            merged[skill] = [p for p in personas if p in VALID_PERSONAS]
    return merged


def search_skills(shared_dir: Path, query: str):
    """Search across all shared skills using simple BM25-like scoring."""
    query_terms = query.lower().split()
    results = []

    for skills_subdir in ["skills", "paperclip-skills", "community-skills"]:
        skills_path = shared_dir / skills_subdir
        if not skills_path.exists():
            continue
        for skill_dir in sorted(skills_path.iterdir()):
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if not skill_md.exists():
                continue
            content = skill_md.read_text(encoding="utf-8").lower()
            # Simple scoring: count query term occurrences weighted by position
            score = 0
            first_50 = content[:500]  # boost matches in header/description
            for term in query_terms:
                score += first_50.count(term) * 3  # header boost
                score += content.count(term)
            if score > 0:
                # Extract name and first description line
                name = skill_dir.name
                desc = ""
                for line in content.split("\n"):
                    line = line.strip()
                    if line and not line.startswith(
                        ("---", "#", "name:", "description:")
                    ):
                        desc = line[:100]
                        break
                results.append((score, name, skills_subdir, desc))

    results.sort(key=lambda x: -x[0])
    print(f"\n  Search: '{query}' — {len(results)} results\n")
    for i, (score, name, source, desc) in enumerate(results[:20]):
        print(f"  {i + 1:2}. [{score:3}] {name} ({source})")
        if desc:
            print(f"       {desc}")
    if len(results) > 20:
        print(f"\n  ... and {len(results) - 20} more. Refine your query.")
    elif len(results) == 0:
        print("  No matches found. Try different keywords.")


def run_tests(personas_dir: Path, target: str = None):
    """Run persona test suite from _tests/*.yaml files."""
    tests_dir = personas_dir / "_tests"
    if not tests_dir.exists():
        print("  No _tests/ directory found.")
        return

    test_files = sorted(tests_dir.glob("*.yaml"))
    if not test_files:
        print("  No test files found in _tests/")
        return

    total = 0
    passed = 0
    failed = 0
    warnings = []

    for test_file in test_files:
        if test_file.name == "README.md":
            continue
        suite = yaml.safe_load(test_file.read_text(encoding="utf-8"))
        if not suite:
            continue
        persona_name = suite.get("persona", test_file.stem)
        if target and persona_name != target:
            continue

        print(f"\n  Testing: {persona_name} ({len(suite.get('tests', []))} cases)")

        # Load persona prompt for validation
        persona_prompt_path = personas_dir / persona_name / "general.md"
        if not persona_prompt_path.exists():
            print(f"    SKIP: {persona_name}/general.md not found")
            continue
        prompt_content = persona_prompt_path.read_text(encoding="utf-8").lower()

        for test in suite.get("tests", []):
            total += 1
            test_name = test.get("name", f"test_{total}")
            expect = test.get("expect", {})
            test_passed = True

            # Check must_include keywords exist in persona definition
            for keyword in expect.get("must_include", []):
                if keyword.lower() not in prompt_content:
                    warnings.append(
                        f"    {persona_name}/{test_name}: '{keyword}' not in persona prompt"
                    )
                    test_passed = False

            # Check escalation targets are defined
            if expect.get("escalation"):
                target_persona = expect["escalation"].lower()
                if target_persona not in prompt_content:
                    warnings.append(
                        f"    {persona_name}/{test_name}: escalation to '{target_persona}' not defined in boundaries"
                    )
                    test_passed = False

            # Check confidence language for intel personas
            if expect.get("confidence"):
                if "confidence" not in prompt_content and "high" not in prompt_content:
                    warnings.append(
                        f"    {persona_name}/{test_name}: confidence levels not defined in persona"
                    )
                    test_passed = False

            if test_passed:
                passed += 1
                print(f"    PASS: {test_name}")
            else:
                failed += 1
                print(f"    WARN: {test_name}")

    print(f"\n  {'=' * 40}")
    print(f"  Tests: {total} total, {passed} passed, {failed} warnings")
    if warnings:
        print(f"\n  Warnings:")
        for w in warnings:
            print(w)
    print(f"  {'=' * 40}")


def build_skills_index(shared_dir: Path, config: dict = None) -> dict:
    """Index all shared skills from _shared/{skills,paperclip-skills,community-skills}/."""
    skill_map = load_skill_persona_map(config or {})
    index = {
        "skills": {},
        "paperclip_skills": {},
        "community_skills": {},
        "design_brands": [],
        "ui_ux_styles": 0,
        "_skill_persona_map": skill_map,
    }

    # Index shared-skills
    skills_dir = shared_dir / "skills"
    if skills_dir.exists():
        for skill_dir in sorted(skills_dir.iterdir()):
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
                skill_meta = parse_skill_frontmatter(skill_md)
                inferred_personas = infer_personas_from_skill_metadata(
                    skill_dir.name, skill_meta
                )
                configured_personas = skill_map.get(skill_dir.name, [])
                merged_personas = sorted(
                    set(configured_personas).union(inferred_personas)
                )
                content = skill_md.read_text(encoding="utf-8")
                first_line = ""
                for line in content.split("\n"):
                    line = line.strip()
                    if line and not line.startswith(
                        ("---", "#", "name:", "description:")
                    ):
                        first_line = line[:120]
                        break
                index["skills"][skill_dir.name] = {
                    "personas": merged_personas,
                    "summary": first_line,
                    "domain": str(skill_meta.get("domain", "")),
                    "subdomain": str(skill_meta.get("subdomain", "")),
                    "tags": skill_meta.get("tags", []),
                    "mapped_by": {
                        "explicit": configured_personas,
                        "inferred": inferred_personas,
                    },
                    "has_references": (skill_dir / "references").is_dir(),
                }

    # Index paperclip-skills
    pskills_dir = shared_dir / "paperclip-skills"
    if pskills_dir.exists():
        for skill_dir in sorted(pskills_dir.iterdir()):
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
                index["paperclip_skills"][skill_dir.name] = True

    # Index community-skills
    cskills_dir = shared_dir / "community-skills"
    if cskills_dir.exists():
        for skill_dir in sorted(cskills_dir.iterdir()):
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
                index["community_skills"][skill_dir.name] = True

    # Index design brands
    design_dir = shared_dir / "design-md"
    if design_dir.exists():
        index["design_brands"] = sorted(
            [d.name for d in design_dir.iterdir() if d.is_dir()]
        )

    # Count UI/UX data
    uiux_dir = shared_dir / "ui-ux-pro-max" / "data"
    if uiux_dir.exists():
        index["ui_ux_styles"] = sum(1 for f in uiux_dir.glob("*.csv"))

    return index


def build_escalation_graph(personas_dir: Path, flat_config: dict) -> dict:
    """Extract cross-persona escalation paths from Boundaries sections."""
    graph = {}  # {persona: [escalation_targets]}
    for persona_dir in sorted(personas_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith((".", "_")):
            continue
        general = persona_dir / "general.md"
        if not general.exists():
            continue
        parsed = parse_persona_md(general, flat_config)
        if not parsed:
            continue
        boundaries = parsed["sections"].get("boundaries", "")
        targets = re.findall(r"Escalate to \*\*(\w+)\*\*", boundaries)
        graph[persona_dir.name] = [t.lower() for t in targets]
    return graph


def build_trigger_index(personas_dir: Path) -> dict:
    """Build reverse index: trigger keyword → persona codenames for multi-agent routing."""
    index = {}  # {trigger: [persona_names]}
    for persona_dir in sorted(personas_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith((".", "_")):
            continue
        meta_file = persona_dir / "_meta.yaml"
        if not meta_file.exists():
            continue
        meta = yaml.safe_load(meta_file.read_text(encoding="utf-8")) or {}
        triggers = meta.get("activation_triggers", [])
        for trigger in triggers:
            t = trigger.lower()
            if t not in index:
                index[t] = []
            index[t].append(persona_dir.name)
    return index


def validate_persona(persona_name: str, parsed: dict) -> list:
    """Validate persona structure and return warnings."""
    warnings = []
    required_sections = ["soul", "expertise", "methodology", "boundaries"]
    for section in required_sections:
        if section not in parsed.get("sections", {}):
            warnings.append(f"Missing section: {section}")
        elif len(parsed["sections"][section].split()) < 30:
            warnings.append(
                f"Thin section ({len(parsed['sections'][section].split())} words): {section}"
            )

    fm = parsed.get("metadata", {})
    for field in ["codename", "name", "domain", "address_to", "tone"]:
        if field not in fm:
            warnings.append(f"Missing frontmatter: {field}")

    return warnings


def build_catalog(
    personas_dir: Path,
    output_dir: Path,
    config: dict,
    flat_config: dict,
    shared_dir: Path | None,
):
    """Generate CATALOG.md with stats, escalation paths, and trigger index."""
    addresses = config.get("persona_defaults", {}).get("custom_addresses", {})

    # Build escalation graph and trigger index
    escalation_graph = build_escalation_graph(personas_dir, flat_config)
    trigger_index = build_trigger_index(personas_dir)

    catalog_lines = [
        "# Persona Catalog\n",
        f"_Auto-generated by build.py | User: {config.get('user', {}).get('name', 'default')}_\n",
    ]

    total_words = 0
    total_sections = 0
    all_warnings = []

    for persona_dir in sorted(personas_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith((".", "_")):
            continue

        meta_file = persona_dir / "_meta.yaml"
        if not meta_file.exists():
            continue

        meta = yaml.safe_load(meta_file.read_text(encoding="utf-8")) or {}
        codename = meta.get("codename", persona_dir.name)
        address = addresses.get(persona_dir.name, meta.get("address_to", "N/A"))
        variants = [
            f.stem
            for f in sorted(persona_dir.glob("*.md"))
            if not f.name.startswith("_")
        ]

        # Parse general.md for stats
        general = persona_dir / "general.md"
        word_count = 0
        section_count = 0
        if general.exists():
            parsed = parse_persona_md(general, flat_config)
            if parsed:
                for s in parsed["sections"].values():
                    word_count += len(s.split())
                section_count = len(parsed["sections"])
                # Validate
                warns = validate_persona(codename, parsed)
                for w in warns:
                    all_warnings.append(f"  {codename}: {w}")

        total_words += word_count
        total_sections += section_count
        escalates_to = escalation_graph.get(persona_dir.name, [])

        catalog_lines.append(f"## {codename} — {meta.get('role', 'Unknown')}")
        catalog_lines.append(f"- **Domain:** {meta.get('domain', 'N/A')}")
        catalog_lines.append(f"- **Hitap:** {address}")
        catalog_lines.append(f"- **Variants:** {', '.join(variants)}")
        catalog_lines.append(
            f"- **Depth:** {word_count:,} words, {section_count} sections"
        )
        if escalates_to:
            catalog_lines.append(f"- **Escalates to:** {', '.join(escalates_to)}")
        catalog_lines.append("")

    # Add trigger index section
    catalog_lines.append("---\n")
    catalog_lines.append("## Activation Trigger Index\n")
    catalog_lines.append("_Keyword → persona routing for multi-agent systems_\n")
    for trigger in sorted(trigger_index.keys()):
        personas = ", ".join(trigger_index[trigger])
        catalog_lines.append(f"- **{trigger}** → {personas}")
    catalog_lines.append("")

    # Add stats
    catalog_lines.append("---\n")
    catalog_lines.append("## Build Statistics\n")
    catalog_lines.append(f"- Total prompt content: {total_words:,} words")
    catalog_lines.append(f"- Total sections: {total_sections}")
    catalog_lines.append(
        f"- Escalation connections: {sum(len(v) for v in escalation_graph.values())}"
    )
    catalog_lines.append(f"- Unique triggers: {len(trigger_index)}")
    catalog_lines.append("")

    catalog_path = personas_dir / "CATALOG.md"
    catalog_path.write_text("\n".join(catalog_lines), encoding="utf-8")
    print(f"  Catalog: {catalog_path}")

    # Write escalation graph and trigger index as JSON for API consumers
    index_path = output_dir / "_index"
    index_path.mkdir(parents=True, exist_ok=True)

    (index_path / "escalation_graph.json").write_text(
        json.dumps(escalation_graph, indent=2, ensure_ascii=False), encoding="utf-8"
    )
    (index_path / "trigger_index.json").write_text(
        json.dumps(trigger_index, indent=2, ensure_ascii=False), encoding="utf-8"
    )
    print(f"  Index: {index_path}/escalation_graph.json, trigger_index.json")

    # Write skills index if shared dir exists
    if shared_dir and shared_dir.exists():
        si = build_skills_index(shared_dir, config)
        (index_path / "skills_index.json").write_text(
            json.dumps(si, indent=2, ensure_ascii=False), encoding="utf-8"
        )
        print(
            f"  Skills: {len(si.get('skills', {}))} shared + "
            f"{len(si.get('paperclip_skills', {}))} paperclip + "
            f"{len(si.get('community_skills', {}))} community + "
            f"{len(si.get('design_brands', []))} design brands + "
            f"{si.get('ui_ux_styles', 0)} UI/UX data files"
        )

    # Print validation warnings
    if all_warnings:
        print(f"\n  WARNINGS ({len(all_warnings)}):")
        for w in all_warnings:
            print(f"    {w}")

    return total_words


def print_summary(
    config: dict, total_personas: int, total_variants: int, total_words: int = 0
):
    """Print build summary with config status."""
    print("\n" + "=" * 50)
    print(f"BUILD COMPLETE")
    print(f"  Personas:  {total_personas}")
    print(f"  Variants:  {total_variants}")
    print(f"  Words:     {total_words:,}")
    print(f"  Output:    generated/")
    print(f"  Index:     generated/_index/")

    if config:
        user = config.get("user", {}).get("name", "?")
        tools_on = sum(
            1
            for v in config.get("infrastructure", {}).get("tools", {}).values()
            if v is True
        )
        frameworks_on = sum(
            1 for v in config.get("frameworks", {}).values() if v is True
        )
        regions = config.get("regional_focus", {}).get("primary", [])
        print(f"\n  Config:   {user}")
        print(f"  Tools:    {tools_on} enabled")
        print(f"  Frameworks: {frameworks_on} enabled")
        if regions:
            print(f"  Regions:  {', '.join(regions)}")
    else:
        print("\n  Config:   none (using defaults)")
        print("  Tip:      Copy config.example.yaml → config.yaml to customize")
    print("=" * 50)


def install_claude(output_dir: Path):
    """Install personas to Claude Code as slash commands + agents."""
    commands_dir = Path.home() / ".claude" / "commands"
    agents_dir = Path.home() / ".claude" / "agents"
    commands_dir.mkdir(parents=True, exist_ok=True)
    agents_dir.mkdir(parents=True, exist_ok=True)

    cmd_count = 0
    agent_count = 0

    for persona_dir in sorted(output_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith("_"):
            continue

        # Install slash commands for all variants
        for prompt_file in persona_dir.glob("*.prompt.md"):
            variant = prompt_file.stem
            codename = persona_dir.name
            cmd_name = (
                f"persona-{codename}"
                if variant == "general"
                else f"persona-{codename}-{variant}"
            )
            dest = commands_dir / f"{cmd_name}.md"
            content = prompt_file.read_text(encoding="utf-8")
            command_content = f"{content}\n\n---\nUser query: $ARGUMENTS\n"
            dest.write_text(command_content, encoding="utf-8")
            cmd_count += 1

        # Install agent .yml for general variant (appears in /agents menu)
        general_json = persona_dir / "general.json"
        if not general_json.exists():
            continue

        data = json.loads(general_json.read_text(encoding="utf-8"))
        codename = data.get("codename", persona_dir.name)
        name = data.get("name", codename.title())
        role = data.get("role", "Specialist")
        domain = data.get("domain", "")
        tone = data.get("tone", "")
        address_to = data.get("address_to", "")
        skills = data.get("skills", [])
        quote = data.get("quote", "")

        soul = data.get("sections", {}).get("soul", "")
        methodology = data.get("sections", {}).get("methodology", "")
        behavior = data.get("sections", {}).get("behavior_rules", "")

        instructions = f"You are **{name}** ({address_to}) — {role}.\n\n"
        instructions += f"Domain: {domain} | Tone: {tone}\n\n"
        if quote:
            instructions += f'> "{quote}"\n\n'
        instructions += "## Soul\n" + soul[:1500] + "\n\n"
        if methodology:
            instructions += "## Methodology\n" + methodology[:1500] + "\n\n"
        if behavior:
            instructions += "## Behavior\n" + behavior[:800] + "\n"
        if skills:
            instructions += "\n## Mapped Skills\n" + ", ".join(skills) + "\n"

        agent = {
            "name": codename,
            "description": f"{name} ({address_to}) — {role}. {domain}.",
            "instructions": instructions,
            "allowedTools": [
                "Read(*)",
                "Edit(*)",
                "Write(*)",
                "Bash(*)",
                "Glob(*)",
                "Grep(*)",
                "WebFetch(*)",
                "WebSearch(*)",
            ],
        }
        agent_file = agents_dir / f"{codename}.yml"
        agent_file.write_text(
            yaml.dump(
                agent, allow_unicode=True, default_flow_style=False, sort_keys=False
            ),
            encoding="utf-8",
        )
        agent_count += 1

    print(f"  Claude: {cmd_count} commands + {agent_count} agents installed")
    return cmd_count


def install_antigravity(output_dir: Path):
    """Install personas to Antigravity IDE system prompts."""
    # Antigravity stores system prompts in ~/.config/antigravity/prompts/ or project .antigravity/
    ag_dir = Path.home() / ".config" / "antigravity" / "personas"
    ag_dir.mkdir(parents=True, exist_ok=True)
    count = 0
    for persona_dir in sorted(output_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith("_"):
            continue
        for prompt_file in persona_dir.glob("*.prompt.md"):
            variant = prompt_file.stem
            codename = persona_dir.name
            dest = ag_dir / codename / f"{variant}.md"
            dest.parent.mkdir(parents=True, exist_ok=True)
            dest.write_text(prompt_file.read_text(encoding="utf-8"), encoding="utf-8")
            count += 1
    print(f"  Antigravity: {count} personas installed to {ag_dir}")
    return count


def install_gemini(output_dir: Path):
    """Install personas as Gemini Gems (JSON format for Google AI Studio)."""
    gems_dir = output_dir / "_gems"
    gems_dir.mkdir(parents=True, exist_ok=True)
    count = 0
    for persona_dir in sorted(output_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith("_"):
            continue
        for json_file in persona_dir.glob("*.json"):
            data = json.loads(json_file.read_text(encoding="utf-8"))
            variant = data.get("variant", json_file.stem)
            codename = data.get("codename", persona_dir.name)
            name = data.get("name", codename.title())
            # Build Gemini Gem format
            gem = {
                "name": f"{name} — {variant}" if variant != "general" else name,
                "description": f"{data.get('role', '')} | {data.get('domain', '')}",
                "system_instruction": data.get("sections", {}).get("soul", "")
                + "\n\n"
                + data.get("sections", {}).get("expertise", "")
                + "\n\n"
                + data.get("sections", {}).get("methodology", "")
                + "\n\n"
                + data.get("sections", {}).get("behavior_rules", ""),
                "metadata": {
                    "codename": codename,
                    "variant": variant,
                    "domain": data.get("domain", ""),
                    "address_to": data.get("address_to", ""),
                    "tone": data.get("tone", ""),
                    "activation_triggers": data.get("activation_triggers", []),
                },
            }
            dest = gems_dir / f"{codename}-{variant}.json"
            dest.write_text(
                json.dumps(gem, ensure_ascii=False, indent=2), encoding="utf-8"
            )
            count += 1
    print(f"  Gemini: {count} gems generated to {gems_dir}")
    return count


def install_paperclip(output_dir: Path, personas_dir: Path, shared_dir: Path | None):
    """Install personas as Paperclip agents (SOUL.md + hermes-config.yaml + AGENTS.md per agent)."""
    pc_dir = output_dir / "_paperclip"
    agents_dir = pc_dir / "agents"
    skills_dir = pc_dir / "skills"

    # Recreate output for deterministic full migration.
    if pc_dir.exists():
        import shutil

        shutil.rmtree(pc_dir)

    agents_dir.mkdir(parents=True, exist_ok=True)
    skills_dir.mkdir(parents=True, exist_ok=True)

    # Build escalation graph for AGENTS.md org chart
    flat_config = {}
    escalation_graph = build_escalation_graph(personas_dir, flat_config)

    # Domain → toolset mapping for hermes-config
    domain_toolsets = {
        "cybersecurity": ["terminal", "file", "web", "code_execution"],
        "intelligence": ["terminal", "file", "web"],
        "military": ["terminal", "file", "web"],
        "engineering": ["terminal", "file", "web", "code_execution"],
        "law-economics": ["file", "web"],
        "history": ["file", "web"],
        "linguistics": ["file", "web"],
        "academia": ["file", "web"],
    }

    agent_count = 0
    skill_count = 0

    for persona_dir in sorted(output_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith("_"):
            continue

        general_json = persona_dir / "general.json"
        general_prompt = persona_dir / "general.prompt.md"
        if not general_json.exists():
            continue

        data = json.loads(general_json.read_text(encoding="utf-8"))
        codename = data.get("codename", persona_dir.name)
        name = data.get("name", codename.title())
        role = data.get("role", "Specialist")
        domain = data.get("domain", "general")
        address_to = data.get("address_to", "")
        tone = data.get("tone", "")
        escalates_to = escalation_graph.get(persona_dir.name, [])
        skills = data.get("skills", [])

        agent_dir = agents_dir / codename
        agent_dir.mkdir(parents=True, exist_ok=True)

        # 1. SOUL.md — persona prompt adapted to Paperclip format
        soul_lines = [
            f"# {name} — {role}\n",
            f"## Kimlik",
            f"- **Ad:** {name}",
            f"- **Kod Adı:** {codename}",
            f"- **Hitap:** {address_to}",
            f"- **Domain:** {domain}",
            f"- **Ton:** {tone}",
            "",
        ]
        if escalates_to:
            soul_lines.append(f"## İlişkiler")
            soul_lines.append(f"- **Escalation:** {', '.join(escalates_to)}")
            soul_lines.append("")
        if skills:
            soul_lines.append(f"## Skills")
            for s in skills:
                soul_lines.append(f"- {s}")
            soul_lines.append("")

        # Append the full prompt body
        if general_prompt.exists():
            soul_lines.append("## Detaylı Tanım\n")
            soul_lines.append(general_prompt.read_text(encoding="utf-8"))

        (agent_dir / "SOUL.md").write_text("\n".join(soul_lines), encoding="utf-8")

        # 2. hermes-config.yaml
        toolsets = domain_toolsets.get(domain, ["terminal", "file", "web"])
        hermes_config = {
            "model": "qwen/qwen3.6-plus:free",
            "provider": "openrouter",
            "defaults": {"quiet": True, "reasoning_effort": "medium"},
            "mcp_servers": {
                "web-search": {
                    "command": "npx",
                    "args": ["-y", "ddg-mcp-search"],
                },
            },
            "skills": {"external_dirs": ["~/.hermes/skills"]},
            "toolsets": toolsets,
        }
        (agent_dir / "hermes-config.yaml").write_text(
            yaml.dump(hermes_config, allow_unicode=True, default_flow_style=False),
            encoding="utf-8",
        )

        # 3. AGENTS.md — workspace overview with org connections
        agents_md_lines = [
            f"# {name} — Workspace\n",
            f"- **Agent:** {name} ({role})",
            f"- **Domain:** {domain}",
            "",
        ]
        if escalates_to:
            agents_md_lines.append("## Bağlantılar\n")
            for target in escalates_to:
                agents_md_lines.append(f"- → {target}")
            agents_md_lines.append("")

        (agent_dir / "AGENTS.md").write_text(
            "\n".join(agents_md_lines), encoding="utf-8"
        )
        agent_count += 1

    # Copy shared skills as Paperclip skills (SKILL.md format already compatible)
    shared_skills = shared_dir / "skills" if shared_dir else Path("__missing__")
    if shared_skills.exists():
        for skill_dir in sorted(shared_skills.iterdir()):
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
                dest = skills_dir / skill_dir.name
                dest.mkdir(parents=True, exist_ok=True)
                (dest / "SKILL.md").write_text(
                    skill_md.read_text(encoding="utf-8"), encoding="utf-8"
                )
                refs = skill_dir / "references"
                if refs.is_dir():
                    import shutil

                    shutil.copytree(refs, dest / "references", dirs_exist_ok=True)
                skill_count += 1

    # Copy paperclip-specific skills
    pc_skills = shared_dir / "paperclip-skills" if shared_dir else Path("__missing__")
    if pc_skills.exists():
        for skill_dir in sorted(pc_skills.iterdir()):
            if not skill_dir.is_dir():
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists() and not (skills_dir / skill_dir.name).exists():
                dest = skills_dir / skill_dir.name
                dest.mkdir(parents=True, exist_ok=True)
                (dest / "SKILL.md").write_text(
                    skill_md.read_text(encoding="utf-8"), encoding="utf-8"
                )
                refs = skill_dir / "references"
                if refs.is_dir():
                    import shutil

                    shutil.copytree(refs, dest / "references", dirs_exist_ok=True)
                scripts = skill_dir / "scripts"
                if scripts.is_dir():
                    import shutil

                    shutil.copytree(scripts, dest / "scripts", dirs_exist_ok=True)
                skill_count += 1

    # Deploy original Paperclip company agents from _shared/paperclip-agents/
    pc_agents_src = (
        shared_dir / "paperclip-agents" if shared_dir else Path("__missing__")
    )
    pc_agent_count = 0

    def normalize_agent_name(name: str) -> str:
        """Normalize escaped/unicode-heavy names to stable ASCII directory names."""
        decoded = re.sub(
            r"#U([0-9A-Fa-f]{4})",
            lambda m: chr(int(m.group(1), 16)),
            name,
        )
        ascii_name = (
            unicodedata.normalize("NFKD", decoded)
            .encode("ascii", "ignore")
            .decode("ascii")
        )
        # Keep names filesystem-safe and deterministic.
        slug = re.sub(r"[^a-zA-Z0-9]+", "-", ascii_name).strip("-").lower()
        return slug or decoded

    if pc_agents_src.exists():
        seen_company_agents = set()
        collision_count = 0
        for agent_src in sorted(pc_agents_src.iterdir()):
            if not agent_src.is_dir():
                continue
            agent_name = normalize_agent_name(agent_src.name)
            if agent_name in seen_company_agents:
                collision_count += 1
                continue
            seen_company_agents.add(agent_name)
            # Skip if persona-based agent already exists with same name
            if (agents_dir / agent_name).exists():
                continue
            dest = agents_dir / agent_name
            dest.mkdir(parents=True, exist_ok=True)
            for f in agent_src.iterdir():
                if f.is_file():
                    (dest / f.name).write_text(
                        f.read_text(encoding="utf-8"), encoding="utf-8"
                    )
            pc_agent_count += 1
        if collision_count:
            print(
                f"    Note: skipped {collision_count} duplicate company agent source dirs after name normalization"
            )

    total_agents = agent_count + pc_agent_count
    print(
        f"  Paperclip: {agent_count} persona agents + {pc_agent_count} company agents + {skill_count} skills to {pc_dir}"
    )
    return total_agents


def install_openclaw(output_dir: Path):
    """Install personas to OpenClaw format (IDENTITY.md + individual persona files)."""
    oc_dir = output_dir / "_openclaw"
    oc_dir.mkdir(parents=True, exist_ok=True)
    personas_dir = oc_dir / "personas"
    personas_dir.mkdir(parents=True, exist_ok=True)
    count = 0
    identity_sections = []
    for persona_dir in sorted(output_dir.iterdir()):
        if not persona_dir.is_dir() or persona_dir.name.startswith("_"):
            continue
        general_prompt = persona_dir / "general.prompt.md"
        if not general_prompt.exists():
            continue
        content = general_prompt.read_text(encoding="utf-8")
        codename = persona_dir.name
        # Write individual persona file
        (personas_dir / f"{codename}.md").write_text(content, encoding="utf-8")
        # Extract first line as title for IDENTITY.md
        first_line = content.split("\n")[0].strip("# ").strip()
        identity_sections.append(f"### {first_line}\nSee: personas/{codename}.md\n")
        count += 1
    # Write IDENTITY.md
    identity = "# IDENTITY — Persona Definitions\n\n" + "\n".join(identity_sections)
    (oc_dir / "IDENTITY.md").write_text(identity, encoding="utf-8")
    print(f"  OpenClaw: {count} personas + IDENTITY.md to {oc_dir}")
    return count


def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="Build persona library and optionally install to platforms."
    )
    parser.add_argument(
        "--install",
        choices=["claude", "antigravity", "gemini", "openclaw", "paperclip", "all"],
        help="Install generated personas to a target platform",
    )
    parser.add_argument(
        "--search",
        type=str,
        metavar="QUERY",
        help="Search across all shared skills (e.g. --search 'pentest AD')",
    )
    parser.add_argument(
        "--test",
        nargs="?",
        const="__all__",
        metavar="PERSONA",
        help="Run persona test suite (optionally specify persona name)",
    )
    args = parser.parse_args()

    root = Path(__file__).parent
    personas_dir = root / "personas"

    if not personas_dir.exists():
        print("No personas/ directory found.")
        sys.exit(1)

    output_dir = root / "generated"

    # Load config
    config = load_config(root)
    flat_config = flatten_config(config) if config else {}

    # Find all persona directories
    persona_dirs = [
        d
        for d in sorted(personas_dir.iterdir())
        if d.is_dir() and not d.name.startswith((".", "_"))
    ]

    if not persona_dirs:
        print("No persona directories found.")
        sys.exit(1)

    shared_dir = resolve_shared_dir(root, personas_dir)
    source_mirrors = discover_sources(root)

    if source_mirrors:
        print(f"Detected source mirrors: {', '.join(source_mirrors)}")
    else:
        print("Detected source mirrors: none")

    # Handle search-only mode
    if args.search:
        if not shared_dir:
            print("No shared skill library found.")
            return
        search_skills(shared_dir, args.search)
        return

    # Handle test-only mode
    if args.test:
        target = None if args.test == "__all__" else args.test
        run_tests(personas_dir, target)
        return

    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"Building {len(persona_dirs)} personas -> {output_dir}\n")

    # Pre-build escalation graph and skills index
    escalation_graph = build_escalation_graph(personas_dir, flat_config)
    skills_index = build_skills_index(shared_dir, config) if shared_dir else {}

    total_variants = 0
    for pdir in persona_dirs:
        total_variants += build_persona(
            pdir, output_dir, flat_config, config, escalation_graph, skills_index
        )

    total_words = build_catalog(
        personas_dir, output_dir, config, flat_config, shared_dir
    )

    # Platform installation
    if args.install:
        print(f"\n--- Installing to: {args.install} ---\n")
        targets = (
            ["claude", "antigravity", "gemini", "openclaw", "paperclip"]
            if args.install == "all"
            else [args.install]
        )
        for target in targets:
            if target == "claude":
                install_claude(output_dir)
            elif target == "antigravity":
                install_antigravity(output_dir)
            elif target == "gemini":
                install_gemini(output_dir)
            elif target == "openclaw":
                install_openclaw(output_dir)
            elif target == "paperclip":
                install_paperclip(output_dir, personas_dir, shared_dir)

    print_summary(config, len(persona_dirs), total_variants, total_words)


if __name__ == "__main__":
    main()