reorganize

2026-04-11 21:19:12 +03:00
parent d18418548d
commit d2add20055
4189 changed files with 693908 additions and 85 deletions
--- a/build.py
+++ b/build.py
@@ -8,6 +8,7 @@ New users: copy config.example.yaml → config.yaml and customize.
 import json
 import re
 import sys
+import unicodedata
 from pathlib import Path

 try:
@@ -27,10 +28,48 @@ def load_config(root: Path) -> dict:

    example_path = root / "config.example.yaml"
    if example_path.exists():
-        print("WARN: No config.yaml found. Using defaults. Copy config.example.yaml → config.yaml to customize.")
+        print(
+            "WARN: No config.yaml found. Using defaults. Copy config.example.yaml → config.yaml to customize."
+        )
    return {}


+def resolve_shared_dir(root: Path, personas_dir: Path) -> Path | None:
+    """Resolve canonical shared library path.
+
+    Primary location is personas/_shared. If that is missing, fall back to
+    known source mirrors under sources/.
+    """
+    primary = personas_dir / "_shared"
+    if primary.exists():
+        return primary
+
+    sources_dir = root / "sources"
+    fallbacks = [
+        sources_dir / "temp-cyber-skills" / "personas" / "_shared",
+        sources_dir / "paperclip-docs-main" / "_shared",
+    ]
+    for candidate in fallbacks:
+        if candidate.exists():
+            return candidate
+
+    return None
+
+
+def discover_sources(root: Path) -> list[str]:
+    """List known source mirrors under root/sources."""
+    sources_dir = root / "sources"
+    if not sources_dir.exists():
+        return []
+
+    known = [
+        "Anthropic-Cybersecurity-Skills",
+        "paperclip-docs-main",
+        "temp-cyber-skills",
+    ]
+    return [name for name in known if (sources_dir / name).exists()]
+
+
 def flatten_config(config: dict, prefix: str = "") -> dict:
    """Flatten nested config dict for template substitution.

@@ -44,7 +83,9 @@ def flatten_config(config: dict, prefix: str = "") -> dict:
        elif isinstance(value, list):
            flat[full_key] = value
            flat[f"{full_key}.count"] = len(value)
-            flat[f"{full_key}.csv"] = ", ".join(str(v) for v in value if not isinstance(v, dict))
+            flat[f"{full_key}.csv"] = ", ".join(
+                str(v) for v in value if not isinstance(v, dict)
+            )
        else:
            flat[full_key] = value
    return flat
@@ -52,6 +93,7 @@ def flatten_config(config: dict, prefix: str = "") -> dict:

 def inject_config(content: str, flat_config: dict) -> str:
    """Replace {{config.key}} placeholders with config values."""
+
    def replacer(match):
        key = match.group(1).strip()
        value = flat_config.get(key, match.group(0))  # keep original if not found
@@ -66,6 +108,7 @@ def inject_config(content: str, flat_config: dict) -> str:

 def check_conditionals(content: str, flat_config: dict) -> str:
    """Process {{#if key}}...{{/if}} and {{#unless key}}...{{/unless}} blocks."""
+
    # Handle {{#if key}}content{{/if}}
    def if_replacer(match):
        key = match.group(1).strip()
@@ -75,7 +118,9 @@ def check_conditionals(content: str, flat_config: dict) -> str:
            return body
        return ""

-    content = re.sub(r"\{\{#if (.+?)\}\}(.*?)\{\{/if\}\}", if_replacer, content, flags=re.DOTALL)
+    content = re.sub(
+        r"\{\{#if (.+?)\}\}(.*?)\{\{/if\}\}", if_replacer, content, flags=re.DOTALL
+    )

    # Handle {{#unless key}}content{{/unless}}
    def unless_replacer(match):
@@ -86,7 +131,12 @@ def check_conditionals(content: str, flat_config: dict) -> str:
            return body
        return ""

-    content = re.sub(r"\{\{#unless (.+?)\}\}(.*?)\{\{/unless\}\}", unless_replacer, content, flags=re.DOTALL)
+    content = re.sub(
+        r"\{\{#unless (.+?)\}\}(.*?)\{\{/unless\}\}",
+        unless_replacer,
+        content,
+        flags=re.DOTALL,
+    )

    return content

@@ -118,7 +168,9 @@ def parse_persona_md(filepath: Path, flat_config: dict) -> dict:
        if line.startswith("## "):
            if current_section:
                sections[current_section] = "\n".join(current_content).strip()
-            current_section = line[3:].strip().lower().replace(" ", "_").replace("&", "and")
+            current_section = (
+                line[3:].strip().lower().replace(" ", "_").replace("&", "and")
+            )
            current_content = []
        else:
            current_content.append(line)
@@ -133,7 +185,14 @@ def parse_persona_md(filepath: Path, flat_config: dict) -> dict:
    }


-def build_persona(persona_dir: Path, output_dir: Path, flat_config: dict, config: dict, escalation_graph: dict = None, skills_index: dict = None):
+def build_persona(
+    persona_dir: Path,
+    output_dir: Path,
+    flat_config: dict,
+    config: dict,
+    escalation_graph: dict = None,
+    skills_index: dict = None,
+):
    """Build all variants for a persona directory."""
    md_files = sorted(persona_dir.glob("*.md"))
    if not md_files:
@@ -168,14 +227,27 @@ def build_persona(persona_dir: Path, output_dir: Path, flat_config: dict, config
            continue

        # Build output object
-        output = {**meta, **parsed["metadata"], "variant": variant, "sections": parsed["sections"]}
+        output = {
+            **meta,
+            **parsed["metadata"],
+            "variant": variant,
+            "sections": parsed["sections"],
+        }

        # Inject config metadata
        if config:
            output["_config"] = {
                "user": config.get("user", {}).get("name", "unknown"),
-                "tools": {k: v for k, v in config.get("infrastructure", {}).get("tools", {}).items() if v is True},
-                "frameworks": {k: v for k, v in config.get("frameworks", {}).items() if v is True},
+                "tools": {
+                    k: v
+                    for k, v in config.get("infrastructure", {})
+                    .get("tools", {})
+                    .items()
+                    if v is True
+                },
+                "frameworks": {
+                    k: v for k, v in config.get("frameworks", {}).items() if v is True
+                },
                "regional_focus": config.get("regional_focus", {}),
            }

@@ -207,13 +279,17 @@ def build_persona(persona_dir: Path, output_dir: Path, flat_config: dict, config
        # Write YAML
        yaml_out = out_path / f"{variant}.yaml"
        yaml_out.write_text(
-            yaml.dump(output, allow_unicode=True, default_flow_style=False, sort_keys=False),
+            yaml.dump(
+                output, allow_unicode=True, default_flow_style=False, sort_keys=False
+            ),
            encoding="utf-8",
        )

        # Write JSON
        json_out = out_path / f"{variant}.json"
-        json_out.write_text(json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8")
+        json_out.write_text(
+            json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8"
+        )

        # Write plain system prompt (just the body, no config metadata)
        prompt_out = out_path / f"{variant}.prompt.md"
@@ -227,37 +303,192 @@ def build_persona(persona_dir: Path, output_dir: Path, flat_config: dict, config

 DEFAULT_SKILL_PERSONA_MAP = {
    # Cybersecurity skills → personas
-    "pentest": ["neo"], "nmap-recon": ["neo", "vortex"], "security-scanner": ["neo", "phantom"],
-    "sql-injection-testing": ["neo", "phantom"], "stealth-browser": ["neo", "oracle"],
-    "security-audit-toolkit": ["neo", "forge"], "pwnclaw-security-scan": ["neo"],
-    "senior-secops": ["bastion"], "clawsec": ["neo", "vortex"],
-    "pcap-analyzer": ["vortex", "bastion"], "sys-guard-linux-remediator": ["bastion"],
-    "ctf-writeup-generator": ["neo"], "dns-networking": ["vortex", "architect"],
-    "network-scanner": ["neo", "vortex"], "security-skill-scanner": ["neo"],
-    "pentest-active-directory": ["neo"], "pentest-api-attacker": ["neo", "phantom"],
-    "pentest-auth-bypass": ["neo", "phantom"], "pentest-c2-operator": ["neo", "sentinel"],
+    "pentest": ["neo"],
+    "nmap-recon": ["neo", "vortex"],
+    "security-scanner": ["neo", "phantom"],
+    "sql-injection-testing": ["neo", "phantom"],
+    "stealth-browser": ["neo", "oracle"],
+    "security-audit-toolkit": ["neo", "forge"],
+    "pwnclaw-security-scan": ["neo"],
+    "senior-secops": ["bastion"],
+    "clawsec": ["neo", "vortex"],
+    "pcap-analyzer": ["vortex", "bastion"],
+    "sys-guard-linux-remediator": ["bastion"],
+    "ctf-writeup-generator": ["neo"],
+    "dns-networking": ["vortex", "architect"],
+    "network-scanner": ["neo", "vortex"],
+    "security-skill-scanner": ["neo"],
+    "pentest-active-directory": ["neo"],
+    "pentest-api-attacker": ["neo", "phantom"],
+    "pentest-auth-bypass": ["neo", "phantom"],
+    "pentest-c2-operator": ["neo", "sentinel"],
    "gov-cybersecurity": ["sentinel", "bastion"],
    # Intelligence skills → personas
-    "osint-investigator": ["oracle"], "seithar-intel": ["sentinel", "frodo"],
-    "freshrss": ["frodo", "oracle"], "freshrss-reader": ["frodo", "oracle"],
-    "war-intel-monitor": ["frodo", "marshal"], "news-crawler": ["frodo", "herald"],
-    "dellight-intelligence-ops": ["frodo", "echo"], "dellight-strategic-intelligence": ["frodo"],
-    "agent-intelligence-network-scan": ["oracle"], "social-trust-manipulation-detector": ["ghost"],
+    "osint-investigator": ["oracle"],
+    "seithar-intel": ["sentinel", "frodo"],
+    "freshrss": ["frodo", "oracle"],
+    "freshrss-reader": ["frodo", "oracle"],
+    "war-intel-monitor": ["frodo", "marshal"],
+    "news-crawler": ["frodo", "herald"],
+    "dellight-intelligence-ops": ["frodo", "echo"],
+    "dellight-strategic-intelligence": ["frodo"],
+    "agent-intelligence-network-scan": ["oracle"],
+    "social-trust-manipulation-detector": ["ghost"],
    # Infrastructure skills → personas
-    "docker-essentials": ["architect"], "session-logs": ["architect"],
+    "docker-essentials": ["architect"],
+    "session-logs": ["architect"],
    # Document processing → personas
-    "image-ocr": ["oracle", "scribe"], "mistral-ocr": ["oracle", "scribe"],
-    "pdf-text-extractor": ["scribe", "scholar"], "youtube-transcript": ["herald", "scholar"],
+    "image-ocr": ["oracle", "scribe"],
+    "mistral-ocr": ["oracle", "scribe"],
+    "pdf-text-extractor": ["scribe", "scholar"],
+    "youtube-transcript": ["herald", "scholar"],
    # Web scraping → personas
-    "deep-scraper": ["oracle"], "crawl-for-ai": ["oracle", "herald"],
+    "deep-scraper": ["oracle"],
+    "crawl-for-ai": ["oracle", "herald"],
 }


+VALID_PERSONAS = {
+    "arbiter",
+    "architect",
+    "bastion",
+    "centurion",
+    "chronos",
+    "cipher",
+    "corsair",
+    "echo",
+    "forge",
+    "frodo",
+    "gambit",
+    "ghost",
+    "herald",
+    "ledger",
+    "marshal",
+    "medic",
+    "neo",
+    "oracle",
+    "phantom",
+    "polyglot",
+    "sage",
+    "scholar",
+    "scribe",
+    "sentinel",
+    "specter",
+    "tribune",
+    "vortex",
+    "warden",
+    "wraith",
+}
+
+
+def parse_skill_frontmatter(skill_md: Path) -> dict:
+    """Parse YAML frontmatter from SKILL.md; return empty dict if absent/invalid."""
+    content = skill_md.read_text(encoding="utf-8")
+    fm_match = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL)
+    if not fm_match:
+        return {}
+    parsed = yaml.safe_load(fm_match.group(1))
+    return parsed if isinstance(parsed, dict) else {}
+
+
+def infer_personas_from_skill_metadata(skill_name: str, metadata: dict) -> list:
+    """Infer likely persona mappings using skill frontmatter metadata and naming."""
+    name = (skill_name or "").lower()
+    domain = str(metadata.get("domain", "")).lower()
+    subdomain = str(metadata.get("subdomain", "")).lower()
+    description = str(metadata.get("description", "")).lower()
+    tags = [str(t).lower() for t in metadata.get("tags", []) if t is not None]
+    blob = " ".join([name, domain, subdomain, description] + tags)
+
+    personas = set()
+
+    # Subdomain affinity
+    subdomain_map = {
+        "penetration-testing": ["neo", "phantom", "vortex"],
+        "application-security": ["phantom", "neo"],
+        "api-security": ["phantom", "neo"],
+        "web-security": ["phantom", "neo"],
+        "malware-analysis": ["specter", "bastion", "sentinel"],
+        "memory-forensics": ["specter", "bastion"],
+        "forensics": ["specter", "bastion"],
+        "threat-intelligence": ["sentinel", "frodo", "oracle"],
+        "incident-response": ["bastion", "sentinel", "medic"],
+        "soc-operations": ["bastion", "sentinel"],
+        "threat-hunting": ["sentinel", "bastion", "vortex"],
+        "network-security": ["vortex", "bastion"],
+        "network-forensics": ["vortex", "specter", "bastion"],
+        "cloud-security": ["architect", "bastion", "sentinel"],
+        "identity-security": ["cipher", "neo", "bastion"],
+        "active-directory": ["cipher", "neo", "bastion"],
+        "vulnerability-management": ["bastion", "forge"],
+        "compliance": ["ledger", "arbiter", "bastion"],
+        "ot-security": ["centurion", "bastion", "sentinel"],
+    }
+    personas.update(subdomain_map.get(subdomain, []))
+
+    # Keyword affinity fallback
+    keyword_map = {
+        "apt": ["sentinel", "frodo"],
+        "threat intel": ["sentinel", "oracle", "frodo"],
+        "ioc": ["sentinel", "bastion"],
+        "misp": ["sentinel", "oracle"],
+        "siem": ["bastion", "sentinel"],
+        "splunk": ["bastion", "sentinel"],
+        "soc": ["bastion", "sentinel"],
+        "incident response": ["bastion", "medic", "sentinel"],
+        "phishing": ["bastion", "oracle", "sentinel"],
+        "malware": ["specter", "bastion", "sentinel"],
+        "ransomware": ["specter", "bastion", "sentinel"],
+        "forensic": ["specter", "bastion"],
+        "volatility": ["specter", "bastion"],
+        "yara": ["specter", "bastion"],
+        "memory": ["specter", "bastion"],
+        "network": ["vortex", "bastion"],
+        "zeek": ["vortex", "bastion", "sentinel"],
+        "wireshark": ["vortex", "bastion"],
+        "nmap": ["neo", "vortex"],
+        "pentest": ["neo", "phantom", "vortex"],
+        "red team": ["neo", "phantom", "specter"],
+        "web": ["phantom", "neo"],
+        "xss": ["phantom", "neo"],
+        "sql injection": ["phantom", "neo"],
+        "api": ["phantom", "neo"],
+        "kubernetes": ["architect", "bastion", "sentinel"],
+        "docker": ["architect", "bastion"],
+        "aws": ["architect", "bastion", "sentinel"],
+        "azure": ["architect", "bastion", "sentinel"],
+        "gcp": ["architect", "bastion", "sentinel"],
+        "iam": ["cipher", "architect", "bastion"],
+        "active directory": ["cipher", "neo", "bastion"],
+        "kerberos": ["cipher", "neo", "bastion"],
+        "compliance": ["ledger", "arbiter", "bastion"],
+        "nist": ["ledger", "bastion", "sentinel"],
+        "ot": ["centurion", "bastion", "sentinel"],
+        "scada": ["centurion", "bastion", "sentinel"],
+        "ics": ["centurion", "bastion", "sentinel"],
+    }
+    for keyword, mapped_personas in keyword_map.items():
+        if keyword in blob:
+            personas.update(mapped_personas)
+
+    # Conservative fallback for unmapped cybersecurity skills
+    if not personas and "cyber" in domain:
+        personas.update(["bastion"])
+
+    # Keep only valid personas and deterministic order
+    return sorted([p for p in personas if p in VALID_PERSONAS])
+
+
 def load_skill_persona_map(config: dict) -> dict:
    """Load skill→persona mapping from config.yaml or use defaults."""
    custom = config.get("skill_persona_map", {})
-    merged = dict(DEFAULT_SKILL_PERSONA_MAP)
-    merged.update(custom)
+    merged = {
+        k: [p for p in v if p in VALID_PERSONAS]
+        for k, v in DEFAULT_SKILL_PERSONA_MAP.items()
+    }
+    for skill, personas in custom.items():
+        if isinstance(personas, list):
+            merged[skill] = [p for p in personas if p in VALID_PERSONAS]
    return merged


@@ -289,7 +520,9 @@ def search_skills(shared_dir: Path, query: str):
                desc = ""
                for line in content.split("\n"):
                    line = line.strip()
-                    if line and not line.startswith(("---", "#", "name:", "description:")):
+                    if line and not line.startswith(
+                        ("---", "#", "name:", "description:")
+                    ):
                        desc = line[:100]
                        break
                results.append((score, name, skills_subdir, desc))
@@ -297,7 +530,7 @@ def search_skills(shared_dir: Path, query: str):
    results.sort(key=lambda x: -x[0])
    print(f"\n  Search: '{query}' — {len(results)} results\n")
    for i, (score, name, source, desc) in enumerate(results[:20]):
-        print(f"  {i+1:2}. [{score:3}] {name} ({source})")
+        print(f"  {i + 1:2}. [{score:3}] {name} ({source})")
        if desc:
            print(f"       {desc}")
    if len(results) > 20:
@@ -351,20 +584,26 @@ def run_tests(personas_dir: Path, target: str = None):
            # Check must_include keywords exist in persona definition
            for keyword in expect.get("must_include", []):
                if keyword.lower() not in prompt_content:
-                    warnings.append(f"    {persona_name}/{test_name}: '{keyword}' not in persona prompt")
+                    warnings.append(
+                        f"    {persona_name}/{test_name}: '{keyword}' not in persona prompt"
+                    )
                    test_passed = False

            # Check escalation targets are defined
            if expect.get("escalation"):
                target_persona = expect["escalation"].lower()
                if target_persona not in prompt_content:
-                    warnings.append(f"    {persona_name}/{test_name}: escalation to '{target_persona}' not defined in boundaries")
+                    warnings.append(
+                        f"    {persona_name}/{test_name}: escalation to '{target_persona}' not defined in boundaries"
+                    )
                    test_passed = False

            # Check confidence language for intel personas
            if expect.get("confidence"):
                if "confidence" not in prompt_content and "high" not in prompt_content:
-                    warnings.append(f"    {persona_name}/{test_name}: confidence levels not defined in persona")
+                    warnings.append(
+                        f"    {persona_name}/{test_name}: confidence levels not defined in persona"
+                    )
                    test_passed = False

            if test_passed:
@@ -384,9 +623,16 @@ def run_tests(personas_dir: Path, target: str = None):


 def build_skills_index(shared_dir: Path, config: dict = None) -> dict:
-    """Index all shared skills from _shared/skills/ and _shared/paperclip-skills/."""
+    """Index all shared skills from _shared/{skills,paperclip-skills,community-skills}/."""
    skill_map = load_skill_persona_map(config or {})
-    index = {"skills": {}, "paperclip_skills": {}, "design_brands": [], "ui_ux_styles": 0, "_skill_persona_map": skill_map}
+    index = {
+        "skills": {},
+        "paperclip_skills": {},
+        "community_skills": {},
+        "design_brands": [],
+        "ui_ux_styles": 0,
+        "_skill_persona_map": skill_map,
+    }

    # Index shared-skills
    skills_dir = shared_dir / "skills"
@@ -396,16 +642,33 @@ def build_skills_index(shared_dir: Path, config: dict = None) -> dict:
                continue
            skill_md = skill_dir / "SKILL.md"
            if skill_md.exists():
+                skill_meta = parse_skill_frontmatter(skill_md)
+                inferred_personas = infer_personas_from_skill_metadata(
+                    skill_dir.name, skill_meta
+                )
+                configured_personas = skill_map.get(skill_dir.name, [])
+                merged_personas = sorted(
+                    set(configured_personas).union(inferred_personas)
+                )
                content = skill_md.read_text(encoding="utf-8")
                first_line = ""
                for line in content.split("\n"):
                    line = line.strip()
-                    if line and not line.startswith(("---", "#", "name:", "description:")):
+                    if line and not line.startswith(
+                        ("---", "#", "name:", "description:")
+                    ):
                        first_line = line[:120]
                        break
                index["skills"][skill_dir.name] = {
-                    "personas": skill_map.get(skill_dir.name, []),
+                    "personas": merged_personas,
                    "summary": first_line,
+                    "domain": str(skill_meta.get("domain", "")),
+                    "subdomain": str(skill_meta.get("subdomain", "")),
+                    "tags": skill_meta.get("tags", []),
+                    "mapped_by": {
+                        "explicit": configured_personas,
+                        "inferred": inferred_personas,
+                    },
                    "has_references": (skill_dir / "references").is_dir(),
                }

@@ -419,10 +682,22 @@ def build_skills_index(shared_dir: Path, config: dict = None) -> dict:
            if skill_md.exists():
                index["paperclip_skills"][skill_dir.name] = True

+    # Index community-skills
+    cskills_dir = shared_dir / "community-skills"
+    if cskills_dir.exists():
+        for skill_dir in sorted(cskills_dir.iterdir()):
+            if not skill_dir.is_dir():
+                continue
+            skill_md = skill_dir / "SKILL.md"
+            if skill_md.exists():
+                index["community_skills"][skill_dir.name] = True
+
    # Index design brands
    design_dir = shared_dir / "design-md"
    if design_dir.exists():
-        index["design_brands"] = sorted([d.name for d in design_dir.iterdir() if d.is_dir()])
+        index["design_brands"] = sorted(
+            [d.name for d in design_dir.iterdir() if d.is_dir()]
+        )

    # Count UI/UX data
    uiux_dir = shared_dir / "ui-ux-pro-max" / "data"
@@ -477,7 +752,9 @@ def validate_persona(persona_name: str, parsed: dict) -> list:
        if section not in parsed.get("sections", {}):
            warnings.append(f"Missing section: {section}")
        elif len(parsed["sections"][section].split()) < 30:
-            warnings.append(f"Thin section ({len(parsed['sections'][section].split())} words): {section}")
+            warnings.append(
+                f"Thin section ({len(parsed['sections'][section].split())} words): {section}"
+            )

    fm = parsed.get("metadata", {})
    for field in ["codename", "name", "domain", "address_to", "tone"]:
@@ -487,7 +764,13 @@ def validate_persona(persona_name: str, parsed: dict) -> list:
    return warnings


-def build_catalog(personas_dir: Path, output_dir: Path, config: dict, flat_config: dict):
+def build_catalog(
+    personas_dir: Path,
+    output_dir: Path,
+    config: dict,
+    flat_config: dict,
+    shared_dir: Path | None,
+):
    """Generate CATALOG.md with stats, escalation paths, and trigger index."""
    addresses = config.get("persona_defaults", {}).get("custom_addresses", {})

@@ -515,7 +798,11 @@ def build_catalog(personas_dir: Path, output_dir: Path, config: dict, flat_confi
        meta = yaml.safe_load(meta_file.read_text(encoding="utf-8")) or {}
        codename = meta.get("codename", persona_dir.name)
        address = addresses.get(persona_dir.name, meta.get("address_to", "N/A"))
-        variants = [f.stem for f in sorted(persona_dir.glob("*.md")) if not f.name.startswith("_")]
+        variants = [
+            f.stem
+            for f in sorted(persona_dir.glob("*.md"))
+            if not f.name.startswith("_")
+        ]

        # Parse general.md for stats
        general = persona_dir / "general.md"
@@ -540,7 +827,9 @@ def build_catalog(personas_dir: Path, output_dir: Path, config: dict, flat_confi
        catalog_lines.append(f"- **Domain:** {meta.get('domain', 'N/A')}")
        catalog_lines.append(f"- **Hitap:** {address}")
        catalog_lines.append(f"- **Variants:** {', '.join(variants)}")
-        catalog_lines.append(f"- **Depth:** {word_count:,} words, {section_count} sections")
+        catalog_lines.append(
+            f"- **Depth:** {word_count:,} words, {section_count} sections"
+        )
        if escalates_to:
            catalog_lines.append(f"- **Escalates to:** {', '.join(escalates_to)}")
        catalog_lines.append("")
@@ -559,7 +848,9 @@ def build_catalog(personas_dir: Path, output_dir: Path, config: dict, flat_confi
    catalog_lines.append("## Build Statistics\n")
    catalog_lines.append(f"- Total prompt content: {total_words:,} words")
    catalog_lines.append(f"- Total sections: {total_sections}")
-    catalog_lines.append(f"- Escalation connections: {sum(len(v) for v in escalation_graph.values())}")
+    catalog_lines.append(
+        f"- Escalation connections: {sum(len(v) for v in escalation_graph.values())}"
+    )
    catalog_lines.append(f"- Unique triggers: {len(trigger_index)}")
    catalog_lines.append("")

@@ -580,13 +871,18 @@ def build_catalog(personas_dir: Path, output_dir: Path, config: dict, flat_confi
    print(f"  Index: {index_path}/escalation_graph.json, trigger_index.json")

    # Write skills index if shared dir exists
-    shared_dir = personas_dir / "_shared"
-    if shared_dir.exists():
-        si = build_skills_index(shared_dir)
+    if shared_dir and shared_dir.exists():
+        si = build_skills_index(shared_dir, config)
        (index_path / "skills_index.json").write_text(
            json.dumps(si, indent=2, ensure_ascii=False), encoding="utf-8"
        )
-        print(f"  Skills: {len(si.get('skills', {}))} shared + {len(si.get('paperclip_skills', {}))} paperclip + {len(si.get('design_brands', []))} design brands + {si.get('ui_ux_styles', 0)} UI/UX data files")
+        print(
+            f"  Skills: {len(si.get('skills', {}))} shared + "
+            f"{len(si.get('paperclip_skills', {}))} paperclip + "
+            f"{len(si.get('community_skills', {}))} community + "
+            f"{len(si.get('design_brands', []))} design brands + "
+            f"{si.get('ui_ux_styles', 0)} UI/UX data files"
+        )

    # Print validation warnings
    if all_warnings:
@@ -597,7 +893,9 @@ def build_catalog(personas_dir: Path, output_dir: Path, config: dict, flat_confi
    return total_words


-def print_summary(config: dict, total_personas: int, total_variants: int, total_words: int = 0):
+def print_summary(
+    config: dict, total_personas: int, total_variants: int, total_words: int = 0
+):
    """Print build summary with config status."""
    print("\n" + "=" * 50)
    print(f"BUILD COMPLETE")
@@ -609,8 +907,14 @@ def print_summary(config: dict, total_personas: int, total_variants: int, total_

    if config:
        user = config.get("user", {}).get("name", "?")
-        tools_on = sum(1 for v in config.get("infrastructure", {}).get("tools", {}).values() if v is True)
-        frameworks_on = sum(1 for v in config.get("frameworks", {}).values() if v is True)
+        tools_on = sum(
+            1
+            for v in config.get("infrastructure", {}).get("tools", {}).values()
+            if v is True
+        )
+        frameworks_on = sum(
+            1 for v in config.get("frameworks", {}).values() if v is True
+        )
        regions = config.get("regional_focus", {}).get("primary", [])
        print(f"\n  Config:   {user}")
        print(f"  Tools:    {tools_on} enabled")
@@ -641,7 +945,11 @@ def install_claude(output_dir: Path):
        for prompt_file in persona_dir.glob("*.prompt.md"):
            variant = prompt_file.stem
            codename = persona_dir.name
-            cmd_name = f"persona-{codename}" if variant == "general" else f"persona-{codename}-{variant}"
+            cmd_name = (
+                f"persona-{codename}"
+                if variant == "general"
+                else f"persona-{codename}-{variant}"
+            )
            dest = commands_dir / f"{cmd_name}.md"
            content = prompt_file.read_text(encoding="utf-8")
            command_content = f"{content}\n\n---\nUser query: $ARGUMENTS\n"
@@ -683,10 +991,24 @@ def install_claude(output_dir: Path):
            "name": codename,
            "description": f"{name} ({address_to}) — {role}. {domain}.",
            "instructions": instructions,
-            "allowedTools": ["Read(*)", "Edit(*)", "Write(*)", "Bash(*)", "Glob(*)", "Grep(*)", "WebFetch(*)", "WebSearch(*)"],
+            "allowedTools": [
+                "Read(*)",
+                "Edit(*)",
+                "Write(*)",
+                "Bash(*)",
+                "Glob(*)",
+                "Grep(*)",
+                "WebFetch(*)",
+                "WebSearch(*)",
+            ],
        }
        agent_file = agents_dir / f"{codename}.yml"
-        agent_file.write_text(yaml.dump(agent, allow_unicode=True, default_flow_style=False, sort_keys=False), encoding="utf-8")
+        agent_file.write_text(
+            yaml.dump(
+                agent, allow_unicode=True, default_flow_style=False, sort_keys=False
+            ),
+            encoding="utf-8",
+        )
        agent_count += 1

    print(f"  Claude: {cmd_count} commands + {agent_count} agents installed")
@@ -730,10 +1052,13 @@ def install_gemini(output_dir: Path):
            gem = {
                "name": f"{name} — {variant}" if variant != "general" else name,
                "description": f"{data.get('role', '')} | {data.get('domain', '')}",
-                "system_instruction": data.get("sections", {}).get("soul", "") + "\n\n" +
-                    data.get("sections", {}).get("expertise", "") + "\n\n" +
-                    data.get("sections", {}).get("methodology", "") + "\n\n" +
-                    data.get("sections", {}).get("behavior_rules", ""),
+                "system_instruction": data.get("sections", {}).get("soul", "")
+                + "\n\n"
+                + data.get("sections", {}).get("expertise", "")
+                + "\n\n"
+                + data.get("sections", {}).get("methodology", "")
+                + "\n\n"
+                + data.get("sections", {}).get("behavior_rules", ""),
                "metadata": {
                    "codename": codename,
                    "variant": variant,
@@ -744,17 +1069,26 @@ def install_gemini(output_dir: Path):
                },
            }
            dest = gems_dir / f"{codename}-{variant}.json"
-            dest.write_text(json.dumps(gem, ensure_ascii=False, indent=2), encoding="utf-8")
+            dest.write_text(
+                json.dumps(gem, ensure_ascii=False, indent=2), encoding="utf-8"
+            )
            count += 1
    print(f"  Gemini: {count} gems generated to {gems_dir}")
    return count


-def install_paperclip(output_dir: Path, personas_dir: Path):
+def install_paperclip(output_dir: Path, personas_dir: Path, shared_dir: Path | None):
    """Install personas as Paperclip agents (SOUL.md + hermes-config.yaml + AGENTS.md per agent)."""
    pc_dir = output_dir / "_paperclip"
    agents_dir = pc_dir / "agents"
    skills_dir = pc_dir / "skills"
+
+    # Recreate output for deterministic full migration.
+    if pc_dir.exists():
+        import shutil
+
+        shutil.rmtree(pc_dir)
+
    agents_dir.mkdir(parents=True, exist_ok=True)
    skills_dir.mkdir(parents=True, exist_ok=True)

@@ -860,11 +1194,13 @@ def install_paperclip(output_dir: Path, personas_dir: Path):
                agents_md_lines.append(f"- → {target}")
            agents_md_lines.append("")

-        (agent_dir / "AGENTS.md").write_text("\n".join(agents_md_lines), encoding="utf-8")
+        (agent_dir / "AGENTS.md").write_text(
+            "\n".join(agents_md_lines), encoding="utf-8"
+        )
        agent_count += 1

    # Copy shared skills as Paperclip skills (SKILL.md format already compatible)
-    shared_skills = personas_dir / "_shared" / "skills"
+    shared_skills = shared_dir / "skills" if shared_dir else Path("__missing__")
    if shared_skills.exists():
        for skill_dir in sorted(shared_skills.iterdir()):
            if not skill_dir.is_dir():
@@ -873,15 +1209,18 @@ def install_paperclip(output_dir: Path, personas_dir: Path):
            if skill_md.exists():
                dest = skills_dir / skill_dir.name
                dest.mkdir(parents=True, exist_ok=True)
-                (dest / "SKILL.md").write_text(skill_md.read_text(encoding="utf-8"), encoding="utf-8")
+                (dest / "SKILL.md").write_text(
+                    skill_md.read_text(encoding="utf-8"), encoding="utf-8"
+                )
                refs = skill_dir / "references"
                if refs.is_dir():
                    import shutil
+
                    shutil.copytree(refs, dest / "references", dirs_exist_ok=True)
                skill_count += 1

    # Copy paperclip-specific skills
-    pc_skills = personas_dir / "_shared" / "paperclip-skills"
+    pc_skills = shared_dir / "paperclip-skills" if shared_dir else Path("__missing__")
    if pc_skills.exists():
        for skill_dir in sorted(pc_skills.iterdir()):
            if not skill_dir.is_dir():
@@ -890,25 +1229,54 @@ def install_paperclip(output_dir: Path, personas_dir: Path):
            if skill_md.exists() and not (skills_dir / skill_dir.name).exists():
                dest = skills_dir / skill_dir.name
                dest.mkdir(parents=True, exist_ok=True)
-                (dest / "SKILL.md").write_text(skill_md.read_text(encoding="utf-8"), encoding="utf-8")
+                (dest / "SKILL.md").write_text(
+                    skill_md.read_text(encoding="utf-8"), encoding="utf-8"
+                )
                refs = skill_dir / "references"
                if refs.is_dir():
                    import shutil
+
                    shutil.copytree(refs, dest / "references", dirs_exist_ok=True)
                scripts = skill_dir / "scripts"
                if scripts.is_dir():
                    import shutil
+
                    shutil.copytree(scripts, dest / "scripts", dirs_exist_ok=True)
                skill_count += 1

    # Deploy original Paperclip company agents from _shared/paperclip-agents/
-    pc_agents_src = personas_dir / "_shared" / "paperclip-agents"
+    pc_agents_src = (
+        shared_dir / "paperclip-agents" if shared_dir else Path("__missing__")
+    )
    pc_agent_count = 0
+
+    def normalize_agent_name(name: str) -> str:
+        """Normalize escaped/unicode-heavy names to stable ASCII directory names."""
+        decoded = re.sub(
+            r"#U([0-9A-Fa-f]{4})",
+            lambda m: chr(int(m.group(1), 16)),
+            name,
+        )
+        ascii_name = (
+            unicodedata.normalize("NFKD", decoded)
+            .encode("ascii", "ignore")
+            .decode("ascii")
+        )
+        # Keep names filesystem-safe and deterministic.
+        slug = re.sub(r"[^a-zA-Z0-9]+", "-", ascii_name).strip("-").lower()
+        return slug or decoded
+
    if pc_agents_src.exists():
+        seen_company_agents = set()
+        collision_count = 0
        for agent_src in sorted(pc_agents_src.iterdir()):
            if not agent_src.is_dir():
                continue
-            agent_name = agent_src.name
+            agent_name = normalize_agent_name(agent_src.name)
+            if agent_name in seen_company_agents:
+                collision_count += 1
+                continue
+            seen_company_agents.add(agent_name)
            # Skip if persona-based agent already exists with same name
            if (agents_dir / agent_name).exists():
                continue
@@ -916,11 +1284,19 @@ def install_paperclip(output_dir: Path, personas_dir: Path):
            dest.mkdir(parents=True, exist_ok=True)
            for f in agent_src.iterdir():
                if f.is_file():
-                    (dest / f.name).write_text(f.read_text(encoding="utf-8"), encoding="utf-8")
+                    (dest / f.name).write_text(
+                        f.read_text(encoding="utf-8"), encoding="utf-8"
+                    )
            pc_agent_count += 1
+        if collision_count:
+            print(
+                f"    Note: skipped {collision_count} duplicate company agent source dirs after name normalization"
+            )

    total_agents = agent_count + pc_agent_count
-    print(f"  Paperclip: {agent_count} persona agents + {pc_agent_count} company agents + {skill_count} skills to {pc_dir}")
+    print(
+        f"  Paperclip: {agent_count} persona agents + {pc_agent_count} company agents + {skill_count} skills to {pc_dir}"
+    )
    return total_agents


@@ -955,13 +1331,28 @@ def install_openclaw(output_dir: Path):

 def main():
    import argparse
-    parser = argparse.ArgumentParser(description="Build persona library and optionally install to platforms.")
-    parser.add_argument("--install", choices=["claude", "antigravity", "gemini", "openclaw", "paperclip", "all"],
-                        help="Install generated personas to a target platform")
-    parser.add_argument("--search", type=str, metavar="QUERY",
-                        help="Search across all shared skills (e.g. --search 'pentest AD')")
-    parser.add_argument("--test", nargs="?", const="__all__", metavar="PERSONA",
-                        help="Run persona test suite (optionally specify persona name)")
+
+    parser = argparse.ArgumentParser(
+        description="Build persona library and optionally install to platforms."
+    )
+    parser.add_argument(
+        "--install",
+        choices=["claude", "antigravity", "gemini", "openclaw", "paperclip", "all"],
+        help="Install generated personas to a target platform",
+    )
+    parser.add_argument(
+        "--search",
+        type=str,
+        metavar="QUERY",
+        help="Search across all shared skills (e.g. --search 'pentest AD')",
+    )
+    parser.add_argument(
+        "--test",
+        nargs="?",
+        const="__all__",
+        metavar="PERSONA",
+        help="Run persona test suite (optionally specify persona name)",
+    )
    args = parser.parse_args()

    root = Path(__file__).parent
@@ -979,17 +1370,28 @@ def main():

    # Find all persona directories
    persona_dirs = [
-        d for d in sorted(personas_dir.iterdir()) if d.is_dir() and not d.name.startswith((".", "_"))
+        d
+        for d in sorted(personas_dir.iterdir())
+        if d.is_dir() and not d.name.startswith((".", "_"))
    ]

    if not persona_dirs:
        print("No persona directories found.")
        sys.exit(1)

-    shared_dir = personas_dir / "_shared"
+    shared_dir = resolve_shared_dir(root, personas_dir)
+    source_mirrors = discover_sources(root)
+
+    if source_mirrors:
+        print(f"Detected source mirrors: {', '.join(source_mirrors)}")
+    else:
+        print("Detected source mirrors: none")

    # Handle search-only mode
    if args.search:
+        if not shared_dir:
+            print("No shared skill library found.")
+            return
        search_skills(shared_dir, args.search)
        return

@@ -1004,18 +1406,26 @@ def main():

    # Pre-build escalation graph and skills index
    escalation_graph = build_escalation_graph(personas_dir, flat_config)
-    skills_index = build_skills_index(shared_dir, config) if shared_dir.exists() else {}
+    skills_index = build_skills_index(shared_dir, config) if shared_dir else {}

    total_variants = 0
    for pdir in persona_dirs:
-        total_variants += build_persona(pdir, output_dir, flat_config, config, escalation_graph, skills_index)
+        total_variants += build_persona(
+            pdir, output_dir, flat_config, config, escalation_graph, skills_index
+        )

-    total_words = build_catalog(personas_dir, output_dir, config, flat_config)
+    total_words = build_catalog(
+        personas_dir, output_dir, config, flat_config, shared_dir
+    )

    # Platform installation
    if args.install:
        print(f"\n--- Installing to: {args.install} ---\n")
-        targets = ["claude", "antigravity", "gemini", "openclaw", "paperclip"] if args.install == "all" else [args.install]
+        targets = (
+            ["claude", "antigravity", "gemini", "openclaw", "paperclip"]
+            if args.install == "all"
+            else [args.install]
+        )
        for target in targets:
            if target == "claude":
                install_claude(output_dir)
@@ -1026,7 +1436,7 @@ def main():
            elif target == "openclaw":
                install_openclaw(output_dir)
            elif target == "paperclip":
-                install_paperclip(output_dir, personas_dir)
+                install_paperclip(output_dir, personas_dir, shared_dir)

    print_summary(config, len(persona_dirs), total_variants, total_words)