#!/usr/bin/env python3 """Build script: Generate .yaml, .json, .prompt.md from persona .md files. Supports config.yaml for dynamic variable injection and user-specific customization. New users: copy config.example.yaml → config.yaml and customize. """ import json import re import sys import unicodedata from pathlib import Path try: import yaml except ImportError: print("PyYAML required: pip install pyyaml") sys.exit(1) def load_config(root: Path) -> dict: """Load config.yaml if it exists, otherwise return empty config.""" config_path = root / "config.yaml" if config_path.exists(): config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} print(f"Config loaded: {config_path}") return config example_path = root / "config.example.yaml" if example_path.exists(): print( "WARN: No config.yaml found. Using defaults. Copy config.example.yaml → config.yaml to customize." ) return {} def resolve_shared_dir(root: Path, personas_dir: Path) -> Path | None: """Resolve canonical shared library path. Primary location is personas/_shared. If that is missing, fall back to known source mirrors under sources/. """ primary = personas_dir / "_shared" if primary.exists(): return primary sources_dir = root / "sources" fallbacks = [ sources_dir / "temp-cyber-skills" / "personas" / "_shared", sources_dir / "paperclip-docs-main" / "_shared", ] for candidate in fallbacks: if candidate.exists(): return candidate return None def discover_sources(root: Path) -> list[str]: """List known source mirrors under root/sources.""" sources_dir = root / "sources" if not sources_dir.exists(): return [] known = [ "Anthropic-Cybersecurity-Skills", "paperclip-docs-main", "temp-cyber-skills", ] return [name for name in known if (sources_dir / name).exists()] def flatten_config(config: dict, prefix: str = "") -> dict: """Flatten nested config dict for template substitution. Example: {"user": {"name": "Salva"}} → {"user.name": "Salva"} """ flat = {} for key, value in config.items(): full_key = f"{prefix}{key}" if not prefix else f"{prefix}.{key}" if isinstance(value, dict): flat.update(flatten_config(value, full_key)) elif isinstance(value, list): flat[full_key] = value flat[f"{full_key}.count"] = len(value) flat[f"{full_key}.csv"] = ", ".join( str(v) for v in value if not isinstance(v, dict) ) else: flat[full_key] = value return flat def inject_config(content: str, flat_config: dict) -> str: """Replace {{config.key}} placeholders with config values.""" def replacer(match): key = match.group(1).strip() value = flat_config.get(key, match.group(0)) # keep original if not found if isinstance(value, list): return ", ".join(str(v) for v in value if not isinstance(v, dict)) if isinstance(value, bool): return "enabled" if value else "disabled" return str(value) return re.sub(r"\{\{(.+?)\}\}", replacer, content) def check_conditionals(content: str, flat_config: dict) -> str: """Process {{#if key}}...{{/if}} and {{#unless key}}...{{/unless}} blocks.""" # Handle {{#if key}}content{{/if}} def if_replacer(match): key = match.group(1).strip() body = match.group(2) value = flat_config.get(key) if value and value not in (False, 0, "", "false", "none", "disabled", None, []): return body return "" content = re.sub( r"\{\{#if (.+?)\}\}(.*?)\{\{/if\}\}", if_replacer, content, flags=re.DOTALL ) # Handle {{#unless key}}content{{/unless}} def unless_replacer(match): key = match.group(1).strip() body = match.group(2) value = flat_config.get(key) if not value or value in (False, 0, "", "false", "none", "disabled", None, []): return body return "" content = re.sub( r"\{\{#unless (.+?)\}\}(.*?)\{\{/unless\}\}", unless_replacer, content, flags=re.DOTALL, ) return content def parse_persona_md(filepath: Path, flat_config: dict) -> dict: """Parse a persona markdown file into structured data.""" content = filepath.read_text(encoding="utf-8") # Apply config injection if flat_config: content = check_conditionals(content, flat_config) content = inject_config(content, flat_config) # Extract YAML frontmatter fm_match = re.match(r"^---\n(.*?)\n---\n(.*)$", content, re.DOTALL) if not fm_match: print(f" WARN: No frontmatter in {filepath}") return {} frontmatter = yaml.safe_load(fm_match.group(1)) body = fm_match.group(2).strip() # Extract sections from body sections = {} current_section = None current_content = [] for line in body.split("\n"): if line.startswith("## "): if current_section: sections[current_section] = "\n".join(current_content).strip() current_section = ( line[3:].strip().lower().replace(" ", "_").replace("&", "and") ) current_content = [] else: current_content.append(line) if current_section: sections[current_section] = "\n".join(current_content).strip() return { "metadata": frontmatter, "sections": sections, "raw_body": body, } def build_persona( persona_dir: Path, output_dir: Path, flat_config: dict, config: dict, escalation_graph: dict = None, skills_index: dict = None, ): """Build all variants for a persona directory.""" md_files = sorted(persona_dir.glob("*.md")) if not md_files: return 0 persona_name = persona_dir.name out_path = output_dir / persona_name out_path.mkdir(parents=True, exist_ok=True) # Load _meta.yaml if exists meta_file = persona_dir / "_meta.yaml" meta = {} if meta_file.exists(): meta_content = meta_file.read_text(encoding="utf-8") if flat_config: meta_content = inject_config(meta_content, flat_config) meta = yaml.safe_load(meta_content) or {} # Apply config overrides for address addresses = config.get("persona_defaults", {}).get("custom_addresses", {}) if persona_name in addresses: meta["address_to"] = addresses[persona_name] count = 0 for md_file in md_files: if md_file.name.startswith("_"): continue variant = md_file.stem parsed = parse_persona_md(md_file, flat_config) if not parsed: continue # Build output object output = { **meta, **parsed["metadata"], "variant": variant, "sections": parsed["sections"], } # Inject config metadata if config: output["_config"] = { "user": config.get("user", {}).get("name", "unknown"), "tools": { k: v for k, v in config.get("infrastructure", {}) .get("tools", {}) .items() if v is True }, "frameworks": { k: v for k, v in config.get("frameworks", {}).items() if v is True }, "regional_focus": config.get("regional_focus", {}), } # Inject escalation graph for this persona if escalation_graph and persona_name in escalation_graph: output["escalates_to"] = escalation_graph[persona_name] # Inject mapped skills for this persona if skills_index: mapped_skills = [] for skill_name, skill_info in skills_index.get("skills", {}).items(): if persona_name in skill_info.get("personas", []): mapped_skills.append(skill_name) # Also check config-based custom mapping skill_map = skills_index.get("_skill_persona_map", {}) for skill_name, persona_list in skill_map.items(): if persona_name in persona_list and skill_name not in mapped_skills: mapped_skills.append(skill_name) if mapped_skills: output["skills"] = sorted(mapped_skills) # Inject section word counts for quality tracking output["_stats"] = { "total_words": sum(len(s.split()) for s in parsed["sections"].values()), "sections": list(parsed["sections"].keys()), "section_count": len(parsed["sections"]), } # Write YAML yaml_out = out_path / f"{variant}.yaml" yaml_out.write_text( yaml.dump( output, allow_unicode=True, default_flow_style=False, sort_keys=False ), encoding="utf-8", ) # Write JSON json_out = out_path / f"{variant}.json" json_out.write_text( json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8" ) # Write plain system prompt (just the body, no config metadata) prompt_out = out_path / f"{variant}.prompt.md" prompt_out.write_text(parsed["raw_body"], encoding="utf-8") count += 1 print(f" Built: {persona_name}/{variant} -> .yaml .json .prompt.md") return count DEFAULT_SKILL_PERSONA_MAP = { # Cybersecurity skills → personas "pentest": ["neo"], "nmap-recon": ["neo", "vortex"], "security-scanner": ["neo", "phantom"], "sql-injection-testing": ["neo", "phantom"], "stealth-browser": ["neo", "oracle"], "security-audit-toolkit": ["neo", "forge"], "pwnclaw-security-scan": ["neo"], "senior-secops": ["bastion"], "clawsec": ["neo", "vortex"], "pcap-analyzer": ["vortex", "bastion"], "sys-guard-linux-remediator": ["bastion"], "ctf-writeup-generator": ["neo"], "dns-networking": ["vortex", "architect"], "network-scanner": ["neo", "vortex"], "security-skill-scanner": ["neo"], "pentest-active-directory": ["neo"], "pentest-api-attacker": ["neo", "phantom"], "pentest-auth-bypass": ["neo", "phantom"], "pentest-c2-operator": ["neo", "sentinel"], "gov-cybersecurity": ["sentinel", "bastion"], # Intelligence skills → personas "osint-investigator": ["oracle"], "seithar-intel": ["sentinel", "frodo"], "freshrss": ["frodo", "oracle"], "freshrss-reader": ["frodo", "oracle"], "war-intel-monitor": ["frodo", "marshal"], "news-crawler": ["frodo", "herald"], "dellight-intelligence-ops": ["frodo", "echo"], "dellight-strategic-intelligence": ["frodo"], "agent-intelligence-network-scan": ["oracle"], "social-trust-manipulation-detector": ["ghost"], # Infrastructure skills → personas "docker-essentials": ["architect"], "session-logs": ["architect"], # Document processing → personas "image-ocr": ["oracle", "scribe"], "mistral-ocr": ["oracle", "scribe"], "pdf-text-extractor": ["scribe", "scholar"], "youtube-transcript": ["herald", "scholar"], # Web scraping → personas "deep-scraper": ["oracle"], "crawl-for-ai": ["oracle", "herald"], } VALID_PERSONAS = { "arbiter", "architect", "bastion", "centurion", "chronos", "cipher", "corsair", "echo", "forge", "frodo", "gambit", "ghost", "herald", "ledger", "marshal", "medic", "neo", "oracle", "phantom", "polyglot", "sage", "scholar", "scribe", "sentinel", "specter", "tribune", "vortex", "warden", "wraith", } def parse_skill_frontmatter(skill_md: Path) -> dict: """Parse YAML frontmatter from SKILL.md; return empty dict if absent/invalid.""" content = skill_md.read_text(encoding="utf-8") fm_match = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL) if not fm_match: return {} parsed = yaml.safe_load(fm_match.group(1)) return parsed if isinstance(parsed, dict) else {} def infer_personas_from_skill_metadata(skill_name: str, metadata: dict) -> list: """Infer likely persona mappings using skill frontmatter metadata and naming.""" name = (skill_name or "").lower() domain = str(metadata.get("domain", "")).lower() subdomain = str(metadata.get("subdomain", "")).lower() description = str(metadata.get("description", "")).lower() tags = [str(t).lower() for t in metadata.get("tags", []) if t is not None] blob = " ".join([name, domain, subdomain, description] + tags) personas = set() # Subdomain affinity subdomain_map = { "penetration-testing": ["neo", "phantom", "vortex"], "application-security": ["phantom", "neo"], "api-security": ["phantom", "neo"], "web-security": ["phantom", "neo"], "malware-analysis": ["specter", "bastion", "sentinel"], "memory-forensics": ["specter", "bastion"], "forensics": ["specter", "bastion"], "threat-intelligence": ["sentinel", "frodo", "oracle"], "incident-response": ["bastion", "sentinel", "medic"], "soc-operations": ["bastion", "sentinel"], "threat-hunting": ["sentinel", "bastion", "vortex"], "network-security": ["vortex", "bastion"], "network-forensics": ["vortex", "specter", "bastion"], "cloud-security": ["architect", "bastion", "sentinel"], "identity-security": ["cipher", "neo", "bastion"], "active-directory": ["cipher", "neo", "bastion"], "vulnerability-management": ["bastion", "forge"], "compliance": ["ledger", "arbiter", "bastion"], "ot-security": ["centurion", "bastion", "sentinel"], } personas.update(subdomain_map.get(subdomain, [])) # Keyword affinity fallback keyword_map = { "apt": ["sentinel", "frodo"], "threat intel": ["sentinel", "oracle", "frodo"], "ioc": ["sentinel", "bastion"], "misp": ["sentinel", "oracle"], "siem": ["bastion", "sentinel"], "splunk": ["bastion", "sentinel"], "soc": ["bastion", "sentinel"], "incident response": ["bastion", "medic", "sentinel"], "phishing": ["bastion", "oracle", "sentinel"], "malware": ["specter", "bastion", "sentinel"], "ransomware": ["specter", "bastion", "sentinel"], "forensic": ["specter", "bastion"], "volatility": ["specter", "bastion"], "yara": ["specter", "bastion"], "memory": ["specter", "bastion"], "network": ["vortex", "bastion"], "zeek": ["vortex", "bastion", "sentinel"], "wireshark": ["vortex", "bastion"], "nmap": ["neo", "vortex"], "pentest": ["neo", "phantom", "vortex"], "red team": ["neo", "phantom", "specter"], "web": ["phantom", "neo"], "xss": ["phantom", "neo"], "sql injection": ["phantom", "neo"], "api": ["phantom", "neo"], "kubernetes": ["architect", "bastion", "sentinel"], "docker": ["architect", "bastion"], "aws": ["architect", "bastion", "sentinel"], "azure": ["architect", "bastion", "sentinel"], "gcp": ["architect", "bastion", "sentinel"], "iam": ["cipher", "architect", "bastion"], "active directory": ["cipher", "neo", "bastion"], "kerberos": ["cipher", "neo", "bastion"], "compliance": ["ledger", "arbiter", "bastion"], "nist": ["ledger", "bastion", "sentinel"], "ot": ["centurion", "bastion", "sentinel"], "scada": ["centurion", "bastion", "sentinel"], "ics": ["centurion", "bastion", "sentinel"], } for keyword, mapped_personas in keyword_map.items(): if keyword in blob: personas.update(mapped_personas) # Conservative fallback for unmapped cybersecurity skills if not personas and "cyber" in domain: personas.update(["bastion"]) # Keep only valid personas and deterministic order return sorted([p for p in personas if p in VALID_PERSONAS]) def load_skill_persona_map(config: dict) -> dict: """Load skill→persona mapping from config.yaml or use defaults.""" custom = config.get("skill_persona_map", {}) merged = { k: [p for p in v if p in VALID_PERSONAS] for k, v in DEFAULT_SKILL_PERSONA_MAP.items() } for skill, personas in custom.items(): if isinstance(personas, list): merged[skill] = [p for p in personas if p in VALID_PERSONAS] return merged def search_skills(shared_dir: Path, query: str): """Search across all shared skills using simple BM25-like scoring.""" query_terms = query.lower().split() results = [] for skills_subdir in ["skills", "paperclip-skills", "community-skills"]: skills_path = shared_dir / skills_subdir if not skills_path.exists(): continue for skill_dir in sorted(skills_path.iterdir()): if not skill_dir.is_dir(): continue skill_md = skill_dir / "SKILL.md" if not skill_md.exists(): continue content = skill_md.read_text(encoding="utf-8").lower() # Simple scoring: count query term occurrences weighted by position score = 0 first_50 = content[:500] # boost matches in header/description for term in query_terms: score += first_50.count(term) * 3 # header boost score += content.count(term) if score > 0: # Extract name and first description line name = skill_dir.name desc = "" for line in content.split("\n"): line = line.strip() if line and not line.startswith( ("---", "#", "name:", "description:") ): desc = line[:100] break results.append((score, name, skills_subdir, desc)) results.sort(key=lambda x: -x[0]) print(f"\n Search: '{query}' — {len(results)} results\n") for i, (score, name, source, desc) in enumerate(results[:20]): print(f" {i + 1:2}. [{score:3}] {name} ({source})") if desc: print(f" {desc}") if len(results) > 20: print(f"\n ... and {len(results) - 20} more. Refine your query.") elif len(results) == 0: print(" No matches found. Try different keywords.") def run_tests(personas_dir: Path, target: str = None): """Run persona test suite from _tests/*.yaml files.""" tests_dir = personas_dir / "_tests" if not tests_dir.exists(): print(" No _tests/ directory found.") return test_files = sorted(tests_dir.glob("*.yaml")) if not test_files: print(" No test files found in _tests/") return total = 0 passed = 0 failed = 0 warnings = [] for test_file in test_files: if test_file.name == "README.md": continue suite = yaml.safe_load(test_file.read_text(encoding="utf-8")) if not suite: continue persona_name = suite.get("persona", test_file.stem) if target and persona_name != target: continue print(f"\n Testing: {persona_name} ({len(suite.get('tests', []))} cases)") # Load persona prompt for validation persona_prompt_path = personas_dir / persona_name / "general.md" if not persona_prompt_path.exists(): print(f" SKIP: {persona_name}/general.md not found") continue prompt_content = persona_prompt_path.read_text(encoding="utf-8").lower() for test in suite.get("tests", []): total += 1 test_name = test.get("name", f"test_{total}") expect = test.get("expect", {}) test_passed = True # Check must_include keywords exist in persona definition for keyword in expect.get("must_include", []): if keyword.lower() not in prompt_content: warnings.append( f" {persona_name}/{test_name}: '{keyword}' not in persona prompt" ) test_passed = False # Check escalation targets are defined if expect.get("escalation"): target_persona = expect["escalation"].lower() if target_persona not in prompt_content: warnings.append( f" {persona_name}/{test_name}: escalation to '{target_persona}' not defined in boundaries" ) test_passed = False # Check confidence language for intel personas if expect.get("confidence"): if "confidence" not in prompt_content and "high" not in prompt_content: warnings.append( f" {persona_name}/{test_name}: confidence levels not defined in persona" ) test_passed = False if test_passed: passed += 1 print(f" PASS: {test_name}") else: failed += 1 print(f" WARN: {test_name}") print(f"\n {'=' * 40}") print(f" Tests: {total} total, {passed} passed, {failed} warnings") if warnings: print(f"\n Warnings:") for w in warnings: print(w) print(f" {'=' * 40}") def build_skills_index(shared_dir: Path, config: dict = None) -> dict: """Index all shared skills from _shared/{skills,paperclip-skills,community-skills}/.""" skill_map = load_skill_persona_map(config or {}) index = { "skills": {}, "paperclip_skills": {}, "community_skills": {}, "design_brands": [], "ui_ux_styles": 0, "_skill_persona_map": skill_map, } # Index shared-skills skills_dir = shared_dir / "skills" if skills_dir.exists(): for skill_dir in sorted(skills_dir.iterdir()): if not skill_dir.is_dir(): continue skill_md = skill_dir / "SKILL.md" if skill_md.exists(): skill_meta = parse_skill_frontmatter(skill_md) inferred_personas = infer_personas_from_skill_metadata( skill_dir.name, skill_meta ) configured_personas = skill_map.get(skill_dir.name, []) merged_personas = sorted( set(configured_personas).union(inferred_personas) ) content = skill_md.read_text(encoding="utf-8") first_line = "" for line in content.split("\n"): line = line.strip() if line and not line.startswith( ("---", "#", "name:", "description:") ): first_line = line[:120] break index["skills"][skill_dir.name] = { "personas": merged_personas, "summary": first_line, "domain": str(skill_meta.get("domain", "")), "subdomain": str(skill_meta.get("subdomain", "")), "tags": skill_meta.get("tags", []), "mapped_by": { "explicit": configured_personas, "inferred": inferred_personas, }, "has_references": (skill_dir / "references").is_dir(), } # Index paperclip-skills pskills_dir = shared_dir / "paperclip-skills" if pskills_dir.exists(): for skill_dir in sorted(pskills_dir.iterdir()): if not skill_dir.is_dir(): continue skill_md = skill_dir / "SKILL.md" if skill_md.exists(): index["paperclip_skills"][skill_dir.name] = True # Index community-skills cskills_dir = shared_dir / "community-skills" if cskills_dir.exists(): for skill_dir in sorted(cskills_dir.iterdir()): if not skill_dir.is_dir(): continue skill_md = skill_dir / "SKILL.md" if skill_md.exists(): index["community_skills"][skill_dir.name] = True # Index design brands design_dir = shared_dir / "design-md" if design_dir.exists(): index["design_brands"] = sorted( [d.name for d in design_dir.iterdir() if d.is_dir()] ) # Count UI/UX data uiux_dir = shared_dir / "ui-ux-pro-max" / "data" if uiux_dir.exists(): index["ui_ux_styles"] = sum(1 for f in uiux_dir.glob("*.csv")) return index def build_escalation_graph(personas_dir: Path, flat_config: dict) -> dict: """Extract cross-persona escalation paths from Boundaries sections.""" graph = {} # {persona: [escalation_targets]} for persona_dir in sorted(personas_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith((".", "_")): continue general = persona_dir / "general.md" if not general.exists(): continue parsed = parse_persona_md(general, flat_config) if not parsed: continue boundaries = parsed["sections"].get("boundaries", "") targets = re.findall(r"Escalate to \*\*(\w+)\*\*", boundaries) graph[persona_dir.name] = [t.lower() for t in targets] return graph def build_trigger_index(personas_dir: Path) -> dict: """Build reverse index: trigger keyword → persona codenames for multi-agent routing.""" index = {} # {trigger: [persona_names]} for persona_dir in sorted(personas_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith((".", "_")): continue meta_file = persona_dir / "_meta.yaml" if not meta_file.exists(): continue meta = yaml.safe_load(meta_file.read_text(encoding="utf-8")) or {} triggers = meta.get("activation_triggers", []) for trigger in triggers: t = trigger.lower() if t not in index: index[t] = [] index[t].append(persona_dir.name) return index def validate_persona(persona_name: str, parsed: dict) -> list: """Validate persona structure and return warnings.""" warnings = [] required_sections = ["soul", "expertise", "methodology", "boundaries"] for section in required_sections: if section not in parsed.get("sections", {}): warnings.append(f"Missing section: {section}") elif len(parsed["sections"][section].split()) < 30: warnings.append( f"Thin section ({len(parsed['sections'][section].split())} words): {section}" ) fm = parsed.get("metadata", {}) for field in ["codename", "name", "domain", "address_to", "tone"]: if field not in fm: warnings.append(f"Missing frontmatter: {field}") return warnings def build_catalog( personas_dir: Path, output_dir: Path, config: dict, flat_config: dict, shared_dir: Path | None, ): """Generate CATALOG.md with stats, escalation paths, and trigger index.""" addresses = config.get("persona_defaults", {}).get("custom_addresses", {}) # Build escalation graph and trigger index escalation_graph = build_escalation_graph(personas_dir, flat_config) trigger_index = build_trigger_index(personas_dir) catalog_lines = [ "# Persona Catalog\n", f"_Auto-generated by build.py | User: {config.get('user', {}).get('name', 'default')}_\n", ] total_words = 0 total_sections = 0 all_warnings = [] for persona_dir in sorted(personas_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith((".", "_")): continue meta_file = persona_dir / "_meta.yaml" if not meta_file.exists(): continue meta = yaml.safe_load(meta_file.read_text(encoding="utf-8")) or {} codename = meta.get("codename", persona_dir.name) address = addresses.get(persona_dir.name, meta.get("address_to", "N/A")) variants = [ f.stem for f in sorted(persona_dir.glob("*.md")) if not f.name.startswith("_") ] # Parse general.md for stats general = persona_dir / "general.md" word_count = 0 section_count = 0 if general.exists(): parsed = parse_persona_md(general, flat_config) if parsed: for s in parsed["sections"].values(): word_count += len(s.split()) section_count = len(parsed["sections"]) # Validate warns = validate_persona(codename, parsed) for w in warns: all_warnings.append(f" {codename}: {w}") total_words += word_count total_sections += section_count escalates_to = escalation_graph.get(persona_dir.name, []) catalog_lines.append(f"## {codename} — {meta.get('role', 'Unknown')}") catalog_lines.append(f"- **Domain:** {meta.get('domain', 'N/A')}") catalog_lines.append(f"- **Hitap:** {address}") catalog_lines.append(f"- **Variants:** {', '.join(variants)}") catalog_lines.append( f"- **Depth:** {word_count:,} words, {section_count} sections" ) if escalates_to: catalog_lines.append(f"- **Escalates to:** {', '.join(escalates_to)}") catalog_lines.append("") # Add trigger index section catalog_lines.append("---\n") catalog_lines.append("## Activation Trigger Index\n") catalog_lines.append("_Keyword → persona routing for multi-agent systems_\n") for trigger in sorted(trigger_index.keys()): personas = ", ".join(trigger_index[trigger]) catalog_lines.append(f"- **{trigger}** → {personas}") catalog_lines.append("") # Add stats catalog_lines.append("---\n") catalog_lines.append("## Build Statistics\n") catalog_lines.append(f"- Total prompt content: {total_words:,} words") catalog_lines.append(f"- Total sections: {total_sections}") catalog_lines.append( f"- Escalation connections: {sum(len(v) for v in escalation_graph.values())}" ) catalog_lines.append(f"- Unique triggers: {len(trigger_index)}") catalog_lines.append("") catalog_path = personas_dir / "CATALOG.md" catalog_path.write_text("\n".join(catalog_lines), encoding="utf-8") print(f" Catalog: {catalog_path}") # Write escalation graph and trigger index as JSON for API consumers index_path = output_dir / "_index" index_path.mkdir(parents=True, exist_ok=True) (index_path / "escalation_graph.json").write_text( json.dumps(escalation_graph, indent=2, ensure_ascii=False), encoding="utf-8" ) (index_path / "trigger_index.json").write_text( json.dumps(trigger_index, indent=2, ensure_ascii=False), encoding="utf-8" ) print(f" Index: {index_path}/escalation_graph.json, trigger_index.json") # Write skills index if shared dir exists if shared_dir and shared_dir.exists(): si = build_skills_index(shared_dir, config) (index_path / "skills_index.json").write_text( json.dumps(si, indent=2, ensure_ascii=False), encoding="utf-8" ) print( f" Skills: {len(si.get('skills', {}))} shared + " f"{len(si.get('paperclip_skills', {}))} paperclip + " f"{len(si.get('community_skills', {}))} community + " f"{len(si.get('design_brands', []))} design brands + " f"{si.get('ui_ux_styles', 0)} UI/UX data files" ) # Print validation warnings if all_warnings: print(f"\n WARNINGS ({len(all_warnings)}):") for w in all_warnings: print(f" {w}") return total_words def print_summary( config: dict, total_personas: int, total_variants: int, total_words: int = 0 ): """Print build summary with config status.""" print("\n" + "=" * 50) print(f"BUILD COMPLETE") print(f" Personas: {total_personas}") print(f" Variants: {total_variants}") print(f" Words: {total_words:,}") print(f" Output: generated/") print(f" Index: generated/_index/") if config: user = config.get("user", {}).get("name", "?") tools_on = sum( 1 for v in config.get("infrastructure", {}).get("tools", {}).values() if v is True ) frameworks_on = sum( 1 for v in config.get("frameworks", {}).values() if v is True ) regions = config.get("regional_focus", {}).get("primary", []) print(f"\n Config: {user}") print(f" Tools: {tools_on} enabled") print(f" Frameworks: {frameworks_on} enabled") if regions: print(f" Regions: {', '.join(regions)}") else: print("\n Config: none (using defaults)") print(" Tip: Copy config.example.yaml → config.yaml to customize") print("=" * 50) def install_claude(output_dir: Path): """Install personas to Claude Code as slash commands + agents.""" commands_dir = Path.home() / ".claude" / "commands" agents_dir = Path.home() / ".claude" / "agents" commands_dir.mkdir(parents=True, exist_ok=True) agents_dir.mkdir(parents=True, exist_ok=True) cmd_count = 0 agent_count = 0 for persona_dir in sorted(output_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith("_"): continue # Install slash commands for all variants for prompt_file in persona_dir.glob("*.prompt.md"): variant = prompt_file.stem codename = persona_dir.name cmd_name = ( f"persona-{codename}" if variant == "general" else f"persona-{codename}-{variant}" ) dest = commands_dir / f"{cmd_name}.md" content = prompt_file.read_text(encoding="utf-8") command_content = f"{content}\n\n---\nUser query: $ARGUMENTS\n" dest.write_text(command_content, encoding="utf-8") cmd_count += 1 # Install agent .yml for general variant (appears in /agents menu) general_json = persona_dir / "general.json" if not general_json.exists(): continue data = json.loads(general_json.read_text(encoding="utf-8")) codename = data.get("codename", persona_dir.name) name = data.get("name", codename.title()) role = data.get("role", "Specialist") domain = data.get("domain", "") tone = data.get("tone", "") address_to = data.get("address_to", "") skills = data.get("skills", []) quote = data.get("quote", "") soul = data.get("sections", {}).get("soul", "") methodology = data.get("sections", {}).get("methodology", "") behavior = data.get("sections", {}).get("behavior_rules", "") instructions = f"You are **{name}** ({address_to}) — {role}.\n\n" instructions += f"Domain: {domain} | Tone: {tone}\n\n" if quote: instructions += f'> "{quote}"\n\n' instructions += "## Soul\n" + soul[:1500] + "\n\n" if methodology: instructions += "## Methodology\n" + methodology[:1500] + "\n\n" if behavior: instructions += "## Behavior\n" + behavior[:800] + "\n" if skills: instructions += "\n## Mapped Skills\n" + ", ".join(skills) + "\n" agent = { "name": codename, "description": f"{name} ({address_to}) — {role}. {domain}.", "instructions": instructions, "allowedTools": [ "Read(*)", "Edit(*)", "Write(*)", "Bash(*)", "Glob(*)", "Grep(*)", "WebFetch(*)", "WebSearch(*)", ], } agent_file = agents_dir / f"{codename}.yml" agent_file.write_text( yaml.dump( agent, allow_unicode=True, default_flow_style=False, sort_keys=False ), encoding="utf-8", ) agent_count += 1 print(f" Claude: {cmd_count} commands + {agent_count} agents installed") return cmd_count def install_antigravity(output_dir: Path): """Install personas to Antigravity IDE system prompts.""" # Antigravity stores system prompts in ~/.config/antigravity/prompts/ or project .antigravity/ ag_dir = Path.home() / ".config" / "antigravity" / "personas" ag_dir.mkdir(parents=True, exist_ok=True) count = 0 for persona_dir in sorted(output_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith("_"): continue for prompt_file in persona_dir.glob("*.prompt.md"): variant = prompt_file.stem codename = persona_dir.name dest = ag_dir / codename / f"{variant}.md" dest.parent.mkdir(parents=True, exist_ok=True) dest.write_text(prompt_file.read_text(encoding="utf-8"), encoding="utf-8") count += 1 print(f" Antigravity: {count} personas installed to {ag_dir}") return count def install_gemini(output_dir: Path): """Install personas as Gemini Gems (JSON format for Google AI Studio).""" gems_dir = output_dir / "_gems" gems_dir.mkdir(parents=True, exist_ok=True) count = 0 for persona_dir in sorted(output_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith("_"): continue for json_file in persona_dir.glob("*.json"): data = json.loads(json_file.read_text(encoding="utf-8")) variant = data.get("variant", json_file.stem) codename = data.get("codename", persona_dir.name) name = data.get("name", codename.title()) # Build Gemini Gem format gem = { "name": f"{name} — {variant}" if variant != "general" else name, "description": f"{data.get('role', '')} | {data.get('domain', '')}", "system_instruction": data.get("sections", {}).get("soul", "") + "\n\n" + data.get("sections", {}).get("expertise", "") + "\n\n" + data.get("sections", {}).get("methodology", "") + "\n\n" + data.get("sections", {}).get("behavior_rules", ""), "metadata": { "codename": codename, "variant": variant, "domain": data.get("domain", ""), "address_to": data.get("address_to", ""), "tone": data.get("tone", ""), "activation_triggers": data.get("activation_triggers", []), }, } dest = gems_dir / f"{codename}-{variant}.json" dest.write_text( json.dumps(gem, ensure_ascii=False, indent=2), encoding="utf-8" ) count += 1 print(f" Gemini: {count} gems generated to {gems_dir}") return count def install_paperclip(output_dir: Path, personas_dir: Path, shared_dir: Path | None): """Install personas as Paperclip agents (SOUL.md + hermes-config.yaml + AGENTS.md per agent).""" pc_dir = output_dir / "_paperclip" agents_dir = pc_dir / "agents" skills_dir = pc_dir / "skills" # Recreate output for deterministic full migration. if pc_dir.exists(): import shutil shutil.rmtree(pc_dir) agents_dir.mkdir(parents=True, exist_ok=True) skills_dir.mkdir(parents=True, exist_ok=True) # Build escalation graph for AGENTS.md org chart flat_config = {} escalation_graph = build_escalation_graph(personas_dir, flat_config) # Domain → toolset mapping for hermes-config domain_toolsets = { "cybersecurity": ["terminal", "file", "web", "code_execution"], "intelligence": ["terminal", "file", "web"], "military": ["terminal", "file", "web"], "engineering": ["terminal", "file", "web", "code_execution"], "law-economics": ["file", "web"], "history": ["file", "web"], "linguistics": ["file", "web"], "academia": ["file", "web"], } agent_count = 0 skill_count = 0 for persona_dir in sorted(output_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith("_"): continue general_json = persona_dir / "general.json" general_prompt = persona_dir / "general.prompt.md" if not general_json.exists(): continue data = json.loads(general_json.read_text(encoding="utf-8")) codename = data.get("codename", persona_dir.name) name = data.get("name", codename.title()) role = data.get("role", "Specialist") domain = data.get("domain", "general") address_to = data.get("address_to", "") tone = data.get("tone", "") escalates_to = escalation_graph.get(persona_dir.name, []) skills = data.get("skills", []) agent_dir = agents_dir / codename agent_dir.mkdir(parents=True, exist_ok=True) # 1. SOUL.md — persona prompt adapted to Paperclip format soul_lines = [ f"# {name} — {role}\n", f"## Kimlik", f"- **Ad:** {name}", f"- **Kod Adı:** {codename}", f"- **Hitap:** {address_to}", f"- **Domain:** {domain}", f"- **Ton:** {tone}", "", ] if escalates_to: soul_lines.append(f"## İlişkiler") soul_lines.append(f"- **Escalation:** {', '.join(escalates_to)}") soul_lines.append("") if skills: soul_lines.append(f"## Skills") for s in skills: soul_lines.append(f"- {s}") soul_lines.append("") # Append the full prompt body if general_prompt.exists(): soul_lines.append("## Detaylı Tanım\n") soul_lines.append(general_prompt.read_text(encoding="utf-8")) (agent_dir / "SOUL.md").write_text("\n".join(soul_lines), encoding="utf-8") # 2. hermes-config.yaml toolsets = domain_toolsets.get(domain, ["terminal", "file", "web"]) hermes_config = { "model": "qwen/qwen3.6-plus:free", "provider": "openrouter", "defaults": {"quiet": True, "reasoning_effort": "medium"}, "mcp_servers": { "web-search": { "command": "npx", "args": ["-y", "ddg-mcp-search"], }, }, "skills": {"external_dirs": ["~/.hermes/skills"]}, "toolsets": toolsets, } (agent_dir / "hermes-config.yaml").write_text( yaml.dump(hermes_config, allow_unicode=True, default_flow_style=False), encoding="utf-8", ) # 3. AGENTS.md — workspace overview with org connections agents_md_lines = [ f"# {name} — Workspace\n", f"- **Agent:** {name} ({role})", f"- **Domain:** {domain}", "", ] if escalates_to: agents_md_lines.append("## Bağlantılar\n") for target in escalates_to: agents_md_lines.append(f"- → {target}") agents_md_lines.append("") (agent_dir / "AGENTS.md").write_text( "\n".join(agents_md_lines), encoding="utf-8" ) agent_count += 1 # Copy shared skills as Paperclip skills (SKILL.md format already compatible) shared_skills = shared_dir / "skills" if shared_dir else Path("__missing__") if shared_skills.exists(): for skill_dir in sorted(shared_skills.iterdir()): if not skill_dir.is_dir(): continue skill_md = skill_dir / "SKILL.md" if skill_md.exists(): dest = skills_dir / skill_dir.name dest.mkdir(parents=True, exist_ok=True) (dest / "SKILL.md").write_text( skill_md.read_text(encoding="utf-8"), encoding="utf-8" ) refs = skill_dir / "references" if refs.is_dir(): import shutil shutil.copytree(refs, dest / "references", dirs_exist_ok=True) skill_count += 1 # Copy paperclip-specific skills pc_skills = shared_dir / "paperclip-skills" if shared_dir else Path("__missing__") if pc_skills.exists(): for skill_dir in sorted(pc_skills.iterdir()): if not skill_dir.is_dir(): continue skill_md = skill_dir / "SKILL.md" if skill_md.exists() and not (skills_dir / skill_dir.name).exists(): dest = skills_dir / skill_dir.name dest.mkdir(parents=True, exist_ok=True) (dest / "SKILL.md").write_text( skill_md.read_text(encoding="utf-8"), encoding="utf-8" ) refs = skill_dir / "references" if refs.is_dir(): import shutil shutil.copytree(refs, dest / "references", dirs_exist_ok=True) scripts = skill_dir / "scripts" if scripts.is_dir(): import shutil shutil.copytree(scripts, dest / "scripts", dirs_exist_ok=True) skill_count += 1 # Deploy original Paperclip company agents from _shared/paperclip-agents/ pc_agents_src = ( shared_dir / "paperclip-agents" if shared_dir else Path("__missing__") ) pc_agent_count = 0 def normalize_agent_name(name: str) -> str: """Normalize escaped/unicode-heavy names to stable ASCII directory names.""" decoded = re.sub( r"#U([0-9A-Fa-f]{4})", lambda m: chr(int(m.group(1), 16)), name, ) ascii_name = ( unicodedata.normalize("NFKD", decoded) .encode("ascii", "ignore") .decode("ascii") ) # Keep names filesystem-safe and deterministic. slug = re.sub(r"[^a-zA-Z0-9]+", "-", ascii_name).strip("-").lower() return slug or decoded if pc_agents_src.exists(): seen_company_agents = set() collision_count = 0 for agent_src in sorted(pc_agents_src.iterdir()): if not agent_src.is_dir(): continue agent_name = normalize_agent_name(agent_src.name) if agent_name in seen_company_agents: collision_count += 1 continue seen_company_agents.add(agent_name) # Skip if persona-based agent already exists with same name if (agents_dir / agent_name).exists(): continue dest = agents_dir / agent_name dest.mkdir(parents=True, exist_ok=True) for f in agent_src.iterdir(): if f.is_file(): (dest / f.name).write_text( f.read_text(encoding="utf-8"), encoding="utf-8" ) pc_agent_count += 1 if collision_count: print( f" Note: skipped {collision_count} duplicate company agent source dirs after name normalization" ) total_agents = agent_count + pc_agent_count print( f" Paperclip: {agent_count} persona agents + {pc_agent_count} company agents + {skill_count} skills to {pc_dir}" ) return total_agents def install_openclaw(output_dir: Path): """Install personas to OpenClaw format (IDENTITY.md + individual persona files).""" oc_dir = output_dir / "_openclaw" oc_dir.mkdir(parents=True, exist_ok=True) personas_dir = oc_dir / "personas" personas_dir.mkdir(parents=True, exist_ok=True) count = 0 identity_sections = [] for persona_dir in sorted(output_dir.iterdir()): if not persona_dir.is_dir() or persona_dir.name.startswith("_"): continue general_prompt = persona_dir / "general.prompt.md" if not general_prompt.exists(): continue content = general_prompt.read_text(encoding="utf-8") codename = persona_dir.name # Write individual persona file (personas_dir / f"{codename}.md").write_text(content, encoding="utf-8") # Extract first line as title for IDENTITY.md first_line = content.split("\n")[0].strip("# ").strip() identity_sections.append(f"### {first_line}\nSee: personas/{codename}.md\n") count += 1 # Write IDENTITY.md identity = "# IDENTITY — Persona Definitions\n\n" + "\n".join(identity_sections) (oc_dir / "IDENTITY.md").write_text(identity, encoding="utf-8") print(f" OpenClaw: {count} personas + IDENTITY.md to {oc_dir}") return count def main(): import argparse parser = argparse.ArgumentParser( description="Build persona library and optionally install to platforms." ) parser.add_argument( "--install", choices=["claude", "antigravity", "gemini", "openclaw", "paperclip", "all"], help="Install generated personas to a target platform", ) parser.add_argument( "--search", type=str, metavar="QUERY", help="Search across all shared skills (e.g. --search 'pentest AD')", ) parser.add_argument( "--test", nargs="?", const="__all__", metavar="PERSONA", help="Run persona test suite (optionally specify persona name)", ) args = parser.parse_args() root = Path(__file__).parent personas_dir = root / "personas" if not personas_dir.exists(): print("No personas/ directory found.") sys.exit(1) output_dir = root / "generated" # Load config config = load_config(root) flat_config = flatten_config(config) if config else {} # Find all persona directories persona_dirs = [ d for d in sorted(personas_dir.iterdir()) if d.is_dir() and not d.name.startswith((".", "_")) ] if not persona_dirs: print("No persona directories found.") sys.exit(1) shared_dir = resolve_shared_dir(root, personas_dir) source_mirrors = discover_sources(root) if source_mirrors: print(f"Detected source mirrors: {', '.join(source_mirrors)}") else: print("Detected source mirrors: none") # Handle search-only mode if args.search: if not shared_dir: print("No shared skill library found.") return search_skills(shared_dir, args.search) return # Handle test-only mode if args.test: target = None if args.test == "__all__" else args.test run_tests(personas_dir, target) return output_dir.mkdir(parents=True, exist_ok=True) print(f"Building {len(persona_dirs)} personas -> {output_dir}\n") # Pre-build escalation graph and skills index escalation_graph = build_escalation_graph(personas_dir, flat_config) skills_index = build_skills_index(shared_dir, config) if shared_dir else {} total_variants = 0 for pdir in persona_dirs: total_variants += build_persona( pdir, output_dir, flat_config, config, escalation_graph, skills_index ) total_words = build_catalog( personas_dir, output_dir, config, flat_config, shared_dir ) # Platform installation if args.install: print(f"\n--- Installing to: {args.install} ---\n") targets = ( ["claude", "antigravity", "gemini", "openclaw", "paperclip"] if args.install == "all" else [args.install] ) for target in targets: if target == "claude": install_claude(output_dir) elif target == "antigravity": install_antigravity(output_dir) elif target == "gemini": install_gemini(output_dir) elif target == "openclaw": install_openclaw(output_dir) elif target == "paperclip": install_paperclip(output_dir, personas_dir, shared_dir) print_summary(config, len(persona_dirs), total_variants, total_words) if __name__ == "__main__": main()