feat(reporting): add LLM-based vulnerability deduplication

- Add dedupe.py with XML-based LLM deduplication using direct litellm calls
- Integrate deduplication check in create_vulnerability_report tool
- Add get_existing_vulnerabilities() method to tracer for fetching reports
- Update schema and system prompt with deduplication guidelines
This commit is contained in:
0xallam
2026-01-07 19:23:17 -08:00
committed by Ahmed Allam
parent 0e9cd9b2a4
commit 01ae348da8
5 changed files with 268 additions and 1 deletions

View File

@@ -157,6 +157,45 @@ def create_vulnerability_report(
tracer = get_global_tracer()
if tracer:
from strix.llm.dedupe import check_duplicate
existing_reports = tracer.get_existing_vulnerabilities()
candidate = {
"title": title,
"description": description,
"impact": impact,
"target": target,
"technical_analysis": technical_analysis,
"poc_description": poc_description,
"poc_script_code": poc_script_code,
"endpoint": endpoint,
"method": method,
}
dedupe_result = check_duplicate(candidate, existing_reports)
if dedupe_result.get("is_duplicate"):
duplicate_id = dedupe_result.get("duplicate_id", "")
duplicate_title = ""
for report in existing_reports:
if report.get("id") == duplicate_id:
duplicate_title = report.get("title", "Unknown")
break
return {
"success": False,
"message": (
f"Potential duplicate of '{duplicate_title}' "
f"(id={duplicate_id[:8]}...). Do not re-report the same vulnerability."
),
"duplicate_of": duplicate_id,
"duplicate_title": duplicate_title,
"confidence": dedupe_result.get("confidence", 0.0),
"reason": dedupe_result.get("reason", ""),
}
cvss_breakdown = {
"attack_vector": attack_vector,
"attack_complexity": attack_complexity,