From 7ad95882129147621aeed823330a0ffe62c98adc Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Sun, 5 Apr 2026 14:41:33 +0300 Subject: [PATCH] feat(03-02): add search and embeddings Tier 3 providers - Perplexity (pplx- prefix, high confidence) - You.com (keyword-only) - Voyage AI (pa- prefix, medium confidence) - Jina AI (jina_ prefix, high confidence) - Unstructured.io (keyword-only) - AssemblyAI (hex32, low confidence) --- pkg/providers/definitions/assemblyai.yaml | 21 +++++++++++++++++++++ pkg/providers/definitions/jina.yaml | 21 +++++++++++++++++++++ pkg/providers/definitions/perplexity.yaml | 21 +++++++++++++++++++++ pkg/providers/definitions/unstructured.yaml | 17 +++++++++++++++++ pkg/providers/definitions/voyage.yaml | 21 +++++++++++++++++++++ pkg/providers/definitions/you.yaml | 17 +++++++++++++++++ providers/assemblyai.yaml | 21 +++++++++++++++++++++ providers/jina.yaml | 21 +++++++++++++++++++++ providers/perplexity.yaml | 21 +++++++++++++++++++++ providers/unstructured.yaml | 17 +++++++++++++++++ providers/voyage.yaml | 21 +++++++++++++++++++++ providers/you.yaml | 17 +++++++++++++++++ 12 files changed, 236 insertions(+) create mode 100644 pkg/providers/definitions/assemblyai.yaml create mode 100644 pkg/providers/definitions/jina.yaml create mode 100644 pkg/providers/definitions/perplexity.yaml create mode 100644 pkg/providers/definitions/unstructured.yaml create mode 100644 pkg/providers/definitions/voyage.yaml create mode 100644 pkg/providers/definitions/you.yaml create mode 100644 providers/assemblyai.yaml create mode 100644 providers/jina.yaml create mode 100644 providers/perplexity.yaml create mode 100644 providers/unstructured.yaml create mode 100644 providers/voyage.yaml create mode 100644 providers/you.yaml diff --git a/pkg/providers/definitions/assemblyai.yaml b/pkg/providers/definitions/assemblyai.yaml new file mode 100644 index 0000000..588d67b --- /dev/null +++ b/pkg/providers/definitions/assemblyai.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: assemblyai +display_name: AssemblyAI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "assemblyai" + - "ASSEMBLYAI_API_KEY" + - "api.assemblyai.com" + - "assembly.ai" +patterns: + - regex: '[a-f0-9]{32}' + entropy_min: 4.0 + confidence: low +verify: + method: GET + url: https://api.assemblyai.com/v2/transcript + headers: + authorization: "{KEY}" + valid_status: [200] + invalid_status: [401, 403] diff --git a/pkg/providers/definitions/jina.yaml b/pkg/providers/definitions/jina.yaml new file mode 100644 index 0000000..87e5d20 --- /dev/null +++ b/pkg/providers/definitions/jina.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: jina +display_name: Jina AI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "jina" + - "JINA_API_KEY" + - "api.jina.ai" + - "jinaai" +patterns: + - regex: 'jina_[A-Za-z0-9]{40,}' + entropy_min: 4.0 + confidence: high +verify: + method: GET + url: https://api.jina.ai/v1/models + headers: + Authorization: "Bearer {KEY}" + valid_status: [200] + invalid_status: [401, 403] diff --git a/pkg/providers/definitions/perplexity.yaml b/pkg/providers/definitions/perplexity.yaml new file mode 100644 index 0000000..413daae --- /dev/null +++ b/pkg/providers/definitions/perplexity.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: perplexity +display_name: Perplexity AI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "perplexity" + - "PERPLEXITY_API_KEY" + - "pplx-" + - "api.perplexity.ai" +patterns: + - regex: 'pplx-[A-Za-z0-9]{48,}' + entropy_min: 4.0 + confidence: high +verify: + method: POST + url: https://api.perplexity.ai/chat/completions + headers: + Authorization: "Bearer {KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/pkg/providers/definitions/unstructured.yaml b/pkg/providers/definitions/unstructured.yaml new file mode 100644 index 0000000..29bc5fd --- /dev/null +++ b/pkg/providers/definitions/unstructured.yaml @@ -0,0 +1,17 @@ +format_version: 1 +name: unstructured +display_name: Unstructured.io +tier: 3 +last_verified: "2026-04-05" +keywords: + - "unstructured" + - "UNSTRUCTURED_API_KEY" + - "api.unstructured.io" + - "unstructuredio" +verify: + method: GET + url: https://api.unstructured.io/general/v0/general + headers: + unstructured-api-key: "{KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/pkg/providers/definitions/voyage.yaml b/pkg/providers/definitions/voyage.yaml new file mode 100644 index 0000000..ba97d97 --- /dev/null +++ b/pkg/providers/definitions/voyage.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: voyage +display_name: Voyage AI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "voyage" + - "VOYAGE_API_KEY" + - "voyageai" + - "api.voyageai.com" +patterns: + - regex: 'pa-[A-Za-z0-9_\-]{40,}' + entropy_min: 4.0 + confidence: medium +verify: + method: POST + url: https://api.voyageai.com/v1/embeddings + headers: + Authorization: "Bearer {KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/pkg/providers/definitions/you.yaml b/pkg/providers/definitions/you.yaml new file mode 100644 index 0000000..59e6f53 --- /dev/null +++ b/pkg/providers/definitions/you.yaml @@ -0,0 +1,17 @@ +format_version: 1 +name: you +display_name: You.com +tier: 3 +last_verified: "2026-04-05" +keywords: + - "you.com" + - "YDC_API_KEY" + - "YOU_API_KEY" + - "api.ydc-index.io" +verify: + method: GET + url: https://api.ydc-index.io/search + headers: + X-API-Key: "{KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/providers/assemblyai.yaml b/providers/assemblyai.yaml new file mode 100644 index 0000000..588d67b --- /dev/null +++ b/providers/assemblyai.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: assemblyai +display_name: AssemblyAI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "assemblyai" + - "ASSEMBLYAI_API_KEY" + - "api.assemblyai.com" + - "assembly.ai" +patterns: + - regex: '[a-f0-9]{32}' + entropy_min: 4.0 + confidence: low +verify: + method: GET + url: https://api.assemblyai.com/v2/transcript + headers: + authorization: "{KEY}" + valid_status: [200] + invalid_status: [401, 403] diff --git a/providers/jina.yaml b/providers/jina.yaml new file mode 100644 index 0000000..87e5d20 --- /dev/null +++ b/providers/jina.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: jina +display_name: Jina AI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "jina" + - "JINA_API_KEY" + - "api.jina.ai" + - "jinaai" +patterns: + - regex: 'jina_[A-Za-z0-9]{40,}' + entropy_min: 4.0 + confidence: high +verify: + method: GET + url: https://api.jina.ai/v1/models + headers: + Authorization: "Bearer {KEY}" + valid_status: [200] + invalid_status: [401, 403] diff --git a/providers/perplexity.yaml b/providers/perplexity.yaml new file mode 100644 index 0000000..413daae --- /dev/null +++ b/providers/perplexity.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: perplexity +display_name: Perplexity AI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "perplexity" + - "PERPLEXITY_API_KEY" + - "pplx-" + - "api.perplexity.ai" +patterns: + - regex: 'pplx-[A-Za-z0-9]{48,}' + entropy_min: 4.0 + confidence: high +verify: + method: POST + url: https://api.perplexity.ai/chat/completions + headers: + Authorization: "Bearer {KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/providers/unstructured.yaml b/providers/unstructured.yaml new file mode 100644 index 0000000..29bc5fd --- /dev/null +++ b/providers/unstructured.yaml @@ -0,0 +1,17 @@ +format_version: 1 +name: unstructured +display_name: Unstructured.io +tier: 3 +last_verified: "2026-04-05" +keywords: + - "unstructured" + - "UNSTRUCTURED_API_KEY" + - "api.unstructured.io" + - "unstructuredio" +verify: + method: GET + url: https://api.unstructured.io/general/v0/general + headers: + unstructured-api-key: "{KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/providers/voyage.yaml b/providers/voyage.yaml new file mode 100644 index 0000000..ba97d97 --- /dev/null +++ b/providers/voyage.yaml @@ -0,0 +1,21 @@ +format_version: 1 +name: voyage +display_name: Voyage AI +tier: 3 +last_verified: "2026-04-05" +keywords: + - "voyage" + - "VOYAGE_API_KEY" + - "voyageai" + - "api.voyageai.com" +patterns: + - regex: 'pa-[A-Za-z0-9_\-]{40,}' + entropy_min: 4.0 + confidence: medium +verify: + method: POST + url: https://api.voyageai.com/v1/embeddings + headers: + Authorization: "Bearer {KEY}" + valid_status: [200, 400] + invalid_status: [401, 403] diff --git a/providers/you.yaml b/providers/you.yaml new file mode 100644 index 0000000..59e6f53 --- /dev/null +++ b/providers/you.yaml @@ -0,0 +1,17 @@ +format_version: 1 +name: you +display_name: You.com +tier: 3 +last_verified: "2026-04-05" +keywords: + - "you.com" + - "YDC_API_KEY" + - "YOU_API_KEY" + - "api.ydc-index.io" +verify: + method: GET + url: https://api.ydc-index.io/search + headers: + X-API-Key: "{KEY}" + valid_status: [200, 400] + invalid_status: [401, 403]