remove stale web access override

This commit is contained in:
Advait Paliwal
2026-04-10 10:20:31 -07:00
parent 5b9362918e
commit 4137a29507
13 changed files with 115 additions and 3627 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,325 +0,0 @@
import http, { type IncomingMessage, type ServerResponse } from "node:http";
import { generateCuratorPage } from "./curator-page.js";
const STALE_THRESHOLD_MS = 30000;
const WATCHDOG_INTERVAL_MS = 5000;
const MAX_BODY_SIZE = 64 * 1024;
type ServerState = "SEARCHING" | "RESULT_SELECTION" | "COMPLETED";
export interface CuratorServerOptions {
queries: string[];
sessionToken: string;
timeout: number;
availableProviders: { perplexity: boolean; exa: boolean; gemini: boolean };
defaultProvider: string;
}
export interface CuratorServerCallbacks {
onSubmit: (selectedQueryIndices: number[]) => void;
onCancel: (reason: "user" | "timeout" | "stale") => void;
onProviderChange: (provider: string) => void;
onAddSearch: (query: string, queryIndex: number) => Promise<{ answer: string; results: Array<{ title: string; url: string; domain: string }> }>;
}
export interface CuratorServerHandle {
server: http.Server;
url: string;
close: () => void;
pushResult: (queryIndex: number, data: { answer: string; results: Array<{ title: string; url: string; domain: string }> }) => void;
pushError: (queryIndex: number, error: string) => void;
searchesDone: () => void;
}
function sendJson(res: ServerResponse, status: number, payload: unknown): void {
res.writeHead(status, {
"Content-Type": "application/json",
"Cache-Control": "no-store",
});
res.end(JSON.stringify(payload));
}
function parseJSONBody(req: IncomingMessage): Promise<unknown> {
return new Promise((resolve, reject) => {
let body = "";
let size = 0;
req.on("data", (chunk: Buffer) => {
size += chunk.length;
if (size > MAX_BODY_SIZE) {
req.destroy();
reject(new Error("Request body too large"));
return;
}
body += chunk.toString();
});
req.on("end", () => {
try { resolve(JSON.parse(body)); }
catch { reject(new Error("Invalid JSON")); }
});
req.on("error", reject);
});
}
export function startCuratorServer(
options: CuratorServerOptions,
callbacks: CuratorServerCallbacks,
): Promise<CuratorServerHandle> {
const { queries, sessionToken, timeout, availableProviders, defaultProvider } = options;
let browserConnected = false;
let lastHeartbeatAt = Date.now();
let completed = false;
let watchdog: NodeJS.Timeout | null = null;
let state: ServerState = "SEARCHING";
let sseResponse: ServerResponse | null = null;
const sseBuffer: string[] = [];
let nextQueryIndex = queries.length;
let sseKeepalive: NodeJS.Timeout | null = null;
const markCompleted = (): boolean => {
if (completed) return false;
completed = true;
state = "COMPLETED";
if (watchdog) { clearInterval(watchdog); watchdog = null; }
if (sseKeepalive) { clearInterval(sseKeepalive); sseKeepalive = null; }
if (sseResponse) {
try { sseResponse.end(); } catch {}
sseResponse = null;
}
return true;
};
const touchHeartbeat = (): void => {
lastHeartbeatAt = Date.now();
browserConnected = true;
};
function validateToken(body: unknown, res: ServerResponse): boolean {
if (!body || typeof body !== "object") {
sendJson(res, 400, { ok: false, error: "Invalid body" });
return false;
}
if ((body as { token?: string }).token !== sessionToken) {
sendJson(res, 403, { ok: false, error: "Invalid session" });
return false;
}
return true;
}
function sendSSE(event: string, data: unknown): void {
const payload = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
const res = sseResponse;
if (res && !res.writableEnded && res.socket && !res.socket.destroyed) {
try {
const ok = res.write(payload);
if (!ok) res.once("drain", () => {});
} catch {
sseBuffer.push(payload);
}
} else {
sseBuffer.push(payload);
}
}
const pageHtml = generateCuratorPage(queries, sessionToken, timeout, availableProviders, defaultProvider);
const server = http.createServer(async (req, res) => {
try {
const method = req.method || "GET";
const url = new URL(req.url || "/", `http://${req.headers.host || "127.0.0.1"}`);
if (method === "GET" && url.pathname === "/") {
const token = url.searchParams.get("session");
if (token !== sessionToken) {
res.writeHead(403, { "Content-Type": "text/plain" });
res.end("Invalid session");
return;
}
touchHeartbeat();
res.writeHead(200, {
"Content-Type": "text/html; charset=utf-8",
"Cache-Control": "no-store",
});
res.end(pageHtml);
return;
}
if (method === "GET" && url.pathname === "/events") {
const token = url.searchParams.get("session");
if (token !== sessionToken) {
res.writeHead(403, { "Content-Type": "text/plain" });
res.end("Invalid session");
return;
}
if (state === "COMPLETED") {
sendJson(res, 409, { ok: false, error: "No events available" });
return;
}
if (sseResponse) {
try { sseResponse.end(); } catch {}
}
res.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
});
res.flushHeaders();
if (res.socket) res.socket.setNoDelay(true);
sseResponse = res;
for (const msg of sseBuffer) {
try { res.write(msg); } catch {}
}
sseBuffer.length = 0;
if (sseKeepalive) clearInterval(sseKeepalive);
sseKeepalive = setInterval(() => {
if (sseResponse) {
try { sseResponse.write(":keepalive\n\n"); } catch {}
}
}, 15000);
req.on("close", () => {
if (sseResponse === res) sseResponse = null;
});
return;
}
if (method === "POST" && url.pathname === "/heartbeat") {
const body = await parseJSONBody(req).catch(() => null);
if (!body) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; }
if (!validateToken(body, res)) return;
touchHeartbeat();
sendJson(res, 200, { ok: true });
return;
}
if (method === "POST" && url.pathname === "/provider") {
const body = await parseJSONBody(req).catch(() => null);
if (!body) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; }
if (!validateToken(body, res)) return;
const { provider } = body as { provider?: string };
if (typeof provider === "string" && provider.length > 0) {
setImmediate(() => callbacks.onProviderChange(provider));
}
sendJson(res, 200, { ok: true });
return;
}
if (method === "POST" && url.pathname === "/search") {
const body = await parseJSONBody(req).catch(() => null);
if (!body) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; }
if (!validateToken(body, res)) return;
if (state === "COMPLETED") {
sendJson(res, 409, { ok: false, error: "Session closed" });
return;
}
const { query } = body as { query?: string };
if (typeof query !== "string" || query.trim().length === 0) {
sendJson(res, 400, { ok: false, error: "Invalid query" });
return;
}
const qi = nextQueryIndex++;
touchHeartbeat();
try {
const result = await callbacks.onAddSearch(query.trim(), qi);
sendJson(res, 200, { ok: true, queryIndex: qi, answer: result.answer, results: result.results });
} catch (err) {
const message = err instanceof Error ? err.message : "Search failed";
sendJson(res, 200, { ok: true, queryIndex: qi, error: message });
}
return;
}
if (method === "POST" && url.pathname === "/submit") {
const body = await parseJSONBody(req).catch(() => null);
if (!body) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; }
if (!validateToken(body, res)) return;
const { selected } = body as { selected?: number[] };
if (!Array.isArray(selected) || !selected.every(n => typeof n === "number")) {
sendJson(res, 400, { ok: false, error: "Invalid selection" });
return;
}
if (state !== "SEARCHING" && state !== "RESULT_SELECTION") {
sendJson(res, 409, { ok: false, error: "Cannot submit in current state" });
return;
}
if (!markCompleted()) {
sendJson(res, 409, { ok: false, error: "Session closed" });
return;
}
sendJson(res, 200, { ok: true });
setImmediate(() => callbacks.onSubmit(selected));
return;
}
if (method === "POST" && url.pathname === "/cancel") {
const body = await parseJSONBody(req).catch(() => null);
if (!body) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; }
if (!validateToken(body, res)) return;
if (!markCompleted()) {
sendJson(res, 200, { ok: true });
return;
}
const { reason } = body as { reason?: string };
sendJson(res, 200, { ok: true });
const cancelReason = reason === "timeout" ? "timeout" : "user";
setImmediate(() => callbacks.onCancel(cancelReason));
return;
}
res.writeHead(404, { "Content-Type": "text/plain" });
res.end("Not found");
} catch (err) {
const message = err instanceof Error ? err.message : "Server error";
sendJson(res, 500, { ok: false, error: message });
}
});
return new Promise((resolve, reject) => {
const onError = (err: Error) => {
reject(new Error(`Curator server failed to start: ${err.message}`));
};
server.once("error", onError);
server.listen(0, "127.0.0.1", () => {
server.off("error", onError);
const addr = server.address();
if (!addr || typeof addr === "string") {
reject(new Error("Curator server: invalid address"));
return;
}
const url = `http://localhost:${addr.port}/?session=${sessionToken}`;
watchdog = setInterval(() => {
if (completed || !browserConnected) return;
if (Date.now() - lastHeartbeatAt <= STALE_THRESHOLD_MS) return;
if (!markCompleted()) return;
setImmediate(() => callbacks.onCancel("stale"));
}, WATCHDOG_INTERVAL_MS);
resolve({
server,
url,
close: () => {
const wasOpen = markCompleted();
try { server.close(); } catch {}
if (wasOpen) {
setImmediate(() => callbacks.onCancel("stale"));
}
},
pushResult: (queryIndex, data) => {
if (completed) return;
sendSSE("result", { queryIndex, query: queries[queryIndex] ?? "", ...data });
},
pushError: (queryIndex, error) => {
if (completed) return;
sendSSE("search-error", { queryIndex, query: queries[queryIndex] ?? "", error });
},
searchesDone: () => {
if (completed) return;
sendSSE("done", {});
state = "RESULT_SELECTION";
},
});
});
});
}

View File

@@ -1,147 +0,0 @@
import { existsSync, readFileSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { activityMonitor } from "./activity.js";
import type { SearchOptions, SearchResponse, SearchResult } from "./perplexity.js";
const EXA_API_URL = "https://api.exa.ai/search";
const CONFIG_PATH = join(homedir(), ".pi", "web-search.json");
interface WebSearchConfig {
exaApiKey?: string;
}
interface ExaSearchResult {
title?: string;
url?: string;
text?: string;
highlights?: string[];
summary?: string;
}
let cachedConfig: WebSearchConfig | null = null;
function loadConfig(): WebSearchConfig {
if (cachedConfig) return cachedConfig;
if (existsSync(CONFIG_PATH)) {
try {
cachedConfig = JSON.parse(readFileSync(CONFIG_PATH, "utf-8")) as WebSearchConfig;
return cachedConfig;
} catch {
cachedConfig = {};
}
} else {
cachedConfig = {};
}
return cachedConfig;
}
function getApiKey(): string {
const config = loadConfig();
const key = process.env.EXA_API_KEY || config.exaApiKey;
if (!key) {
throw new Error(
"Exa API key not found. Either:\n" +
` 1. Create ${CONFIG_PATH} with { "exaApiKey": "your-key" }\n` +
" 2. Set EXA_API_KEY environment variable\n" +
"Get a key from the Exa dashboard."
);
}
return key;
}
function toSnippet(result: ExaSearchResult): string {
if (Array.isArray(result.highlights) && result.highlights.length > 0) {
return result.highlights.join(" ");
}
if (typeof result.summary === "string" && result.summary.trim()) {
return result.summary.trim();
}
if (typeof result.text === "string" && result.text.trim()) {
return result.text.trim().slice(0, 400);
}
return "";
}
function formatAnswer(results: SearchResult[]): string {
return results
.map((result, index) => {
const snippet = result.snippet ? `\n${result.snippet}` : "";
return `${index + 1}. ${result.title}\n${result.url}${snippet}`;
})
.join("\n\n");
}
export function isExaAvailable(): boolean {
const config = loadConfig();
return Boolean(process.env.EXA_API_KEY || config.exaApiKey);
}
export async function searchWithExa(query: string, options: SearchOptions = {}): Promise<SearchResponse> {
const activityId = activityMonitor.logStart({ type: "api", query });
const apiKey = getApiKey();
const numResults = Math.min(options.numResults ?? 5, 20);
const includeDomains = options.domainFilter?.filter((entry) => !entry.startsWith("-")) ?? [];
const excludeDomains = options.domainFilter?.filter((entry) => entry.startsWith("-")).map((entry) => entry.slice(1)) ?? [];
const requestBody: Record<string, unknown> = {
query,
type: "auto",
numResults,
contents: {
highlights: {
numSentences: 3,
},
},
};
if (includeDomains.length > 0) {
requestBody.includeDomains = includeDomains;
}
if (excludeDomains.length > 0) {
requestBody.excludeDomains = excludeDomains;
}
try {
const response = await fetch(EXA_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": apiKey,
},
body: JSON.stringify(requestBody),
signal: options.signal,
});
if (!response.ok) {
activityMonitor.logComplete(activityId, response.status);
throw new Error(`Exa API error ${response.status}: ${(await response.text()).slice(0, 300)}`);
}
const data = await response.json() as { results?: ExaSearchResult[] };
const results = (Array.isArray(data.results) ? data.results : [])
.slice(0, numResults)
.map((result, index) => ({
title: result.title?.trim() || `Source ${index + 1}`,
url: result.url?.trim() || "",
snippet: toSnippet(result),
}))
.filter((result) => result.url.length > 0);
activityMonitor.logComplete(activityId, response.status);
return {
answer: formatAnswer(results),
results,
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
if (message.toLowerCase().includes("abort")) {
activityMonitor.logComplete(activityId, 0);
} else {
activityMonitor.logError(activityId, message);
}
throw error;
}
}

View File

@@ -1,256 +0,0 @@
import { existsSync, readFileSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { activityMonitor } from "./activity.js";
import { isExaAvailable, searchWithExa } from "./exa.js";
import { getApiKey, API_BASE, DEFAULT_MODEL } from "./gemini-api.js";
import { isGeminiWebAvailable, queryWithCookies } from "./gemini-web.js";
import { isPerplexityAvailable, searchWithPerplexity, type SearchResult, type SearchResponse, type SearchOptions } from "./perplexity.js";
export type SearchProvider = "auto" | "perplexity" | "exa" | "gemini";
const CONFIG_PATH = join(homedir(), ".pi", "web-search.json");
let cachedSearchConfig: { searchProvider: SearchProvider; searchModel?: string } | null = null;
function getSearchConfig(): { searchProvider: SearchProvider; searchModel?: string } {
if (cachedSearchConfig) return cachedSearchConfig;
try {
if (existsSync(CONFIG_PATH)) {
const raw = JSON.parse(readFileSync(CONFIG_PATH, "utf-8")) as {
searchProvider?: SearchProvider;
searchModel?: unknown;
};
cachedSearchConfig = {
searchProvider: raw.searchProvider ?? "auto",
searchModel: normalizeSearchModel(raw.searchModel),
};
return cachedSearchConfig;
}
} catch {}
cachedSearchConfig = { searchProvider: "auto", searchModel: undefined };
return cachedSearchConfig;
}
function normalizeSearchModel(value: unknown): string | undefined {
return typeof value === "string" && value.length > 0 ? value : undefined;
}
export interface FullSearchOptions extends SearchOptions {
provider?: SearchProvider;
}
export async function search(query: string, options: FullSearchOptions = {}): Promise<SearchResponse> {
const config = getSearchConfig();
const provider = options.provider ?? config.searchProvider;
if (provider === "perplexity") {
return searchWithPerplexity(query, options);
}
if (provider === "exa") {
return searchWithExa(query, options);
}
if (provider === "gemini") {
const result = await searchWithGeminiApi(query, options)
?? await searchWithGeminiWeb(query, options);
if (result) return result;
throw new Error(
"Gemini search unavailable. Either:\n" +
" 1. Set GEMINI_API_KEY in ~/.pi/web-search.json\n" +
" 2. Sign into gemini.google.com in a supported Chromium-based browser"
);
}
if (isPerplexityAvailable()) {
return searchWithPerplexity(query, options);
}
if (isExaAvailable()) {
return searchWithExa(query, options);
}
const geminiResult = await searchWithGeminiApi(query, options)
?? await searchWithGeminiWeb(query, options);
if (geminiResult) return geminiResult;
throw new Error(
"No search provider available. Either:\n" +
" 1. Set perplexityApiKey in ~/.pi/web-search.json\n" +
" 2. Set exaApiKey or EXA_API_KEY\n" +
" 3. Set GEMINI_API_KEY in ~/.pi/web-search.json\n" +
" 4. Sign into gemini.google.com in a supported Chromium-based browser"
);
}
async function searchWithGeminiApi(query: string, options: SearchOptions = {}): Promise<SearchResponse | null> {
const apiKey = getApiKey();
if (!apiKey) return null;
const activityId = activityMonitor.logStart({ type: "api", query });
try {
const model = getSearchConfig().searchModel ?? DEFAULT_MODEL;
const body = {
contents: [{ parts: [{ text: query }] }],
tools: [{ google_search: {} }],
};
const res = await fetch(`${API_BASE}/models/${model}:generateContent?key=${apiKey}`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
signal: AbortSignal.any([
AbortSignal.timeout(60000),
...(options.signal ? [options.signal] : []),
]),
});
if (!res.ok) {
const errorText = await res.text();
throw new Error(`Gemini API error ${res.status}: ${errorText.slice(0, 300)}`);
}
const data = await res.json() as GeminiSearchResponse;
activityMonitor.logComplete(activityId, res.status);
const answer = data.candidates?.[0]?.content?.parts
?.map(p => p.text).filter(Boolean).join("\n") ?? "";
const metadata = data.candidates?.[0]?.groundingMetadata;
const results = await resolveGroundingChunks(metadata?.groundingChunks, options.signal);
if (!answer && results.length === 0) return null;
return { answer, results };
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (message.toLowerCase().includes("abort")) {
activityMonitor.logComplete(activityId, 0);
} else {
activityMonitor.logError(activityId, message);
}
return null;
}
}
async function searchWithGeminiWeb(query: string, options: SearchOptions = {}): Promise<SearchResponse | null> {
const cookies = await isGeminiWebAvailable();
if (!cookies) return null;
const prompt = buildSearchPrompt(query, options);
const activityId = activityMonitor.logStart({ type: "api", query });
try {
const text = await queryWithCookies(prompt, cookies, {
model: "gemini-3-flash-preview",
signal: options.signal,
timeoutMs: 60000,
});
activityMonitor.logComplete(activityId, 200);
const results = extractSourceUrls(text);
return { answer: text, results };
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (message.toLowerCase().includes("abort")) {
activityMonitor.logComplete(activityId, 0);
} else {
activityMonitor.logError(activityId, message);
}
return null;
}
}
function buildSearchPrompt(query: string, options: SearchOptions): string {
let prompt = `Search the web and answer the following question. Include source URLs for your claims.\nFormat your response as:\n1. A direct answer to the question\n2. Cited sources as markdown links\n\nQuestion: ${query}`;
if (options.recencyFilter) {
const labels: Record<string, string> = {
day: "past 24 hours",
week: "past week",
month: "past month",
year: "past year",
};
prompt += `\n\nOnly include results from the ${labels[options.recencyFilter]}.`;
}
if (options.domainFilter?.length) {
const includes = options.domainFilter.filter(d => !d.startsWith("-"));
const excludes = options.domainFilter.filter(d => d.startsWith("-")).map(d => d.slice(1));
if (includes.length) prompt += `\n\nOnly cite sources from: ${includes.join(", ")}`;
if (excludes.length) prompt += `\n\nDo not cite sources from: ${excludes.join(", ")}`;
}
return prompt;
}
function extractSourceUrls(markdown: string): SearchResult[] {
const results: SearchResult[] = [];
const seen = new Set<string>();
const linkRegex = /\[([^\]]+)\]\((https?:\/\/[^)]+)\)/g;
for (const match of markdown.matchAll(linkRegex)) {
const url = match[2];
if (seen.has(url)) continue;
seen.add(url);
results.push({ title: match[1], url, snippet: "" });
}
return results;
}
async function resolveGroundingChunks(
chunks: GroundingChunk[] | undefined,
signal?: AbortSignal,
): Promise<SearchResult[]> {
if (!chunks?.length) return [];
const results: SearchResult[] = [];
for (const chunk of chunks) {
if (!chunk.web) continue;
const title = chunk.web.title || "";
let url = chunk.web.uri || "";
if (url.includes("vertexaisearch.cloud.google.com/grounding-api-redirect")) {
const resolved = await resolveRedirect(url, signal);
if (resolved) url = resolved;
}
if (url) results.push({ title, url, snippet: "" });
}
return results;
}
async function resolveRedirect(proxyUrl: string, signal?: AbortSignal): Promise<string | null> {
try {
const res = await fetch(proxyUrl, {
method: "HEAD",
redirect: "manual",
signal: AbortSignal.any([
AbortSignal.timeout(5000),
...(signal ? [signal] : []),
]),
});
return res.headers.get("location") || null;
} catch {
return null;
}
}
interface GeminiSearchResponse {
candidates?: Array<{
content?: { parts?: Array<{ text?: string }> };
groundingMetadata?: {
webSearchQueries?: string[];
groundingChunks?: GroundingChunk[];
groundingSupports?: Array<{
segment?: { startIndex?: number; endIndex?: number; text?: string };
groundingChunkIndices?: number[];
}>;
};
}>;
}
interface GroundingChunk {
web?: { uri?: string; title?: string };
}

File diff suppressed because it is too large Load Diff