7 Commits

Author SHA1 Message Date
Advait Paliwal
66a7978582 Require multiple search terms in deepresearch 2026-04-17 21:44:31 -07:00
Advait Paliwal
3d46b581e0 Make deepresearch execute reliably over RPC 2026-04-17 18:52:57 -07:00
Advait Paliwal
40939859b9 Fix subagent output paths and deepresearch robustness 2026-04-17 18:00:24 -07:00
Advait Paliwal
6f3eeea75b Fix Feynman runtime auth env 2026-04-17 15:42:30 -07:00
Advait Paliwal
1b53e3b7f1 Fix Pi subagent task outputs 2026-04-17 14:16:57 -07:00
Advait Paliwal
ec4cbfb57e Update Pi runtime packages 2026-04-17 13:45:16 -07:00
Advait Paliwal
1cd1a147f2 Remove runtime hygiene extension bloat 2026-04-17 11:47:18 -07:00
30 changed files with 1093 additions and 1571 deletions

View File

@@ -25,7 +25,7 @@ curl -fsSL https://feynman.is/install | bash
irm https://feynman.is/install.ps1 | iex
```
The one-line installer fetches the latest tagged release. To pin a version, pass it explicitly, for example `curl -fsSL https://feynman.is/install | bash -s -- 0.2.27`.
The one-line installer fetches the latest tagged release. To pin a version, pass it explicitly, for example `curl -fsSL https://feynman.is/install | bash -s -- 0.2.31`.
The installer downloads a standalone native bundle with its own Node.js runtime.
@@ -35,8 +35,6 @@ To uninstall the standalone app, remove the launcher and runtime bundle, then op
Local models are supported through the setup flow. For LM Studio, run `feynman setup`, choose `LM Studio`, and keep the default `http://localhost:1234/v1` unless you changed the server port. For LiteLLM, choose `LiteLLM Proxy` and keep the default `http://localhost:4000/v1`. For Ollama or vLLM, choose `Custom provider (baseUrl + API key)`, use `openai-completions`, and point it at the local `/v1` endpoint.
Feynman uses Pi's own runtime hooks for context hygiene: Pi compaction/retry settings are seeded by default, `context_report` exposes the current Pi context usage to the model, oversized alphaXiv tool returns spill to `outputs/.cache/`, oversized custom/subagent returns spill to `outputs/.runs/`, and a bounded resume packet is injected from `outputs/.plans/`, `outputs/.state/`, and `CHANGELOG.md` when those files exist. Automatic session logging writes JSONL snippets to `notes/feynman-autolog/`; set `FEYNMAN_AUTO_LOG=off` to disable it or `FEYNMAN_AUTO_LOG=full` for full text. Feynman also locks new plan slugs under `outputs/.state/` to prevent concurrent workflow collisions and garbage-collects stale managed caches on startup.
### Skills Only
If you want just the research skills without the full terminal app:

View File

@@ -1,16 +1,12 @@
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { registerAlphaTools } from "./research-tools/alpha.js";
import { registerAutoLog } from "./research-tools/autolog.js";
import { registerContextReportTool } from "./research-tools/context.js";
import { registerDiscoveryCommands } from "./research-tools/discovery.js";
import { registerFeynmanModelCommand } from "./research-tools/feynman-model.js";
import { installFeynmanHeader } from "./research-tools/header.js";
import { registerHelpCommand } from "./research-tools/help.js";
import { registerInitCommand, registerOutputsCommand } from "./research-tools/project.js";
import { registerResumePacket } from "./research-tools/resume.js";
import { registerServiceTierControls } from "./research-tools/service-tier.js";
import { registerStateManagement } from "./research-tools/state.js";
export default function researchTools(pi: ExtensionAPI): void {
const cache: { agentSummaryPromise?: Promise<{ agents: string[]; chains: string[] }> } = {};
@@ -21,14 +17,10 @@ export default function researchTools(pi: ExtensionAPI): void {
});
registerAlphaTools(pi);
registerAutoLog(pi);
registerContextReportTool(pi);
registerDiscoveryCommands(pi);
registerFeynmanModelCommand(pi);
registerHelpCommand(pi);
registerInitCommand(pi);
registerOutputsCommand(pi);
registerResumePacket(pi);
registerServiceTierControls(pi);
registerStateManagement(pi);
}

View File

@@ -7,11 +7,7 @@ import {
readPaperCode,
searchPapers,
} from "@companion-ai/alpha-hub/lib";
import { createHash } from "node:crypto";
import { mkdirSync, writeFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
function formatText(value: unknown): string {
@@ -19,44 +15,6 @@ function formatText(value: unknown): string {
return JSON.stringify(value, null, 2);
}
function toolOutputCapChars(): number {
const raw = Number(process.env.FEYNMAN_TOOL_OUTPUT_CAP_CHARS);
return Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : 32_000;
}
function spillPath(ctx: ExtensionContext, toolName: string, text: string): string {
const hash = createHash("sha256").update(text).digest("hex").slice(0, 12);
return resolve(ctx.cwd, "outputs", ".cache", `${toolName}-${hash}.md`);
}
export function formatToolResultWithSpillover(
ctx: ExtensionContext,
toolName: string,
result: unknown,
): { text: string; details: unknown } {
const text = formatText(result);
const cap = toolOutputCapChars();
if (text.length <= cap) {
return { text, details: result };
}
const path = spillPath(ctx, toolName, text);
mkdirSync(dirname(path), { recursive: true });
writeFileSync(path, text, "utf8");
const head = text.slice(0, Math.min(cap, 4_000));
const pointer = {
feynman_spillover: true,
tool: toolName,
path,
bytes: Buffer.byteLength(text, "utf8"),
sha256: createHash("sha256").update(text).digest("hex"),
note: "Full tool output was written to disk. Read the path in bounded chunks instead of asking the tool to return everything again.",
head,
};
return { text: JSON.stringify(pointer, null, 2), details: pointer };
}
export function registerAlphaTools(pi: ExtensionAPI): void {
pi.registerTool({
name: "alpha_search",
@@ -69,10 +27,9 @@ export function registerAlphaTools(pi: ExtensionAPI): void {
Type.String({ description: "Search mode: semantic, keyword, both, agentic, or all." }),
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
async execute(_toolCallId, params) {
const result = await searchPapers(params.query, params.mode?.trim() || "semantic");
const formatted = formatToolResultWithSpillover(ctx, "alpha_search", result);
return { content: [{ type: "text", text: formatted.text }], details: formatted.details };
return { content: [{ type: "text", text: formatText(result) }], details: result };
},
});
@@ -84,10 +41,9 @@ export function registerAlphaTools(pi: ExtensionAPI): void {
paper: Type.String({ description: "arXiv ID, arXiv URL, or alphaXiv URL." }),
fullText: Type.Optional(Type.Boolean({ description: "Return raw full text instead of AI report." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
async execute(_toolCallId, params) {
const result = await getPaper(params.paper, { fullText: params.fullText });
const formatted = formatToolResultWithSpillover(ctx, "alpha_get_paper", result);
return { content: [{ type: "text", text: formatted.text }], details: formatted.details };
return { content: [{ type: "text", text: formatText(result) }], details: result };
},
});
@@ -99,10 +55,9 @@ export function registerAlphaTools(pi: ExtensionAPI): void {
paper: Type.String({ description: "arXiv ID, arXiv URL, or alphaXiv URL." }),
question: Type.String({ description: "Question about the paper." }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
async execute(_toolCallId, params) {
const result = await askPaper(params.paper, params.question);
const formatted = formatToolResultWithSpillover(ctx, "alpha_ask_paper", result);
return { content: [{ type: "text", text: formatted.text }], details: formatted.details };
return { content: [{ type: "text", text: formatText(result) }], details: result };
},
});
@@ -115,14 +70,13 @@ export function registerAlphaTools(pi: ExtensionAPI): void {
note: Type.Optional(Type.String({ description: "Annotation text. Omit when clear=true." })),
clear: Type.Optional(Type.Boolean({ description: "Clear the existing annotation." })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
async execute(_toolCallId, params) {
const result = params.clear
? await clearPaperAnnotation(params.paper)
: params.note
? await annotatePaper(params.paper, params.note)
: (() => { throw new Error("Provide either note or clear=true."); })();
const formatted = formatToolResultWithSpillover(ctx, "alpha_annotate_paper", result);
return { content: [{ type: "text", text: formatted.text }], details: formatted.details };
return { content: [{ type: "text", text: formatText(result) }], details: result };
},
});
@@ -131,10 +85,9 @@ export function registerAlphaTools(pi: ExtensionAPI): void {
label: "Alpha List Annotations",
description: "List all persistent local paper annotations.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, ctx) {
async execute() {
const result = await listPaperAnnotations();
const formatted = formatToolResultWithSpillover(ctx, "alpha_list_annotations", result);
return { content: [{ type: "text", text: formatted.text }], details: formatted.details };
return { content: [{ type: "text", text: formatText(result) }], details: result };
},
});
@@ -146,10 +99,9 @@ export function registerAlphaTools(pi: ExtensionAPI): void {
githubUrl: Type.String({ description: "GitHub repository URL." }),
path: Type.Optional(Type.String({ description: "File or directory path. Default: '/'" })),
}),
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
async execute(_toolCallId, params) {
const result = await readPaperCode(params.githubUrl, params.path?.trim() || "/");
const formatted = formatToolResultWithSpillover(ctx, "alpha_read_code", result);
return { content: [{ type: "text", text: formatted.text }], details: formatted.details };
return { content: [{ type: "text", text: formatText(result) }], details: result };
},
});
}

View File

@@ -1,84 +0,0 @@
import { appendFileSync, mkdirSync, readFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
type AutoLogMode = "off" | "events" | "full";
function readAgentSettings(): Record<string, unknown> {
const agentDir = process.env.PI_CODING_AGENT_DIR;
if (!agentDir) return {};
try {
return JSON.parse(readFileSync(resolve(agentDir, "settings.json"), "utf8")) as Record<string, unknown>;
} catch {
return {};
}
}
function normalizeMode(value: unknown): AutoLogMode | undefined {
if (typeof value !== "string") return undefined;
const normalized = value.trim().toLowerCase();
if (normalized === "off" || normalized === "events" || normalized === "full") return normalized;
return undefined;
}
export function getAutoLogMode(): AutoLogMode {
return normalizeMode(process.env.FEYNMAN_AUTO_LOG) ??
normalizeMode(readAgentSettings().autoLog) ??
"events";
}
function extractMessageText(message: unknown): string {
if (!message || typeof message !== "object") return "";
const content = (message as { content?: unknown }).content;
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
return content
.map((item) => {
if (!item || typeof item !== "object") return "";
const record = item as { type?: string; text?: unknown; thinking?: unknown; name?: unknown };
if (record.type === "text" && typeof record.text === "string") return record.text;
if (record.type === "thinking" && typeof record.thinking === "string") return "[thinking omitted]";
if (record.type === "toolCall") return `[tool:${typeof record.name === "string" ? record.name : "unknown"}]`;
return "";
})
.filter(Boolean)
.join("\n");
}
function clip(text: string, maxChars: number): string {
return text.length > maxChars ? `${text.slice(0, maxChars)}\n...[truncated ${text.length - maxChars} chars]` : text;
}
export function autoLogPath(cwd: string, date = new Date()): string {
const day = date.toISOString().slice(0, 10);
return resolve(cwd, "notes", "feynman-autolog", `${day}.jsonl`);
}
export function writeAutoLogEntry(cwd: string, entry: Record<string, unknown>): void {
const path = autoLogPath(cwd);
mkdirSync(dirname(path), { recursive: true });
appendFileSync(path, `${JSON.stringify(entry)}\n`, "utf8");
}
export function registerAutoLog(pi: ExtensionAPI): void {
pi.on("message_end", async (event, ctx: ExtensionContext) => {
const mode = getAutoLogMode();
if (mode === "off") return;
const message = event.message as any;
if (message.role !== "user" && message.role !== "assistant") return;
const text = extractMessageText(message).replace(/\s+/g, " ").trim();
if (!text) return;
writeAutoLogEntry(ctx.cwd, {
timestamp: new Date(message.timestamp ?? Date.now()).toISOString(),
session: ctx.sessionManager.getSessionId(),
role: message.role,
model: message.role === "assistant" ? `${message.provider}/${message.model}` : undefined,
mode,
text: mode === "full" ? text : clip(text, 500),
});
});
}

View File

@@ -1,53 +0,0 @@
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
type ContextPosture = {
model: string;
contextWindow: number | null;
estimatedInputTokens: number | null;
utilizationPct: number | null;
compactionThresholdHit: boolean;
recommendedMaxWorkers: number;
};
export function computeContextPosture(ctx: ExtensionContext): ContextPosture {
const usage = ctx.getContextUsage();
const modelWindow = typeof ctx.model?.contextWindow === "number" ? ctx.model.contextWindow : null;
const contextWindow = usage?.contextWindow ?? modelWindow;
const estimatedInputTokens = usage?.tokens ?? null;
const utilizationPct = usage?.percent ?? (contextWindow && estimatedInputTokens
? Math.round((estimatedInputTokens / contextWindow) * 1000) / 10
: null);
const compactionThresholdHit = utilizationPct !== null && utilizationPct >= 70;
const availableForWorkers = contextWindow
? Math.max(0, contextWindow - 16_384 - (estimatedInputTokens ?? 0))
: 0;
const recommendedMaxWorkers = contextWindow === null
? 1
: Math.max(1, Math.min(4, Math.floor(availableForWorkers / 24_000) || 1));
return {
model: ctx.model ? `${ctx.model.provider}/${ctx.model.id}` : "not set",
contextWindow,
estimatedInputTokens,
utilizationPct,
compactionThresholdHit,
recommendedMaxWorkers,
};
}
export function registerContextReportTool(pi: ExtensionAPI): void {
pi.registerTool({
name: "context_report",
label: "Context Report",
description: "Report current Pi context usage, compaction threshold posture, and safe worker-count guidance.",
parameters: Type.Object({}),
async execute(_toolCallId, _params, _signal, _onUpdate, ctx) {
const report = computeContextPosture(ctx);
return {
content: [{ type: "text", text: JSON.stringify(report, null, 2) }],
details: report,
};
},
});
}

View File

@@ -5,7 +5,6 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { getExtensionCommandSpec } from "../../metadata/commands.mjs";
import { buildProjectAgentsTemplate, buildSessionLogsReadme } from "./project-scaffold.js";
import { collectManagedGc } from "./state.js";
async function pathExists(path: string): Promise<boolean> {
try {
@@ -105,15 +104,7 @@ export function registerInitCommand(pi: ExtensionAPI): void {
export function registerOutputsCommand(pi: ExtensionAPI): void {
pi.registerCommand("outputs", {
description: "Browse all research artifacts (papers, outputs, experiments, notes).",
handler: async (args, ctx) => {
const trimmedArgs = args.trim();
if (trimmedArgs === "gc" || trimmedArgs === "gc --dry-run") {
const dryRun = trimmedArgs.includes("--dry-run");
const result = collectManagedGc(ctx.cwd, Date.now(), undefined, { dryRun });
ctx.ui.notify(`${dryRun ? "Would remove" : "Removed"} ${result.deleted.length} managed cache file(s).`, "info");
return;
}
handler: async (_args, ctx) => {
const items = await collectArtifacts(ctx.cwd);
if (items.length === 0) {
ctx.ui.notify("No artifacts found. Use /lit, /draft, /review, or /deepresearch to create some.", "info");

View File

@@ -1,92 +0,0 @@
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
import { join, resolve } from "node:path";
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
type ResumeArtifact = {
path: string;
mtimeMs: number;
};
function collectFiles(root: string, predicate: (path: string) => boolean): ResumeArtifact[] {
if (!existsSync(root)) return [];
const files: ResumeArtifact[] = [];
for (const entry of readdirSync(root, { withFileTypes: true })) {
const path = join(root, entry.name);
if (entry.isDirectory()) {
files.push(...collectFiles(path, predicate));
continue;
}
if (!entry.isFile() || !predicate(path)) continue;
try {
files.push({ path, mtimeMs: statSync(path).mtimeMs });
} catch {}
}
return files;
}
function tail(text: string, maxChars: number): string {
return text.length <= maxChars ? text : text.slice(text.length - maxChars);
}
export function buildResumePacket(cwd: string, maxChars = 4_000): string | undefined {
const plans = collectFiles(resolve(cwd, "outputs", ".plans"), (path) => path.endsWith(".md"))
.sort((a, b) => b.mtimeMs - a.mtimeMs)
.slice(0, 3);
const stateFiles = collectFiles(resolve(cwd, "outputs", ".state"), (path) => /\.(json|jsonl|md)$/i.test(path))
.sort((a, b) => b.mtimeMs - a.mtimeMs)
.slice(0, 5);
const changelogPath = resolve(cwd, "CHANGELOG.md");
if (plans.length === 0 && stateFiles.length === 0 && !existsSync(changelogPath)) {
return undefined;
}
const lines: string[] = [
"[feynman resume packet]",
"This is a bounded project-state summary from disk. Prefer these paths over guessing prior workflow state.",
];
if (plans.length > 0) {
lines.push("", "Recent plans:");
for (const plan of plans) {
lines.push(`- ${plan.path}`);
}
const newestPlan = plans[0]!;
try {
lines.push("", `Newest plan tail (${newestPlan.path}):`, tail(readFileSync(newestPlan.path, "utf8"), 1_500));
} catch {}
}
if (stateFiles.length > 0) {
lines.push("", "Recent state files:");
for (const file of stateFiles) {
lines.push(`- ${file.path}`);
}
}
if (existsSync(changelogPath)) {
try {
lines.push("", "CHANGELOG tail:", tail(readFileSync(changelogPath, "utf8"), 1_200));
} catch {}
}
return tail(lines.join("\n"), maxChars);
}
export function registerResumePacket(pi: ExtensionAPI): void {
pi.on("session_start", async (_event, ctx: ExtensionContext) => {
if (process.env.FEYNMAN_RESUME_PACKET === "off") return;
const packet = buildResumePacket(ctx.cwd);
if (!packet) return;
pi.sendMessage(
{
customType: "feynman_resume_packet",
content: packet,
display: false,
details: { source: "outputs/.plans outputs/.state CHANGELOG.md" },
},
{ triggerTurn: false, deliverAs: "nextTurn" },
);
});
}

View File

@@ -1,276 +0,0 @@
import { createHash } from "node:crypto";
import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs";
import { basename, dirname, relative, resolve } from "node:path";
import { isToolCallEventType, type ExtensionAPI, type ExtensionContext, type ToolCallEvent } from "@mariozechner/pi-coding-agent";
type SlugLock = {
pid: number;
sessionId: string;
startedAt: string;
planPath: string;
};
type GcResult = {
deleted: string[];
kept: string[];
};
type SpillResult = {
content: { type: "text"; text: string }[];
details: unknown;
} | undefined;
type ToolResultPatch = {
content?: { type: "text"; text: string }[];
details?: unknown;
isError?: boolean;
};
const BUILT_IN_TOOL_NAMES = new Set(["bash", "read", "write", "edit", "grep", "find", "ls"]);
function isPathInside(parent: string, child: string): boolean {
const rel = relative(parent, child);
return rel === "" || (!rel.startsWith("..") && !rel.startsWith("/"));
}
function pidIsLive(pid: number): boolean {
if (!Number.isInteger(pid) || pid <= 0) return false;
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
function readLock(path: string): SlugLock | undefined {
try {
return JSON.parse(readFileSync(path, "utf8")) as SlugLock;
} catch {
return undefined;
}
}
function lockIsLive(lock: SlugLock | undefined, timeoutMs: number, now = Date.now()): boolean {
if (!lock) return false;
const started = Date.parse(lock.startedAt);
if (!Number.isFinite(started) || now - started > timeoutMs) return false;
return pidIsLive(lock.pid);
}
function planPathInfo(cwd: string, inputPath: string): { absPath: string; slug: string; lockPath: string } | undefined {
const absPath = resolve(cwd, inputPath);
const plansRoot = resolve(cwd, "outputs", ".plans");
if (!isPathInside(plansRoot, absPath) || !absPath.endsWith(".md")) return undefined;
const slug = basename(absPath, ".md");
const lockPath = resolve(cwd, "outputs", ".state", `${slug}.lock`);
return { absPath, slug, lockPath };
}
export function claimPlanSlug(
cwd: string,
sessionId: string,
inputPath: string,
options?: { timeoutMinutes?: number; strategy?: "suffix" | "error" | "overwrite"; now?: number },
): { ok: true; lockPath?: string } | { ok: false; reason: string } {
const info = planPathInfo(cwd, inputPath);
if (!info) return { ok: true };
const strategy = options?.strategy ?? (process.env.FEYNMAN_SLUG_COLLISION_STRATEGY as "suffix" | "error" | "overwrite" | undefined) ?? "error";
if (strategy === "overwrite") return { ok: true };
const timeoutMinutes = options?.timeoutMinutes ?? (Number(process.env.FEYNMAN_SLUG_LOCK_TIMEOUT_MINUTES) || 30);
const timeoutMs = timeoutMinutes * 60_000;
const existingLock = readLock(info.lockPath);
const live = lockIsLive(existingLock, timeoutMs, options?.now);
if (live && existingLock?.sessionId !== sessionId) {
return {
ok: false,
reason: `Slug "${info.slug}" is locked by another Feynman session. Use a unique slug such as ${info.slug}-2, or wait for ${info.lockPath} to expire.`,
};
}
if (existsSync(info.absPath) && existingLock?.sessionId !== sessionId) {
return {
ok: false,
reason: `Plan already exists at ${relative(cwd, info.absPath)}. Use a unique slug such as ${info.slug}-2 to avoid overwriting another run.`,
};
}
mkdirSync(dirname(info.lockPath), { recursive: true });
writeFileSync(
info.lockPath,
JSON.stringify({
pid: process.pid,
sessionId,
startedAt: new Date(options?.now ?? Date.now()).toISOString(),
planPath: info.absPath,
}, null, 2) + "\n",
"utf8",
);
return { ok: true, lockPath: info.lockPath };
}
function managedRetentionDays(): number {
const raw = Number(process.env.FEYNMAN_CACHE_RETENTION_DAYS);
return Number.isFinite(raw) && raw >= 0 ? raw : 14;
}
function gcIgnored(path: string): boolean {
if (path.endsWith(".gcignore")) return true;
try {
return /^---[\s\S]*?retain:\s*true/im.test(readFileSync(path, "utf8").slice(0, 500));
} catch {
return false;
}
}
export function collectManagedGc(
cwd: string,
now = Date.now(),
retentionDays = managedRetentionDays(),
options?: { dryRun?: boolean },
): GcResult {
const roots = [
resolve(cwd, "outputs", ".cache"),
resolve(cwd, "outputs", ".runs"),
resolve(cwd, "outputs", ".notes"),
];
const cutoff = now - retentionDays * 24 * 60 * 60 * 1000;
const result: GcResult = { deleted: [], kept: [] };
const visit = (path: string) => {
if (!existsSync(path)) return;
for (const entry of readdirSync(path, { withFileTypes: true })) {
const child = resolve(path, entry.name);
if (entry.isDirectory()) {
visit(child);
try {
if (readdirSync(child).length === 0) rmSync(child, { recursive: true, force: true });
} catch {}
continue;
}
if (!entry.isFile()) continue;
const stat = statSync(child);
if (gcIgnored(child) || stat.mtimeMs >= cutoff) {
result.kept.push(child);
continue;
}
if (!options?.dryRun) {
rmSync(child, { force: true });
}
result.deleted.push(child);
}
};
for (const root of roots) visit(root);
return result;
}
function textFromToolContent(content: ToolResultContent): string {
return content
.map((item) => item.type === "text" ? item.text : "")
.filter(Boolean)
.join("\n");
}
type ToolResultContent = Array<{ type: "text"; text: string } | { type: "image"; data: string; mimeType: string }>;
function customToolOutputCapChars(): number {
const raw = Number(process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS);
return Number.isFinite(raw) && raw > 0 ? raw : 24_000;
}
export function spillLargeCustomToolResult(
cwd: string,
toolName: string,
toolCallId: string,
content: ToolResultContent,
details: unknown,
): SpillResult {
if (BUILT_IN_TOOL_NAMES.has(toolName)) return undefined;
const text = textFromToolContent(content);
const cap = customToolOutputCapChars();
if (text.length <= cap) return undefined;
const hash = createHash("sha256").update(text).digest("hex");
const safeToolName = toolName.replace(/[^a-zA-Z0-9._-]+/g, "-").slice(0, 60) || "tool";
const path = resolve(cwd, "outputs", ".runs", `${safeToolName}-${toolCallId}-${hash.slice(0, 12)}.md`);
mkdirSync(dirname(path), { recursive: true });
writeFileSync(path, text, "utf8");
const pointer = {
feynman_spillover: true,
tool: toolName,
toolCallId,
path,
bytes: Buffer.byteLength(text, "utf8"),
sha256: hash,
head: text.slice(0, Math.min(cap, 2_000)),
note: "Full custom/subagent tool result was written to disk. Read the path in bounded chunks when needed.",
originalDetails: details,
};
return {
content: [{ type: "text", text: JSON.stringify(pointer, null, 2) }],
details: pointer,
};
}
function appendJsonl(path: string, value: unknown): void {
mkdirSync(dirname(path), { recursive: true });
appendFileSync(path, `${JSON.stringify(value)}\n`, "utf8");
}
function recordCheckpoint(ctx: ExtensionContext, toolName: string, isError: boolean): void {
appendJsonl(resolve(ctx.cwd, "outputs", ".state", "feynman.checkpoint.jsonl"), {
timestamp: new Date().toISOString(),
sessionId: ctx.sessionManager.getSessionId(),
toolName,
isError,
context: ctx.getContextUsage?.(),
});
}
function recordJobEvent(ctx: ExtensionContext, toolName: string, status: "running" | "done" | "failed", data: unknown): void {
appendJsonl(resolve(ctx.cwd, "outputs", ".state", "subagent.jobs.jsonl"), {
timestamp: new Date().toISOString(),
sessionId: ctx.sessionManager.getSessionId(),
toolName,
status,
data,
});
}
function looksLikeSubagentTool(toolName: string): boolean {
return /subagent|parallel|chain|run/i.test(toolName);
}
export function registerStateManagement(pi: ExtensionAPI): void {
pi.on("session_start", async (_event, ctx) => {
if (process.env.FEYNMAN_OUTPUTS_GC === "off") return;
collectManagedGc(ctx.cwd);
});
pi.on("tool_call", async (event: ToolCallEvent, ctx) => {
const sessionId = ctx.sessionManager.getSessionId();
if (isToolCallEventType("write", event)) {
const claim = claimPlanSlug(ctx.cwd, sessionId, event.input.path);
if (!claim.ok) return { block: true, reason: claim.reason };
}
if (isToolCallEventType("edit", event)) {
const claim = claimPlanSlug(ctx.cwd, sessionId, event.input.path);
if (!claim.ok) return { block: true, reason: claim.reason };
}
if (looksLikeSubagentTool(event.toolName)) {
recordJobEvent(ctx, event.toolName, "running", event.input);
}
return undefined;
});
pi.on("tool_result", async (event, ctx): Promise<ToolResultPatch | undefined> => {
recordCheckpoint(ctx, event.toolName, event.isError);
if (looksLikeSubagentTool(event.toolName)) {
recordJobEvent(ctx, event.toolName, event.isError ? "failed" : "done", event.details ?? event.content);
}
return spillLargeCustomToolResult(ctx.cwd, event.toolName, event.toolCallId, event.content as ToolResultContent, event.details);
});
}

1105
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "@companion-ai/feynman",
"version": "0.2.27",
"version": "0.2.34",
"description": "Research-first CLI agent built on Pi and alphaXiv",
"license": "MIT",
"type": "module",
@@ -61,16 +61,16 @@
"dependencies": {
"@clack/prompts": "^1.2.0",
"@companion-ai/alpha-hub": "^0.1.3",
"@mariozechner/pi-ai": "^0.66.1",
"@mariozechner/pi-coding-agent": "^0.66.1",
"@sinclair/typebox": "^0.34.48",
"dotenv": "^17.3.1"
"@mariozechner/pi-ai": "^0.67.6",
"@mariozechner/pi-coding-agent": "^0.67.6",
"@sinclair/typebox": "^0.34.49",
"dotenv": "^17.4.2"
},
"overrides": {
"basic-ftp": "5.2.2",
"basic-ftp": "5.3.0",
"@modelcontextprotocol/sdk": {
"@hono/node-server": "1.19.13",
"hono": "4.12.12"
"@hono/node-server": "1.19.14",
"hono": "4.12.14"
},
"express": {
"router": {
@@ -80,16 +80,17 @@
"proxy-agent": {
"pac-proxy-agent": {
"get-uri": {
"basic-ftp": "5.2.2"
"basic-ftp": "5.3.0"
}
}
},
"protobufjs": "7.5.5",
"minimatch": {
"brace-expansion": "5.0.5"
}
},
"devDependencies": {
"@types/node": "^25.5.0",
"@types/node": "^25.6.0",
"tsx": "^4.21.0",
"typescript": "^5.9.3"
},

View File

@@ -4,205 +4,177 @@ args: <topic>
section: Research Workflows
topLevelCli: true
---
Run a deep research workflow for: $@
Run deep research for: $@
You are the Lead Researcher. You plan, delegate, evaluate, verify, write, and cite. Internal orchestration is invisible to the user unless they ask.
This is an execution request, not a request to explain or implement the workflow instructions.
Execute the workflow. Do not answer by describing the protocol, do not explain these instructions, do not restate the protocol, and do not ask for confirmation. Do not stop after planning. Your first actions should be tool calls that create directories and write the plan artifact.
## 1. Plan
## Required Artifacts
Analyze the research question using extended thinking. Develop a research strategy:
- Key questions that must be answered
- Evidence types needed (papers, web, code, data, docs)
- Sub-questions disjoint enough to parallelize
- Source types and time periods that matter
- Acceptance criteria: what evidence would make the answer "sufficient"
Derive a short slug from the topic: lowercase, hyphenated, no filler words, at most 5 words.
Derive a short slug from the topic (lowercase, hyphens, no filler words, ≤5 words — e.g. "cloud-sandbox-pricing" not "deepresearch-plan"). Write the plan to `outputs/.plans/<slug>.md` as a self-contained artifact. Use this same slug for all artifacts in this run.
If `CHANGELOG.md` exists, read the most recent relevant entries before finalizing the plan. Once the workflow becomes multi-round or spans enough work to merit resume support, append concise entries to `CHANGELOG.md` after meaningful progress and before stopping.
Every run must leave these files on disk:
- `outputs/.plans/<slug>.md`
- `outputs/.drafts/<slug>-draft.md`
- `outputs/.drafts/<slug>-cited.md`
- `outputs/<slug>.md` or `papers/<slug>.md`
- `outputs/<slug>.provenance.md` or `papers/<slug>.provenance.md`
```markdown
# Research Plan: [topic]
If any capability fails, continue in degraded mode and still write a blocked or partial final output and provenance sidecar. Never end with chat-only output. Never end with only an explanation in chat. Use `Verification: BLOCKED` when verification could not be completed.
## Questions
1. ...
## Step 1: Plan
## Strategy
- Researcher allocations and dimensions
- Expected rounds
Create `outputs/.plans/<slug>.md` immediately. The plan must include:
- Key questions
- Evidence needed
- Scale decision
- Task ledger
- Verification log
- Decision log
## Acceptance Criteria
- [ ] All key questions answered with ≥2 independent sources
- [ ] Contradictions identified and addressed
- [ ] No single-source claims on critical findings
Make the scale decision before assigning owners in the plan. If the topic is a narrow "what is X" explainer, the plan must use lead-owned direct search tasks only; do not allocate researcher subagents in the task ledger.
## Task Ledger
| ID | Owner | Task | Status | Output |
|---|---|---|---|---|
| T1 | lead / researcher | ... | todo | ... |
Also save the plan with `memory_remember` using key `deepresearch.<slug>.plan` if that tool is available. If it is not available, continue without it.
## Verification Log
| Item | Method | Status | Evidence |
|---|---|---|---|
| Critical claim / computation / figure | source cross-read / rerun / direct fetch / code check | pending | path or URL |
After writing the plan, continue immediately. Do not pause for approval.
## Decision Log
(Updated as the workflow progresses)
```
## Step 2: Scale
Also save the plan with `memory_remember` (type: `fact`, key: `deepresearch.<slug>.plan`) so it survives context truncation.
Use direct search for:
- Single fact or narrow question, including "what is X" explainers
- Work you can answer with 3-10 tool calls
Briefly summarize the plan to the user and continue immediately. Do not ask for confirmation or wait for a proceed response unless the user explicitly requested plan review.
For "what is X" explainer topics, you MUST NOT spawn researcher subagents unless the user explicitly asks for comprehensive coverage, current landscape, benchmarks, or production deployment.
Do not inflate a simple explainer into a multi-agent survey.
Do not stop after planning. If live search, subagents, web access, alphaXiv, or any other capability is unavailable, continue in degraded mode and write a durable blocked/partial report that records exactly which capabilities failed.
Use subagents only when decomposition clearly helps:
- Direct comparison of 2-3 items: 2 `researcher` subagents
- Broad survey or multi-faceted topic: 3-4 `researcher` subagents
- Complex multi-domain research: 4-6 `researcher` subagents
## 2. Scale decision
## Step 3: Gather Evidence
| Query type | Execution |
|---|---|
| Single fact or narrow question | Search directly yourself, no subagents, 3-10 tool calls |
| Direct comparison (2-3 items) | 2 parallel `researcher` subagents |
| Broad survey or multi-faceted topic | 3-4 parallel `researcher` subagents |
| Complex multi-domain research | 4-6 parallel `researcher` subagents |
Avoid crash-prone PDF parsing in this workflow. Do not call `alpha_get_paper` and do not fetch `.pdf` URLs unless the user explicitly asks for PDF extraction. Prefer paper metadata, abstracts, HTML pages, official docs, and web snippets. If only a PDF exists, cite the PDF URL from search metadata and mark full-text PDF parsing as blocked instead of fetching it.
Never spawn subagents for work you can do in 5 tool calls.
If direct search was chosen:
- Skip researcher spawning entirely.
- Search and fetch sources yourself.
- Use multiple search terms/angles before drafting. Minimum: 3 distinct queries for direct-mode research, covering definition/history, mechanism/formula, and current usage/comparison when relevant.
- Record the exact search terms used in `<slug>-research-direct.md`.
- Write notes to `<slug>-research-direct.md`.
- Continue to synthesis.
## 3. Spawn researchers
If subagents were chosen:
- Write a per-researcher brief first, such as `outputs/.plans/<slug>-T1.md`.
- Keep `subagent` tool-call JSON small and valid.
- Do not place multi-paragraph instructions inside the `subagent` JSON.
- Use only supported `subagent` keys. Do not add extra keys such as `artifacts` unless the tool schema explicitly exposes them.
- Always set `failFast: false`.
- Do not name exact tool commands in subagent tasks unless those tool names are visible in the current tool set.
- Prefer broad guidance such as "use paper search and web search"; if a PDF parser or paper fetch fails, the researcher must continue from metadata, abstracts, and web sources and mark PDF parsing as blocked.
Launch parallel `researcher` subagents via `subagent`. Each gets a structured brief with:
- **Objective:** what to find
- **Output format:** numbered sources, evidence table, inline source references
- **Tool guidance:** which search tools to prioritize
- **Task boundaries:** what NOT to cover (another researcher handles that)
- **Task IDs:** the specific ledger rows they own and must report back on
Example shape:
Assign each researcher a clearly disjoint dimension — different source types, geographic scopes, time periods, or technical angles. Never duplicate coverage.
```
```json
{
tasks: [
{ agent: "researcher", task: "...", output: "<slug>-research-web.md" },
{ agent: "researcher", task: "...", output: "<slug>-research-papers.md" }
"tasks": [
{ "agent": "researcher", "task": "Read outputs/.plans/<slug>-T1.md and write <slug>-research-web.md.", "output": "<slug>-research-web.md" },
{ "agent": "researcher", "task": "Read outputs/.plans/<slug>-T2.md and write <slug>-research-papers.md.", "output": "<slug>-research-papers.md" }
],
concurrency: 4,
failFast: false
"concurrency": 4,
"failFast": false
}
```
Researchers write full outputs to files and pass references back — do not have them return full content into your context.
Researchers must not silently merge or skip assigned tasks. If something is impossible or redundant, mark the ledger row `blocked` or `superseded` with a note.
After evidence gathering, update the plan ledger and verification log. If research failed, record exactly what failed and proceed with a blocked or partial draft.
## 4. Evaluate and loop
## Step 4: Draft
After researchers return, read their output files and critically assess:
- Which plan questions remain unanswered?
- Which answers rest on only one source?
- Are there contradictions needing resolution?
- Is any key angle missing entirely?
- Did every assigned ledger task actually get completed, blocked, or explicitly superseded?
Write the report yourself. Do not delegate synthesis.
If gaps are significant, spawn another targeted batch of researchers. No fixed cap on rounds — iterate until evidence is sufficient or sources are exhausted.
Save to `outputs/.drafts/<slug>-draft.md`.
Update the plan artifact (`outputs/.plans/<slug>.md`) task ledger, verification log, and decision log after each round.
When the work spans multiple rounds, also append a concise chronological entry to `CHANGELOG.md` covering what changed, what was verified, what remains blocked, and the next recommended step.
Include:
- Executive summary
- Findings organized by question/theme
- Evidence-backed caveats and disagreements
- Open questions
- No invented sources, results, figures, benchmarks, images, charts, or tables
Most topics need 1-2 rounds. Stop when additional rounds would not materially change conclusions.
Before citation, sweep the draft:
- Every critical claim, number, figure, table, or benchmark must map to a source URL, research note, raw artifact path, or command/script output.
- Remove or downgrade unsupported claims.
- Mark inferences as inferences.
If no researcher files can be produced because tools, subagents, or network access failed, create `outputs/.drafts/<slug>-draft.md` yourself as a blocked report with:
- what was requested,
- which capabilities failed,
- what evidence was and was not gathered,
- a proposed source-gathering plan,
- no invented sources or results.
## Step 5: Cite
## 5. Write the report
If direct search/no researcher subagents was chosen:
- Do citation yourself.
- Verify reachable HTML/doc URLs with available fetch/search tools.
- Copy or rewrite `outputs/.drafts/<slug>-draft.md` to `outputs/.drafts/<slug>-cited.md` with inline citations and a Sources section.
- Do not spawn the `verifier` subagent for simple direct-search runs.
Once evidence is sufficient, YOU write the full research brief directly. Do not delegate writing to another agent. Read the research files, synthesize the findings, and produce a complete document:
If researcher subagents were used, run the `verifier` agent after the draft exists. This step is mandatory and must complete before any reviewer runs. Do not run the `verifier` and `reviewer` in the same parallel `subagent` call.
```markdown
# Title
Use this shape:
## Executive Summary
2-3 paragraph overview of key findings.
## Section 1: ...
Detailed findings organized by theme or question.
## Section N: ...
## Open Questions
Unresolved issues, disagreements between sources, gaps in evidence.
```json
{
"agent": "verifier",
"task": "Add inline citations to outputs/.drafts/<slug>-draft.md using the research files as source material. Verify every URL. Write the complete cited brief to outputs/.drafts/<slug>-cited.md.",
"output": "outputs/.drafts/<slug>-cited.md"
}
```
When the research includes quantitative data (benchmarks, performance comparisons, trends), generate charts using `pi-charts`. Use Mermaid diagrams for architectures and processes. Every visual must have a caption and reference the underlying data.
After the verifier returns, verify on disk that `outputs/.drafts/<slug>-cited.md` exists. If the verifier wrote elsewhere, find the cited file and move or copy it to `outputs/.drafts/<slug>-cited.md`.
Before finalizing the draft, do a claim sweep:
- map each critical claim, number, and figure to its supporting source or artifact in the verification log
- downgrade or remove anything that cannot be grounded
- label inferences as inferences
- if code or calculations were involved, record which checks were actually run and which remain unverified
## Step 6: Review
Save this draft to `outputs/.drafts/<slug>-draft.md`.
If direct search/no researcher subagents was chosen:
- Review the cited draft yourself.
- Write `<slug>-verification.md` with FATAL / MAJOR / MINOR findings and the checks performed.
- Fix FATAL issues before delivery.
- Do not spawn the `reviewer` subagent for simple direct-search runs.
## 6. Cite
If researcher subagents were used, only after `outputs/.drafts/<slug>-cited.md` exists, run the `reviewer` agent against it.
Spawn the `verifier` agent to post-process YOUR draft. The verifier agent adds inline citations, verifies every source URL, and produces the final output:
Use this shape:
```
{ agent: "verifier", task: "Add inline citations to <slug>-draft.md using the research files as source material. Verify every URL.", output: "<slug>-brief.md" }
```json
{
"agent": "reviewer",
"task": "Verify outputs/.drafts/<slug>-cited.md. Flag unsupported claims, logical gaps, single-source critical claims, and overstated confidence. This is a verification pass, not a peer review.",
"output": "<slug>-verification.md"
}
```
The verifier agent does not rewrite the report — it only anchors claims to sources and builds the numbered Sources section.
If the reviewer flags FATAL issues, fix them before delivery and run one more review pass. Note MAJOR issues in Open Questions. Accept MINOR issues.
## 7. Verify
When applying reviewer fixes, do not issue one giant `edit` tool call with many replacements. Use small localized edits only for 1-3 simple corrections. For section rewrites, table rewrites, or more than 3 substantive fixes, read the cited draft and write a corrected full file to `outputs/.drafts/<slug>-revised.md` instead.
Spawn the `reviewer` agent against the cited draft. The reviewer checks for:
- Unsupported claims that slipped past citation
- Logical gaps or contradictions between sections
- Single-source claims on critical findings
- Overstated confidence relative to evidence quality
The final candidate is `outputs/.drafts/<slug>-revised.md` if it exists; otherwise it is `outputs/.drafts/<slug>-cited.md`.
```
{ agent: "reviewer", task: "Verify <slug>-brief.md — flag any claims that lack sufficient source backing, identify logical gaps, and check that confidence levels match evidence strength. This is a verification pass, not a peer review.", output: "<slug>-verification.md" }
```
## Step 7: Deliver
If the reviewer flags FATAL issues, fix them in the brief before delivering. MAJOR issues get noted in the Open Questions section. MINOR issues are accepted.
After fixes, run at least one more review-style verification pass if any FATAL issues were found. Do not assume one fix solved everything.
Copy the final candidate to:
- `papers/<slug>.md` for paper-style drafts
- `outputs/<slug>.md` for everything else
## 8. Deliver
Copy the final cited and verified output to the appropriate folder:
- Paper-style drafts → `papers/`
- Everything else → `outputs/`
Save the final output as `<slug>.md` (in `outputs/` or `papers/` per the rule above).
Write a provenance record alongside it as `<slug>.provenance.md`:
Write provenance next to it as `<slug>.provenance.md`:
```markdown
# Provenance: [topic]
- **Date:** [date]
- **Rounds:** [number of researcher rounds]
- **Sources consulted:** [total unique sources across all research files]
- **Sources accepted:** [sources that survived citation verification]
- **Sources rejected:** [dead links, unverifiable, or removed]
- **Verification:** [PASS / PASS WITH NOTES — summary of reviewer findings]
- **Rounds:** [number of research rounds]
- **Sources consulted:** [count and/or list]
- **Sources accepted:** [count and/or list]
- **Sources rejected:** [dead, unverifiable, or removed]
- **Verification:** [PASS / PASS WITH NOTES / BLOCKED]
- **Plan:** outputs/.plans/<slug>.md
- **Research files:** [list of intermediate <slug>-research-*.md files]
- **Research files:** [files used]
```
Before you stop, verify on disk that all of these exist:
- `outputs/.plans/<slug>.md`
- `outputs/.drafts/<slug>-draft.md`
- `<slug>-brief.md` intermediate cited brief
- `outputs/<slug>.md` or `papers/<slug>.md` final promoted deliverable
- `outputs/<slug>.provenance.md` or `papers/<slug>.provenance.md` provenance sidecar
Before responding, verify on disk that all required artifacts exist. If verification could not be completed, set `Verification: BLOCKED` or `PASS WITH NOTES` and list the missing checks.
Do not stop at `<slug>-brief.md` alone. If the cited brief exists but the promoted final output or provenance sidecar does not, create them before responding.
If full verification could not be completed, still create the final deliverable and provenance sidecar with `Verification: BLOCKED` or `PASS WITH NOTES` and list the missing checks. Never end with only an explanation in chat.
## Background execution
If the user wants unattended execution or the sweep will clearly take a while:
- Launch the full workflow via `subagent` using `clarify: false, async: true`
- Report the async ID and how to check status with `subagent_status`
Final response should be brief: link the final file, provenance file, and any blocked checks.

View File

@@ -110,7 +110,7 @@ This usually means the release exists, but not all platform bundles were uploade
Workarounds:
- try again after the release finishes publishing
- pass the latest published version explicitly, e.g.:
& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.27
& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.31
"@
}

View File

@@ -261,7 +261,7 @@ This usually means the release exists, but not all platform bundles were uploade
Workarounds:
- try again after the release finishes publishing
- pass the latest published version explicitly, e.g.:
curl -fsSL https://feynman.is/install | bash -s -- 0.2.27
curl -fsSL https://feynman.is/install | bash -s -- 0.2.31
EOF
exit 1
fi

View File

@@ -5,11 +5,13 @@ export const PI_SUBAGENTS_PATCH_TARGETS = [
"run-history.ts",
"skills.ts",
"chain-clarify.ts",
"subagent-executor.ts",
"schemas.ts",
];
const RESOLVE_PI_AGENT_DIR_HELPER = [
"function resolvePiAgentDir(): string {",
' const configured = process.env.PI_CODING_AGENT_DIR?.trim();',
' const configured = process.env.FEYNMAN_CODING_AGENT_DIR?.trim() || process.env.PI_CODING_AGENT_DIR?.trim();',
' if (!configured) return path.join(os.homedir(), ".pi", "agent");',
' return configured.startsWith("~/") ? path.join(os.homedir(), configured.slice(2)) : configured;',
"}",
@@ -94,6 +96,11 @@ export function patchPiSubagentsSource(relativePath, source) {
'const configPath = path.join(os.homedir(), ".pi", "agent", "extensions", "subagent", "config.json");',
'const configPath = path.join(resolvePiAgentDir(), "extensions", "subagent", "config.json");',
);
patched = replaceAll(
patched,
"• PARALLEL: { tasks: [{agent,task,count?}, ...], concurrency?: number, worktree?: true } - concurrent execution (worktree: isolate each task in a git worktree)",
"• PARALLEL: { tasks: [{agent,task,count?,output?}, ...], concurrency?: number, worktree?: true } - concurrent execution (output: per-task file target, worktree: isolate each task in a git worktree)",
);
break;
case "agents.ts":
patched = replaceAll(
@@ -190,6 +197,138 @@ export function patchPiSubagentsSource(relativePath, source) {
'const dir = path.join(resolvePiAgentDir(), "agents");',
);
break;
case "subagent-executor.ts":
patched = replaceAll(
patched,
[
"\tcwd?: string;",
"\tcount?: number;",
"\tmodel?: string;",
"\tskill?: string | string[] | boolean;",
].join("\n"),
[
"\tcwd?: string;",
"\tcount?: number;",
"\tmodel?: string;",
"\tskill?: string | string[] | boolean;",
"\toutput?: string | false;",
].join("\n"),
);
patched = replaceAll(
patched,
[
"\t\t\tcwd: task.cwd,",
"\t\t\t...(modelOverrides[index] ? { model: modelOverrides[index] } : {}),",
].join("\n"),
[
"\t\t\tcwd: task.cwd,",
"\t\t\toutput: task.output,",
"\t\t\t...(modelOverrides[index] ? { model: modelOverrides[index] } : {}),",
].join("\n"),
);
patched = replaceAll(
patched,
[
"\t\tcwd: task.cwd,",
"\t\t...(modelOverrides[index] ? { model: modelOverrides[index] } : {}),",
].join("\n"),
[
"\t\tcwd: task.cwd,",
"\t\toutput: task.output,",
"\t\t...(modelOverrides[index] ? { model: modelOverrides[index] } : {}),",
].join("\n"),
);
patched = replaceAll(
patched,
[
"\t\t\t\tcwd: t.cwd,",
"\t\t\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
].join("\n"),
[
"\t\t\t\tcwd: t.cwd,",
"\t\t\t\toutput: t.output,",
"\t\t\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
].join("\n"),
);
patched = replaceAll(
patched,
[
"\t\tcwd: t.cwd,",
"\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
].join("\n"),
[
"\t\tcwd: t.cwd,",
"\t\toutput: t.output,",
"\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
].join("\n"),
);
patched = replaceAll(
patched,
[
"\t\tconst behaviors = agentConfigs.map((c, i) =>",
"\t\t\tresolveStepBehavior(c, { skills: skillOverrides[i] }),",
"\t\t);",
].join("\n"),
[
"\t\tconst behaviors = agentConfigs.map((c, i) =>",
"\t\t\tresolveStepBehavior(c, { output: tasks[i]?.output, skills: skillOverrides[i] }),",
"\t\t);",
].join("\n"),
);
patched = replaceAll(
patched,
"\tconst behaviors = agentConfigs.map((config) => resolveStepBehavior(config, {}));",
"\tconst behaviors = agentConfigs.map((config, i) => resolveStepBehavior(config, { output: tasks[i]?.output, skills: skillOverrides[i] }));",
);
patched = replaceAll(
patched,
[
"\t\tconst taskCwd = resolveParallelTaskCwd(task, input.paramsCwd, input.worktreeSetup, index);",
"\t\treturn runSync(input.ctx.cwd, input.agents, task.agent, input.taskTexts[index]!, {",
].join("\n"),
[
"\t\tconst taskCwd = resolveParallelTaskCwd(task, input.paramsCwd, input.worktreeSetup, index);",
"\t\tconst outputPath = typeof input.behaviors[index]?.output === \"string\"",
"\t\t\t? resolveSingleOutputPath(input.behaviors[index]?.output, input.ctx.cwd, taskCwd)",
"\t\t\t: undefined;",
"\t\tconst taskText = injectSingleOutputInstruction(input.taskTexts[index]!, outputPath);",
"\t\treturn runSync(input.ctx.cwd, input.agents, task.agent, taskText, {",
].join("\n"),
);
patched = replaceAll(
patched,
[
"\t\t\tmaxOutput: input.maxOutput,",
"\t\t\tmaxSubagentDepth: input.maxSubagentDepths[index],",
].join("\n"),
[
"\t\t\tmaxOutput: input.maxOutput,",
"\t\t\toutputPath,",
"\t\t\tmaxSubagentDepth: input.maxSubagentDepths[index],",
].join("\n"),
);
break;
case "schemas.ts":
patched = replaceAll(
patched,
[
"\tcwd: Type.Optional(Type.String()),",
'\tcount: Type.Optional(Type.Integer({ minimum: 1, description: "Repeat this parallel task N times with the same settings." })),',
'\tmodel: Type.Optional(Type.String({ description: "Override model for this task (e.g. \'google/gemini-3-pro\')" })),',
].join("\n"),
[
"\tcwd: Type.Optional(Type.String()),",
'\tcount: Type.Optional(Type.Integer({ minimum: 1, description: "Repeat this parallel task N times with the same settings." })),',
'\toutput: Type.Optional(Type.Any({ description: "Output file for this parallel task (string), or false to disable. Relative paths resolve against cwd." })),',
'\tmodel: Type.Optional(Type.String({ description: "Override model for this task (e.g. \'google/gemini-3-pro\')" })),',
].join("\n"),
);
patched = replaceAll(
patched,
'tasks: Type.Optional(Type.Array(TaskItem, { description: "PARALLEL mode: [{agent, task, count?}, ...]" })),',
'tasks: Type.Optional(Type.Array(TaskItem, { description: "PARALLEL mode: [{agent, task, count?, output?}, ...]" })),',
);
break;
default:
return source;
}
@@ -198,5 +337,5 @@ export function patchPiSubagentsSource(relativePath, source) {
return source;
}
return injectResolvePiAgentDirHelper(patched);
return patched.includes("resolvePiAgentDir()") ? injectResolvePiAgentDirHelper(patched) : patched;
}

View File

@@ -1,4 +1,5 @@
import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs";
import { createHash } from "node:crypto";
import { resolve } from "node:path";
import { spawnSync } from "node:child_process";
@@ -6,6 +7,8 @@ import { stripPiSubagentBuiltinModelSource } from "./lib/pi-subagents-patch.mjs"
const appRoot = resolve(import.meta.dirname, "..");
const settingsPath = resolve(appRoot, ".feynman", "settings.json");
const packageJsonPath = resolve(appRoot, "package.json");
const packageLockPath = resolve(appRoot, "package-lock.json");
const feynmanDir = resolve(appRoot, ".feynman");
const workspaceDir = resolve(appRoot, ".feynman", "npm");
const workspaceNodeModulesDir = resolve(workspaceDir, "node_modules");
@@ -13,16 +16,29 @@ const manifestPath = resolve(workspaceDir, ".runtime-manifest.json");
const workspacePackageJsonPath = resolve(workspaceDir, "package.json");
const workspaceArchivePath = resolve(feynmanDir, "runtime-workspace.tgz");
const PRUNE_VERSION = 4;
const PINNED_RUNTIME_PACKAGES = [
"@mariozechner/pi-agent-core",
"@mariozechner/pi-ai",
"@mariozechner/pi-coding-agent",
"@mariozechner/pi-tui",
];
function readPackageSpecs() {
const settings = JSON.parse(readFileSync(settingsPath, "utf8"));
if (!Array.isArray(settings.packages)) {
return [];
const packageSpecs = Array.isArray(settings.packages)
? settings.packages
.filter((value) => typeof value === "string" && value.startsWith("npm:"))
.map((value) => value.slice(4))
: [];
for (const packageName of PINNED_RUNTIME_PACKAGES) {
const version = readLockedPackageVersion(packageName);
if (version) {
packageSpecs.push(`${packageName}@${version}`);
}
}
return settings.packages
.filter((value) => typeof value === "string" && value.startsWith("npm:"))
.map((value) => value.slice(4));
return Array.from(new Set(packageSpecs));
}
function parsePackageName(spec) {
@@ -30,10 +46,41 @@ function parsePackageName(spec) {
return match?.[1] ?? spec;
}
function readLockedPackageVersion(packageName) {
if (!existsSync(packageLockPath)) {
return undefined;
}
try {
const lockfile = JSON.parse(readFileSync(packageLockPath, "utf8"));
const entry = lockfile.packages?.[`node_modules/${packageName}`];
return typeof entry?.version === "string" ? entry.version : undefined;
} catch {
return undefined;
}
}
function arraysMatch(left, right) {
return left.length === right.length && left.every((value, index) => value === right[index]);
}
function hashFile(path) {
if (!existsSync(path)) {
return null;
}
return createHash("sha256").update(readFileSync(path)).digest("hex");
}
function getRuntimeInputHash() {
const hash = createHash("sha256");
for (const path of [packageJsonPath, packageLockPath, settingsPath]) {
hash.update(path);
hash.update("\0");
hash.update(hashFile(path) ?? "missing");
hash.update("\0");
}
return hash.digest("hex");
}
function workspaceIsCurrent(packageSpecs) {
if (!existsSync(manifestPath) || !existsSync(workspaceNodeModulesDir)) {
return false;
@@ -44,6 +91,9 @@ function workspaceIsCurrent(packageSpecs) {
if (!Array.isArray(manifest.packageSpecs) || !arraysMatch(manifest.packageSpecs, packageSpecs)) {
return false;
}
if (manifest.runtimeInputHash !== getRuntimeInputHash()) {
return false;
}
if (
manifest.nodeAbi !== process.versions.modules ||
manifest.platform !== process.platform ||
@@ -97,8 +147,8 @@ function prepareWorkspace(packageSpecs) {
const result = spawnSync(
process.env.npm_execpath ? process.execPath : "npm",
process.env.npm_execpath
? [process.env.npm_execpath, "install", "--prefer-offline", "--no-audit", "--no-fund", "--no-dry-run", "--loglevel", "error", "--prefix", workspaceDir, ...packageSpecs]
: ["install", "--prefer-offline", "--no-audit", "--no-fund", "--no-dry-run", "--loglevel", "error", "--prefix", workspaceDir, ...packageSpecs],
? [process.env.npm_execpath, "install", "--prefer-online", "--no-audit", "--no-fund", "--no-dry-run", "--legacy-peer-deps", "--loglevel", "error", "--prefix", workspaceDir, ...packageSpecs]
: ["install", "--prefer-online", "--no-audit", "--no-fund", "--no-dry-run", "--legacy-peer-deps", "--loglevel", "error", "--prefix", workspaceDir, ...packageSpecs],
{ stdio: "inherit", env: childNpmInstallEnv() },
);
if (result.status !== 0) {
@@ -110,15 +160,16 @@ function writeManifest(packageSpecs) {
writeFileSync(
manifestPath,
JSON.stringify(
{
packageSpecs,
generatedAt: new Date().toISOString(),
nodeAbi: process.versions.modules,
nodeVersion: process.version,
platform: process.platform,
arch: process.arch,
pruneVersion: PRUNE_VERSION,
},
{
packageSpecs,
runtimeInputHash: getRuntimeInputHash(),
generatedAt: new Date().toISOString(),
nodeAbi: process.versions.modules,
nodeVersion: process.version,
platform: process.platform,
arch: process.arch,
pruneVersion: PRUNE_VERSION,
},
null,
2,
) + "\n",

View File

@@ -558,6 +558,7 @@ export async function main(): Promise<void> {
normalizeFeynmanSettings(feynmanSettingsPath, bundledSettingsPath, thinkingLevel, feynmanAuthPath);
}
const workflowCommandNames = new Set(readPromptSpecs(appRoot).filter((s) => s.topLevelCli).map((s) => s.name));
await launchPiChat({
appRoot,
workingDir,
@@ -568,6 +569,6 @@ export async function main(): Promise<void> {
thinkingLevel,
explicitModelSpec,
oneShotPrompt: values.prompt,
initialPrompt: resolveInitialPrompt(command, rest, values.prompt, new Set(readPromptSpecs(appRoot).filter((s) => s.topLevelCli).map((s) => s.name))),
initialPrompt: resolveInitialPrompt(command, rest, values.prompt, workflowCommandNames),
});
}

View File

@@ -123,6 +123,8 @@ export function buildPiEnv(options: PiRuntimeOptions): NodeJS.ProcessEnv {
FEYNMAN_BIN_PATH: resolve(options.appRoot, "bin", "feynman.js"),
FEYNMAN_NPM_PREFIX: feynmanNpmPrefixPath,
// Ensure the Pi child process uses Feynman's agent dir for auth/models/settings.
// Patched Pi uses FEYNMAN_CODING_AGENT_DIR; upstream Pi uses PI_CODING_AGENT_DIR.
FEYNMAN_CODING_AGENT_DIR: options.feynmanAgentDir,
PI_CODING_AGENT_DIR: options.feynmanAgentDir,
PANDOC_PATH: process.env.PANDOC_PATH ?? resolveExecutable("pandoc", PANDOC_FALLBACK_PATHS),
PI_HARDWARE_CURSOR: process.env.PI_HARDWARE_CURSOR ?? "1",

View File

@@ -127,19 +127,6 @@ export function normalizeFeynmanSettings(
settings.theme = "feynman";
settings.quietStartup = true;
settings.collapseChangelog = true;
settings.compaction = {
enabled: true,
reserveTokens: 16384,
keepRecentTokens: 20000,
...(settings.compaction && typeof settings.compaction === "object" ? settings.compaction : {}),
};
settings.retry = {
enabled: true,
maxRetries: 3,
baseDelayMs: 2000,
maxDelayMs: 60000,
...(settings.retry && typeof settings.retry === "object" ? settings.retry : {}),
};
const supportedCorePackages = filterPackageSourcesForCurrentNode(CORE_PACKAGE_SOURCES);
if (!Array.isArray(settings.packages) || settings.packages.length === 0) {
settings.packages = supportedCorePackages;

View File

@@ -12,11 +12,6 @@ import { buildModelStatusSnapshotFromRecords, getAvailableModelRecords, getSuppo
import { createModelRegistry, getModelsJsonPath } from "../model/registry.js";
import { getConfiguredServiceTier } from "../model/service-tier.js";
type ContextRiskSummary = {
level: "low" | "medium" | "high" | "unknown";
lines: string[];
};
function findProvidersMissingApiKey(modelsJsonPath: string): string[] {
try {
const raw = readFileSync(modelsJsonPath, "utf8").trim();
@@ -40,50 +35,6 @@ function findProvidersMissingApiKey(modelsJsonPath: string): string[] {
}
}
function numberSetting(settings: Record<string, unknown>, path: string[], fallback: number): number {
let value: unknown = settings;
for (const key of path) {
if (!value || typeof value !== "object") return fallback;
value = (value as Record<string, unknown>)[key];
}
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
}
export function buildContextRiskSummary(
settings: Record<string, unknown>,
model: { provider: string; id: string; contextWindow: number; maxTokens: number; reasoning: boolean } | undefined,
): ContextRiskSummary {
if (!model) {
return {
level: "unknown",
lines: ["context risk: unknown (no active model)"],
};
}
const reserveTokens = numberSetting(settings, ["compaction", "reserveTokens"], 16384);
const keepRecentTokens = numberSetting(settings, ["compaction", "keepRecentTokens"], 20000);
const retryMax = numberSetting(settings, ["retry", "maxRetries"], 3);
const usableWindow = Math.max(0, model.contextWindow - reserveTokens);
const level = model.contextWindow < 64_000
? "high"
: model.contextWindow < 128_000
? "medium"
: "low";
return {
level,
lines: [
`context risk: ${level}`,
` model: ${model.provider}/${model.id}`,
` context window: ${model.contextWindow}`,
` usable before Pi compaction reserve: ${usableWindow}`,
` Pi compaction: reserve=${reserveTokens}, keepRecent=${keepRecentTokens}`,
` Pi retry: maxRetries=${retryMax}`,
` reasoning: ${model.reasoning ? "supported" : "off/not supported"}`,
],
};
}
export type DoctorOptions = {
settingsPath: string;
authPath: string;
@@ -213,10 +164,6 @@ export function runDoctor(options: DoctorOptions): void {
: "not set"}`,
);
const modelStatus = collectStatusSnapshot(options);
const currentModel = typeof settings.defaultProvider === "string" && typeof settings.defaultModel === "string"
? modelRegistry.find(settings.defaultProvider, settings.defaultModel)
: undefined;
const contextRisk = buildContextRiskSummary(settings, currentModel);
console.log(`default model valid: ${modelStatus.modelValid ? "yes" : "no"}`);
console.log(`authenticated providers: ${modelStatus.authenticatedProviderCount}`);
console.log(`authenticated models: ${modelStatus.authenticatedModelCount}`);
@@ -225,9 +172,6 @@ export function runDoctor(options: DoctorOptions): void {
if (modelStatus.recommendedModelReason) {
console.log(` why: ${modelStatus.recommendedModelReason}`);
}
for (const line of contextRisk.lines) {
console.log(line);
}
const modelsError = modelRegistry.getError();
if (modelsError) {
console.log("models.json: error");

View File

@@ -65,11 +65,47 @@ test("deepresearch workflow requires durable artifacts even when blocked", () =>
assert.match(systemPrompt, /Do not claim you are only a static model/i);
assert.match(systemPrompt, /write the requested durable artifact/i);
assert.match(deepResearchPrompt, /Do not stop after planning/i);
assert.match(deepResearchPrompt, /not a request to explain or implement/i);
assert.match(deepResearchPrompt, /Do not answer by describing the protocol/i);
assert.match(deepResearchPrompt, /degraded mode/i);
assert.match(deepResearchPrompt, /Verification: BLOCKED/i);
assert.match(deepResearchPrompt, /Never end with only an explanation in chat/i);
});
test("deepresearch citation and review stages are sequential and avoid giant edits", () => {
const deepResearchPrompt = readFileSync(join(repoRoot, "prompts", "deepresearch.md"), "utf8");
assert.match(deepResearchPrompt, /must complete before any reviewer runs/i);
assert.match(deepResearchPrompt, /Do not run the `verifier` and `reviewer` in the same parallel `subagent` call/i);
assert.match(deepResearchPrompt, /outputs\/\.drafts\/<slug>-cited\.md/i);
assert.match(deepResearchPrompt, /do not issue one giant `edit` tool call/i);
assert.match(deepResearchPrompt, /outputs\/\.drafts\/<slug>-revised\.md/i);
assert.match(deepResearchPrompt, /The final candidate is `outputs\/\.drafts\/<slug>-revised\.md` if it exists/i);
});
test("deepresearch keeps subagent tool calls small and skips subagents for narrow explainers", () => {
const deepResearchPrompt = readFileSync(join(repoRoot, "prompts", "deepresearch.md"), "utf8");
assert.match(deepResearchPrompt, /including "what is X" explainers/i);
assert.match(deepResearchPrompt, /Make the scale decision before assigning owners/i);
assert.match(deepResearchPrompt, /lead-owned direct search tasks only/i);
assert.match(deepResearchPrompt, /MUST NOT spawn researcher subagents/i);
assert.match(deepResearchPrompt, /Do not inflate a simple explainer into a multi-agent survey/i);
assert.match(deepResearchPrompt, /Skip researcher spawning entirely/i);
assert.match(deepResearchPrompt, /Use multiple search terms\/angles before drafting/i);
assert.match(deepResearchPrompt, /Minimum: 3 distinct queries/i);
assert.match(deepResearchPrompt, /Record the exact search terms used/i);
assert.match(deepResearchPrompt, /<slug>-research-direct\.md/i);
assert.match(deepResearchPrompt, /Do not call `alpha_get_paper`/i);
assert.match(deepResearchPrompt, /do not fetch `\.pdf` URLs/i);
assert.match(deepResearchPrompt, /Keep `subagent` tool-call JSON small and valid/i);
assert.match(deepResearchPrompt, /write a per-researcher brief first/i);
assert.match(deepResearchPrompt, /Do not place multi-paragraph instructions inside the `subagent` JSON/i);
assert.match(deepResearchPrompt, /Do not add extra keys such as `artifacts`/i);
assert.match(deepResearchPrompt, /always set `failFast: false`/i);
assert.match(deepResearchPrompt, /if a PDF parser or paper fetch fails/i);
});
test("workflow prompts do not introduce implicit confirmation gates", () => {
const workflowPrompts = [
"audit.md",

View File

@@ -243,6 +243,10 @@ test("updateConfiguredPackages batches multiple npm updates into a single instal
` console.log(resolve(${JSON.stringify(root)}, "npm-global", "lib", "node_modules"));`,
` process.exit(0);`,
`}`,
`if (args.length >= 4 && args[0] === "view" && args[2] === "version" && args[3] === "--json") {`,
` console.log(JSON.stringify("2.0.0"));`,
` process.exit(0);`,
`}`,
`appendFileSync(${JSON.stringify(logPath)}, JSON.stringify(args) + "\\n", "utf8");`,
"process.exit(0);",
].join("\n"));
@@ -290,6 +294,10 @@ test("updateConfiguredPackages skips native package updates on unsupported Node
` console.log(resolve(${JSON.stringify(root)}, "npm-global", "lib", "node_modules"));`,
` process.exit(0);`,
`}`,
`if (args.length >= 4 && args[0] === "view" && args[2] === "version" && args[3] === "--json") {`,
` console.log(JSON.stringify("2.0.0"));`,
` process.exit(0);`,
`}`,
`appendFileSync(${JSON.stringify(logPath)}, JSON.stringify(args) + "\\n", "utf8");`,
"process.exit(0);",
].join("\n"));

View File

@@ -54,6 +54,7 @@ test("buildPiEnv wires Feynman paths into the Pi environment", () => {
assert.equal(env.FEYNMAN_NPM_PREFIX, "/home/.feynman/npm-global");
assert.equal(env.NPM_CONFIG_PREFIX, "/home/.feynman/npm-global");
assert.equal(env.npm_config_prefix, "/home/.feynman/npm-global");
assert.equal(env.FEYNMAN_CODING_AGENT_DIR, "/home/.feynman/agent");
assert.equal(env.PI_CODING_AGENT_DIR, "/home/.feynman/agent");
assert.ok(
env.PATH?.startsWith(

View File

@@ -83,7 +83,7 @@ for (const scenario of CASES) {
const patched = patchPiSubagentsSource(scenario.file, scenario.input);
assert.match(patched, /function resolvePiAgentDir\(\): string \{/);
assert.match(patched, /process\.env\.PI_CODING_AGENT_DIR\?\.trim\(\)/);
assert.match(patched, /process\.env\.FEYNMAN_CODING_AGENT_DIR\?\.trim\(\) \|\| process\.env\.PI_CODING_AGENT_DIR\?\.trim\(\)/);
assert.ok(patched.includes(scenario.expected));
assert.ok(!patched.includes(scenario.original));
});
@@ -141,6 +141,139 @@ test("patchPiSubagentsSource rewrites modern agents.ts discovery paths", () => {
assert.ok(!patched.includes('fs.existsSync(userDirNew) ? userDirNew : userDirOld'));
});
test("patchPiSubagentsSource preserves output on top-level parallel tasks", () => {
const input = [
"interface TaskParam {",
"\tagent: string;",
"\ttask: string;",
"\tcwd?: string;",
"\tcount?: number;",
"\tmodel?: string;",
"\tskill?: string | string[] | boolean;",
"}",
"function run(params: { tasks: TaskParam[] }) {",
"\tconst modelOverrides = params.tasks.map(() => undefined);",
"\tconst skillOverrides = params.tasks.map(() => undefined);",
"\tconst parallelTasks = params.tasks.map((task, index) => ({",
"\t\tagent: task.agent,",
"\t\ttask: params.context === \"fork\" ? wrapForkTask(task.task) : task.task,",
"\t\tcwd: task.cwd,",
"\t\t...(modelOverrides[index] ? { model: modelOverrides[index] } : {}),",
"\t\t...(skillOverrides[index] !== undefined ? { skill: skillOverrides[index] } : {}),",
"\t}));",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /output\?: string \| false;/);
assert.match(patched, /\n\t\toutput: task\.output,/);
assert.doesNotMatch(patched, /resolvePiAgentDir/);
});
test("patchPiSubagentsSource preserves output in async parallel task handoff", () => {
const input = [
"function run(tasks: TaskParam[]) {",
"\tconst modelOverrides = tasks.map(() => undefined);",
"\tconst skillOverrides = tasks.map(() => undefined);",
"\tconst parallelTasks = tasks.map((t, i) => ({",
"\t\tagent: t.agent,",
"\t\ttask: params.context === \"fork\" ? wrapForkTask(taskTexts[i]!) : taskTexts[i]!,",
"\t\tcwd: t.cwd,",
"\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
"\t\t...(skillOverrides[i] !== undefined ? { skill: skillOverrides[i] } : {}),",
"\t}));",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /\n\t\toutput: t\.output,/);
});
test("patchPiSubagentsSource uses task output when resolving foreground parallel behavior", () => {
const input = [
"async function run(tasks: TaskParam[]) {",
"\tconst skillOverrides = tasks.map((t) => normalizeSkillInput(t.skill));",
"\tif (params.clarify === true && ctx.hasUI) {",
"\t\tconst behaviors = agentConfigs.map((c, i) =>",
"\t\t\tresolveStepBehavior(c, { skills: skillOverrides[i] }),",
"\t\t);",
"\t}",
"\tconst behaviors = agentConfigs.map((config) => resolveStepBehavior(config, {}));",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /resolveStepBehavior\(c, \{ output: tasks\[i\]\?\.output, skills: skillOverrides\[i\] \}\)/);
assert.match(patched, /resolveStepBehavior\(config, \{ output: tasks\[i\]\?\.output, skills: skillOverrides\[i\] \}\)/);
assert.doesNotMatch(patched, /resolveStepBehavior\(config, \{\}\)/);
});
test("patchPiSubagentsSource passes foreground parallel output paths into runSync", () => {
const input = [
"async function runForegroundParallelTasks(input: ForegroundParallelRunInput): Promise<SingleResult[]> {",
"\treturn mapConcurrent(input.tasks, input.concurrencyLimit, async (task, index) => {",
"\t\tconst overrideSkills = input.skillOverrides[index];",
"\t\tconst effectiveSkills = overrideSkills === undefined ? input.behaviors[index]?.skills : overrideSkills;",
"\t\tconst taskCwd = resolveParallelTaskCwd(task, input.paramsCwd, input.worktreeSetup, index);",
"\t\treturn runSync(input.ctx.cwd, input.agents, task.agent, input.taskTexts[index]!, {",
"\t\t\tcwd: taskCwd,",
"\t\t\tsignal: input.signal,",
"\t\t\tmaxOutput: input.maxOutput,",
"\t\t\tmaxSubagentDepth: input.maxSubagentDepths[index],",
"\t\t});",
"\t});",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /const outputPath = typeof input\.behaviors\[index\]\?\.output === "string"/);
assert.match(patched, /const taskText = injectSingleOutputInstruction\(input\.taskTexts\[index\]!, outputPath\)/);
assert.match(patched, /runSync\(input\.ctx\.cwd, input\.agents, task\.agent, taskText, \{/);
assert.match(patched, /\n\t\t\toutputPath,/);
});
test("patchPiSubagentsSource documents output in top-level task schema", () => {
const input = [
"export const TaskItem = Type.Object({ ",
"\tagent: Type.String(), ",
"\ttask: Type.String(), ",
"\tcwd: Type.Optional(Type.String()),",
"\tcount: Type.Optional(Type.Integer({ minimum: 1, description: \"Repeat this parallel task N times with the same settings.\" })),",
"\tmodel: Type.Optional(Type.String({ description: \"Override model for this task (e.g. 'google/gemini-3-pro')\" })),",
"\tskill: Type.Optional(SkillOverride),",
"});",
"export const SubagentParams = Type.Object({",
"\ttasks: Type.Optional(Type.Array(TaskItem, { description: \"PARALLEL mode: [{agent, task, count?}, ...]\" })),",
"});",
].join("\n");
const patched = patchPiSubagentsSource("schemas.ts", input);
assert.match(patched, /output: Type\.Optional\(Type\.Any/);
assert.match(patched, /count\?, output\?/);
assert.doesNotMatch(patched, /resolvePiAgentDir/);
});
test("patchPiSubagentsSource documents output in top-level parallel help", () => {
const input = [
'import * as os from "node:os";',
'import * as path from "node:path";',
"const help = `",
"• PARALLEL: { tasks: [{agent,task,count?}, ...], concurrency?: number, worktree?: true } - concurrent execution (worktree: isolate each task in a git worktree)",
"`;",
].join("\n");
const patched = patchPiSubagentsSource("index.ts", input);
assert.match(patched, /output\?/);
assert.match(patched, /per-task file target/);
assert.doesNotMatch(patched, /function resolvePiAgentDir/);
});
test("stripPiSubagentBuiltinModelSource removes built-in model pins", () => {
const input = [
"---",

View File

@@ -1,156 +0,0 @@
import test from "node:test";
import assert from "node:assert/strict";
import { existsSync, mkdirSync, mkdtempSync, readFileSync, utimesSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { dirname, join, resolve } from "node:path";
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
import { formatToolResultWithSpillover } from "../extensions/research-tools/alpha.js";
import { autoLogPath, writeAutoLogEntry } from "../extensions/research-tools/autolog.js";
import { computeContextPosture } from "../extensions/research-tools/context.js";
import { buildResumePacket } from "../extensions/research-tools/resume.js";
import { buildContextRiskSummary } from "../src/setup/doctor.js";
import { claimPlanSlug, collectManagedGc, spillLargeCustomToolResult } from "../extensions/research-tools/state.js";
function fakeCtx(cwd: string): ExtensionContext {
return {
cwd,
model: {
provider: "test",
id: "small",
contextWindow: 32_000,
},
getContextUsage: () => ({
tokens: 24_000,
contextWindow: 32_000,
percent: 75,
}),
sessionManager: {
getSessionId: () => "session-1",
},
} as unknown as ExtensionContext;
}
test("alpha tool spillover writes oversized output to outputs cache", () => {
const root = mkdtempSync(join(tmpdir(), "feynman-spill-"));
const originalCap = process.env.FEYNMAN_TOOL_OUTPUT_CAP_CHARS;
process.env.FEYNMAN_TOOL_OUTPUT_CAP_CHARS = "64";
try {
const result = formatToolResultWithSpillover(fakeCtx(root), "alpha_get_paper", { text: "x".repeat(500) });
const parsed = JSON.parse(result.text) as { path: string; feynman_spillover: boolean };
assert.equal(parsed.feynman_spillover, true);
assert.equal(existsSync(parsed.path), true);
assert.match(readFileSync(parsed.path, "utf8"), /xxxxx/);
assert.match(parsed.path, /outputs\/\.cache\/alpha_get_paper-/);
} finally {
if (originalCap === undefined) {
delete process.env.FEYNMAN_TOOL_OUTPUT_CAP_CHARS;
} else {
process.env.FEYNMAN_TOOL_OUTPUT_CAP_CHARS = originalCap;
}
}
});
test("context_report posture uses Pi context usage directly", () => {
const report = computeContextPosture(fakeCtx("/tmp"));
assert.equal(report.model, "test/small");
assert.equal(report.contextWindow, 32_000);
assert.equal(report.estimatedInputTokens, 24_000);
assert.equal(report.compactionThresholdHit, true);
assert.equal(report.recommendedMaxWorkers, 1);
});
test("autolog writes dated jsonl entries under notes", () => {
const root = mkdtempSync(join(tmpdir(), "feynman-autolog-"));
writeAutoLogEntry(root, { role: "user", text: "hello" });
const path = autoLogPath(root);
assert.equal(existsSync(path), true);
assert.deepEqual(JSON.parse(readFileSync(path, "utf8").trim()), { role: "user", text: "hello" });
});
test("resume packet summarizes recent plans and changelog from disk", () => {
const root = mkdtempSync(join(tmpdir(), "feynman-resume-"));
mkdirSync(resolve(root, "outputs", ".plans"), { recursive: true });
mkdirSync(resolve(root, "outputs", ".state"), { recursive: true });
const planPath = resolve(root, "outputs", ".plans", "demo.md");
const statePath = resolve(root, "outputs", ".state", "demo.jobs.jsonl");
writeFileSyncSafe(planPath, "# Plan\n\n- next step");
writeFileSyncSafe(statePath, "{\"status\":\"running\"}\n");
writeFileSyncSafe(resolve(root, "CHANGELOG.md"), "## Entry\n- verified\n");
const packet = buildResumePacket(root);
assert.ok(packet);
assert.match(packet!, /Recent plans/);
assert.match(packet!, /demo\.md/);
assert.match(packet!, /CHANGELOG tail/);
});
test("doctor context risk uses Pi model context window and compaction settings", () => {
const summary = buildContextRiskSummary(
{ compaction: { reserveTokens: 4096, keepRecentTokens: 8000 }, retry: { maxRetries: 2 } },
{ provider: "local", id: "qwen", contextWindow: 32_000, maxTokens: 4096, reasoning: true },
);
assert.equal(summary.level, "high");
assert.match(summary.lines.join("\n"), /Pi compaction: reserve=4096, keepRecent=8000/);
assert.match(summary.lines.join("\n"), /Pi retry: maxRetries=2/);
});
test("slug lock blocks overwriting an existing plan from another session", () => {
const root = mkdtempSync(join(tmpdir(), "feynman-slug-"));
const planPath = resolve(root, "outputs", ".plans", "demo.md");
writeFileSyncSafe(planPath, "# Existing\n");
const result = claimPlanSlug(root, "session-2", "outputs/.plans/demo.md");
assert.equal(result.ok, false);
if (!result.ok) {
assert.match(result.reason, /Plan already exists/);
}
});
test("managed cache gc deletes stale cache files and honors dry-run", () => {
const root = mkdtempSync(join(tmpdir(), "feynman-gc-"));
const cachePath = resolve(root, "outputs", ".cache", "old.md");
writeFileSyncSafe(cachePath, "old");
const old = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
utimesSync(cachePath, old, old);
const preview = collectManagedGc(root, Date.now(), 14, { dryRun: true });
assert.equal(preview.deleted.length, 1);
assert.equal(existsSync(cachePath), true);
const actual = collectManagedGc(root, Date.now(), 14);
assert.equal(actual.deleted.length, 1);
assert.equal(existsSync(cachePath), false);
});
test("large custom tool results spill to outputs runs", () => {
const root = mkdtempSync(join(tmpdir(), "feynman-subagent-spill-"));
const originalCap = process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS;
process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS = "50";
try {
const result = spillLargeCustomToolResult(
root,
"subagent",
"call-1",
[{ type: "text", text: "x".repeat(200) }],
{ ok: true },
);
assert.ok(result);
const parsed = JSON.parse(result!.content[0]!.text) as { path: string; feynman_spillover: boolean };
assert.equal(parsed.feynman_spillover, true);
assert.match(parsed.path, /outputs\/\.runs\/subagent-call-1-/);
assert.equal(existsSync(parsed.path), true);
} finally {
if (originalCap === undefined) {
delete process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS;
} else {
process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS = originalCap;
}
}
});
function writeFileSyncSafe(path: string, text: string): void {
mkdirSync(dirname(path), { recursive: true });
writeFileSync(path, text, "utf8");
}

View File

@@ -1544,9 +1544,9 @@
}
},
"node_modules/@hono/node-server": {
"version": "1.19.13",
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.13.tgz",
"integrity": "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ==",
"version": "1.19.14",
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz",
"integrity": "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==",
"license": "MIT",
"engines": {
"node": ">=18.14.1"
@@ -7998,9 +7998,9 @@
}
},
"node_modules/hono": {
"version": "4.12.12",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
"integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
"version": "4.12.14",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz",
"integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==",
"license": "MIT",
"engines": {
"node": ">=16.9.0"

View File

@@ -36,8 +36,8 @@
},
"overrides": {
"@modelcontextprotocol/sdk": {
"@hono/node-server": "1.19.13",
"hono": "4.12.12"
"@hono/node-server": "1.19.14",
"hono": "4.12.14"
},
"router": {
"path-to-regexp": "8.4.2"

View File

@@ -261,7 +261,7 @@ This usually means the release exists, but not all platform bundles were uploade
Workarounds:
- try again after the release finishes publishing
- pass the latest published version explicitly, e.g.:
curl -fsSL https://feynman.is/install | bash -s -- 0.2.27
curl -fsSL https://feynman.is/install | bash -s -- 0.2.31
EOF
exit 1
fi

View File

@@ -110,7 +110,7 @@ This usually means the release exists, but not all platform bundles were uploade
Workarounds:
- try again after the release finishes publishing
- pass the latest published version explicitly, e.g.:
& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.27
& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.31
"@
}

View File

@@ -117,13 +117,13 @@ These installers download the bundled `skills/` and `prompts/` trees plus the re
The one-line installer already targets the latest tagged release. To pin an exact version, pass it explicitly:
```bash
curl -fsSL https://feynman.is/install | bash -s -- 0.2.27
curl -fsSL https://feynman.is/install | bash -s -- 0.2.31
```
On Windows:
```powershell
& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.27
& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.31
```
## Post-install setup

View File

@@ -22,7 +22,9 @@ These are installed by default with every Feynman installation. They provide the
| `pi-mermaid` | Render Mermaid diagrams in the terminal UI |
| `@aliou/pi-processes` | Manage long-running experiments, background tasks, and log tailing |
| `pi-zotero` | Integration with Zotero for citation library management |
| `@kaiserlich-dev/pi-session-search` | Indexed session recall with summarize and resume UI. Powers session lookup |
| `pi-schedule-prompt` | Schedule recurring and deferred research jobs. Powers the `/watch` workflow |
| `@samfp/pi-memory` | Pi-managed preference and correction memory across sessions |
| `@tmustier/pi-ralph-wiggum` | Long-running agent loops for iterative development. Powers `/autoresearch` |
These packages are updated together when you run `feynman update`. You do not need to install them individually.
@@ -34,8 +36,6 @@ Install on demand with `feynman packages install <preset>`. These extend Feynman
| Package | Preset | Purpose |
| --- | --- | --- |
| `pi-generative-ui` | `generative-ui` | Interactive HTML-style widgets for rich output |
| `@kaiserlich-dev/pi-session-search` | `session-search` | Indexed session recall with summarize and resume UI. Powers `/search` |
| `@samfp/pi-memory` | `memory` | Automatic preference and correction memory across sessions |
## Installing and managing packages
@@ -48,17 +48,9 @@ feynman packages list
Install a specific optional preset:
```bash
feynman packages install session-search
feynman packages install memory
feynman packages install generative-ui
```
Install all optional packages at once:
```bash
feynman packages install all-extras
```
## Updating packages
Update all installed packages to their latest versions: