diff --git a/README.md b/README.md index 84126d3..d7da806 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ curl -fsSL https://feynman.is/install | bash irm https://feynman.is/install.ps1 | iex ``` -The one-line installer fetches the latest tagged release. To pin a version, pass it explicitly, for example `curl -fsSL https://feynman.is/install | bash -s -- 0.2.26`. +The one-line installer fetches the latest tagged release. To pin a version, pass it explicitly, for example `curl -fsSL https://feynman.is/install | bash -s -- 0.2.27`. The installer downloads a standalone native bundle with its own Node.js runtime. @@ -35,7 +35,7 @@ To uninstall the standalone app, remove the launcher and runtime bundle, then op Local models are supported through the setup flow. For LM Studio, run `feynman setup`, choose `LM Studio`, and keep the default `http://localhost:1234/v1` unless you changed the server port. For LiteLLM, choose `LiteLLM Proxy` and keep the default `http://localhost:4000/v1`. For Ollama or vLLM, choose `Custom provider (baseUrl + API key)`, use `openai-completions`, and point it at the local `/v1` endpoint. -Feynman uses Pi's own runtime hooks for context hygiene: Pi compaction/retry settings are seeded by default, `context_report` exposes the current Pi context usage to the model, oversized alphaXiv tool returns spill to `outputs/.cache/`, and a bounded resume packet is injected from `outputs/.plans/`, `outputs/.state/`, and `CHANGELOG.md` when those files exist. Automatic session logging writes JSONL snippets to `notes/feynman-autolog/`; set `FEYNMAN_AUTO_LOG=off` to disable it or `FEYNMAN_AUTO_LOG=full` for full text. +Feynman uses Pi's own runtime hooks for context hygiene: Pi compaction/retry settings are seeded by default, `context_report` exposes the current Pi context usage to the model, oversized alphaXiv tool returns spill to `outputs/.cache/`, oversized custom/subagent returns spill to `outputs/.runs/`, and a bounded resume packet is injected from `outputs/.plans/`, `outputs/.state/`, and `CHANGELOG.md` when those files exist. Automatic session logging writes JSONL snippets to `notes/feynman-autolog/`; set `FEYNMAN_AUTO_LOG=off` to disable it or `FEYNMAN_AUTO_LOG=full` for full text. Feynman also locks new plan slugs under `outputs/.state/` to prevent concurrent workflow collisions and garbage-collects stale managed caches on startup. ### Skills Only diff --git a/extensions/research-tools.ts b/extensions/research-tools.ts index fb46c93..ae1d5b9 100644 --- a/extensions/research-tools.ts +++ b/extensions/research-tools.ts @@ -10,6 +10,7 @@ import { registerHelpCommand } from "./research-tools/help.js"; import { registerInitCommand, registerOutputsCommand } from "./research-tools/project.js"; import { registerResumePacket } from "./research-tools/resume.js"; import { registerServiceTierControls } from "./research-tools/service-tier.js"; +import { registerStateManagement } from "./research-tools/state.js"; export default function researchTools(pi: ExtensionAPI): void { const cache: { agentSummaryPromise?: Promise<{ agents: string[]; chains: string[] }> } = {}; @@ -29,4 +30,5 @@ export default function researchTools(pi: ExtensionAPI): void { registerOutputsCommand(pi); registerResumePacket(pi); registerServiceTierControls(pi); + registerStateManagement(pi); } diff --git a/extensions/research-tools/project.ts b/extensions/research-tools/project.ts index 4530846..9dfcb2c 100644 --- a/extensions/research-tools/project.ts +++ b/extensions/research-tools/project.ts @@ -5,6 +5,7 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { getExtensionCommandSpec } from "../../metadata/commands.mjs"; import { buildProjectAgentsTemplate, buildSessionLogsReadme } from "./project-scaffold.js"; +import { collectManagedGc } from "./state.js"; async function pathExists(path: string): Promise { try { @@ -104,7 +105,15 @@ export function registerInitCommand(pi: ExtensionAPI): void { export function registerOutputsCommand(pi: ExtensionAPI): void { pi.registerCommand("outputs", { description: "Browse all research artifacts (papers, outputs, experiments, notes).", - handler: async (_args, ctx) => { + handler: async (args, ctx) => { + const trimmedArgs = args.trim(); + if (trimmedArgs === "gc" || trimmedArgs === "gc --dry-run") { + const dryRun = trimmedArgs.includes("--dry-run"); + const result = collectManagedGc(ctx.cwd, Date.now(), undefined, { dryRun }); + ctx.ui.notify(`${dryRun ? "Would remove" : "Removed"} ${result.deleted.length} managed cache file(s).`, "info"); + return; + } + const items = await collectArtifacts(ctx.cwd); if (items.length === 0) { ctx.ui.notify("No artifacts found. Use /lit, /draft, /review, or /deepresearch to create some.", "info"); diff --git a/extensions/research-tools/state.ts b/extensions/research-tools/state.ts new file mode 100644 index 0000000..d8bbd93 --- /dev/null +++ b/extensions/research-tools/state.ts @@ -0,0 +1,276 @@ +import { createHash } from "node:crypto"; +import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs"; +import { basename, dirname, relative, resolve } from "node:path"; + +import { isToolCallEventType, type ExtensionAPI, type ExtensionContext, type ToolCallEvent } from "@mariozechner/pi-coding-agent"; + +type SlugLock = { + pid: number; + sessionId: string; + startedAt: string; + planPath: string; +}; + +type GcResult = { + deleted: string[]; + kept: string[]; +}; + +type SpillResult = { + content: { type: "text"; text: string }[]; + details: unknown; +} | undefined; + +type ToolResultPatch = { + content?: { type: "text"; text: string }[]; + details?: unknown; + isError?: boolean; +}; + +const BUILT_IN_TOOL_NAMES = new Set(["bash", "read", "write", "edit", "grep", "find", "ls"]); + +function isPathInside(parent: string, child: string): boolean { + const rel = relative(parent, child); + return rel === "" || (!rel.startsWith("..") && !rel.startsWith("/")); +} + +function pidIsLive(pid: number): boolean { + if (!Number.isInteger(pid) || pid <= 0) return false; + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +function readLock(path: string): SlugLock | undefined { + try { + return JSON.parse(readFileSync(path, "utf8")) as SlugLock; + } catch { + return undefined; + } +} + +function lockIsLive(lock: SlugLock | undefined, timeoutMs: number, now = Date.now()): boolean { + if (!lock) return false; + const started = Date.parse(lock.startedAt); + if (!Number.isFinite(started) || now - started > timeoutMs) return false; + return pidIsLive(lock.pid); +} + +function planPathInfo(cwd: string, inputPath: string): { absPath: string; slug: string; lockPath: string } | undefined { + const absPath = resolve(cwd, inputPath); + const plansRoot = resolve(cwd, "outputs", ".plans"); + if (!isPathInside(plansRoot, absPath) || !absPath.endsWith(".md")) return undefined; + const slug = basename(absPath, ".md"); + const lockPath = resolve(cwd, "outputs", ".state", `${slug}.lock`); + return { absPath, slug, lockPath }; +} + +export function claimPlanSlug( + cwd: string, + sessionId: string, + inputPath: string, + options?: { timeoutMinutes?: number; strategy?: "suffix" | "error" | "overwrite"; now?: number }, +): { ok: true; lockPath?: string } | { ok: false; reason: string } { + const info = planPathInfo(cwd, inputPath); + if (!info) return { ok: true }; + + const strategy = options?.strategy ?? (process.env.FEYNMAN_SLUG_COLLISION_STRATEGY as "suffix" | "error" | "overwrite" | undefined) ?? "error"; + if (strategy === "overwrite") return { ok: true }; + + const timeoutMinutes = options?.timeoutMinutes ?? (Number(process.env.FEYNMAN_SLUG_LOCK_TIMEOUT_MINUTES) || 30); + const timeoutMs = timeoutMinutes * 60_000; + const existingLock = readLock(info.lockPath); + const live = lockIsLive(existingLock, timeoutMs, options?.now); + if (live && existingLock?.sessionId !== sessionId) { + return { + ok: false, + reason: `Slug "${info.slug}" is locked by another Feynman session. Use a unique slug such as ${info.slug}-2, or wait for ${info.lockPath} to expire.`, + }; + } + if (existsSync(info.absPath) && existingLock?.sessionId !== sessionId) { + return { + ok: false, + reason: `Plan already exists at ${relative(cwd, info.absPath)}. Use a unique slug such as ${info.slug}-2 to avoid overwriting another run.`, + }; + } + + mkdirSync(dirname(info.lockPath), { recursive: true }); + writeFileSync( + info.lockPath, + JSON.stringify({ + pid: process.pid, + sessionId, + startedAt: new Date(options?.now ?? Date.now()).toISOString(), + planPath: info.absPath, + }, null, 2) + "\n", + "utf8", + ); + return { ok: true, lockPath: info.lockPath }; +} + +function managedRetentionDays(): number { + const raw = Number(process.env.FEYNMAN_CACHE_RETENTION_DAYS); + return Number.isFinite(raw) && raw >= 0 ? raw : 14; +} + +function gcIgnored(path: string): boolean { + if (path.endsWith(".gcignore")) return true; + try { + return /^---[\s\S]*?retain:\s*true/im.test(readFileSync(path, "utf8").slice(0, 500)); + } catch { + return false; + } +} + +export function collectManagedGc( + cwd: string, + now = Date.now(), + retentionDays = managedRetentionDays(), + options?: { dryRun?: boolean }, +): GcResult { + const roots = [ + resolve(cwd, "outputs", ".cache"), + resolve(cwd, "outputs", ".runs"), + resolve(cwd, "outputs", ".notes"), + ]; + const cutoff = now - retentionDays * 24 * 60 * 60 * 1000; + const result: GcResult = { deleted: [], kept: [] }; + + const visit = (path: string) => { + if (!existsSync(path)) return; + for (const entry of readdirSync(path, { withFileTypes: true })) { + const child = resolve(path, entry.name); + if (entry.isDirectory()) { + visit(child); + try { + if (readdirSync(child).length === 0) rmSync(child, { recursive: true, force: true }); + } catch {} + continue; + } + if (!entry.isFile()) continue; + const stat = statSync(child); + if (gcIgnored(child) || stat.mtimeMs >= cutoff) { + result.kept.push(child); + continue; + } + if (!options?.dryRun) { + rmSync(child, { force: true }); + } + result.deleted.push(child); + } + }; + + for (const root of roots) visit(root); + return result; +} + +function textFromToolContent(content: ToolResultContent): string { + return content + .map((item) => item.type === "text" ? item.text : "") + .filter(Boolean) + .join("\n"); +} + +type ToolResultContent = Array<{ type: "text"; text: string } | { type: "image"; data: string; mimeType: string }>; + +function customToolOutputCapChars(): number { + const raw = Number(process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS); + return Number.isFinite(raw) && raw > 0 ? raw : 24_000; +} + +export function spillLargeCustomToolResult( + cwd: string, + toolName: string, + toolCallId: string, + content: ToolResultContent, + details: unknown, +): SpillResult { + if (BUILT_IN_TOOL_NAMES.has(toolName)) return undefined; + const text = textFromToolContent(content); + const cap = customToolOutputCapChars(); + if (text.length <= cap) return undefined; + + const hash = createHash("sha256").update(text).digest("hex"); + const safeToolName = toolName.replace(/[^a-zA-Z0-9._-]+/g, "-").slice(0, 60) || "tool"; + const path = resolve(cwd, "outputs", ".runs", `${safeToolName}-${toolCallId}-${hash.slice(0, 12)}.md`); + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, text, "utf8"); + const pointer = { + feynman_spillover: true, + tool: toolName, + toolCallId, + path, + bytes: Buffer.byteLength(text, "utf8"), + sha256: hash, + head: text.slice(0, Math.min(cap, 2_000)), + note: "Full custom/subagent tool result was written to disk. Read the path in bounded chunks when needed.", + originalDetails: details, + }; + return { + content: [{ type: "text", text: JSON.stringify(pointer, null, 2) }], + details: pointer, + }; +} + +function appendJsonl(path: string, value: unknown): void { + mkdirSync(dirname(path), { recursive: true }); + appendFileSync(path, `${JSON.stringify(value)}\n`, "utf8"); +} + +function recordCheckpoint(ctx: ExtensionContext, toolName: string, isError: boolean): void { + appendJsonl(resolve(ctx.cwd, "outputs", ".state", "feynman.checkpoint.jsonl"), { + timestamp: new Date().toISOString(), + sessionId: ctx.sessionManager.getSessionId(), + toolName, + isError, + context: ctx.getContextUsage?.(), + }); +} + +function recordJobEvent(ctx: ExtensionContext, toolName: string, status: "running" | "done" | "failed", data: unknown): void { + appendJsonl(resolve(ctx.cwd, "outputs", ".state", "subagent.jobs.jsonl"), { + timestamp: new Date().toISOString(), + sessionId: ctx.sessionManager.getSessionId(), + toolName, + status, + data, + }); +} + +function looksLikeSubagentTool(toolName: string): boolean { + return /subagent|parallel|chain|run/i.test(toolName); +} + +export function registerStateManagement(pi: ExtensionAPI): void { + pi.on("session_start", async (_event, ctx) => { + if (process.env.FEYNMAN_OUTPUTS_GC === "off") return; + collectManagedGc(ctx.cwd); + }); + + pi.on("tool_call", async (event: ToolCallEvent, ctx) => { + const sessionId = ctx.sessionManager.getSessionId(); + if (isToolCallEventType("write", event)) { + const claim = claimPlanSlug(ctx.cwd, sessionId, event.input.path); + if (!claim.ok) return { block: true, reason: claim.reason }; + } + if (isToolCallEventType("edit", event)) { + const claim = claimPlanSlug(ctx.cwd, sessionId, event.input.path); + if (!claim.ok) return { block: true, reason: claim.reason }; + } + if (looksLikeSubagentTool(event.toolName)) { + recordJobEvent(ctx, event.toolName, "running", event.input); + } + return undefined; + }); + + pi.on("tool_result", async (event, ctx): Promise => { + recordCheckpoint(ctx, event.toolName, event.isError); + if (looksLikeSubagentTool(event.toolName)) { + recordJobEvent(ctx, event.toolName, event.isError ? "failed" : "done", event.details ?? event.content); + } + return spillLargeCustomToolResult(ctx.cwd, event.toolName, event.toolCallId, event.content as ToolResultContent, event.details); + }); +} diff --git a/package-lock.json b/package-lock.json index c7692a7..066c7ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@companion-ai/feynman", - "version": "0.2.26", + "version": "0.2.27", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@companion-ai/feynman", - "version": "0.2.26", + "version": "0.2.27", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 2fc48ab..ea284f0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@companion-ai/feynman", - "version": "0.2.26", + "version": "0.2.27", "description": "Research-first CLI agent built on Pi and alphaXiv", "license": "MIT", "type": "module", diff --git a/scripts/install/install.ps1 b/scripts/install/install.ps1 index 5d9c1b0..eae85b9 100644 --- a/scripts/install/install.ps1 +++ b/scripts/install/install.ps1 @@ -110,7 +110,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.26 + & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.27 "@ } diff --git a/scripts/install/install.sh b/scripts/install/install.sh index 61cc550..06dd1b2 100644 --- a/scripts/install/install.sh +++ b/scripts/install/install.sh @@ -261,7 +261,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - curl -fsSL https://feynman.is/install | bash -s -- 0.2.26 + curl -fsSL https://feynman.is/install | bash -s -- 0.2.27 EOF exit 1 fi diff --git a/tests/research-tools-extension.test.ts b/tests/research-tools-extension.test.ts index 6b8a822..303df04 100644 --- a/tests/research-tools-extension.test.ts +++ b/tests/research-tools-extension.test.ts @@ -1,6 +1,6 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from "node:fs"; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, utimesSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { dirname, join, resolve } from "node:path"; @@ -11,6 +11,7 @@ import { autoLogPath, writeAutoLogEntry } from "../extensions/research-tools/aut import { computeContextPosture } from "../extensions/research-tools/context.js"; import { buildResumePacket } from "../extensions/research-tools/resume.js"; import { buildContextRiskSummary } from "../src/setup/doctor.js"; +import { claimPlanSlug, collectManagedGc, spillLargeCustomToolResult } from "../extensions/research-tools/state.js"; function fakeCtx(cwd: string): ExtensionContext { return { @@ -94,6 +95,61 @@ test("doctor context risk uses Pi model context window and compaction settings", assert.match(summary.lines.join("\n"), /Pi retry: maxRetries=2/); }); +test("slug lock blocks overwriting an existing plan from another session", () => { + const root = mkdtempSync(join(tmpdir(), "feynman-slug-")); + const planPath = resolve(root, "outputs", ".plans", "demo.md"); + writeFileSyncSafe(planPath, "# Existing\n"); + + const result = claimPlanSlug(root, "session-2", "outputs/.plans/demo.md"); + + assert.equal(result.ok, false); + if (!result.ok) { + assert.match(result.reason, /Plan already exists/); + } +}); + +test("managed cache gc deletes stale cache files and honors dry-run", () => { + const root = mkdtempSync(join(tmpdir(), "feynman-gc-")); + const cachePath = resolve(root, "outputs", ".cache", "old.md"); + writeFileSyncSafe(cachePath, "old"); + const old = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); + utimesSync(cachePath, old, old); + + const preview = collectManagedGc(root, Date.now(), 14, { dryRun: true }); + assert.equal(preview.deleted.length, 1); + assert.equal(existsSync(cachePath), true); + + const actual = collectManagedGc(root, Date.now(), 14); + assert.equal(actual.deleted.length, 1); + assert.equal(existsSync(cachePath), false); +}); + +test("large custom tool results spill to outputs runs", () => { + const root = mkdtempSync(join(tmpdir(), "feynman-subagent-spill-")); + const originalCap = process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS; + process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS = "50"; + try { + const result = spillLargeCustomToolResult( + root, + "subagent", + "call-1", + [{ type: "text", text: "x".repeat(200) }], + { ok: true }, + ); + assert.ok(result); + const parsed = JSON.parse(result!.content[0]!.text) as { path: string; feynman_spillover: boolean }; + assert.equal(parsed.feynman_spillover, true); + assert.match(parsed.path, /outputs\/\.runs\/subagent-call-1-/); + assert.equal(existsSync(parsed.path), true); + } finally { + if (originalCap === undefined) { + delete process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS; + } else { + process.env.FEYNMAN_CUSTOM_TOOL_CAP_CHARS = originalCap; + } + } +}); + function writeFileSyncSafe(path: string, text: string): void { mkdirSync(dirname(path), { recursive: true }); writeFileSync(path, text, "utf8"); diff --git a/website/public/install b/website/public/install index 61cc550..06dd1b2 100644 --- a/website/public/install +++ b/website/public/install @@ -261,7 +261,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - curl -fsSL https://feynman.is/install | bash -s -- 0.2.26 + curl -fsSL https://feynman.is/install | bash -s -- 0.2.27 EOF exit 1 fi diff --git a/website/public/install.ps1 b/website/public/install.ps1 index 5d9c1b0..eae85b9 100644 --- a/website/public/install.ps1 +++ b/website/public/install.ps1 @@ -110,7 +110,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.26 + & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.27 "@ } diff --git a/website/src/content/docs/getting-started/installation.md b/website/src/content/docs/getting-started/installation.md index faddf86..ca10359 100644 --- a/website/src/content/docs/getting-started/installation.md +++ b/website/src/content/docs/getting-started/installation.md @@ -117,13 +117,13 @@ These installers download the bundled `skills/` and `prompts/` trees plus the re The one-line installer already targets the latest tagged release. To pin an exact version, pass it explicitly: ```bash -curl -fsSL https://feynman.is/install | bash -s -- 0.2.26 +curl -fsSL https://feynman.is/install | bash -s -- 0.2.27 ``` On Windows: ```powershell -& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.26 +& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.27 ``` ## Post-install setup