Initial Feynman research agent scaffold

2026-03-20 11:05:58 -07:00
commit 1fe1ce04a5
25 changed files with 5079 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,8 @@
 # Optional runtime defaults for Feynman.
 # Provider credentials are read by pi-coding-agent in the usual ways.
 FEYNMAN_MODEL=
 FEYNMAN_THINKING=medium
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
 node_modules
 .env
 .feynman
 outputs/*
 !outputs/.gitkeep
--- a/.pi/settings.json
+++ b/.pi/settings.json
@@ -0,0 +1,12 @@
 {
  "packages": [
    "npm:pi-subagents",
    "npm:pi-docparser",
    "npm:pi-web-access",
    "npm:pi-markdown-preview",
    "npm:@kaiserlich-dev/pi-session-search",
    "npm:@aliou/pi-processes",
    "npm:pi-wandb",
    "npm:pi-zotero"
  ]
 }
--- a/README.md
+++ b/README.md
@@ -0,0 +1,87 @@
 # Feynman
 `feynman` is a research-first agent scaffold built on `@mariozechner/pi-coding-agent`.
 It keeps the useful parts of a coding agent:
 - file access
 - shell execution
 - persistent sessions
 - skills
 - custom extensions
 But it biases the runtime toward research work:
 - literature review
 - paper lookup
 - replication planning
 - experiment design
 - writing notes and reports
 The primary paper backend is `@companion-ai/alpha-hub` and your alphaXiv account.
 The rest of the workflow is augmented through a curated `.pi/settings.json` package stack.
 ## Setup
 ```bash
 cd /Users/advaitpaliwal/Companion/Code/feynman
 npm install
 cp .env.example .env
 npm run start
 ```
 If you already use `pi`, Feynman will reuse the usual auth/config locations for model access.
 Before deep paper work, make sure alphaXiv auth is set up:
 ```bash
 npx @companion-ai/alpha-hub login
 ```
 ## Commands
 Inside the REPL:
 - `/help` shows local commands
 - `/new` starts a new persisted session
 - `/exit` quits
 - `/lit-review <topic>` expands the literature-review prompt template
 - `/replicate <paper or claim>` expands the replication prompt template
 - `/reading-list <topic>` expands the reading-list prompt template
 - `/paper-code-audit <item>` expands the paper/code audit prompt template
 - `/paper-draft <topic>` expands the paper-style writing prompt template
 ## Custom Tools
 The starter extension adds:
 - `alpha_search` for alphaXiv-backed paper discovery
 - `alpha_get_paper` for fetching paper reports or raw text
 - `alpha_ask_paper` for targeted paper Q&A
 - `alpha_annotate_paper` for persistent local notes
 - `alpha_list_annotations` for recall across sessions
 - `alpha_read_code` for reading a paper repository
 Feynman uses `@companion-ai/alpha-hub` directly in-process rather than shelling out to the CLI.
 ## Curated Pi Stack
 Feynman loads a lean research stack from [.pi/settings.json](/Users/advaitpaliwal/Companion/Code/feynman/.pi/settings.json):
 - `pi-subagents` for parallel literature gathering and decomposition
 - `pi-docparser` for PDFs, Office docs, spreadsheets, and images
 - `pi-web-access` for broader web, GitHub, PDF, and media access
 - `pi-markdown-preview` for polished Markdown and LaTeX-heavy research writeups
 - `@kaiserlich-dev/pi-session-search` for recall across long-running research threads
 - `@aliou/pi-processes` for long-running experiments and log tails
 - `pi-wandb` for experiment tracking
 - `pi-zotero` for citation-library workflows
 ## Layout
 ```text
 feynman/
 ├── extensions/   # Custom research tools
 ├── papers/       # Polished paper-style drafts and writeups
 ├── prompts/      # Slash-style prompt templates
 ├── skills/       # Research workflows
 └── src/          # Minimal REPL wrapper around pi-coding-agent
 ```
--- a/experiments/.gitkeep
+++ b/experiments/.gitkeep
@@ -0,0 +1 @@
--- a/extensions/research-tools.ts
+++ b/extensions/research-tools.ts
@@ -0,0 +1,171 @@
 import {
 	annotatePaper,
 	askPaper,
 	clearPaperAnnotation,
 	disconnect,
 	getPaper,
 	listPaperAnnotations,
 	readPaperCode,
 	searchPapers,
 } from "@companion-ai/alpha-hub/lib";
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 import { Type } from "@sinclair/typebox";
 function formatToolText(result: unknown): string {
 	return typeof result === "string" ? result : JSON.stringify(result, null, 2);
 }
 export default function researchTools(pi: ExtensionAPI): void {
 	pi.registerTool({
 		name: "alpha_search",
 		label: "Alpha Search",
 		description: "Search papers through alphaXiv using semantic, keyword, both, agentic, or all retrieval modes.",
 		parameters: Type.Object({
 			query: Type.String({ description: "Paper search query." }),
 			mode: Type.Optional(
 				Type.String({
 					description: "Search mode: semantic, keyword, both, agentic, or all.",
 				}),
 			),
 		}),
 		async execute(_toolCallId, params) {
 			try {
 				const result = await searchPapers(params.query, params.mode?.trim() || "all");
 				return {
 					content: [{ type: "text", text: formatToolText(result) }],
 					details: result,
 				};
 			} finally {
 				await disconnect();
 			}
 		},
 	});
 	pi.registerTool({
 		name: "alpha_get_paper",
 		label: "Alpha Get Paper",
 		description: "Fetch a paper report or full text, plus any local annotation, using alphaXiv.",
 		parameters: Type.Object({
 			paper: Type.String({
 				description: "arXiv ID, arXiv URL, or alphaXiv URL.",
 			}),
 			fullText: Type.Optional(
 				Type.Boolean({
 					description: "Return raw full text instead of the AI report.",
 				}),
 			),
 		}),
 		async execute(_toolCallId, params) {
 			try {
 				const result = await getPaper(params.paper, { fullText: params.fullText });
 				return {
 					content: [{ type: "text", text: formatToolText(result) }],
 					details: result,
 				};
 			} finally {
 				await disconnect();
 			}
 		},
 	});
 	pi.registerTool({
 		name: "alpha_ask_paper",
 		label: "Alpha Ask Paper",
 		description: "Ask a targeted question about a paper using alphaXiv's PDF analysis.",
 		parameters: Type.Object({
 			paper: Type.String({
 				description: "arXiv ID, arXiv URL, or alphaXiv URL.",
 			}),
 			question: Type.String({
 				description: "Question to ask about the paper.",
 			}),
 		}),
 		async execute(_toolCallId, params) {
 			try {
 				const result = await askPaper(params.paper, params.question);
 				return {
 					content: [{ type: "text", text: formatToolText(result) }],
 					details: result,
 				};
 			} finally {
 				await disconnect();
 			}
 		},
 	});
 	pi.registerTool({
 		name: "alpha_annotate_paper",
 		label: "Alpha Annotate Paper",
 		description: "Write or clear a persistent local annotation for a paper.",
 		parameters: Type.Object({
 			paper: Type.String({
 				description: "Paper ID to annotate.",
 			}),
 			note: Type.Optional(
 				Type.String({
 					description: "Annotation text. Omit when clear=true.",
 				}),
 			),
 			clear: Type.Optional(
 				Type.Boolean({
 					description: "Clear the existing annotation instead of writing one.",
 				}),
 			),
 		}),
 		async execute(_toolCallId, params) {
 			const result = params.clear
 				? await clearPaperAnnotation(params.paper)
 				: params.note
 					? await annotatePaper(params.paper, params.note)
 					: (() => {
 							throw new Error("Provide either note or clear=true.");
 						})();
 			return {
 				content: [{ type: "text", text: formatToolText(result) }],
 				details: result,
 			};
 		},
 	});
 	pi.registerTool({
 		name: "alpha_list_annotations",
 		label: "Alpha List Annotations",
 		description: "List all persistent local paper annotations.",
 		parameters: Type.Object({}),
 		async execute() {
 			const result = await listPaperAnnotations();
 			return {
 				content: [{ type: "text", text: formatToolText(result) }],
 				details: result,
 			};
 		},
 	});
 	pi.registerTool({
 		name: "alpha_read_code",
 		label: "Alpha Read Code",
 		description: "Read files from a paper's GitHub repository through alphaXiv.",
 		parameters: Type.Object({
 			githubUrl: Type.String({
 				description: "GitHub repository URL for the paper implementation.",
 			}),
 			path: Type.Optional(
 				Type.String({
 					description: "Repository path to inspect. Use / for the repo overview.",
 				}),
 			),
 		}),
 		async execute(_toolCallId, params) {
 			try {
 				const result = await readPaperCode(params.githubUrl, params.path?.trim() || "/");
 				return {
 					content: [{ type: "text", text: formatToolText(result) }],
 					details: result,
 				};
 			} finally {
 				await disconnect();
 			}
 		},
 	});
 }
--- a/notes/.gitkeep
+++ b/notes/.gitkeep
@@ -0,0 +1 @@
--- a/outputs/.gitkeep
+++ b/outputs/.gitkeep
@@ -0,0 +1 @@
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -0,0 +1,44 @@
 {
  "name": "feynman",
  "version": "0.1.0",
  "private": true,
  "description": "Research-first agent built on pi-coding-agent",
  "type": "module",
  "scripts": {
    "dev": "tsx src/index.ts",
    "start": "tsx src/index.ts",
    "typecheck": "tsc --noEmit"
  },
  "keywords": [
    "pi-package",
    "research-agent",
    "literature-review",
    "experiments"
  ],
  "pi": {
    "extensions": [
      "./extensions"
    ],
    "skills": [
      "./skills"
    ],
    "prompts": [
      "./prompts"
    ]
  },
  "dependencies": {
    "@companion-ai/alpha-hub": "^0.1.0",
    "@mariozechner/pi-ai": "^0.56.1",
    "@mariozechner/pi-coding-agent": "^0.56.1",
    "@sinclair/typebox": "^0.34.41",
    "dotenv": "^16.4.7"
  },
  "devDependencies": {
    "@types/node": "^24.3.0",
    "tsx": "^4.20.5",
    "typescript": "^5.7.3"
  },
  "engines": {
    "node": ">=20.6.0"
  }
 }
--- a/papers/.gitkeep
+++ b/papers/.gitkeep
@@ -0,0 +1 @@
--- a/prompts/lit-review.md
+++ b/prompts/lit-review.md
@@ -0,0 +1,13 @@
 ---
 description: Run a literature review on a topic using paper search and primary-source synthesis.
 ---
 Investigate the following topic as a literature review: $@
 Requirements:
 - Use `alpha_search` first.
 - Use `alpha_get_paper` on the most relevant papers before making strong claims.
 - Use `alpha_ask_paper` for targeted follow-up questions when the report is not enough.
 - Prefer primary sources and note when something appears to be a preprint or secondary summary.
 - Separate consensus, disagreements, and open questions.
 - When useful, propose concrete next experiments or follow-up reading.
 - If the user wants an artifact, write the review to disk as markdown.
--- a/prompts/paper-code-audit.md
+++ b/prompts/paper-code-audit.md
@@ -0,0 +1,12 @@
 ---
 description: Compare a paper's claims against its public codebase and identify mismatches, omissions, and reproducibility risks.
 ---
 Audit the paper and codebase for: $@
 Requirements:
 - Identify the canonical paper first with `alpha_search` and `alpha_get_paper`.
 - Extract implementation-sensitive claims with `alpha_ask_paper`.
 - If a public repo exists, inspect it with `alpha_read_code`.
 - Compare claimed methods, defaults, metrics, and data handling against the repository.
 - Call out missing code, mismatches, ambiguous defaults, and reproduction risks.
 - Save the audit to `outputs/` as markdown.
--- a/prompts/paper-draft.md
+++ b/prompts/paper-draft.md
@@ -0,0 +1,19 @@
 ---
 description: Turn research findings into a polished paper-style draft with equations, sections, and explicit claims.
 ---
 Write a paper-style draft for: $@
 Requirements:
 - Ground every claim in inspected sources, experiments, or explicit inference.
 - Use clean Markdown structure with LaTeX where equations materially help.
 - Include at minimum:
  - title
  - abstract
  - problem statement
  - related work
  - method or synthesis
  - evidence or experiments
  - limitations
  - conclusion
 - If citations are available, include citation placeholders or references clearly enough to convert later.
 - Save the draft to `papers/` as markdown.
--- a/prompts/reading-list.md
+++ b/prompts/reading-list.md
@@ -0,0 +1,12 @@
 ---
 description: Build a prioritized reading list on a research topic with rationale for each paper.
 ---
 Create a research reading list for: $@
 Requirements:
 - Use `alpha_search` with `all` mode.
 - Inspect the strongest papers with `alpha_get_paper`.
 - Use `alpha_ask_paper` when a paper's fit is unclear.
 - Group papers by role when useful: foundational, strongest recent work, methods, benchmarks, critiques, replication targets.
 - For each paper, explain why it is on the list.
 - Save the final reading list to `outputs/` as markdown.
--- a/prompts/replicate.md
+++ b/prompts/replicate.md
@@ -0,0 +1,14 @@
 ---
 description: Plan or execute a replication workflow for a paper, claim, or benchmark.
 ---
 Design a replication plan for: $@
 Requirements:
 - Identify the canonical paper or source material first.
 - Use `alpha_get_paper` for the target paper.
 - Use `alpha_ask_paper` to extract the exact implementation or evaluation details you still need.
 - If the paper links code, inspect it with `alpha_read_code`.
 - Determine what code, datasets, metrics, and environment are needed.
 - If enough information is available locally, implement and run the replication steps.
 - Save notes, scripts, and results to disk in a reproducible layout.
 - Be explicit about what is verified, what is inferred, and what is still missing.
--- a/skills/research/experiment-design/SKILL.md
+++ b/skills/research/experiment-design/SKILL.md
@@ -0,0 +1,48 @@
 ---
 name: experiment-design
 description: Use this when the task is to turn a vague research idea into a testable experiment, define metrics, choose baselines, or plan ablations.
 ---
 # Experiment Design
 ## When To Use
 Use this skill when the user has:
 - a hypothesis to test
 - a method to evaluate
 - an unclear benchmark plan
 - a need for baselines, ablations, or metrics
 ## Procedure
 1. Restate the research question as a falsifiable claim.
 2. Define:
   - independent variables
   - dependent variables
   - success metrics
   - baselines
   - constraints
 3. Search for prior work first with `alpha_search` so you do not reinvent an obviously flawed setup.
 4. Use `alpha_get_paper` and `alpha_ask_paper` on the strongest references.
 5. Prefer the smallest experiment that can meaningfully reduce uncertainty.
 6. List confounders and failure modes up front.
 7. If implementation is requested, create the scripts, configs, and logging plan.
 8. Write the plan to disk before running expensive work.
 ## Pitfalls
 - Avoid experiments with no baseline.
 - Avoid metrics that do not connect to the claim.
 - Avoid ablations that change multiple variables at once.
 - Avoid broad plans that cannot be executed with the current environment.
 ## Deliverable
 Produce:
 - hypothesis
 - setup
 - baselines
 - metrics
 - ablations
 - risks
 - next action
--- a/skills/research/literature-review/SKILL.md
+++ b/skills/research/literature-review/SKILL.md
@@ -0,0 +1,52 @@
 ---
 name: literature-review
 description: Use this when the task is to survey prior work, compare papers, synthesize a field, or build a reading list grounded in primary sources.
 ---
 # Literature Review
 ## When To Use
 Use this skill when the user wants:
 - a research overview
 - a paper shortlist
 - a comparison of methods
 - a synthesis of consensus and disagreement
 - a source-backed brief on a topic
 ## Procedure
 1. Search broadly first with `alpha_search`.
 2. Pick the strongest candidates by direct relevance, recency, citations, and venue quality.
 3. Inspect the top papers with `alpha_get_paper` before making concrete claims.
 4. Use `alpha_ask_paper` for missing methodological or experimental details.
 5. Build a compact evidence table:
   - title
   - year
   - authors
   - venue
   - claim or contribution
   - important caveats
 6. Distinguish:
   - what multiple sources agree on
   - where methods or findings differ
   - what remains unresolved
 7. If the user wants a durable artifact, write a markdown brief to disk.
 8. If you discover an important gotcha about a paper, save it with `alpha_annotate_paper`.
 ## Pitfalls
 - Do not summarize a field from titles alone.
 - Do not flatten disagreements into fake consensus.
 - Do not treat recent preprints as established facts without saying so.
 - Do not cite secondary commentary when a primary source is available.
 ## Output Shape
 Prefer this structure:
 - question
 - strongest papers
 - major findings
 - disagreements or caveats
 - open questions
 - recommended next reading or experiments
--- a/skills/research/paper-code-audit/SKILL.md
+++ b/skills/research/paper-code-audit/SKILL.md
@@ -0,0 +1,50 @@
 ---
 name: paper-code-audit
 description: Use this when the task is to compare a paper against its repository, verify whether claims are implemented, or assess reproducibility risk.
 ---
 # Paper Code Audit
 ## When To Use
 Use this skill for:
 - paper-versus-code verification
 - implementation gap analysis
 - reproducibility audits
 - checking whether public code matches reported results
 ## Procedure
 1. Locate the paper with `alpha_search`.
 2. Load the paper with `alpha_get_paper`.
 3. Extract implementation-relevant details using `alpha_ask_paper`:
   - datasets
   - preprocessing
   - model architecture
   - hyperparameters
   - evaluation protocol
 4. If the paper links a repository, inspect it using `alpha_read_code`.
 5. Compare paper claims against code realities:
   - are all components present
   - do defaults match the paper
   - are metrics/eval scripts exposed
   - are hidden assumptions required
 6. Record concrete mismatches, not vibes.
 7. Save the audit in `outputs/`.
 8. If you find a durable gotcha, save it with `alpha_annotate_paper`.
 ## Pitfalls
 - Do not infer repository behavior without opening the relevant files.
 - Do not assume README claims reflect the actual defaults.
 - Do not mark something as missing if it exists under another name without checking.
 ## Deliverable
 Include:
 - paper summary
 - repository coverage
 - confirmed matches
 - mismatches or omissions
 - reproducibility risks
 - recommended next actions
--- a/skills/research/paper-writing/SKILL.md
+++ b/skills/research/paper-writing/SKILL.md
@@ -0,0 +1,45 @@
 ---
 name: paper-writing
 description: Use this when the task is to turn research notes, experiments, or a literature review into a polished paper-style writeup with Markdown and LaTeX.
 ---
 # Paper Writing
 ## When To Use
 Use this skill for:
 - research reports that should read like a paper
 - internal memos with equations or formal structure
 - polished writeups of experiments or literature reviews
 - converting rough notes into a coherent draft
 ## Procedure
 1. Make sure the underlying claims are already grounded in sources, experiments, or explicit caveats.
 2. Build the draft around a proper research structure:
   - title
   - abstract
   - introduction or problem statement
   - related work
   - approach, synthesis, or methodology
   - evidence, experiments, or case studies
   - limitations
   - conclusion
 3. Use Markdown by default.
 4. Use LaTeX only where equations or notation genuinely improve clarity.
 5. Keep claims falsifiable and scoped.
 6. Save polished drafts to `papers/`.
 ## Pitfalls
 - Do not use LaTeX for decoration.
 - Do not make a draft look more certain than the evidence supports.
 - Do not hide missing citations or weak evidence; flag them.
 ## Deliverable
 A readable paper-style draft with:
 - explicit structure
 - traceable claims
 - equations only where useful
 - limitations stated plainly
--- a/skills/research/reading-list/SKILL.md
+++ b/skills/research/reading-list/SKILL.md
@@ -0,0 +1,49 @@
 ---
 name: reading-list
 description: Use this when the user wants a curated reading sequence, paper shortlist, or tiered set of papers for learning or project onboarding.
 ---
 # Reading List
 ## When To Use
 Use this skill for:
 - getting up to speed on a topic
 - onboarding into a research area
 - choosing which papers to read first
 - constructing a project-specific reading order
 ## Procedure
 1. Start with `alpha_search` in `all` mode.
 2. Inspect the strongest candidates with `alpha_get_paper`.
 3. Use `alpha_ask_paper` for fit questions like:
   - what problem does this really solve
   - what assumptions does it rely on
   - what prior work does it build on
 4. Classify papers into roles:
   - foundational
   - key recent advances
   - evaluation or benchmark references
   - critiques or limitations
   - likely replication targets
 5. Order the list intentionally:
   - start with orientation
   - move to strongest methods
   - finish with edges, critiques, or adjacent work
 6. Write the final list as a durable markdown artifact in `outputs/`.
 ## Pitfalls
 - Do not sort purely by citations.
 - Do not over-index on recency when fundamentals matter.
 - Do not include papers you have not inspected at all.
 ## Deliverable
 For each paper include:
 - title
 - year
 - why it matters
 - when to read it in the sequence
 - one caveat or limitation
--- a/skills/research/replication/SKILL.md
+++ b/skills/research/replication/SKILL.md
@@ -0,0 +1,52 @@
 ---
 name: replication
 description: Use this when the task is to reproduce a paper result, benchmark a claim, rebuild an experiment, or evaluate whether a published result holds in practice.
 ---
 # Replication
 ## When To Use
 Use this skill for:
 - paper reproduction
 - benchmark recreation
 - ablation reruns
 - claim verification through code and experiments
 ## Procedure
 1. Identify the canonical source paper and inspect it with `alpha_get_paper`.
 2. Extract the exact target:
   - task
   - dataset
   - model or method
   - metrics
   - hardware or runtime assumptions
 3. Use `alpha_ask_paper` to pull out the exact details missing from the report.
 4. If the paper has a public repository, inspect it with `alpha_read_code`.
 5. Search the local workspace for existing code, notebooks, configs, and datasets.
 6. Write down the missing pieces explicitly before running anything.
 7. If the environment is sufficient, implement the minimal runnable reproduction path.
 8. Run the experiment with built-in file and shell tools.
 9. Save:
   - commands used
   - configs
   - raw outputs
   - summarized results
 10. Compare observed results with the paper and explain gaps.
 11. If the paper had a practical gotcha, attach it with `alpha_annotate_paper`.
 ## Pitfalls
 - Do not claim replication succeeded if key conditions were missing.
 - Do not compare different metrics as if they were equivalent.
 - Do not ignore dataset or preprocessing mismatch.
 - Do not hide failed runs; record them and explain them.
 ## Verification
 A good replication outcome includes:
 - the exact command path
 - the data or config used
 - the observed metrics
 - a clear statement of match, partial match, or mismatch
--- a/src/feynman-prompt.ts
+++ b/src/feynman-prompt.ts
@@ -0,0 +1,34 @@
 export const FEYNMAN_SYSTEM_PROMPT = `You are Feynman, a research-first AI agent.
 Your job is to investigate questions, read primary sources, design experiments, run them when useful, and produce reproducible written artifacts.
 Operating rules:
 - Evidence over fluency.
 - Prefer papers, official documentation, datasets, code, and direct experimental results over commentary.
 - Separate observations from inferences.
 - State uncertainty explicitly.
 - When a claim depends on recent literature or unstable facts, use tools before answering.
 - When discussing papers, cite title, year, and identifier or URL when possible.
 - Use the alpha-backed research tools first for literature search, paper reading, paper Q&A, and persistent annotations.
 - Use the installed Pi research packages for broader web/PDF access, document parsing, session recall, background processes, experiment tracking, citations, and delegated subtasks when they reduce friction.
 - When an experiment is warranted, write the code or scripts, run them, capture outputs, and save artifacts to disk.
 - Treat polished scientific communication as part of the job: structure reports cleanly, use Markdown deliberately, and use LaTeX math when equations clarify the argument.
 - Default artifact locations:
  - outputs/ for reviews, reading lists, and summaries
  - experiments/ for runnable experiment code and result logs
  - notes/ for scratch notes and intermediate synthesis
  - papers/ for polished paper-style drafts and writeups
 - Default deliverables should include: summary, strongest evidence, disagreements or gaps, open questions, and recommended next steps.
 Default workflow:
 1. Clarify the research objective if needed.
 2. Search for relevant primary sources.
 3. Inspect the most relevant papers or materials directly.
 4. Synthesize consensus, disagreements, and missing evidence.
 5. Design and run experiments when they would resolve uncertainty.
 6. Write the requested output artifact.
 Style:
 - Concise, skeptical, and explicit.
 - Avoid fake certainty.
 - Do not present unverified claims as facts.`;
--- a/src/index.ts
+++ b/src/index.ts
@@ -0,0 +1,212 @@
 import "dotenv/config";
 import { mkdirSync } from "node:fs";
 import { stdin as input, stdout as output } from "node:process";
 import { dirname, resolve } from "node:path";
 import { parseArgs } from "node:util";
 import { fileURLToPath } from "node:url";
 import readline from "node:readline/promises";
 import {
 	AuthStorage,
 	createAgentSession,
 	createCodingTools,
 	DefaultResourceLoader,
 	ModelRegistry,
 	SessionManager,
 	SettingsManager,
 } from "@mariozechner/pi-coding-agent";
 import { FEYNMAN_SYSTEM_PROMPT } from "./feynman-prompt.js";
 type ThinkingLevel = "off" | "low" | "medium" | "high";
 function printHelp(): void {
 	console.log(`Feynman commands:
 	  /help                     Show this help
 	  /new                      Start a fresh persisted session
 	  /exit                     Quit the REPL
 	  /lit-review <topic>       Expand the literature review prompt template
 	  /replicate <paper>        Expand the replication prompt template
 	  /reading-list <topic>     Expand the reading list prompt template
 	  /paper-code-audit <item>  Expand the paper/code audit prompt template
 	  /paper-draft <topic>      Expand the paper-style writing prompt template
 	CLI flags:
  --prompt "<text>"         Run one prompt and exit
  --model provider:model    Force a specific model
  --thinking level          off | low | medium | high
  --cwd /path/to/workdir    Working directory for tools
  --session-dir /path       Session storage directory`);
 }
 function parseModelSpec(spec: string, modelRegistry: ModelRegistry) {
 	const trimmed = spec.trim();
 	const separator = trimmed.includes(":") ? ":" : trimmed.includes("/") ? "/" : null;
 	if (!separator) {
 		return undefined;
 	}
 	const [provider, ...rest] = trimmed.split(separator);
 	const id = rest.join(separator);
 	if (!provider || !id) {
 		return undefined;
 	}
 	return modelRegistry.find(provider, id);
 }
 function normalizeThinkingLevel(value: string | undefined): ThinkingLevel | undefined {
 	if (!value) {
 		return undefined;
 	}
 	const normalized = value.toLowerCase();
 	if (normalized === "off" || normalized === "low" || normalized === "medium" || normalized === "high") {
 		return normalized;
 	}
 	return undefined;
 }
 async function main(): Promise<void> {
 	const here = dirname(fileURLToPath(import.meta.url));
 	const appRoot = resolve(here, "..");
 	const { values, positionals } = parseArgs({
 		allowPositionals: true,
 		options: {
 			cwd: { type: "string" },
 			help: { type: "boolean" },
 			model: { type: "string" },
 			"new-session": { type: "boolean" },
 			prompt: { type: "string" },
 			"session-dir": { type: "string" },
 			thinking: { type: "string" },
 		},
 	});
 	if (values.help) {
 		printHelp();
 		return;
 	}
 	const workingDir = resolve(values.cwd ?? process.cwd());
 	const sessionDir = resolve(values["session-dir"] ?? resolve(appRoot, ".feynman", "sessions"));
 	mkdirSync(sessionDir, { recursive: true });
 	const settingsManager = SettingsManager.create(appRoot);
 	const authStorage = AuthStorage.create();
 	const modelRegistry = new ModelRegistry(authStorage);
 	const explicitModelSpec = values.model ?? process.env.FEYNMAN_MODEL;
 	const explicitModel = explicitModelSpec ? parseModelSpec(explicitModelSpec, modelRegistry) : undefined;
 	if (explicitModelSpec && !explicitModel) {
 		throw new Error(`Unknown model: ${explicitModelSpec}`);
 	}
 	if (!explicitModel) {
 		const available = await modelRegistry.getAvailable();
 		if (available.length === 0) {
 			throw new Error(
 				"No models are available. Configure pi auth or export a provider API key before starting Feynman.",
 			);
 		}
 	}
 	const thinkingLevel = normalizeThinkingLevel(values.thinking ?? process.env.FEYNMAN_THINKING) ?? "medium";
 	const resourceLoader = new DefaultResourceLoader({
 		cwd: appRoot,
 		additionalExtensionPaths: [resolve(appRoot, "extensions")],
 		additionalPromptTemplatePaths: [resolve(appRoot, "prompts")],
 		additionalSkillPaths: [resolve(appRoot, "skills")],
 		settingsManager,
 		systemPromptOverride: () => FEYNMAN_SYSTEM_PROMPT,
 		appendSystemPromptOverride: () => [],
 	});
 	await resourceLoader.reload();
 	const sessionManager = values["new-session"]
 		? SessionManager.create(workingDir, sessionDir)
 		: SessionManager.continueRecent(workingDir, sessionDir);
 	const { session } = await createAgentSession({
 		authStorage,
 		cwd: workingDir,
 		model: explicitModel,
 		modelRegistry,
 		resourceLoader,
 		sessionManager,
 		settingsManager,
 		thinkingLevel,
 		tools: createCodingTools(workingDir),
 	});
 	session.subscribe((event) => {
 		if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
 			process.stdout.write(event.assistantMessageEvent.delta);
 			return;
 		}
 		if (event.type === "tool_execution_start") {
 			process.stderr.write(`\n[tool] ${event.toolName}\n`);
 			return;
 		}
 		if (event.type === "tool_execution_end" && event.isError) {
 			process.stderr.write(`[tool-error] ${event.toolName}\n`);
 		}
 	});
 	const initialPrompt = values.prompt ?? (positionals.length > 0 ? positionals.join(" ") : undefined);
 	if (initialPrompt) {
 		await session.prompt(initialPrompt);
 		process.stdout.write("\n");
 		session.dispose();
 		return;
 	}
 	console.log("Feynman research agent");
 	console.log(`working dir: ${workingDir}`);
 	console.log(`session dir: ${sessionDir}`);
 	console.log("type /help for commands");
 	const rl = readline.createInterface({ input, output });
 	try {
 		while (true) {
 			const line = (await rl.question("feynman> ")).trim();
 			if (!line) {
 				continue;
 			}
 			if (line === "/exit" || line === "/quit") {
 				break;
 			}
 			if (line === "/help") {
 				printHelp();
 				continue;
 			}
 			if (line === "/new") {
 				await session.newSession();
 				console.log("started a new session");
 				continue;
 			}
 			await session.prompt(line);
 			process.stdout.write("\n");
 		}
 	} finally {
 		rl.close();
 		session.dispose();
 	}
 }
 main().catch((error) => {
 	console.error(error instanceof Error ? error.message : String(error));
 	process.exitCode = 1;
 });
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -0,0 +1,19 @@
 {
  "compilerOptions": {
    "target": "ES2022",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "strict": true,
    "skipLibCheck": true,
    "allowSyntheticDefaultImports": true,
    "esModuleInterop": true,
    "types": [
      "node"
    ],
    "noEmit": true
  },
  "include": [
    "src/**/*.ts",
    "extensions/**/*.ts"
  ]
 }