Initial Feynman research agent scaffold
This commit is contained in:
8
.env.example
Normal file
8
.env.example
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# Optional runtime defaults for Feynman.
|
||||||
|
# Provider credentials are read by pi-coding-agent in the usual ways.
|
||||||
|
|
||||||
|
FEYNMAN_MODEL=
|
||||||
|
FEYNMAN_THINKING=medium
|
||||||
|
|
||||||
|
OPENAI_API_KEY=
|
||||||
|
ANTHROPIC_API_KEY=
|
||||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
node_modules
|
||||||
|
.env
|
||||||
|
.feynman
|
||||||
|
outputs/*
|
||||||
|
!outputs/.gitkeep
|
||||||
12
.pi/settings.json
Normal file
12
.pi/settings.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"packages": [
|
||||||
|
"npm:pi-subagents",
|
||||||
|
"npm:pi-docparser",
|
||||||
|
"npm:pi-web-access",
|
||||||
|
"npm:pi-markdown-preview",
|
||||||
|
"npm:@kaiserlich-dev/pi-session-search",
|
||||||
|
"npm:@aliou/pi-processes",
|
||||||
|
"npm:pi-wandb",
|
||||||
|
"npm:pi-zotero"
|
||||||
|
]
|
||||||
|
}
|
||||||
87
README.md
Normal file
87
README.md
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
# Feynman
|
||||||
|
|
||||||
|
`feynman` is a research-first agent scaffold built on `@mariozechner/pi-coding-agent`.
|
||||||
|
|
||||||
|
It keeps the useful parts of a coding agent:
|
||||||
|
- file access
|
||||||
|
- shell execution
|
||||||
|
- persistent sessions
|
||||||
|
- skills
|
||||||
|
- custom extensions
|
||||||
|
|
||||||
|
But it biases the runtime toward research work:
|
||||||
|
- literature review
|
||||||
|
- paper lookup
|
||||||
|
- replication planning
|
||||||
|
- experiment design
|
||||||
|
- writing notes and reports
|
||||||
|
|
||||||
|
The primary paper backend is `@companion-ai/alpha-hub` and your alphaXiv account.
|
||||||
|
The rest of the workflow is augmented through a curated `.pi/settings.json` package stack.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/advaitpaliwal/Companion/Code/feynman
|
||||||
|
npm install
|
||||||
|
cp .env.example .env
|
||||||
|
npm run start
|
||||||
|
```
|
||||||
|
|
||||||
|
If you already use `pi`, Feynman will reuse the usual auth/config locations for model access.
|
||||||
|
|
||||||
|
Before deep paper work, make sure alphaXiv auth is set up:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npx @companion-ai/alpha-hub login
|
||||||
|
```
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
Inside the REPL:
|
||||||
|
|
||||||
|
- `/help` shows local commands
|
||||||
|
- `/new` starts a new persisted session
|
||||||
|
- `/exit` quits
|
||||||
|
- `/lit-review <topic>` expands the literature-review prompt template
|
||||||
|
- `/replicate <paper or claim>` expands the replication prompt template
|
||||||
|
- `/reading-list <topic>` expands the reading-list prompt template
|
||||||
|
- `/paper-code-audit <item>` expands the paper/code audit prompt template
|
||||||
|
- `/paper-draft <topic>` expands the paper-style writing prompt template
|
||||||
|
|
||||||
|
## Custom Tools
|
||||||
|
|
||||||
|
The starter extension adds:
|
||||||
|
|
||||||
|
- `alpha_search` for alphaXiv-backed paper discovery
|
||||||
|
- `alpha_get_paper` for fetching paper reports or raw text
|
||||||
|
- `alpha_ask_paper` for targeted paper Q&A
|
||||||
|
- `alpha_annotate_paper` for persistent local notes
|
||||||
|
- `alpha_list_annotations` for recall across sessions
|
||||||
|
- `alpha_read_code` for reading a paper repository
|
||||||
|
|
||||||
|
Feynman uses `@companion-ai/alpha-hub` directly in-process rather than shelling out to the CLI.
|
||||||
|
|
||||||
|
## Curated Pi Stack
|
||||||
|
|
||||||
|
Feynman loads a lean research stack from [.pi/settings.json](/Users/advaitpaliwal/Companion/Code/feynman/.pi/settings.json):
|
||||||
|
|
||||||
|
- `pi-subagents` for parallel literature gathering and decomposition
|
||||||
|
- `pi-docparser` for PDFs, Office docs, spreadsheets, and images
|
||||||
|
- `pi-web-access` for broader web, GitHub, PDF, and media access
|
||||||
|
- `pi-markdown-preview` for polished Markdown and LaTeX-heavy research writeups
|
||||||
|
- `@kaiserlich-dev/pi-session-search` for recall across long-running research threads
|
||||||
|
- `@aliou/pi-processes` for long-running experiments and log tails
|
||||||
|
- `pi-wandb` for experiment tracking
|
||||||
|
- `pi-zotero` for citation-library workflows
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
|
||||||
|
```text
|
||||||
|
feynman/
|
||||||
|
├── extensions/ # Custom research tools
|
||||||
|
├── papers/ # Polished paper-style drafts and writeups
|
||||||
|
├── prompts/ # Slash-style prompt templates
|
||||||
|
├── skills/ # Research workflows
|
||||||
|
└── src/ # Minimal REPL wrapper around pi-coding-agent
|
||||||
|
```
|
||||||
1
experiments/.gitkeep
Normal file
1
experiments/.gitkeep
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
171
extensions/research-tools.ts
Normal file
171
extensions/research-tools.ts
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
import {
|
||||||
|
annotatePaper,
|
||||||
|
askPaper,
|
||||||
|
clearPaperAnnotation,
|
||||||
|
disconnect,
|
||||||
|
getPaper,
|
||||||
|
listPaperAnnotations,
|
||||||
|
readPaperCode,
|
||||||
|
searchPapers,
|
||||||
|
} from "@companion-ai/alpha-hub/lib";
|
||||||
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||||
|
import { Type } from "@sinclair/typebox";
|
||||||
|
|
||||||
|
function formatToolText(result: unknown): string {
|
||||||
|
return typeof result === "string" ? result : JSON.stringify(result, null, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function researchTools(pi: ExtensionAPI): void {
|
||||||
|
pi.registerTool({
|
||||||
|
name: "alpha_search",
|
||||||
|
label: "Alpha Search",
|
||||||
|
description: "Search papers through alphaXiv using semantic, keyword, both, agentic, or all retrieval modes.",
|
||||||
|
parameters: Type.Object({
|
||||||
|
query: Type.String({ description: "Paper search query." }),
|
||||||
|
mode: Type.Optional(
|
||||||
|
Type.String({
|
||||||
|
description: "Search mode: semantic, keyword, both, agentic, or all.",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
async execute(_toolCallId, params) {
|
||||||
|
try {
|
||||||
|
const result = await searchPapers(params.query, params.mode?.trim() || "all");
|
||||||
|
return {
|
||||||
|
content: [{ type: "text", text: formatToolText(result) }],
|
||||||
|
details: result,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await disconnect();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerTool({
|
||||||
|
name: "alpha_get_paper",
|
||||||
|
label: "Alpha Get Paper",
|
||||||
|
description: "Fetch a paper report or full text, plus any local annotation, using alphaXiv.",
|
||||||
|
parameters: Type.Object({
|
||||||
|
paper: Type.String({
|
||||||
|
description: "arXiv ID, arXiv URL, or alphaXiv URL.",
|
||||||
|
}),
|
||||||
|
fullText: Type.Optional(
|
||||||
|
Type.Boolean({
|
||||||
|
description: "Return raw full text instead of the AI report.",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
async execute(_toolCallId, params) {
|
||||||
|
try {
|
||||||
|
const result = await getPaper(params.paper, { fullText: params.fullText });
|
||||||
|
return {
|
||||||
|
content: [{ type: "text", text: formatToolText(result) }],
|
||||||
|
details: result,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await disconnect();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerTool({
|
||||||
|
name: "alpha_ask_paper",
|
||||||
|
label: "Alpha Ask Paper",
|
||||||
|
description: "Ask a targeted question about a paper using alphaXiv's PDF analysis.",
|
||||||
|
parameters: Type.Object({
|
||||||
|
paper: Type.String({
|
||||||
|
description: "arXiv ID, arXiv URL, or alphaXiv URL.",
|
||||||
|
}),
|
||||||
|
question: Type.String({
|
||||||
|
description: "Question to ask about the paper.",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
async execute(_toolCallId, params) {
|
||||||
|
try {
|
||||||
|
const result = await askPaper(params.paper, params.question);
|
||||||
|
return {
|
||||||
|
content: [{ type: "text", text: formatToolText(result) }],
|
||||||
|
details: result,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await disconnect();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerTool({
|
||||||
|
name: "alpha_annotate_paper",
|
||||||
|
label: "Alpha Annotate Paper",
|
||||||
|
description: "Write or clear a persistent local annotation for a paper.",
|
||||||
|
parameters: Type.Object({
|
||||||
|
paper: Type.String({
|
||||||
|
description: "Paper ID to annotate.",
|
||||||
|
}),
|
||||||
|
note: Type.Optional(
|
||||||
|
Type.String({
|
||||||
|
description: "Annotation text. Omit when clear=true.",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
clear: Type.Optional(
|
||||||
|
Type.Boolean({
|
||||||
|
description: "Clear the existing annotation instead of writing one.",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
async execute(_toolCallId, params) {
|
||||||
|
const result = params.clear
|
||||||
|
? await clearPaperAnnotation(params.paper)
|
||||||
|
: params.note
|
||||||
|
? await annotatePaper(params.paper, params.note)
|
||||||
|
: (() => {
|
||||||
|
throw new Error("Provide either note or clear=true.");
|
||||||
|
})();
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: [{ type: "text", text: formatToolText(result) }],
|
||||||
|
details: result,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerTool({
|
||||||
|
name: "alpha_list_annotations",
|
||||||
|
label: "Alpha List Annotations",
|
||||||
|
description: "List all persistent local paper annotations.",
|
||||||
|
parameters: Type.Object({}),
|
||||||
|
async execute() {
|
||||||
|
const result = await listPaperAnnotations();
|
||||||
|
return {
|
||||||
|
content: [{ type: "text", text: formatToolText(result) }],
|
||||||
|
details: result,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerTool({
|
||||||
|
name: "alpha_read_code",
|
||||||
|
label: "Alpha Read Code",
|
||||||
|
description: "Read files from a paper's GitHub repository through alphaXiv.",
|
||||||
|
parameters: Type.Object({
|
||||||
|
githubUrl: Type.String({
|
||||||
|
description: "GitHub repository URL for the paper implementation.",
|
||||||
|
}),
|
||||||
|
path: Type.Optional(
|
||||||
|
Type.String({
|
||||||
|
description: "Repository path to inspect. Use / for the repo overview.",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
async execute(_toolCallId, params) {
|
||||||
|
try {
|
||||||
|
const result = await readPaperCode(params.githubUrl, params.path?.trim() || "/");
|
||||||
|
return {
|
||||||
|
content: [{ type: "text", text: formatToolText(result) }],
|
||||||
|
details: result,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await disconnect();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
1
notes/.gitkeep
Normal file
1
notes/.gitkeep
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
1
outputs/.gitkeep
Normal file
1
outputs/.gitkeep
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
4117
package-lock.json
generated
Normal file
4117
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
44
package.json
Normal file
44
package.json
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"name": "feynman",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"description": "Research-first agent built on pi-coding-agent",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "tsx src/index.ts",
|
||||||
|
"start": "tsx src/index.ts",
|
||||||
|
"typecheck": "tsc --noEmit"
|
||||||
|
},
|
||||||
|
"keywords": [
|
||||||
|
"pi-package",
|
||||||
|
"research-agent",
|
||||||
|
"literature-review",
|
||||||
|
"experiments"
|
||||||
|
],
|
||||||
|
"pi": {
|
||||||
|
"extensions": [
|
||||||
|
"./extensions"
|
||||||
|
],
|
||||||
|
"skills": [
|
||||||
|
"./skills"
|
||||||
|
],
|
||||||
|
"prompts": [
|
||||||
|
"./prompts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@companion-ai/alpha-hub": "^0.1.0",
|
||||||
|
"@mariozechner/pi-ai": "^0.56.1",
|
||||||
|
"@mariozechner/pi-coding-agent": "^0.56.1",
|
||||||
|
"@sinclair/typebox": "^0.34.41",
|
||||||
|
"dotenv": "^16.4.7"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^24.3.0",
|
||||||
|
"tsx": "^4.20.5",
|
||||||
|
"typescript": "^5.7.3"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.6.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
1
papers/.gitkeep
Normal file
1
papers/.gitkeep
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
13
prompts/lit-review.md
Normal file
13
prompts/lit-review.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
description: Run a literature review on a topic using paper search and primary-source synthesis.
|
||||||
|
---
|
||||||
|
Investigate the following topic as a literature review: $@
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Use `alpha_search` first.
|
||||||
|
- Use `alpha_get_paper` on the most relevant papers before making strong claims.
|
||||||
|
- Use `alpha_ask_paper` for targeted follow-up questions when the report is not enough.
|
||||||
|
- Prefer primary sources and note when something appears to be a preprint or secondary summary.
|
||||||
|
- Separate consensus, disagreements, and open questions.
|
||||||
|
- When useful, propose concrete next experiments or follow-up reading.
|
||||||
|
- If the user wants an artifact, write the review to disk as markdown.
|
||||||
12
prompts/paper-code-audit.md
Normal file
12
prompts/paper-code-audit.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
description: Compare a paper's claims against its public codebase and identify mismatches, omissions, and reproducibility risks.
|
||||||
|
---
|
||||||
|
Audit the paper and codebase for: $@
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Identify the canonical paper first with `alpha_search` and `alpha_get_paper`.
|
||||||
|
- Extract implementation-sensitive claims with `alpha_ask_paper`.
|
||||||
|
- If a public repo exists, inspect it with `alpha_read_code`.
|
||||||
|
- Compare claimed methods, defaults, metrics, and data handling against the repository.
|
||||||
|
- Call out missing code, mismatches, ambiguous defaults, and reproduction risks.
|
||||||
|
- Save the audit to `outputs/` as markdown.
|
||||||
19
prompts/paper-draft.md
Normal file
19
prompts/paper-draft.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
description: Turn research findings into a polished paper-style draft with equations, sections, and explicit claims.
|
||||||
|
---
|
||||||
|
Write a paper-style draft for: $@
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Ground every claim in inspected sources, experiments, or explicit inference.
|
||||||
|
- Use clean Markdown structure with LaTeX where equations materially help.
|
||||||
|
- Include at minimum:
|
||||||
|
- title
|
||||||
|
- abstract
|
||||||
|
- problem statement
|
||||||
|
- related work
|
||||||
|
- method or synthesis
|
||||||
|
- evidence or experiments
|
||||||
|
- limitations
|
||||||
|
- conclusion
|
||||||
|
- If citations are available, include citation placeholders or references clearly enough to convert later.
|
||||||
|
- Save the draft to `papers/` as markdown.
|
||||||
12
prompts/reading-list.md
Normal file
12
prompts/reading-list.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
description: Build a prioritized reading list on a research topic with rationale for each paper.
|
||||||
|
---
|
||||||
|
Create a research reading list for: $@
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Use `alpha_search` with `all` mode.
|
||||||
|
- Inspect the strongest papers with `alpha_get_paper`.
|
||||||
|
- Use `alpha_ask_paper` when a paper's fit is unclear.
|
||||||
|
- Group papers by role when useful: foundational, strongest recent work, methods, benchmarks, critiques, replication targets.
|
||||||
|
- For each paper, explain why it is on the list.
|
||||||
|
- Save the final reading list to `outputs/` as markdown.
|
||||||
14
prompts/replicate.md
Normal file
14
prompts/replicate.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
description: Plan or execute a replication workflow for a paper, claim, or benchmark.
|
||||||
|
---
|
||||||
|
Design a replication plan for: $@
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Identify the canonical paper or source material first.
|
||||||
|
- Use `alpha_get_paper` for the target paper.
|
||||||
|
- Use `alpha_ask_paper` to extract the exact implementation or evaluation details you still need.
|
||||||
|
- If the paper links code, inspect it with `alpha_read_code`.
|
||||||
|
- Determine what code, datasets, metrics, and environment are needed.
|
||||||
|
- If enough information is available locally, implement and run the replication steps.
|
||||||
|
- Save notes, scripts, and results to disk in a reproducible layout.
|
||||||
|
- Be explicit about what is verified, what is inferred, and what is still missing.
|
||||||
48
skills/research/experiment-design/SKILL.md
Normal file
48
skills/research/experiment-design/SKILL.md
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
name: experiment-design
|
||||||
|
description: Use this when the task is to turn a vague research idea into a testable experiment, define metrics, choose baselines, or plan ablations.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Experiment Design
|
||||||
|
|
||||||
|
## When To Use
|
||||||
|
|
||||||
|
Use this skill when the user has:
|
||||||
|
- a hypothesis to test
|
||||||
|
- a method to evaluate
|
||||||
|
- an unclear benchmark plan
|
||||||
|
- a need for baselines, ablations, or metrics
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
1. Restate the research question as a falsifiable claim.
|
||||||
|
2. Define:
|
||||||
|
- independent variables
|
||||||
|
- dependent variables
|
||||||
|
- success metrics
|
||||||
|
- baselines
|
||||||
|
- constraints
|
||||||
|
3. Search for prior work first with `alpha_search` so you do not reinvent an obviously flawed setup.
|
||||||
|
4. Use `alpha_get_paper` and `alpha_ask_paper` on the strongest references.
|
||||||
|
5. Prefer the smallest experiment that can meaningfully reduce uncertainty.
|
||||||
|
6. List confounders and failure modes up front.
|
||||||
|
7. If implementation is requested, create the scripts, configs, and logging plan.
|
||||||
|
8. Write the plan to disk before running expensive work.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- Avoid experiments with no baseline.
|
||||||
|
- Avoid metrics that do not connect to the claim.
|
||||||
|
- Avoid ablations that change multiple variables at once.
|
||||||
|
- Avoid broad plans that cannot be executed with the current environment.
|
||||||
|
|
||||||
|
## Deliverable
|
||||||
|
|
||||||
|
Produce:
|
||||||
|
- hypothesis
|
||||||
|
- setup
|
||||||
|
- baselines
|
||||||
|
- metrics
|
||||||
|
- ablations
|
||||||
|
- risks
|
||||||
|
- next action
|
||||||
52
skills/research/literature-review/SKILL.md
Normal file
52
skills/research/literature-review/SKILL.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
---
|
||||||
|
name: literature-review
|
||||||
|
description: Use this when the task is to survey prior work, compare papers, synthesize a field, or build a reading list grounded in primary sources.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Literature Review
|
||||||
|
|
||||||
|
## When To Use
|
||||||
|
|
||||||
|
Use this skill when the user wants:
|
||||||
|
- a research overview
|
||||||
|
- a paper shortlist
|
||||||
|
- a comparison of methods
|
||||||
|
- a synthesis of consensus and disagreement
|
||||||
|
- a source-backed brief on a topic
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
1. Search broadly first with `alpha_search`.
|
||||||
|
2. Pick the strongest candidates by direct relevance, recency, citations, and venue quality.
|
||||||
|
3. Inspect the top papers with `alpha_get_paper` before making concrete claims.
|
||||||
|
4. Use `alpha_ask_paper` for missing methodological or experimental details.
|
||||||
|
5. Build a compact evidence table:
|
||||||
|
- title
|
||||||
|
- year
|
||||||
|
- authors
|
||||||
|
- venue
|
||||||
|
- claim or contribution
|
||||||
|
- important caveats
|
||||||
|
6. Distinguish:
|
||||||
|
- what multiple sources agree on
|
||||||
|
- where methods or findings differ
|
||||||
|
- what remains unresolved
|
||||||
|
7. If the user wants a durable artifact, write a markdown brief to disk.
|
||||||
|
8. If you discover an important gotcha about a paper, save it with `alpha_annotate_paper`.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- Do not summarize a field from titles alone.
|
||||||
|
- Do not flatten disagreements into fake consensus.
|
||||||
|
- Do not treat recent preprints as established facts without saying so.
|
||||||
|
- Do not cite secondary commentary when a primary source is available.
|
||||||
|
|
||||||
|
## Output Shape
|
||||||
|
|
||||||
|
Prefer this structure:
|
||||||
|
- question
|
||||||
|
- strongest papers
|
||||||
|
- major findings
|
||||||
|
- disagreements or caveats
|
||||||
|
- open questions
|
||||||
|
- recommended next reading or experiments
|
||||||
50
skills/research/paper-code-audit/SKILL.md
Normal file
50
skills/research/paper-code-audit/SKILL.md
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
---
|
||||||
|
name: paper-code-audit
|
||||||
|
description: Use this when the task is to compare a paper against its repository, verify whether claims are implemented, or assess reproducibility risk.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Paper Code Audit
|
||||||
|
|
||||||
|
## When To Use
|
||||||
|
|
||||||
|
Use this skill for:
|
||||||
|
- paper-versus-code verification
|
||||||
|
- implementation gap analysis
|
||||||
|
- reproducibility audits
|
||||||
|
- checking whether public code matches reported results
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
1. Locate the paper with `alpha_search`.
|
||||||
|
2. Load the paper with `alpha_get_paper`.
|
||||||
|
3. Extract implementation-relevant details using `alpha_ask_paper`:
|
||||||
|
- datasets
|
||||||
|
- preprocessing
|
||||||
|
- model architecture
|
||||||
|
- hyperparameters
|
||||||
|
- evaluation protocol
|
||||||
|
4. If the paper links a repository, inspect it using `alpha_read_code`.
|
||||||
|
5. Compare paper claims against code realities:
|
||||||
|
- are all components present
|
||||||
|
- do defaults match the paper
|
||||||
|
- are metrics/eval scripts exposed
|
||||||
|
- are hidden assumptions required
|
||||||
|
6. Record concrete mismatches, not vibes.
|
||||||
|
7. Save the audit in `outputs/`.
|
||||||
|
8. If you find a durable gotcha, save it with `alpha_annotate_paper`.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- Do not infer repository behavior without opening the relevant files.
|
||||||
|
- Do not assume README claims reflect the actual defaults.
|
||||||
|
- Do not mark something as missing if it exists under another name without checking.
|
||||||
|
|
||||||
|
## Deliverable
|
||||||
|
|
||||||
|
Include:
|
||||||
|
- paper summary
|
||||||
|
- repository coverage
|
||||||
|
- confirmed matches
|
||||||
|
- mismatches or omissions
|
||||||
|
- reproducibility risks
|
||||||
|
- recommended next actions
|
||||||
45
skills/research/paper-writing/SKILL.md
Normal file
45
skills/research/paper-writing/SKILL.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
---
|
||||||
|
name: paper-writing
|
||||||
|
description: Use this when the task is to turn research notes, experiments, or a literature review into a polished paper-style writeup with Markdown and LaTeX.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Paper Writing
|
||||||
|
|
||||||
|
## When To Use
|
||||||
|
|
||||||
|
Use this skill for:
|
||||||
|
- research reports that should read like a paper
|
||||||
|
- internal memos with equations or formal structure
|
||||||
|
- polished writeups of experiments or literature reviews
|
||||||
|
- converting rough notes into a coherent draft
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
1. Make sure the underlying claims are already grounded in sources, experiments, or explicit caveats.
|
||||||
|
2. Build the draft around a proper research structure:
|
||||||
|
- title
|
||||||
|
- abstract
|
||||||
|
- introduction or problem statement
|
||||||
|
- related work
|
||||||
|
- approach, synthesis, or methodology
|
||||||
|
- evidence, experiments, or case studies
|
||||||
|
- limitations
|
||||||
|
- conclusion
|
||||||
|
3. Use Markdown by default.
|
||||||
|
4. Use LaTeX only where equations or notation genuinely improve clarity.
|
||||||
|
5. Keep claims falsifiable and scoped.
|
||||||
|
6. Save polished drafts to `papers/`.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- Do not use LaTeX for decoration.
|
||||||
|
- Do not make a draft look more certain than the evidence supports.
|
||||||
|
- Do not hide missing citations or weak evidence; flag them.
|
||||||
|
|
||||||
|
## Deliverable
|
||||||
|
|
||||||
|
A readable paper-style draft with:
|
||||||
|
- explicit structure
|
||||||
|
- traceable claims
|
||||||
|
- equations only where useful
|
||||||
|
- limitations stated plainly
|
||||||
49
skills/research/reading-list/SKILL.md
Normal file
49
skills/research/reading-list/SKILL.md
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
---
|
||||||
|
name: reading-list
|
||||||
|
description: Use this when the user wants a curated reading sequence, paper shortlist, or tiered set of papers for learning or project onboarding.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Reading List
|
||||||
|
|
||||||
|
## When To Use
|
||||||
|
|
||||||
|
Use this skill for:
|
||||||
|
- getting up to speed on a topic
|
||||||
|
- onboarding into a research area
|
||||||
|
- choosing which papers to read first
|
||||||
|
- constructing a project-specific reading order
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
1. Start with `alpha_search` in `all` mode.
|
||||||
|
2. Inspect the strongest candidates with `alpha_get_paper`.
|
||||||
|
3. Use `alpha_ask_paper` for fit questions like:
|
||||||
|
- what problem does this really solve
|
||||||
|
- what assumptions does it rely on
|
||||||
|
- what prior work does it build on
|
||||||
|
4. Classify papers into roles:
|
||||||
|
- foundational
|
||||||
|
- key recent advances
|
||||||
|
- evaluation or benchmark references
|
||||||
|
- critiques or limitations
|
||||||
|
- likely replication targets
|
||||||
|
5. Order the list intentionally:
|
||||||
|
- start with orientation
|
||||||
|
- move to strongest methods
|
||||||
|
- finish with edges, critiques, or adjacent work
|
||||||
|
6. Write the final list as a durable markdown artifact in `outputs/`.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- Do not sort purely by citations.
|
||||||
|
- Do not over-index on recency when fundamentals matter.
|
||||||
|
- Do not include papers you have not inspected at all.
|
||||||
|
|
||||||
|
## Deliverable
|
||||||
|
|
||||||
|
For each paper include:
|
||||||
|
- title
|
||||||
|
- year
|
||||||
|
- why it matters
|
||||||
|
- when to read it in the sequence
|
||||||
|
- one caveat or limitation
|
||||||
52
skills/research/replication/SKILL.md
Normal file
52
skills/research/replication/SKILL.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
---
|
||||||
|
name: replication
|
||||||
|
description: Use this when the task is to reproduce a paper result, benchmark a claim, rebuild an experiment, or evaluate whether a published result holds in practice.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Replication
|
||||||
|
|
||||||
|
## When To Use
|
||||||
|
|
||||||
|
Use this skill for:
|
||||||
|
- paper reproduction
|
||||||
|
- benchmark recreation
|
||||||
|
- ablation reruns
|
||||||
|
- claim verification through code and experiments
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
1. Identify the canonical source paper and inspect it with `alpha_get_paper`.
|
||||||
|
2. Extract the exact target:
|
||||||
|
- task
|
||||||
|
- dataset
|
||||||
|
- model or method
|
||||||
|
- metrics
|
||||||
|
- hardware or runtime assumptions
|
||||||
|
3. Use `alpha_ask_paper` to pull out the exact details missing from the report.
|
||||||
|
4. If the paper has a public repository, inspect it with `alpha_read_code`.
|
||||||
|
5. Search the local workspace for existing code, notebooks, configs, and datasets.
|
||||||
|
6. Write down the missing pieces explicitly before running anything.
|
||||||
|
7. If the environment is sufficient, implement the minimal runnable reproduction path.
|
||||||
|
8. Run the experiment with built-in file and shell tools.
|
||||||
|
9. Save:
|
||||||
|
- commands used
|
||||||
|
- configs
|
||||||
|
- raw outputs
|
||||||
|
- summarized results
|
||||||
|
10. Compare observed results with the paper and explain gaps.
|
||||||
|
11. If the paper had a practical gotcha, attach it with `alpha_annotate_paper`.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- Do not claim replication succeeded if key conditions were missing.
|
||||||
|
- Do not compare different metrics as if they were equivalent.
|
||||||
|
- Do not ignore dataset or preprocessing mismatch.
|
||||||
|
- Do not hide failed runs; record them and explain them.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
A good replication outcome includes:
|
||||||
|
- the exact command path
|
||||||
|
- the data or config used
|
||||||
|
- the observed metrics
|
||||||
|
- a clear statement of match, partial match, or mismatch
|
||||||
34
src/feynman-prompt.ts
Normal file
34
src/feynman-prompt.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export const FEYNMAN_SYSTEM_PROMPT = `You are Feynman, a research-first AI agent.
|
||||||
|
|
||||||
|
Your job is to investigate questions, read primary sources, design experiments, run them when useful, and produce reproducible written artifacts.
|
||||||
|
|
||||||
|
Operating rules:
|
||||||
|
- Evidence over fluency.
|
||||||
|
- Prefer papers, official documentation, datasets, code, and direct experimental results over commentary.
|
||||||
|
- Separate observations from inferences.
|
||||||
|
- State uncertainty explicitly.
|
||||||
|
- When a claim depends on recent literature or unstable facts, use tools before answering.
|
||||||
|
- When discussing papers, cite title, year, and identifier or URL when possible.
|
||||||
|
- Use the alpha-backed research tools first for literature search, paper reading, paper Q&A, and persistent annotations.
|
||||||
|
- Use the installed Pi research packages for broader web/PDF access, document parsing, session recall, background processes, experiment tracking, citations, and delegated subtasks when they reduce friction.
|
||||||
|
- When an experiment is warranted, write the code or scripts, run them, capture outputs, and save artifacts to disk.
|
||||||
|
- Treat polished scientific communication as part of the job: structure reports cleanly, use Markdown deliberately, and use LaTeX math when equations clarify the argument.
|
||||||
|
- Default artifact locations:
|
||||||
|
- outputs/ for reviews, reading lists, and summaries
|
||||||
|
- experiments/ for runnable experiment code and result logs
|
||||||
|
- notes/ for scratch notes and intermediate synthesis
|
||||||
|
- papers/ for polished paper-style drafts and writeups
|
||||||
|
- Default deliverables should include: summary, strongest evidence, disagreements or gaps, open questions, and recommended next steps.
|
||||||
|
|
||||||
|
Default workflow:
|
||||||
|
1. Clarify the research objective if needed.
|
||||||
|
2. Search for relevant primary sources.
|
||||||
|
3. Inspect the most relevant papers or materials directly.
|
||||||
|
4. Synthesize consensus, disagreements, and missing evidence.
|
||||||
|
5. Design and run experiments when they would resolve uncertainty.
|
||||||
|
6. Write the requested output artifact.
|
||||||
|
|
||||||
|
Style:
|
||||||
|
- Concise, skeptical, and explicit.
|
||||||
|
- Avoid fake certainty.
|
||||||
|
- Do not present unverified claims as facts.`;
|
||||||
212
src/index.ts
Normal file
212
src/index.ts
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
import "dotenv/config";
|
||||||
|
|
||||||
|
import { mkdirSync } from "node:fs";
|
||||||
|
import { stdin as input, stdout as output } from "node:process";
|
||||||
|
import { dirname, resolve } from "node:path";
|
||||||
|
import { parseArgs } from "node:util";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
import readline from "node:readline/promises";
|
||||||
|
|
||||||
|
import {
|
||||||
|
AuthStorage,
|
||||||
|
createAgentSession,
|
||||||
|
createCodingTools,
|
||||||
|
DefaultResourceLoader,
|
||||||
|
ModelRegistry,
|
||||||
|
SessionManager,
|
||||||
|
SettingsManager,
|
||||||
|
} from "@mariozechner/pi-coding-agent";
|
||||||
|
|
||||||
|
import { FEYNMAN_SYSTEM_PROMPT } from "./feynman-prompt.js";
|
||||||
|
|
||||||
|
type ThinkingLevel = "off" | "low" | "medium" | "high";
|
||||||
|
|
||||||
|
function printHelp(): void {
|
||||||
|
console.log(`Feynman commands:
|
||||||
|
/help Show this help
|
||||||
|
/new Start a fresh persisted session
|
||||||
|
/exit Quit the REPL
|
||||||
|
/lit-review <topic> Expand the literature review prompt template
|
||||||
|
/replicate <paper> Expand the replication prompt template
|
||||||
|
/reading-list <topic> Expand the reading list prompt template
|
||||||
|
/paper-code-audit <item> Expand the paper/code audit prompt template
|
||||||
|
/paper-draft <topic> Expand the paper-style writing prompt template
|
||||||
|
|
||||||
|
CLI flags:
|
||||||
|
--prompt "<text>" Run one prompt and exit
|
||||||
|
--model provider:model Force a specific model
|
||||||
|
--thinking level off | low | medium | high
|
||||||
|
--cwd /path/to/workdir Working directory for tools
|
||||||
|
--session-dir /path Session storage directory`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseModelSpec(spec: string, modelRegistry: ModelRegistry) {
|
||||||
|
const trimmed = spec.trim();
|
||||||
|
const separator = trimmed.includes(":") ? ":" : trimmed.includes("/") ? "/" : null;
|
||||||
|
if (!separator) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [provider, ...rest] = trimmed.split(separator);
|
||||||
|
const id = rest.join(separator);
|
||||||
|
if (!provider || !id) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return modelRegistry.find(provider, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeThinkingLevel(value: string | undefined): ThinkingLevel | undefined {
|
||||||
|
if (!value) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = value.toLowerCase();
|
||||||
|
if (normalized === "off" || normalized === "low" || normalized === "medium" || normalized === "high") {
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const here = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const appRoot = resolve(here, "..");
|
||||||
|
|
||||||
|
const { values, positionals } = parseArgs({
|
||||||
|
allowPositionals: true,
|
||||||
|
options: {
|
||||||
|
cwd: { type: "string" },
|
||||||
|
help: { type: "boolean" },
|
||||||
|
model: { type: "string" },
|
||||||
|
"new-session": { type: "boolean" },
|
||||||
|
prompt: { type: "string" },
|
||||||
|
"session-dir": { type: "string" },
|
||||||
|
thinking: { type: "string" },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (values.help) {
|
||||||
|
printHelp();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const workingDir = resolve(values.cwd ?? process.cwd());
|
||||||
|
const sessionDir = resolve(values["session-dir"] ?? resolve(appRoot, ".feynman", "sessions"));
|
||||||
|
mkdirSync(sessionDir, { recursive: true });
|
||||||
|
const settingsManager = SettingsManager.create(appRoot);
|
||||||
|
|
||||||
|
const authStorage = AuthStorage.create();
|
||||||
|
const modelRegistry = new ModelRegistry(authStorage);
|
||||||
|
const explicitModelSpec = values.model ?? process.env.FEYNMAN_MODEL;
|
||||||
|
const explicitModel = explicitModelSpec ? parseModelSpec(explicitModelSpec, modelRegistry) : undefined;
|
||||||
|
|
||||||
|
if (explicitModelSpec && !explicitModel) {
|
||||||
|
throw new Error(`Unknown model: ${explicitModelSpec}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!explicitModel) {
|
||||||
|
const available = await modelRegistry.getAvailable();
|
||||||
|
if (available.length === 0) {
|
||||||
|
throw new Error(
|
||||||
|
"No models are available. Configure pi auth or export a provider API key before starting Feynman.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const thinkingLevel = normalizeThinkingLevel(values.thinking ?? process.env.FEYNMAN_THINKING) ?? "medium";
|
||||||
|
|
||||||
|
const resourceLoader = new DefaultResourceLoader({
|
||||||
|
cwd: appRoot,
|
||||||
|
additionalExtensionPaths: [resolve(appRoot, "extensions")],
|
||||||
|
additionalPromptTemplatePaths: [resolve(appRoot, "prompts")],
|
||||||
|
additionalSkillPaths: [resolve(appRoot, "skills")],
|
||||||
|
settingsManager,
|
||||||
|
systemPromptOverride: () => FEYNMAN_SYSTEM_PROMPT,
|
||||||
|
appendSystemPromptOverride: () => [],
|
||||||
|
});
|
||||||
|
await resourceLoader.reload();
|
||||||
|
|
||||||
|
const sessionManager = values["new-session"]
|
||||||
|
? SessionManager.create(workingDir, sessionDir)
|
||||||
|
: SessionManager.continueRecent(workingDir, sessionDir);
|
||||||
|
|
||||||
|
const { session } = await createAgentSession({
|
||||||
|
authStorage,
|
||||||
|
cwd: workingDir,
|
||||||
|
model: explicitModel,
|
||||||
|
modelRegistry,
|
||||||
|
resourceLoader,
|
||||||
|
sessionManager,
|
||||||
|
settingsManager,
|
||||||
|
thinkingLevel,
|
||||||
|
tools: createCodingTools(workingDir),
|
||||||
|
});
|
||||||
|
|
||||||
|
session.subscribe((event) => {
|
||||||
|
if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
|
||||||
|
process.stdout.write(event.assistantMessageEvent.delta);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.type === "tool_execution_start") {
|
||||||
|
process.stderr.write(`\n[tool] ${event.toolName}\n`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.type === "tool_execution_end" && event.isError) {
|
||||||
|
process.stderr.write(`[tool-error] ${event.toolName}\n`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const initialPrompt = values.prompt ?? (positionals.length > 0 ? positionals.join(" ") : undefined);
|
||||||
|
|
||||||
|
if (initialPrompt) {
|
||||||
|
await session.prompt(initialPrompt);
|
||||||
|
process.stdout.write("\n");
|
||||||
|
session.dispose();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Feynman research agent");
|
||||||
|
console.log(`working dir: ${workingDir}`);
|
||||||
|
console.log(`session dir: ${sessionDir}`);
|
||||||
|
console.log("type /help for commands");
|
||||||
|
|
||||||
|
const rl = readline.createInterface({ input, output });
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const line = (await rl.question("feynman> ")).trim();
|
||||||
|
if (!line) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line === "/exit" || line === "/quit") {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line === "/help") {
|
||||||
|
printHelp();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line === "/new") {
|
||||||
|
await session.newSession();
|
||||||
|
console.log("started a new session");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await session.prompt(line);
|
||||||
|
process.stdout.write("\n");
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
rl.close();
|
||||||
|
session.dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((error) => {
|
||||||
|
console.error(error instanceof Error ? error.message : String(error));
|
||||||
|
process.exitCode = 1;
|
||||||
|
});
|
||||||
19
tsconfig.json
Normal file
19
tsconfig.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2022",
|
||||||
|
"module": "NodeNext",
|
||||||
|
"moduleResolution": "NodeNext",
|
||||||
|
"strict": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"allowSyntheticDefaultImports": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"types": [
|
||||||
|
"node"
|
||||||
|
],
|
||||||
|
"noEmit": true
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"src/**/*.ts",
|
||||||
|
"extensions/**/*.ts"
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user