From dbdad94adc3965cc551adcffe04d72890d38bd11 Mon Sep 17 00:00:00 2001 From: Advait Paliwal Date: Sun, 22 Mar 2026 14:36:47 -0700 Subject: [PATCH] Add AI research review workflows --- .pi/agents/review.chain.md | 22 ++++++++++++++++++++++ .pi/agents/reviewer.md | 33 +++++++++++++++++++++++++++++++++ README.md | 6 ++++++ extensions/research-tools.ts | 20 +++++++++++++++++--- prompts/ablate.md | 17 +++++++++++++++++ prompts/rebuttal.md | 18 ++++++++++++++++++ prompts/related.md | 19 +++++++++++++++++++ prompts/review.md | 24 ++++++++++++++++++++++++ src/feynman-prompt.ts | 4 +++- src/index.ts | 4 ++++ 10 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 .pi/agents/review.chain.md create mode 100644 .pi/agents/reviewer.md create mode 100644 prompts/ablate.md create mode 100644 prompts/rebuttal.md create mode 100644 prompts/related.md create mode 100644 prompts/review.md diff --git a/.pi/agents/review.chain.md b/.pi/agents/review.chain.md new file mode 100644 index 0000000..b93e2a1 --- /dev/null +++ b/.pi/agents/review.chain.md @@ -0,0 +1,22 @@ +--- +name: review +description: Gather evidence, verify claims, and simulate a peer review for an AI research artifact. +--- + +## researcher +output: research.md + +Inspect the target paper, draft, code, cited work, and any linked experimental artifacts for {task}. Gather the strongest primary evidence that matters for a review. + +## verifier +reads: research.md +output: verification.md + +Audit research.md for unsupported claims, reproducibility gaps, stale or weak evidence, and paper-code mismatches relevant to {task}. + +## reviewer +reads: research.md+verification.md +output: review.md +progress: true + +Write the final simulated peer review for {task} using research.md and verification.md. Include likely reviewer objections, severity, and a concrete revision plan. diff --git a/.pi/agents/reviewer.md b/.pi/agents/reviewer.md new file mode 100644 index 0000000..2a70c38 --- /dev/null +++ b/.pi/agents/reviewer.md @@ -0,0 +1,33 @@ +--- +name: reviewer +description: Simulate a tough but constructive AI research peer reviewer. +thinking: high +output: review.md +defaultProgress: true +--- + +You are Feynman's AI research reviewer. + +Your job is to act like a skeptical but fair peer reviewer for AI/ML systems work. + +Operating rules: +- Evaluate novelty, clarity, empirical rigor, reproducibility, and likely reviewer pushback. +- Do not praise vaguely. Every positive claim should be tied to specific evidence. +- Look for: + - missing or weak baselines + - missing ablations + - evaluation mismatches + - unclear claims of novelty + - weak related-work positioning + - insufficient statistical evidence + - benchmark leakage or contamination risks + - under-specified implementation details + - claims that outrun the experiments +- Produce reviewer-style output with severity and concrete fixes. +- Distinguish between fatal issues, strong concerns, and polish issues. +- Preserve uncertainty. If the draft might pass depending on venue norms, say so explicitly. +- End with a `Sources` section containing direct URLs for anything additionally inspected during review. + +Default output expectations: +- Save the main artifact to `review.md`. +- Optimize for reviewer realism and actionable criticism. diff --git a/README.md b/README.md index b151451..1343b4a 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,10 @@ Inside the REPL: - `/new` starts a new persisted session - `/exit` quits - `/lit ` expands the literature-review prompt template +- `/related ` builds the related-work and justification view +- `/review ` simulates a peer review for an AI research artifact +- `/ablate ` designs the minimum convincing ablation set +- `/rebuttal ` drafts a rebuttal and revision matrix - `/replicate ` expands the replication prompt template - `/reading ` expands the reading-list prompt template - `/memo ` expands the general research memo prompt template @@ -109,8 +113,10 @@ Feynman also ships bundled research subagents in `.pi/agents/`: - `researcher` for evidence gathering - `verifier` for claim and source checking +- `reviewer` for peer-review style criticism - `writer` for polished memo and draft writing - `deep` chain for gather → verify → synthesize +- `review` chain for gather → verify → peer review - `auto` chain for plan → gather → verify → draft Feynman uses `@companion-ai/alpha-hub` directly in-process rather than shelling out to the CLI. diff --git a/extensions/research-tools.ts b/extensions/research-tools.ts index ccbfc2d..b4d7e43 100644 --- a/extensions/research-tools.ts +++ b/extensions/research-tools.ts @@ -562,7 +562,16 @@ function buildProjectAgentsTemplate(): string { This file is read automatically at startup. It is the durable project memory for Feynman. ## Project Overview -- State the research question, target artifact, and key datasets here. +- State the research question, target artifact, target venue, and key datasets or benchmarks here. + +## AI Research Context +- Problem statement: +- Core hypothesis: +- Closest prior work: +- Required baselines: +- Required ablations: +- Primary metrics: +- Datasets / benchmarks: ## Ground Rules - Do not modify raw data in \`Data/Raw/\` or equivalent raw-data folders. @@ -575,6 +584,11 @@ This file is read automatically at startup. It is the durable project memory for ## Session Logging - Use \`/log\` at the end of meaningful sessions to write a durable session note into \`notes/session-logs/\`. + +## Review Readiness +- Known reviewer concerns: +- Missing experiments: +- Missing writing or framing work: `; } @@ -613,9 +627,9 @@ export default function researchTools(pi: ExtensionAPI): void { const recentActivity = getRecentActivitySummary(ctx); const shortcuts = [ ["/lit", "survey papers on a topic"], - ["/deepresearch", "run a source-heavy research pass"], + ["/review", "simulate a peer review"], ["/draft", "draft a paper-style writeup"], - ["/jobs", "inspect active background work"], + ["/deepresearch", "run a source-heavy research pass"], ]; const lines: string[] = []; diff --git a/prompts/ablate.md b/prompts/ablate.md new file mode 100644 index 0000000..85a7817 --- /dev/null +++ b/prompts/ablate.md @@ -0,0 +1,17 @@ +--- +description: Design the smallest convincing ablation set for an AI research project. +--- +Design an ablation plan for: $@ + +Requirements: +- Identify the exact claims the paper is making. +- For each claim, determine what ablation or control is necessary to support it. +- Prefer the `verifier` subagent when the claim structure is complicated. +- Distinguish: + - must-have ablations + - nice-to-have ablations + - unnecessary experiments +- Call out where benchmark norms imply mandatory controls. +- Optimize for the minimum convincing set, not experiment sprawl. +- Save the plan to `outputs/` as markdown if the user wants a durable artifact. +- End with a `Sources` section containing direct URLs for any external sources used. diff --git a/prompts/rebuttal.md b/prompts/rebuttal.md new file mode 100644 index 0000000..e420ab6 --- /dev/null +++ b/prompts/rebuttal.md @@ -0,0 +1,18 @@ +--- +description: Turn reviewer comments into a structured rebuttal and revision plan for an AI research paper. +--- +Prepare a rebuttal workflow for: $@ + +Requirements: +- If reviewer comments are provided, organize them into a response matrix. +- If reviewer comments are not yet provided, infer the likely strongest objections from the current draft and review them before drafting responses. +- Prefer the `reviewer` subagent or the project `review` chain when fresh critical review is still needed. +- For each issue, produce: + - reviewer concern + - whether it is valid + - evidence available now + - paper changes needed + - rebuttal language +- Do not overclaim fixes that have not been implemented. +- Save the rebuttal matrix to `outputs/` as markdown. +- End with a `Sources` section containing direct URLs for all inspected external sources. diff --git a/prompts/related.md b/prompts/related.md new file mode 100644 index 0000000..a1fa068 --- /dev/null +++ b/prompts/related.md @@ -0,0 +1,19 @@ +--- +description: Build a related-work map and justify why an AI research project needs to exist. +--- +Build the related-work and justification view for: $@ + +Requirements: +- Search for the closest and strongest relevant papers first. +- Prefer the `researcher` subagent when the space is broad or moving quickly. +- Identify: + - foundational papers + - closest prior work + - strongest recent competing approaches + - benchmarks and evaluation norms + - critiques or known weaknesses in the area +- For each important paper, explain why it matters to this project. +- Be explicit about what real gap remains after considering the strongest prior work. +- If the project is not differentiated enough, say so clearly. +- Save the artifact to `outputs/` as markdown if the user wants a durable result. +- End with a `Sources` section containing direct URLs. diff --git a/prompts/review.md b/prompts/review.md new file mode 100644 index 0000000..99486a8 --- /dev/null +++ b/prompts/review.md @@ -0,0 +1,24 @@ +--- +description: Simulate an AI research peer review with likely objections, severity, and a concrete revision plan. +--- +Review this AI research artifact: $@ + +Requirements: +- Prefer the project `review` chain or the `researcher` + `verifier` + `reviewer` subagents when the artifact is large or the review needs to inspect paper, code, and experiments together. +- Inspect the strongest relevant sources directly before making strong review claims. +- If the artifact is a paper or draft, evaluate: + - novelty and related-work positioning + - clarity of claims + - baseline fairness + - evaluation design + - missing ablations + - reproducibility details + - whether conclusions outrun the evidence +- If code or experiment artifacts exist, compare them against the claimed method and evaluation. +- Produce: + - short verdict + - likely reviewer objections + - severity for each issue + - revision plan in priority order +- Save the review to `outputs/` as markdown. +- End with a `Sources` section containing direct URLs for every inspected external source. diff --git a/src/feynman-prompt.ts b/src/feynman-prompt.ts index 2a2a086..55a0ebe 100644 --- a/src/feynman-prompt.ts +++ b/src/feynman-prompt.ts @@ -16,8 +16,9 @@ Operating rules: - Never answer a latest/current question from arXiv or alpha-backed paper search alone. - For AI model or product claims, prefer official docs/vendor pages plus recent web sources over old papers. - Use the installed Pi research packages for broader web/PDF access, document parsing, citation workflows, background processes, memory, session recall, and delegated subtasks when they reduce friction. -- Feynman ships project subagents for research work. Prefer the \`researcher\`, \`verifier\`, and \`writer\` subagents for larger research tasks, and use the project \`deep\` or \`auto\` chains when a multi-step delegated workflow clearly fits. +- Feynman ships project subagents for research work. Prefer the \`researcher\`, \`verifier\`, \`reviewer\`, and \`writer\` subagents for larger research tasks, and use the project \`deep\`, \`review\`, or \`auto\` chains when a multi-step delegated workflow clearly fits. - Use subagents when decomposition meaningfully reduces context pressure or lets you parallelize evidence gathering. For detached long-running work, prefer background subagent execution with \`clarify: false, async: true\`. +- For AI research artifacts, default to pressure-testing the work before polishing it. Use review-style workflows to check novelty positioning, evaluation design, baseline fairness, ablations, reproducibility, and likely reviewer objections. - Use the visualization packages when a chart, diagram, or interactive widget would materially improve understanding. Prefer charts for quantitative comparisons, Mermaid for simple process/architecture diagrams, and interactive HTML widgets for exploratory visual explanations. - Persistent memory is package-backed. Use \`memory_search\` to recall prior preferences and lessons, \`memory_remember\` to store explicit durable facts, and \`memory_lessons\` when prior corrections matter. - If the user says "remember", states a stable preference, or asks for something to be the default in future sessions, call \`memory_remember\`. Do not just say you will remember it. @@ -33,6 +34,7 @@ Operating rules: - When citing papers from alpha-backed tools, prefer direct arXiv or alphaXiv links and include the arXiv ID. - After writing a polished artifact, use \`preview_file\` when the user wants to review it in a browser or PDF viewer. - Default toward delivering a concrete artifact when the task naturally calls for one: reading list, memo, audit, experiment log, or draft. +- Strong default AI-research artifacts include: related-work map, peer-review simulation, ablation plan, reproducibility audit, and rebuttal matrix. - Default artifact locations: - outputs/ for reviews, reading lists, and summaries - experiments/ for runnable experiment code and result logs diff --git a/src/index.ts b/src/index.ts index 96569e7..afbb29a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -212,6 +212,10 @@ function printHelp(): void { /new Start a fresh persisted session /exit Quit the REPL /lit Expand the literature review prompt template + /related Map related work and justify the research gap + /review Simulate a peer review for an AI research artifact + /ablate Design the minimum convincing ablation set + /rebuttal Draft a rebuttal and revision matrix /replicate Expand the replication prompt template /reading Expand the reading list prompt template /memo Expand the general research memo prompt template