From fe2422496532853911821a3d70661c065caa18b4 Mon Sep 17 00:00:00 2001 From: Advait Paliwal Date: Wed, 15 Apr 2026 22:45:04 -0700 Subject: [PATCH] Add system-wide guardrails against fabricated results --- .feynman/SYSTEM.md | 2 ++ README.md | 2 +- package-lock.json | 4 ++-- package.json | 2 +- scripts/install/install.ps1 | 2 +- scripts/install/install.sh | 2 +- tests/content-policy.test.ts | 2 ++ website/public/install | 2 +- website/public/install.ps1 | 2 +- website/src/content/docs/getting-started/installation.md | 4 ++-- 10 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.feynman/SYSTEM.md b/.feynman/SYSTEM.md index 87655e0..19a6fcc 100644 --- a/.feynman/SYSTEM.md +++ b/.feynman/SYSTEM.md @@ -24,6 +24,8 @@ Operating rules: - Do not force chain-shaped orchestration onto the user. Multi-agent decomposition is an internal tactic, not the primary UX. - For AI research artifacts, default to pressure-testing the work before polishing it. Use review-style workflows to check novelty positioning, evaluation design, baseline fairness, ablations, reproducibility, and likely reviewer objections. - Do not say `verified`, `confirmed`, `checked`, or `reproduced` unless you actually performed the check and can point to the supporting source, artifact, or command output. +- Never invent or fabricate experimental results, scores, datasets, sample sizes, ablations, benchmark tables, figures, images, charts, or quantitative comparisons. If the user asks for a paper, report, draft, figure, or result and the underlying data is missing, write a clearly labeled placeholder such as `No experimental results are available yet` or `TODO: run experiment`. +- Every quantitative result, figure, table, chart, image, or benchmark claim must trace to at least one explicit source URL, research note, raw artifact path, or script/command output. If provenance is missing, omit the claim or mark it as a planned measurement instead of presenting it as fact. - When a task involves calculations, code, or quantitative outputs, define the minimal test or oracle set before implementation and record the results of those checks before delivery. - If a plot, number, or conclusion looks cleaner than expected, assume it may be wrong until it survives explicit checks. Never smooth curves, drop inconvenient variations, or tune presentation-only outputs without stating that choice. - When a verification pass finds one issue, continue searching for others. Do not stop after the first error unless the whole branch is blocked. diff --git a/README.md b/README.md index 1d9dca9..b277784 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ curl -fsSL https://feynman.is/install | bash irm https://feynman.is/install.ps1 | iex ``` -The one-line installer fetches the latest tagged release. To pin a version, pass it explicitly, for example `curl -fsSL https://feynman.is/install | bash -s -- 0.2.18`. +The one-line installer fetches the latest tagged release. To pin a version, pass it explicitly, for example `curl -fsSL https://feynman.is/install | bash -s -- 0.2.19`. The installer downloads a standalone native bundle with its own Node.js runtime. diff --git a/package-lock.json b/package-lock.json index 0f772b9..ab61688 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@companion-ai/feynman", - "version": "0.2.18", + "version": "0.2.19", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@companion-ai/feynman", - "version": "0.2.18", + "version": "0.2.19", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 6478bd9..e7fb9bb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@companion-ai/feynman", - "version": "0.2.18", + "version": "0.2.19", "description": "Research-first CLI agent built on Pi and alphaXiv", "license": "MIT", "type": "module", diff --git a/scripts/install/install.ps1 b/scripts/install/install.ps1 index 7a7f6d0..2032c1f 100644 --- a/scripts/install/install.ps1 +++ b/scripts/install/install.ps1 @@ -110,7 +110,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.18 + & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.19 "@ } diff --git a/scripts/install/install.sh b/scripts/install/install.sh index 0ddda73..41febfd 100644 --- a/scripts/install/install.sh +++ b/scripts/install/install.sh @@ -261,7 +261,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - curl -fsSL https://feynman.is/install | bash -s -- 0.2.18 + curl -fsSL https://feynman.is/install | bash -s -- 0.2.19 EOF exit 1 fi diff --git a/tests/content-policy.test.ts b/tests/content-policy.test.ts index cf24a54..12c7078 100644 --- a/tests/content-policy.test.ts +++ b/tests/content-policy.test.ts @@ -33,10 +33,12 @@ test("bundled prompts and skills do not contain blocked promotional product cont test("draft workflow explicitly forbids fabricated results and unproven figures", () => { const draftPrompt = readFileSync(join(repoRoot, "prompts", "draft.md"), "utf8"); + const systemPrompt = readFileSync(join(repoRoot, ".feynman", "SYSTEM.md"), "utf8"); const writerPrompt = readFileSync(join(repoRoot, ".feynman", "agents", "writer.md"), "utf8"); const verifierPrompt = readFileSync(join(repoRoot, ".feynman", "agents", "verifier.md"), "utf8"); for (const [label, content] of [ + ["system prompt", systemPrompt], ["draft prompt", draftPrompt], ["writer prompt", writerPrompt], ["verifier prompt", verifierPrompt], diff --git a/website/public/install b/website/public/install index 0ddda73..41febfd 100644 --- a/website/public/install +++ b/website/public/install @@ -261,7 +261,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - curl -fsSL https://feynman.is/install | bash -s -- 0.2.18 + curl -fsSL https://feynman.is/install | bash -s -- 0.2.19 EOF exit 1 fi diff --git a/website/public/install.ps1 b/website/public/install.ps1 index 7a7f6d0..2032c1f 100644 --- a/website/public/install.ps1 +++ b/website/public/install.ps1 @@ -110,7 +110,7 @@ This usually means the release exists, but not all platform bundles were uploade Workarounds: - try again after the release finishes publishing - pass the latest published version explicitly, e.g.: - & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.18 + & ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.19 "@ } diff --git a/website/src/content/docs/getting-started/installation.md b/website/src/content/docs/getting-started/installation.md index fb8d1b9..2de3522 100644 --- a/website/src/content/docs/getting-started/installation.md +++ b/website/src/content/docs/getting-started/installation.md @@ -117,13 +117,13 @@ These installers download the bundled `skills/` and `prompts/` trees plus the re The one-line installer already targets the latest tagged release. To pin an exact version, pass it explicitly: ```bash -curl -fsSL https://feynman.is/install | bash -s -- 0.2.18 +curl -fsSL https://feynman.is/install | bash -s -- 0.2.19 ``` On Windows: ```powershell -& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.18 +& ([scriptblock]::Create((irm https://feynman.is/install.ps1))) -Version 0.2.19 ``` ## Post-install setup