Fix subagent output paths and deepresearch robustness

This commit is contained in:
Advait Paliwal
2026-04-17 18:00:24 -07:00
parent 6f3eeea75b
commit 40939859b9
7 changed files with 190 additions and 13 deletions

View File

@@ -72,6 +72,35 @@ test("deepresearch workflow requires durable artifacts even when blocked", () =>
assert.match(deepResearchPrompt, /Never end with only an explanation in chat/i);
});
test("deepresearch citation and review stages are sequential and avoid giant edits", () => {
const deepResearchPrompt = readFileSync(join(repoRoot, "prompts", "deepresearch.md"), "utf8");
assert.match(deepResearchPrompt, /must complete before any reviewer runs/i);
assert.match(deepResearchPrompt, /Do not run the `verifier` and `reviewer` in the same parallel `subagent` call/i);
assert.match(deepResearchPrompt, /outputs\/\.drafts\/<slug>-cited\.md/i);
assert.match(deepResearchPrompt, /do not issue one giant `edit` tool call/i);
assert.match(deepResearchPrompt, /outputs\/\.drafts\/<slug>-revised\.md/i);
assert.match(deepResearchPrompt, /The final candidate is `outputs\/\.drafts\/<slug>-revised\.md` if it exists/i);
});
test("deepresearch keeps subagent tool calls small and skips subagents for narrow explainers", () => {
const deepResearchPrompt = readFileSync(join(repoRoot, "prompts", "deepresearch.md"), "utf8");
assert.match(deepResearchPrompt, /including "what is X" explainers/i);
assert.match(deepResearchPrompt, /Make the scale decision before assigning owners/i);
assert.match(deepResearchPrompt, /lead-owned direct search tasks only/i);
assert.match(deepResearchPrompt, /MUST NOT spawn researcher subagents/i);
assert.match(deepResearchPrompt, /Do not inflate a simple explainer into a multi-agent survey/i);
assert.match(deepResearchPrompt, /Skip this section entirely when the scale decision chose direct search\/no subagents/i);
assert.match(deepResearchPrompt, /<slug>-research-direct\.md/i);
assert.match(deepResearchPrompt, /Keep `subagent` tool-call JSON small and valid/i);
assert.match(deepResearchPrompt, /write a per-researcher brief first/i);
assert.match(deepResearchPrompt, /Do not place multi-paragraph instructions inside the `subagent` JSON/i);
assert.match(deepResearchPrompt, /Do not add extra keys such as `artifacts`/i);
assert.match(deepResearchPrompt, /always set `failFast: false`/i);
assert.match(deepResearchPrompt, /if a PDF parser or paper fetch fails/i);
});
test("workflow prompts do not introduce implicit confirmation gates", () => {
const workflowPrompts = [
"audit.md",

View File

@@ -171,6 +171,71 @@ test("patchPiSubagentsSource preserves output on top-level parallel tasks", () =
assert.doesNotMatch(patched, /resolvePiAgentDir/);
});
test("patchPiSubagentsSource preserves output in async parallel task handoff", () => {
const input = [
"function run(tasks: TaskParam[]) {",
"\tconst modelOverrides = tasks.map(() => undefined);",
"\tconst skillOverrides = tasks.map(() => undefined);",
"\tconst parallelTasks = tasks.map((t, i) => ({",
"\t\tagent: t.agent,",
"\t\ttask: params.context === \"fork\" ? wrapForkTask(taskTexts[i]!) : taskTexts[i]!,",
"\t\tcwd: t.cwd,",
"\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
"\t\t...(skillOverrides[i] !== undefined ? { skill: skillOverrides[i] } : {}),",
"\t}));",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /\n\t\toutput: t\.output,/);
});
test("patchPiSubagentsSource uses task output when resolving foreground parallel behavior", () => {
const input = [
"async function run(tasks: TaskParam[]) {",
"\tconst skillOverrides = tasks.map((t) => normalizeSkillInput(t.skill));",
"\tif (params.clarify === true && ctx.hasUI) {",
"\t\tconst behaviors = agentConfigs.map((c, i) =>",
"\t\t\tresolveStepBehavior(c, { skills: skillOverrides[i] }),",
"\t\t);",
"\t}",
"\tconst behaviors = agentConfigs.map((config) => resolveStepBehavior(config, {}));",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /resolveStepBehavior\(c, \{ output: tasks\[i\]\?\.output, skills: skillOverrides\[i\] \}\)/);
assert.match(patched, /resolveStepBehavior\(config, \{ output: tasks\[i\]\?\.output, skills: skillOverrides\[i\] \}\)/);
assert.doesNotMatch(patched, /resolveStepBehavior\(config, \{\}\)/);
});
test("patchPiSubagentsSource passes foreground parallel output paths into runSync", () => {
const input = [
"async function runForegroundParallelTasks(input: ForegroundParallelRunInput): Promise<SingleResult[]> {",
"\treturn mapConcurrent(input.tasks, input.concurrencyLimit, async (task, index) => {",
"\t\tconst overrideSkills = input.skillOverrides[index];",
"\t\tconst effectiveSkills = overrideSkills === undefined ? input.behaviors[index]?.skills : overrideSkills;",
"\t\tconst taskCwd = resolveParallelTaskCwd(task, input.paramsCwd, input.worktreeSetup, index);",
"\t\treturn runSync(input.ctx.cwd, input.agents, task.agent, input.taskTexts[index]!, {",
"\t\t\tcwd: taskCwd,",
"\t\t\tsignal: input.signal,",
"\t\t\tmaxOutput: input.maxOutput,",
"\t\t\tmaxSubagentDepth: input.maxSubagentDepths[index],",
"\t\t});",
"\t});",
"}",
].join("\n");
const patched = patchPiSubagentsSource("subagent-executor.ts", input);
assert.match(patched, /const outputPath = typeof input\.behaviors\[index\]\?\.output === "string"/);
assert.match(patched, /const taskText = injectSingleOutputInstruction\(input\.taskTexts\[index\]!, outputPath\)/);
assert.match(patched, /runSync\(input\.ctx\.cwd, input\.agents, task\.agent, taskText, \{/);
assert.match(patched, /\n\t\t\toutputPath,/);
});
test("patchPiSubagentsSource documents output in top-level task schema", () => {
const input = [
"export const TaskItem = Type.Object({ ",