Fix subagent output paths and deepresearch robustness

2026-04-17 18:00:24 -07:00
parent 6f3eeea75b
commit 40939859b9
7 changed files with 190 additions and 13 deletions
--- a/tests/content-policy.test.ts
+++ b/tests/content-policy.test.ts
@@ -72,6 +72,35 @@ test("deepresearch workflow requires durable artifacts even when blocked", () =>
 	assert.match(deepResearchPrompt, /Never end with only an explanation in chat/i);
 });

+test("deepresearch citation and review stages are sequential and avoid giant edits", () => {
+	const deepResearchPrompt = readFileSync(join(repoRoot, "prompts", "deepresearch.md"), "utf8");
+
+	assert.match(deepResearchPrompt, /must complete before any reviewer runs/i);
+	assert.match(deepResearchPrompt, /Do not run the `verifier` and `reviewer` in the same parallel `subagent` call/i);
+	assert.match(deepResearchPrompt, /outputs\/\.drafts\/<slug>-cited\.md/i);
+	assert.match(deepResearchPrompt, /do not issue one giant `edit` tool call/i);
+	assert.match(deepResearchPrompt, /outputs\/\.drafts\/<slug>-revised\.md/i);
+	assert.match(deepResearchPrompt, /The final candidate is `outputs\/\.drafts\/<slug>-revised\.md` if it exists/i);
+});
+
+test("deepresearch keeps subagent tool calls small and skips subagents for narrow explainers", () => {
+	const deepResearchPrompt = readFileSync(join(repoRoot, "prompts", "deepresearch.md"), "utf8");
+
+	assert.match(deepResearchPrompt, /including "what is X" explainers/i);
+	assert.match(deepResearchPrompt, /Make the scale decision before assigning owners/i);
+	assert.match(deepResearchPrompt, /lead-owned direct search tasks only/i);
+	assert.match(deepResearchPrompt, /MUST NOT spawn researcher subagents/i);
+	assert.match(deepResearchPrompt, /Do not inflate a simple explainer into a multi-agent survey/i);
+	assert.match(deepResearchPrompt, /Skip this section entirely when the scale decision chose direct search\/no subagents/i);
+	assert.match(deepResearchPrompt, /<slug>-research-direct\.md/i);
+	assert.match(deepResearchPrompt, /Keep `subagent` tool-call JSON small and valid/i);
+	assert.match(deepResearchPrompt, /write a per-researcher brief first/i);
+	assert.match(deepResearchPrompt, /Do not place multi-paragraph instructions inside the `subagent` JSON/i);
+	assert.match(deepResearchPrompt, /Do not add extra keys such as `artifacts`/i);
+	assert.match(deepResearchPrompt, /always set `failFast: false`/i);
+	assert.match(deepResearchPrompt, /if a PDF parser or paper fetch fails/i);
+});
+
 test("workflow prompts do not introduce implicit confirmation gates", () => {
 	const workflowPrompts = [
 		"audit.md",
--- a/tests/pi-subagents-patch.test.ts
+++ b/tests/pi-subagents-patch.test.ts
@@ -171,6 +171,71 @@ test("patchPiSubagentsSource preserves output on top-level parallel tasks", () =
 	assert.doesNotMatch(patched, /resolvePiAgentDir/);
 });

+test("patchPiSubagentsSource preserves output in async parallel task handoff", () => {
+	const input = [
+		"function run(tasks: TaskParam[]) {",
+		"\tconst modelOverrides = tasks.map(() => undefined);",
+		"\tconst skillOverrides = tasks.map(() => undefined);",
+		"\tconst parallelTasks = tasks.map((t, i) => ({",
+		"\t\tagent: t.agent,",
+		"\t\ttask: params.context === \"fork\" ? wrapForkTask(taskTexts[i]!) : taskTexts[i]!,",
+		"\t\tcwd: t.cwd,",
+		"\t\t...(modelOverrides[i] ? { model: modelOverrides[i] } : {}),",
+		"\t\t...(skillOverrides[i] !== undefined ? { skill: skillOverrides[i] } : {}),",
+		"\t}));",
+		"}",
+	].join("\n");
+
+	const patched = patchPiSubagentsSource("subagent-executor.ts", input);
+
+	assert.match(patched, /\n\t\toutput: t\.output,/);
+});
+
+test("patchPiSubagentsSource uses task output when resolving foreground parallel behavior", () => {
+	const input = [
+		"async function run(tasks: TaskParam[]) {",
+		"\tconst skillOverrides = tasks.map((t) => normalizeSkillInput(t.skill));",
+		"\tif (params.clarify === true && ctx.hasUI) {",
+		"\t\tconst behaviors = agentConfigs.map((c, i) =>",
+		"\t\t\tresolveStepBehavior(c, { skills: skillOverrides[i] }),",
+		"\t\t);",
+		"\t}",
+		"\tconst behaviors = agentConfigs.map((config) => resolveStepBehavior(config, {}));",
+		"}",
+	].join("\n");
+
+	const patched = patchPiSubagentsSource("subagent-executor.ts", input);
+
+	assert.match(patched, /resolveStepBehavior\(c, \{ output: tasks\[i\]\?\.output, skills: skillOverrides\[i\] \}\)/);
+	assert.match(patched, /resolveStepBehavior\(config, \{ output: tasks\[i\]\?\.output, skills: skillOverrides\[i\] \}\)/);
+	assert.doesNotMatch(patched, /resolveStepBehavior\(config, \{\}\)/);
+});
+
+test("patchPiSubagentsSource passes foreground parallel output paths into runSync", () => {
+	const input = [
+		"async function runForegroundParallelTasks(input: ForegroundParallelRunInput): Promise<SingleResult[]> {",
+		"\treturn mapConcurrent(input.tasks, input.concurrencyLimit, async (task, index) => {",
+		"\t\tconst overrideSkills = input.skillOverrides[index];",
+		"\t\tconst effectiveSkills = overrideSkills === undefined ? input.behaviors[index]?.skills : overrideSkills;",
+		"\t\tconst taskCwd = resolveParallelTaskCwd(task, input.paramsCwd, input.worktreeSetup, index);",
+		"\t\treturn runSync(input.ctx.cwd, input.agents, task.agent, input.taskTexts[index]!, {",
+		"\t\t\tcwd: taskCwd,",
+		"\t\t\tsignal: input.signal,",
+		"\t\t\tmaxOutput: input.maxOutput,",
+		"\t\t\tmaxSubagentDepth: input.maxSubagentDepths[index],",
+		"\t\t});",
+		"\t});",
+		"}",
+	].join("\n");
+
+	const patched = patchPiSubagentsSource("subagent-executor.ts", input);
+
+	assert.match(patched, /const outputPath = typeof input\.behaviors\[index\]\?\.output === "string"/);
+	assert.match(patched, /const taskText = injectSingleOutputInstruction\(input\.taskTexts\[index\]!, outputPath\)/);
+	assert.match(patched, /runSync\(input\.ctx\.cwd, input\.agents, task\.agent, taskText, \{/);
+	assert.match(patched, /\n\t\t\toutputPath,/);
+});
+
 test("patchPiSubagentsSource documents output in top-level task schema", () => {
 	const input = [
 		"export const TaskItem = Type.Object({ ",