pi: patch omp to require reasoning_content for OpenRouter reasoning models

DeepSeek V4 Pro (and similar reasoning models reached via OpenRouter) reject multi-turn requests in thinking mode with: 400 The `reasoning_content` in the thinking mode must be passed back to the API. omp's existing kimi placeholder injection (`requiresReasoningContentForToolCalls`) covered this requirement only for `thinkingFormat == "openai"`. OpenRouter sets `thinkingFormat == "openrouter"`, so the gate never fired even though the underlying providers behind OpenRouter (DeepSeek, Kimi, etc.) all enforce the same invariant. This patch: 1. Extends `requiresReasoningContentForToolCalls` detection: any reasoning-capable model fronted by OpenRouter now sets the flag. 2. Extends the placeholder gate in `convertMessages` to accept `thinkingFormat == "openrouter"` alongside `"openai"`. Cross-provider continuations are the dominant trigger: a conversation warmed up by Anthropic Claude (whose reasoning is redacted/encrypted on the wire) followed by a switch to DeepSeek V4 Pro via OpenRouter. omp cannot synthesize plaintext `reasoning_content` from Anthropic's encrypted blocks, so the placeholder satisfies DeepSeek's validator without fabricating a reasoning trace. Real captured reasoning, when present, short-circuits the placeholder via `hasReasoningField` and survives intact. Side benefit: also closes a latent gap where Kimi-via-OpenRouter (`thinkingFormat == "openrouter"`) had the compat flag set but the placeholder gate silently rejected it. Applies cleanly on top of patch 0001.
2026-04-25 17:25:43 -04:00
parent d55743a9e7
commit 318373c09c
2 changed files with 237 additions and 0 deletions
--- a/home/progs/pi.nix
+++ b/home/progs/pi.nix
@@ -43,6 +43,10 @@ in
        # nullable unions with `Invalid tool parameters schema : field \`anyOf\`: missing field \`type\``.
        # Upstream PR: pending; applies cleanly against v14.2.1.
        ../../patches/omp/0001-openai-completions-retry-without-strict-on-deepseek-openrouter.patch
+        # Stub `reasoning_content` on tool-call assistant messages for OpenRouter reasoning models.
+        # Fixes DeepSeek V4 Pro et al. rejecting follow-up requests with `400 The \`reasoning_content\`
+        # in the thinking mode must be passed back to the API`. Mirrors the existing Kimi handling.
+        ../../patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch
      ];
    }))
  ];
--- a/patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch
+++ b/patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch
@@ -0,0 +1,233 @@
+Subject: [PATCH] fix(openai-completions): require `reasoning_content` for OpenRouter reasoning models
+
+DeepSeek V4 Pro (and similar reasoning models reached via OpenRouter) reject
+multi-turn requests in thinking mode with:
+
+  400 The `reasoning_content` in the thinking mode must be passed back to
+  the API.
+
+omp's existing kimi placeholder injection (`requiresReasoningContentForToolCalls`)
+covered this requirement only for `thinkingFormat === "openai"`. OpenRouter
+sets `thinkingFormat === "openrouter"`, so the gate never fired even though
+the underlying providers behind OpenRouter (DeepSeek, Kimi, etc.) all enforce
+the same invariant.
+
+This patch:
+
+1. Extends `requiresReasoningContentForToolCalls` detection: any
+   reasoning-capable model fronted by OpenRouter now sets the flag.
+2. Extends the placeholder gate in `convertMessages` to accept
+   `thinkingFormat === "openrouter"` alongside `"openai"`.
+
+Cross-provider continuations are the dominant trigger: a conversation warmed
+up by Anthropic Claude (whose reasoning is redacted/encrypted on the wire)
+followed by a switch to DeepSeek V4 Pro via OpenRouter. omp cannot
+synthesize plaintext `reasoning_content` from Anthropic's encrypted blocks,
+so the placeholder satisfies DeepSeek's validator without fabricating a
+reasoning trace. Real captured reasoning, when present, short-circuits the
+placeholder via `hasReasoningField` and survives intact.
+
+Side benefit: also closes a latent gap where Kimi-via-OpenRouter
+(`thinkingFormat === "openrouter"`) had the compat flag set but the
+placeholder gate silently rejected it.
+
+Regression tests cover:
+  - compat flag detection on OpenRouter reasoning models
+  - opt-out for non-reasoning OpenRouter models
+  - cross-provider redacted-thinking placeholder
+  - Kimi-via-OpenRouter placeholder firing
+  - real reasoning preserved over the placeholder
+
+Applies cleanly on top of patch 0001.
+
+---
+diff --git a/packages/ai/src/providers/openai-completions-compat.ts b/packages/ai/src/providers/openai-completions-compat.ts
+--- a/packages/ai/src/providers/openai-completions-compat.ts
+++ b/packages/ai/src/providers/openai-completions-compat.ts
+@@ -105,7 +105,14 @@
+ 					? "qwen"
+ 					: "openai",
+ 		reasoningContentField: "reasoning_content",
+-		requiresReasoningContentForToolCalls: isKimiModel,
+		// Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
+		//   - Kimi: documented invariant on its native API and via OpenCode-Go.
+		//   - Any reasoning-capable model reached through OpenRouter: DeepSeek V4 Pro and similar enforce
+		//     this server-side whenever the request is in thinking mode. We can't translate Anthropic's
+		//     redacted/encrypted reasoning into DeepSeek's plaintext form, so cross-provider continuations
+		//     rely on a placeholder — see `convertMessages` for the placeholder injection.
+		requiresReasoningContentForToolCalls:
+			isKimiModel || ((provider === "openrouter" || baseUrl.includes("openrouter.ai")) && Boolean(model.reasoning)),
+ 		requiresAssistantContentForToolCalls: isKimiModel,
+ 		openRouterRouting: undefined,
+ 		vercelGatewayRouting: undefined,
+diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
+--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
+@@ -1059,12 +1059,21 @@
+ 				(assistantMsg as any).reasoning_content !== undefined ||
+ 				(assistantMsg as any).reasoning !== undefined ||
+ 				(assistantMsg as any).reasoning_text !== undefined;
+-			if (
+-				toolCalls.length > 0 &&
+			// Inject a `reasoning_content` placeholder on assistant tool-call turns when the backend
+			// rejects history without it. The compat flag captures the rule:
+			//   - Kimi (native or via OpenCode-Go): chat completion endpoint demands the field.
+			//   - Reasoning models reached through OpenRouter (e.g. DeepSeek V4 Pro): the underlying
+			//     provider's thinking-mode validator demands it on every prior assistant turn. omp
+			//     cannot synthesize real reasoning when the conversation was warmed up by another
+			//     provider whose reasoning is redacted/encrypted (Anthropic) or simply absent, so we
+			//     emit a placeholder. Real captured reasoning, when present, is preserved earlier via
+			//     the `thinkingSignature` echo path and short-circuits via `hasReasoningField`.
+			// `thinkingFormat` is gated to formats that consume the field (openai/openrouter chat
+			// completions); formats with their own conventions (zai, qwen) are excluded.
+			const stubsReasoningContent =
+ 				compat.requiresReasoningContentForToolCalls &&
+-				compat.thinkingFormat === "openai" &&
+-				!hasReasoningField
+-			) {
+				(compat.thinkingFormat === "openai" || compat.thinkingFormat === "openrouter");
+			if (toolCalls.length > 0 && stubsReasoningContent && !hasReasoningField) {
+ 				const reasoningField = compat.reasoningContentField ?? "reasoning_content";
+ 				(assistantMsg as any)[reasoningField] = ".";
+ 			}
+diff --git a/packages/ai/test/openai-completions-compat.test.ts b/packages/ai/test/openai-completions-compat.test.ts
+--- a/packages/ai/test/openai-completions-compat.test.ts
+++ b/packages/ai/test/openai-completions-compat.test.ts
+@@ -367,4 +367,137 @@
+ 		const compat = detectCompat(model);
+ 		expect(compat.requiresReasoningContentForToolCalls).toBe(true);
+ 	});
+
+	it("requires reasoning_content for tool calls on reasoning-capable models via OpenRouter", () => {
+		const model: Model<"openai-completions"> = {
+			...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
+			reasoning: true,
+		};
+		const compat = detectCompat(model);
+		expect(compat.thinkingFormat).toBe("openrouter");
+		expect(compat.requiresReasoningContentForToolCalls).toBe(true);
+	});
+
+	it("does not require reasoning_content for non-reasoning OpenRouter models", () => {
+		const model: Model<"openai-completions"> = {
+			...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
+			reasoning: false,
+		};
+		const compat = detectCompat(model);
+		expect(compat.requiresReasoningContentForToolCalls).toBe(false);
+	});
+
+	it("injects reasoning_content placeholder for OpenRouter reasoning models lacking captured reasoning", () => {
+		// Reproduces the failing path from real usage: a conversation generated under Anthropic Claude (whose
+		// reasoning is redacted/encrypted) is continued with deepseek/deepseek-v4-pro via OpenRouter. The
+		// prior assistant turns persist as ThinkingContent blocks with empty `thinking` text plus an opaque
+		// Anthropic signature cookie. omp cannot translate that into DeepSeek's plain-text `reasoning_content`,
+		// so the empty thinking block is filtered out and the placeholder fires — satisfying DeepSeek's
+		// thinking-mode validator without fabricating a reasoning trace.
+		const model: Model<"openai-completions"> = {
+			...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
+			reasoning: true,
+		};
+		const compat = detectCompat(model);
+		const toolCallMessage: AssistantMessage = {
+			role: "assistant",
+			content: [
+				// Anthropic-style redacted thinking block: empty text plus opaque signature.
+				// `thinking.trim().length === 0` filters this out before the signature echo can fire.
+				{ type: "thinking", thinking: "", thinkingSignature: "Ep4CClkIDRgCKkDOpaqueAnthropicCookie" },
+				{ type: "toolCall", id: "call_anth_to_ds", name: "web_search", arguments: { query: "hi" } },
+			],
+			api: model.api,
+			provider: model.provider,
+			model: model.id,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+			timestamp: Date.now(),
+		};
+		const messages = convertMessages(model, { messages: [toolCallMessage] }, compat);
+		const assistant = messages.find(m => m.role === "assistant");
+		expect(assistant).toBeDefined();
+		expect(Reflect.get(assistant as object, "reasoning_content")).toBe(".");
+	});
+
+	it("injects reasoning_content placeholder for kimi-k2-5 via OpenRouter (closes the kimi-via-openrouter gap)", () => {
+		// Before this fix, `requiresReasoningContentForToolCalls` was true for Kimi via OpenRouter but the
+		// stub gate only fired when `thinkingFormat === "openai"`. OpenRouter sets thinkingFormat="openrouter",
+		// so the stub silently never fired and Kimi-via-OpenRouter conversations 400'd the same way.
+		const model: Model<"openai-completions"> = {
+			...getBundledModel("openai", "gpt-4o-mini"),
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			id: "moonshotai/kimi-k2-5",
+			reasoning: true,
+		};
+		const compat = detectCompat(model);
+		const toolCallMessage: AssistantMessage = {
+			role: "assistant",
+			content: [
+				{ type: "toolCall", id: "call_kimi_or", name: "web_search", arguments: { query: "hi" } },
+			],
+			api: model.api,
+			provider: model.provider,
+			model: model.id,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+			timestamp: Date.now(),
+		};
+		const messages = convertMessages(model, { messages: [toolCallMessage] }, compat);
+		const assistant = messages.find(m => m.role === "assistant");
+		expect(assistant).toBeDefined();
+		expect(Reflect.get(assistant as object, "reasoning_content")).toBe(".");
+	});
+
+	it("preserves real captured reasoning over the placeholder when the assistant has non-empty thinking", () => {
+		// Sanity check: the placeholder must not overwrite real reasoning. When the prior assistant turn was
+		// generated by the same provider and surfaces plaintext reasoning, the existing thinkingSignature
+		// echo path sets `reasoning_content` first, and `hasReasoningField` short-circuits the stub.
+		const model: Model<"openai-completions"> = {
+			...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
+			reasoning: true,
+		};
+		const compat = detectCompat(model);
+		const toolCallMessage: AssistantMessage = {
+			role: "assistant",
+			content: [
+				{ type: "thinking", thinking: "Step 1: read the file. Step 2: search.", thinkingSignature: "reasoning_content" },
+				{ type: "toolCall", id: "call_real", name: "web_search", arguments: { query: "hi" } },
+			],
+			api: model.api,
+			provider: model.provider,
+			model: model.id,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+			timestamp: Date.now(),
+		};
+		const messages = convertMessages(model, { messages: [toolCallMessage] }, compat);
+		const assistant = messages.find(m => m.role === "assistant");
+		expect(assistant).toBeDefined();
+		expect(Reflect.get(assistant as object, "reasoning_content")).toBe("Step 1: read the file. Step 2: search.");
+	});
+
+ });