pi: patch omp to require reasoning_content for OpenRouter reasoning models
DeepSeek V4 Pro (and similar reasoning models reached via OpenRouter) reject multi-turn requests in thinking mode with: 400 The `reasoning_content` in the thinking mode must be passed back to the API. omp's existing kimi placeholder injection (`requiresReasoningContentForToolCalls`) covered this requirement only for `thinkingFormat == "openai"`. OpenRouter sets `thinkingFormat == "openrouter"`, so the gate never fired even though the underlying providers behind OpenRouter (DeepSeek, Kimi, etc.) all enforce the same invariant. This patch: 1. Extends `requiresReasoningContentForToolCalls` detection: any reasoning-capable model fronted by OpenRouter now sets the flag. 2. Extends the placeholder gate in `convertMessages` to accept `thinkingFormat == "openrouter"` alongside `"openai"`. Cross-provider continuations are the dominant trigger: a conversation warmed up by Anthropic Claude (whose reasoning is redacted/encrypted on the wire) followed by a switch to DeepSeek V4 Pro via OpenRouter. omp cannot synthesize plaintext `reasoning_content` from Anthropic's encrypted blocks, so the placeholder satisfies DeepSeek's validator without fabricating a reasoning trace. Real captured reasoning, when present, short-circuits the placeholder via `hasReasoningField` and survives intact. Side benefit: also closes a latent gap where Kimi-via-OpenRouter (`thinkingFormat == "openrouter"`) had the compat flag set but the placeholder gate silently rejected it. Applies cleanly on top of patch 0001.
This commit is contained in:
@@ -43,6 +43,10 @@ in
|
||||
# nullable unions with `Invalid tool parameters schema : field \`anyOf\`: missing field \`type\``.
|
||||
# Upstream PR: pending; applies cleanly against v14.2.1.
|
||||
../../patches/omp/0001-openai-completions-retry-without-strict-on-deepseek-openrouter.patch
|
||||
# Stub `reasoning_content` on tool-call assistant messages for OpenRouter reasoning models.
|
||||
# Fixes DeepSeek V4 Pro et al. rejecting follow-up requests with `400 The \`reasoning_content\`
|
||||
# in the thinking mode must be passed back to the API`. Mirrors the existing Kimi handling.
|
||||
../../patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch
|
||||
];
|
||||
}))
|
||||
];
|
||||
|
||||
@@ -0,0 +1,233 @@
|
||||
Subject: [PATCH] fix(openai-completions): require `reasoning_content` for OpenRouter reasoning models
|
||||
|
||||
DeepSeek V4 Pro (and similar reasoning models reached via OpenRouter) reject
|
||||
multi-turn requests in thinking mode with:
|
||||
|
||||
400 The `reasoning_content` in the thinking mode must be passed back to
|
||||
the API.
|
||||
|
||||
omp's existing kimi placeholder injection (`requiresReasoningContentForToolCalls`)
|
||||
covered this requirement only for `thinkingFormat === "openai"`. OpenRouter
|
||||
sets `thinkingFormat === "openrouter"`, so the gate never fired even though
|
||||
the underlying providers behind OpenRouter (DeepSeek, Kimi, etc.) all enforce
|
||||
the same invariant.
|
||||
|
||||
This patch:
|
||||
|
||||
1. Extends `requiresReasoningContentForToolCalls` detection: any
|
||||
reasoning-capable model fronted by OpenRouter now sets the flag.
|
||||
2. Extends the placeholder gate in `convertMessages` to accept
|
||||
`thinkingFormat === "openrouter"` alongside `"openai"`.
|
||||
|
||||
Cross-provider continuations are the dominant trigger: a conversation warmed
|
||||
up by Anthropic Claude (whose reasoning is redacted/encrypted on the wire)
|
||||
followed by a switch to DeepSeek V4 Pro via OpenRouter. omp cannot
|
||||
synthesize plaintext `reasoning_content` from Anthropic's encrypted blocks,
|
||||
so the placeholder satisfies DeepSeek's validator without fabricating a
|
||||
reasoning trace. Real captured reasoning, when present, short-circuits the
|
||||
placeholder via `hasReasoningField` and survives intact.
|
||||
|
||||
Side benefit: also closes a latent gap where Kimi-via-OpenRouter
|
||||
(`thinkingFormat === "openrouter"`) had the compat flag set but the
|
||||
placeholder gate silently rejected it.
|
||||
|
||||
Regression tests cover:
|
||||
- compat flag detection on OpenRouter reasoning models
|
||||
- opt-out for non-reasoning OpenRouter models
|
||||
- cross-provider redacted-thinking placeholder
|
||||
- Kimi-via-OpenRouter placeholder firing
|
||||
- real reasoning preserved over the placeholder
|
||||
|
||||
Applies cleanly on top of patch 0001.
|
||||
|
||||
---
|
||||
diff --git a/packages/ai/src/providers/openai-completions-compat.ts b/packages/ai/src/providers/openai-completions-compat.ts
|
||||
--- a/packages/ai/src/providers/openai-completions-compat.ts
|
||||
+++ b/packages/ai/src/providers/openai-completions-compat.ts
|
||||
@@ -105,7 +105,14 @@
|
||||
? "qwen"
|
||||
: "openai",
|
||||
reasoningContentField: "reasoning_content",
|
||||
- requiresReasoningContentForToolCalls: isKimiModel,
|
||||
+ // Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
|
||||
+ // - Kimi: documented invariant on its native API and via OpenCode-Go.
|
||||
+ // - Any reasoning-capable model reached through OpenRouter: DeepSeek V4 Pro and similar enforce
|
||||
+ // this server-side whenever the request is in thinking mode. We can't translate Anthropic's
|
||||
+ // redacted/encrypted reasoning into DeepSeek's plaintext form, so cross-provider continuations
|
||||
+ // rely on a placeholder — see `convertMessages` for the placeholder injection.
|
||||
+ requiresReasoningContentForToolCalls:
|
||||
+ isKimiModel || ((provider === "openrouter" || baseUrl.includes("openrouter.ai")) && Boolean(model.reasoning)),
|
||||
requiresAssistantContentForToolCalls: isKimiModel,
|
||||
openRouterRouting: undefined,
|
||||
vercelGatewayRouting: undefined,
|
||||
diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
|
||||
--- a/packages/ai/src/providers/openai-completions.ts
|
||||
+++ b/packages/ai/src/providers/openai-completions.ts
|
||||
@@ -1059,12 +1059,21 @@
|
||||
(assistantMsg as any).reasoning_content !== undefined ||
|
||||
(assistantMsg as any).reasoning !== undefined ||
|
||||
(assistantMsg as any).reasoning_text !== undefined;
|
||||
- if (
|
||||
- toolCalls.length > 0 &&
|
||||
+ // Inject a `reasoning_content` placeholder on assistant tool-call turns when the backend
|
||||
+ // rejects history without it. The compat flag captures the rule:
|
||||
+ // - Kimi (native or via OpenCode-Go): chat completion endpoint demands the field.
|
||||
+ // - Reasoning models reached through OpenRouter (e.g. DeepSeek V4 Pro): the underlying
|
||||
+ // provider's thinking-mode validator demands it on every prior assistant turn. omp
|
||||
+ // cannot synthesize real reasoning when the conversation was warmed up by another
|
||||
+ // provider whose reasoning is redacted/encrypted (Anthropic) or simply absent, so we
|
||||
+ // emit a placeholder. Real captured reasoning, when present, is preserved earlier via
|
||||
+ // the `thinkingSignature` echo path and short-circuits via `hasReasoningField`.
|
||||
+ // `thinkingFormat` is gated to formats that consume the field (openai/openrouter chat
|
||||
+ // completions); formats with their own conventions (zai, qwen) are excluded.
|
||||
+ const stubsReasoningContent =
|
||||
compat.requiresReasoningContentForToolCalls &&
|
||||
- compat.thinkingFormat === "openai" &&
|
||||
- !hasReasoningField
|
||||
- ) {
|
||||
+ (compat.thinkingFormat === "openai" || compat.thinkingFormat === "openrouter");
|
||||
+ if (toolCalls.length > 0 && stubsReasoningContent && !hasReasoningField) {
|
||||
const reasoningField = compat.reasoningContentField ?? "reasoning_content";
|
||||
(assistantMsg as any)[reasoningField] = ".";
|
||||
}
|
||||
diff --git a/packages/ai/test/openai-completions-compat.test.ts b/packages/ai/test/openai-completions-compat.test.ts
|
||||
--- a/packages/ai/test/openai-completions-compat.test.ts
|
||||
+++ b/packages/ai/test/openai-completions-compat.test.ts
|
||||
@@ -367,4 +367,137 @@
|
||||
const compat = detectCompat(model);
|
||||
expect(compat.requiresReasoningContentForToolCalls).toBe(true);
|
||||
});
|
||||
+
|
||||
+ it("requires reasoning_content for tool calls on reasoning-capable models via OpenRouter", () => {
|
||||
+ const model: Model<"openai-completions"> = {
|
||||
+ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
|
||||
+ reasoning: true,
|
||||
+ };
|
||||
+ const compat = detectCompat(model);
|
||||
+ expect(compat.thinkingFormat).toBe("openrouter");
|
||||
+ expect(compat.requiresReasoningContentForToolCalls).toBe(true);
|
||||
+ });
|
||||
+
|
||||
+ it("does not require reasoning_content for non-reasoning OpenRouter models", () => {
|
||||
+ const model: Model<"openai-completions"> = {
|
||||
+ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
|
||||
+ reasoning: false,
|
||||
+ };
|
||||
+ const compat = detectCompat(model);
|
||||
+ expect(compat.requiresReasoningContentForToolCalls).toBe(false);
|
||||
+ });
|
||||
+
|
||||
+ it("injects reasoning_content placeholder for OpenRouter reasoning models lacking captured reasoning", () => {
|
||||
+ // Reproduces the failing path from real usage: a conversation generated under Anthropic Claude (whose
|
||||
+ // reasoning is redacted/encrypted) is continued with deepseek/deepseek-v4-pro via OpenRouter. The
|
||||
+ // prior assistant turns persist as ThinkingContent blocks with empty `thinking` text plus an opaque
|
||||
+ // Anthropic signature cookie. omp cannot translate that into DeepSeek's plain-text `reasoning_content`,
|
||||
+ // so the empty thinking block is filtered out and the placeholder fires — satisfying DeepSeek's
|
||||
+ // thinking-mode validator without fabricating a reasoning trace.
|
||||
+ const model: Model<"openai-completions"> = {
|
||||
+ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
|
||||
+ reasoning: true,
|
||||
+ };
|
||||
+ const compat = detectCompat(model);
|
||||
+ const toolCallMessage: AssistantMessage = {
|
||||
+ role: "assistant",
|
||||
+ content: [
|
||||
+ // Anthropic-style redacted thinking block: empty text plus opaque signature.
|
||||
+ // `thinking.trim().length === 0` filters this out before the signature echo can fire.
|
||||
+ { type: "thinking", thinking: "", thinkingSignature: "Ep4CClkIDRgCKkDOpaqueAnthropicCookie" },
|
||||
+ { type: "toolCall", id: "call_anth_to_ds", name: "web_search", arguments: { query: "hi" } },
|
||||
+ ],
|
||||
+ api: model.api,
|
||||
+ provider: model.provider,
|
||||
+ model: model.id,
|
||||
+ usage: {
|
||||
+ input: 0,
|
||||
+ output: 0,
|
||||
+ cacheRead: 0,
|
||||
+ cacheWrite: 0,
|
||||
+ totalTokens: 0,
|
||||
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
+ },
|
||||
+ stopReason: "toolUse",
|
||||
+ timestamp: Date.now(),
|
||||
+ };
|
||||
+ const messages = convertMessages(model, { messages: [toolCallMessage] }, compat);
|
||||
+ const assistant = messages.find(m => m.role === "assistant");
|
||||
+ expect(assistant).toBeDefined();
|
||||
+ expect(Reflect.get(assistant as object, "reasoning_content")).toBe(".");
|
||||
+ });
|
||||
+
|
||||
+ it("injects reasoning_content placeholder for kimi-k2-5 via OpenRouter (closes the kimi-via-openrouter gap)", () => {
|
||||
+ // Before this fix, `requiresReasoningContentForToolCalls` was true for Kimi via OpenRouter but the
|
||||
+ // stub gate only fired when `thinkingFormat === "openai"`. OpenRouter sets thinkingFormat="openrouter",
|
||||
+ // so the stub silently never fired and Kimi-via-OpenRouter conversations 400'd the same way.
|
||||
+ const model: Model<"openai-completions"> = {
|
||||
+ ...getBundledModel("openai", "gpt-4o-mini"),
|
||||
+ api: "openai-completions",
|
||||
+ provider: "openrouter",
|
||||
+ baseUrl: "https://openrouter.ai/api/v1",
|
||||
+ id: "moonshotai/kimi-k2-5",
|
||||
+ reasoning: true,
|
||||
+ };
|
||||
+ const compat = detectCompat(model);
|
||||
+ const toolCallMessage: AssistantMessage = {
|
||||
+ role: "assistant",
|
||||
+ content: [
|
||||
+ { type: "toolCall", id: "call_kimi_or", name: "web_search", arguments: { query: "hi" } },
|
||||
+ ],
|
||||
+ api: model.api,
|
||||
+ provider: model.provider,
|
||||
+ model: model.id,
|
||||
+ usage: {
|
||||
+ input: 0,
|
||||
+ output: 0,
|
||||
+ cacheRead: 0,
|
||||
+ cacheWrite: 0,
|
||||
+ totalTokens: 0,
|
||||
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
+ },
|
||||
+ stopReason: "toolUse",
|
||||
+ timestamp: Date.now(),
|
||||
+ };
|
||||
+ const messages = convertMessages(model, { messages: [toolCallMessage] }, compat);
|
||||
+ const assistant = messages.find(m => m.role === "assistant");
|
||||
+ expect(assistant).toBeDefined();
|
||||
+ expect(Reflect.get(assistant as object, "reasoning_content")).toBe(".");
|
||||
+ });
|
||||
+
|
||||
+ it("preserves real captured reasoning over the placeholder when the assistant has non-empty thinking", () => {
|
||||
+ // Sanity check: the placeholder must not overwrite real reasoning. When the prior assistant turn was
|
||||
+ // generated by the same provider and surfaces plaintext reasoning, the existing thinkingSignature
|
||||
+ // echo path sets `reasoning_content` first, and `hasReasoningField` short-circuits the stub.
|
||||
+ const model: Model<"openai-completions"> = {
|
||||
+ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">),
|
||||
+ reasoning: true,
|
||||
+ };
|
||||
+ const compat = detectCompat(model);
|
||||
+ const toolCallMessage: AssistantMessage = {
|
||||
+ role: "assistant",
|
||||
+ content: [
|
||||
+ { type: "thinking", thinking: "Step 1: read the file. Step 2: search.", thinkingSignature: "reasoning_content" },
|
||||
+ { type: "toolCall", id: "call_real", name: "web_search", arguments: { query: "hi" } },
|
||||
+ ],
|
||||
+ api: model.api,
|
||||
+ provider: model.provider,
|
||||
+ model: model.id,
|
||||
+ usage: {
|
||||
+ input: 0,
|
||||
+ output: 0,
|
||||
+ cacheRead: 0,
|
||||
+ cacheWrite: 0,
|
||||
+ totalTokens: 0,
|
||||
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
+ },
|
||||
+ stopReason: "toolUse",
|
||||
+ timestamp: Date.now(),
|
||||
+ };
|
||||
+ const messages = convertMessages(model, { messages: [toolCallMessage] }, compat);
|
||||
+ const assistant = messages.find(m => m.role === "assistant");
|
||||
+ expect(assistant).toBeDefined();
|
||||
+ expect(Reflect.get(assistant as object, "reasoning_content")).toBe("Step 1: read the file. Step 2: search.");
|
||||
+ });
|
||||
+
|
||||
});
|
||||
Reference in New Issue
Block a user