diff --git a/home/progs/pi.nix b/home/progs/pi.nix index fc1eef5..b1528f9 100644 --- a/home/progs/pi.nix +++ b/home/progs/pi.nix @@ -43,6 +43,10 @@ in # nullable unions with `Invalid tool parameters schema : field \`anyOf\`: missing field \`type\``. # Upstream PR: pending; applies cleanly against v14.2.1. ../../patches/omp/0001-openai-completions-retry-without-strict-on-deepseek-openrouter.patch + # Stub `reasoning_content` on tool-call assistant messages for OpenRouter reasoning models. + # Fixes DeepSeek V4 Pro et al. rejecting follow-up requests with `400 The \`reasoning_content\` + # in the thinking mode must be passed back to the API`. Mirrors the existing Kimi handling. + ../../patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch ]; })) ]; diff --git a/patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch b/patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch new file mode 100644 index 0000000..4dc0eb3 --- /dev/null +++ b/patches/omp/0002-openai-completions-stub-reasoning-content-for-openrouter.patch @@ -0,0 +1,233 @@ +Subject: [PATCH] fix(openai-completions): require `reasoning_content` for OpenRouter reasoning models + +DeepSeek V4 Pro (and similar reasoning models reached via OpenRouter) reject +multi-turn requests in thinking mode with: + + 400 The `reasoning_content` in the thinking mode must be passed back to + the API. + +omp's existing kimi placeholder injection (`requiresReasoningContentForToolCalls`) +covered this requirement only for `thinkingFormat === "openai"`. OpenRouter +sets `thinkingFormat === "openrouter"`, so the gate never fired even though +the underlying providers behind OpenRouter (DeepSeek, Kimi, etc.) all enforce +the same invariant. + +This patch: + +1. Extends `requiresReasoningContentForToolCalls` detection: any + reasoning-capable model fronted by OpenRouter now sets the flag. +2. Extends the placeholder gate in `convertMessages` to accept + `thinkingFormat === "openrouter"` alongside `"openai"`. + +Cross-provider continuations are the dominant trigger: a conversation warmed +up by Anthropic Claude (whose reasoning is redacted/encrypted on the wire) +followed by a switch to DeepSeek V4 Pro via OpenRouter. omp cannot +synthesize plaintext `reasoning_content` from Anthropic's encrypted blocks, +so the placeholder satisfies DeepSeek's validator without fabricating a +reasoning trace. Real captured reasoning, when present, short-circuits the +placeholder via `hasReasoningField` and survives intact. + +Side benefit: also closes a latent gap where Kimi-via-OpenRouter +(`thinkingFormat === "openrouter"`) had the compat flag set but the +placeholder gate silently rejected it. + +Regression tests cover: + - compat flag detection on OpenRouter reasoning models + - opt-out for non-reasoning OpenRouter models + - cross-provider redacted-thinking placeholder + - Kimi-via-OpenRouter placeholder firing + - real reasoning preserved over the placeholder + +Applies cleanly on top of patch 0001. + +--- +diff --git a/packages/ai/src/providers/openai-completions-compat.ts b/packages/ai/src/providers/openai-completions-compat.ts +--- a/packages/ai/src/providers/openai-completions-compat.ts ++++ b/packages/ai/src/providers/openai-completions-compat.ts +@@ -105,7 +105,14 @@ + ? "qwen" + : "openai", + reasoningContentField: "reasoning_content", +- requiresReasoningContentForToolCalls: isKimiModel, ++ // Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`: ++ // - Kimi: documented invariant on its native API and via OpenCode-Go. ++ // - Any reasoning-capable model reached through OpenRouter: DeepSeek V4 Pro and similar enforce ++ // this server-side whenever the request is in thinking mode. We can't translate Anthropic's ++ // redacted/encrypted reasoning into DeepSeek's plaintext form, so cross-provider continuations ++ // rely on a placeholder — see `convertMessages` for the placeholder injection. ++ requiresReasoningContentForToolCalls: ++ isKimiModel || ((provider === "openrouter" || baseUrl.includes("openrouter.ai")) && Boolean(model.reasoning)), + requiresAssistantContentForToolCalls: isKimiModel, + openRouterRouting: undefined, + vercelGatewayRouting: undefined, +diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts +--- a/packages/ai/src/providers/openai-completions.ts ++++ b/packages/ai/src/providers/openai-completions.ts +@@ -1059,12 +1059,21 @@ + (assistantMsg as any).reasoning_content !== undefined || + (assistantMsg as any).reasoning !== undefined || + (assistantMsg as any).reasoning_text !== undefined; +- if ( +- toolCalls.length > 0 && ++ // Inject a `reasoning_content` placeholder on assistant tool-call turns when the backend ++ // rejects history without it. The compat flag captures the rule: ++ // - Kimi (native or via OpenCode-Go): chat completion endpoint demands the field. ++ // - Reasoning models reached through OpenRouter (e.g. DeepSeek V4 Pro): the underlying ++ // provider's thinking-mode validator demands it on every prior assistant turn. omp ++ // cannot synthesize real reasoning when the conversation was warmed up by another ++ // provider whose reasoning is redacted/encrypted (Anthropic) or simply absent, so we ++ // emit a placeholder. Real captured reasoning, when present, is preserved earlier via ++ // the `thinkingSignature` echo path and short-circuits via `hasReasoningField`. ++ // `thinkingFormat` is gated to formats that consume the field (openai/openrouter chat ++ // completions); formats with their own conventions (zai, qwen) are excluded. ++ const stubsReasoningContent = + compat.requiresReasoningContentForToolCalls && +- compat.thinkingFormat === "openai" && +- !hasReasoningField +- ) { ++ (compat.thinkingFormat === "openai" || compat.thinkingFormat === "openrouter"); ++ if (toolCalls.length > 0 && stubsReasoningContent && !hasReasoningField) { + const reasoningField = compat.reasoningContentField ?? "reasoning_content"; + (assistantMsg as any)[reasoningField] = "."; + } +diff --git a/packages/ai/test/openai-completions-compat.test.ts b/packages/ai/test/openai-completions-compat.test.ts +--- a/packages/ai/test/openai-completions-compat.test.ts ++++ b/packages/ai/test/openai-completions-compat.test.ts +@@ -367,4 +367,137 @@ + const compat = detectCompat(model); + expect(compat.requiresReasoningContentForToolCalls).toBe(true); + }); ++ ++ it("requires reasoning_content for tool calls on reasoning-capable models via OpenRouter", () => { ++ const model: Model<"openai-completions"> = { ++ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">), ++ reasoning: true, ++ }; ++ const compat = detectCompat(model); ++ expect(compat.thinkingFormat).toBe("openrouter"); ++ expect(compat.requiresReasoningContentForToolCalls).toBe(true); ++ }); ++ ++ it("does not require reasoning_content for non-reasoning OpenRouter models", () => { ++ const model: Model<"openai-completions"> = { ++ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">), ++ reasoning: false, ++ }; ++ const compat = detectCompat(model); ++ expect(compat.requiresReasoningContentForToolCalls).toBe(false); ++ }); ++ ++ it("injects reasoning_content placeholder for OpenRouter reasoning models lacking captured reasoning", () => { ++ // Reproduces the failing path from real usage: a conversation generated under Anthropic Claude (whose ++ // reasoning is redacted/encrypted) is continued with deepseek/deepseek-v4-pro via OpenRouter. The ++ // prior assistant turns persist as ThinkingContent blocks with empty `thinking` text plus an opaque ++ // Anthropic signature cookie. omp cannot translate that into DeepSeek's plain-text `reasoning_content`, ++ // so the empty thinking block is filtered out and the placeholder fires — satisfying DeepSeek's ++ // thinking-mode validator without fabricating a reasoning trace. ++ const model: Model<"openai-completions"> = { ++ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">), ++ reasoning: true, ++ }; ++ const compat = detectCompat(model); ++ const toolCallMessage: AssistantMessage = { ++ role: "assistant", ++ content: [ ++ // Anthropic-style redacted thinking block: empty text plus opaque signature. ++ // `thinking.trim().length === 0` filters this out before the signature echo can fire. ++ { type: "thinking", thinking: "", thinkingSignature: "Ep4CClkIDRgCKkDOpaqueAnthropicCookie" }, ++ { type: "toolCall", id: "call_anth_to_ds", name: "web_search", arguments: { query: "hi" } }, ++ ], ++ api: model.api, ++ provider: model.provider, ++ model: model.id, ++ usage: { ++ input: 0, ++ output: 0, ++ cacheRead: 0, ++ cacheWrite: 0, ++ totalTokens: 0, ++ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, ++ }, ++ stopReason: "toolUse", ++ timestamp: Date.now(), ++ }; ++ const messages = convertMessages(model, { messages: [toolCallMessage] }, compat); ++ const assistant = messages.find(m => m.role === "assistant"); ++ expect(assistant).toBeDefined(); ++ expect(Reflect.get(assistant as object, "reasoning_content")).toBe("."); ++ }); ++ ++ it("injects reasoning_content placeholder for kimi-k2-5 via OpenRouter (closes the kimi-via-openrouter gap)", () => { ++ // Before this fix, `requiresReasoningContentForToolCalls` was true for Kimi via OpenRouter but the ++ // stub gate only fired when `thinkingFormat === "openai"`. OpenRouter sets thinkingFormat="openrouter", ++ // so the stub silently never fired and Kimi-via-OpenRouter conversations 400'd the same way. ++ const model: Model<"openai-completions"> = { ++ ...getBundledModel("openai", "gpt-4o-mini"), ++ api: "openai-completions", ++ provider: "openrouter", ++ baseUrl: "https://openrouter.ai/api/v1", ++ id: "moonshotai/kimi-k2-5", ++ reasoning: true, ++ }; ++ const compat = detectCompat(model); ++ const toolCallMessage: AssistantMessage = { ++ role: "assistant", ++ content: [ ++ { type: "toolCall", id: "call_kimi_or", name: "web_search", arguments: { query: "hi" } }, ++ ], ++ api: model.api, ++ provider: model.provider, ++ model: model.id, ++ usage: { ++ input: 0, ++ output: 0, ++ cacheRead: 0, ++ cacheWrite: 0, ++ totalTokens: 0, ++ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, ++ }, ++ stopReason: "toolUse", ++ timestamp: Date.now(), ++ }; ++ const messages = convertMessages(model, { messages: [toolCallMessage] }, compat); ++ const assistant = messages.find(m => m.role === "assistant"); ++ expect(assistant).toBeDefined(); ++ expect(Reflect.get(assistant as object, "reasoning_content")).toBe("."); ++ }); ++ ++ it("preserves real captured reasoning over the placeholder when the assistant has non-empty thinking", () => { ++ // Sanity check: the placeholder must not overwrite real reasoning. When the prior assistant turn was ++ // generated by the same provider and surfaces plaintext reasoning, the existing thinkingSignature ++ // echo path sets `reasoning_content` first, and `hasReasoningField` short-circuits the stub. ++ const model: Model<"openai-completions"> = { ++ ...(getBundledModel("openrouter", "deepseek/deepseek-v3.2") as Model<"openai-completions">), ++ reasoning: true, ++ }; ++ const compat = detectCompat(model); ++ const toolCallMessage: AssistantMessage = { ++ role: "assistant", ++ content: [ ++ { type: "thinking", thinking: "Step 1: read the file. Step 2: search.", thinkingSignature: "reasoning_content" }, ++ { type: "toolCall", id: "call_real", name: "web_search", arguments: { query: "hi" } }, ++ ], ++ api: model.api, ++ provider: model.provider, ++ model: model.id, ++ usage: { ++ input: 0, ++ output: 0, ++ cacheRead: 0, ++ cacheWrite: 0, ++ totalTokens: 0, ++ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, ++ }, ++ stopReason: "toolUse", ++ timestamp: Date.now(), ++ }; ++ const messages = convertMessages(model, { messages: [toolCallMessage] }, compat); ++ const assistant = messages.find(m => m.role === "assistant"); ++ expect(assistant).toBeDefined(); ++ expect(Reflect.get(assistant as object, "reasoning_content")).toBe("Step 1: read the file. Step 2: search."); ++ }); ++ + });