diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index f45b311f6..ebe86fdb5 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -219,6 +219,45 @@ describe("OpenAiHandler", () => { expect(textChunks[0].text).toBe("Test response") }) + it("streams reasoning chunks from delta.reasoning_content", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning_content: "thinking..." }, index: 0 }] } + yield { choices: [{ delta: { content: "answer" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning: "router-style thought" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + it("should handle tool calls in streaming responses", async () => { mockCreate.mockImplementation(async (options) => { return { diff --git a/src/api/providers/__tests__/requesty.spec.ts b/src/api/providers/__tests__/requesty.spec.ts index feacf3f87..1e272d1b6 100644 --- a/src/api/providers/__tests__/requesty.spec.ts +++ b/src/api/providers/__tests__/requesty.spec.ts @@ -249,6 +249,49 @@ describe("RequestyHandler", () => { await expect(generator.next()).rejects.toThrow("API Error") }) + it("streams reasoning chunks from delta.reasoning_content", async () => { + const handler = new RequestyHandler(mockOptions) + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { id: "1", choices: [{ delta: { reasoning_content: "thinking..." } }] } + yield { id: "1", choices: [{ delta: { content: "answer" } }] } + yield { + id: "1", + choices: [{ delta: {} }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + } + }, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + const handler = new RequestyHandler(mockOptions) + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { id: "1", choices: [{ delta: { reasoning: "router-style thought" } }] } + yield { + id: "1", + choices: [{ delta: {} }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + } + }, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + describe("native tool support", () => { const systemPrompt = "test system prompt" const messages: Anthropic.Messages.MessageParam[] = [ diff --git a/src/api/providers/__tests__/unbound.spec.ts b/src/api/providers/__tests__/unbound.spec.ts index 261968190..8771e636e 100644 --- a/src/api/providers/__tests__/unbound.spec.ts +++ b/src/api/providers/__tests__/unbound.spec.ts @@ -50,6 +50,57 @@ describe("UnboundHandler", () => { ) }) + it("streams reasoning chunks from delta.reasoning_content", async () => { + const mockCreate = (OpenAI as unknown as any)().chat.completions.create + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { choices: [{ delta: { reasoning_content: "thinking..." } }] } + yield { choices: [{ delta: { content: "answer" } }] } + yield { choices: [{ delta: {} }], usage: { prompt_tokens: 1, completion_tokens: 1 } } + }, + }) + + const handler = new UnboundHandler({ + unboundApiKey: "test-key", + unboundModelId: "openai/gpt-4o", + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("system", [{ role: "user", content: "hi" }], { + taskId: "t", + tools: [], + })) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + const mockCreate = (OpenAI as unknown as any)().chat.completions.create + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { choices: [{ delta: { reasoning: "router-style thought" } }] } + yield { choices: [{ delta: {} }], usage: { prompt_tokens: 1, completion_tokens: 1 } } + }, + }) + + const handler = new UnboundHandler({ + unboundApiKey: "test-key", + unboundModelId: "openai/gpt-4o", + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("system", [{ role: "user", content: "hi" }], { + taskId: "t", + tools: [], + })) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + it("identifies itself as Zoo Code in per-request Unbound metadata", async () => { const mockCreate = (OpenAI as unknown as any)().chat.completions.create mockCreate.mockResolvedValue({ diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index e2ffd2916..819fe6c7b 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -16,6 +16,7 @@ import { getModelParams } from "../transform/model-params" import { convertToR1Format } from "../transform/r1-format" import { OpenAiHandler } from "./openai" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" import type { ApiHandlerCreateMessageMetadata } from "../index" // Custom interface for DeepSeek params to support thinking mode @@ -155,11 +156,9 @@ export class DeepSeekHandler extends OpenAiHandler { // Handle reasoning_content from DeepSeek's interleaved thinking // This is the proper way DeepSeek sends thinking content in streaming - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle tool calls diff --git a/src/api/providers/mimo.ts b/src/api/providers/mimo.ts index f842926b6..2901c2e92 100644 --- a/src/api/providers/mimo.ts +++ b/src/api/providers/mimo.ts @@ -9,6 +9,7 @@ import { convertToR1Format } from "../transform/r1-format" import { getModelParams } from "../transform/model-params" import { calculateApiCostOpenAI } from "../../shared/cost" import { handleProviderError } from "./utils/error-handler" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" import { OpenAiHandler } from "./openai" import type { ApiHandlerCreateMessageMetadata } from "../index" @@ -127,11 +128,9 @@ export class MimoHandler extends OpenAiHandler { } } - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } yield* this.processToolCalls(sanitizedDelta, finishReason, activeToolCallIds) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 532ed38ba..f80544b40 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -24,6 +24,7 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously @@ -207,11 +208,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string | undefined) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } yield* this.processToolCalls(delta, finishReason, activeToolCallIds) diff --git a/src/api/providers/opencode-go.ts b/src/api/providers/opencode-go.ts index 6b66aa684..43d32e619 100644 --- a/src/api/providers/opencode-go.ts +++ b/src/api/providers/opencode-go.ts @@ -10,6 +10,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" /** * API handler for the Opencode "Go" subscription plan. @@ -80,8 +81,9 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio } // Several Go-plan models (GLM, DeepSeek) stream reasoning via this field. - if (delta && "reasoning_content" in delta && delta.reasoning_content) { - yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Emit raw tool call chunks - NativeToolCallParser handles state management. diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index f2a207051..0b7d7598a 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -14,6 +14,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" const QWEN_OAUTH_BASE_URL = "https://chat.qwen.ai" @@ -283,11 +284,9 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan } } - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string | undefined) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle tool calls in stream - emit partial chunks for NativeToolCallParser diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 3e50adf9c..df3dc35af 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -18,6 +18,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { toRequestyServiceUrl } from "../../shared/utils/requesty" import { handleOpenAIError } from "./utils/openai-error-handler" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" // Requesty usage includes an extra field for Anthropic use cases. // Safely cast the prompt token details section to the appropriate structure. @@ -174,8 +175,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan yield { type: "text", text: delta.content } } - if (delta && "reasoning_content" in delta && delta.reasoning_content) { - yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle native tool calls diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts index a1de7dfa1..f0c6fe758 100644 --- a/src/api/providers/unbound.ts +++ b/src/api/providers/unbound.ts @@ -17,6 +17,7 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { handleOpenAIError } from "./utils/openai-error-handler" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" // Unbound usage includes extra fields for Anthropic cache tokens. interface UnboundUsage extends OpenAI.CompletionUsage { @@ -162,8 +163,9 @@ export class UnboundHandler extends BaseProvider implements SingleCompletionHand yield { type: "text", text: delta.content } } - if (delta && "reasoning_content" in delta && delta.reasoning_content) { - yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle native tool calls