Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/api/providers/__tests__/openai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,45 @@ describe("OpenAiHandler", () => {
expect(textChunks[0].text).toBe("Test response")
})

it("streams reasoning chunks from delta.reasoning_content", async () => {
mockCreate.mockImplementationOnce(async () => ({
[Symbol.asyncIterator]: async function* () {
yield { choices: [{ delta: { reasoning_content: "thinking..." }, index: 0 }] }
yield { choices: [{ delta: { content: "answer" }, index: 0 }] }
yield {
choices: [{ delta: {}, index: 0 }],
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
}
},
}))

const chunks: any[] = []
for await (const chunk of handler.createMessage(systemPrompt, messages)) {
chunks.push(chunk)
}

expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." })
})

it("falls back to delta.reasoning when reasoning_content is absent", async () => {
mockCreate.mockImplementationOnce(async () => ({
[Symbol.asyncIterator]: async function* () {
yield { choices: [{ delta: { reasoning: "router-style thought" }, index: 0 }] }
yield {
choices: [{ delta: {}, index: 0 }],
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
}
},
}))

const chunks: any[] = []
for await (const chunk of handler.createMessage(systemPrompt, messages)) {
chunks.push(chunk)
}

expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" })
})

it("should handle tool calls in streaming responses", async () => {
mockCreate.mockImplementation(async (options) => {
return {
Expand Down
43 changes: 43 additions & 0 deletions src/api/providers/__tests__/requesty.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,49 @@ describe("RequestyHandler", () => {
await expect(generator.next()).rejects.toThrow("API Error")
})

it("streams reasoning chunks from delta.reasoning_content", async () => {
const handler = new RequestyHandler(mockOptions)
mockCreate.mockResolvedValue({
async *[Symbol.asyncIterator]() {
yield { id: "1", choices: [{ delta: { reasoning_content: "thinking..." } }] }
yield { id: "1", choices: [{ delta: { content: "answer" } }] }
yield {
id: "1",
choices: [{ delta: {} }],
usage: { prompt_tokens: 1, completion_tokens: 1 },
}
},
})

const chunks: any[] = []
for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) {
chunks.push(chunk)
}

expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." })
})

it("falls back to delta.reasoning when reasoning_content is absent", async () => {
const handler = new RequestyHandler(mockOptions)
mockCreate.mockResolvedValue({
async *[Symbol.asyncIterator]() {
yield { id: "1", choices: [{ delta: { reasoning: "router-style thought" } }] }
yield {
id: "1",
choices: [{ delta: {} }],
usage: { prompt_tokens: 1, completion_tokens: 1 },
}
},
})

const chunks: any[] = []
for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) {
chunks.push(chunk)
}

expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" })
})

describe("native tool support", () => {
const systemPrompt = "test system prompt"
const messages: Anthropic.Messages.MessageParam[] = [
Expand Down
51 changes: 51 additions & 0 deletions src/api/providers/__tests__/unbound.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,57 @@ describe("UnboundHandler", () => {
)
})

it("streams reasoning chunks from delta.reasoning_content", async () => {
const mockCreate = (OpenAI as unknown as any)().chat.completions.create
mockCreate.mockResolvedValue({
async *[Symbol.asyncIterator]() {
yield { choices: [{ delta: { reasoning_content: "thinking..." } }] }
yield { choices: [{ delta: { content: "answer" } }] }
yield { choices: [{ delta: {} }], usage: { prompt_tokens: 1, completion_tokens: 1 } }
},
})

const handler = new UnboundHandler({
unboundApiKey: "test-key",
unboundModelId: "openai/gpt-4o",
})

const chunks: any[] = []
for await (const chunk of handler.createMessage("system", [{ role: "user", content: "hi" }], {
taskId: "t",
tools: [],
})) {
chunks.push(chunk)
}

expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." })
})

it("falls back to delta.reasoning when reasoning_content is absent", async () => {
const mockCreate = (OpenAI as unknown as any)().chat.completions.create
mockCreate.mockResolvedValue({
async *[Symbol.asyncIterator]() {
yield { choices: [{ delta: { reasoning: "router-style thought" } }] }
yield { choices: [{ delta: {} }], usage: { prompt_tokens: 1, completion_tokens: 1 } }
},
})

const handler = new UnboundHandler({
unboundApiKey: "test-key",
unboundModelId: "openai/gpt-4o",
})

const chunks: any[] = []
for await (const chunk of handler.createMessage("system", [{ role: "user", content: "hi" }], {
taskId: "t",
tools: [],
})) {
chunks.push(chunk)
}

expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" })
})

it("identifies itself as Zoo Code in per-request Unbound metadata", async () => {
const mockCreate = (OpenAI as unknown as any)().chat.completions.create
mockCreate.mockResolvedValue({
Expand Down
9 changes: 4 additions & 5 deletions src/api/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { getModelParams } from "../transform/model-params"
import { convertToR1Format } from "../transform/r1-format"

import { OpenAiHandler } from "./openai"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"
import type { ApiHandlerCreateMessageMetadata } from "../index"

// Custom interface for DeepSeek params to support thinking mode
Expand Down Expand Up @@ -155,11 +156,9 @@ export class DeepSeekHandler extends OpenAiHandler {

// Handle reasoning_content from DeepSeek's interleaved thinking
// This is the proper way DeepSeek sends thinking content in streaming
if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string) || "",
}
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

// Handle tool calls
Expand Down
9 changes: 4 additions & 5 deletions src/api/providers/mimo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { convertToR1Format } from "../transform/r1-format"
import { getModelParams } from "../transform/model-params"
import { calculateApiCostOpenAI } from "../../shared/cost"
import { handleProviderError } from "./utils/error-handler"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"

import { OpenAiHandler } from "./openai"
import type { ApiHandlerCreateMessageMetadata } from "../index"
Expand Down Expand Up @@ -127,11 +128,9 @@ export class MimoHandler extends OpenAiHandler {
}
}

if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string) || "",
}
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

yield* this.processToolCalls(sanitizedDelta, finishReason, activeToolCallIds)
Expand Down
9 changes: 4 additions & 5 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { getApiRequestTimeout } from "./utils/timeout-config"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"

// TODO: Rename this to OpenAICompatibleHandler. Also, I think the
// `OpenAINativeHandler` can subclass from this, since it's obviously
Expand Down Expand Up @@ -207,11 +208,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}

if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
}
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

yield* this.processToolCalls(delta, finishReason, activeToolCallIds)
Expand Down
6 changes: 4 additions & 2 deletions src/api/providers/opencode-go.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format"

import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { RouterProvider } from "./router-provider"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"

/**
* API handler for the Opencode "Go" subscription plan.
Expand Down Expand Up @@ -80,8 +81,9 @@ export class OpencodeGoHandler extends RouterProvider implements SingleCompletio
}

// Several Go-plan models (GLM, DeepSeek) stream reasoning via this field.
if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

// Emit raw tool call chunks - NativeToolCallParser handles state management.
Expand Down
9 changes: 4 additions & 5 deletions src/api/providers/qwen-code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"

import { BaseProvider } from "./base-provider"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"

const QWEN_OAUTH_BASE_URL = "https://chat.qwen.ai"
Expand Down Expand Up @@ -283,11 +284,9 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan
}
}

if ("reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
}
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

// Handle tool calls in stream - emit partial chunks for NativeToolCallParser
Expand Down
6 changes: 4 additions & 2 deletions src/api/providers/requesty.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
import { toRequestyServiceUrl } from "../../shared/utils/requesty"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { applyRouterToolPreferences } from "./utils/router-tool-preferences"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"

// Requesty usage includes an extra field for Anthropic use cases.
// Safely cast the prompt token details section to the appropriate structure.
Expand Down Expand Up @@ -174,8 +175,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
yield { type: "text", text: delta.content }
}

if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

// Handle native tool calls
Expand Down
6 changes: 4 additions & 2 deletions src/api/providers/unbound.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { applyRouterToolPreferences } from "./utils/router-tool-preferences"
import { extractReasoningFromDelta } from "./utils/extract-reasoning"

// Unbound usage includes extra fields for Anthropic cache tokens.
interface UnboundUsage extends OpenAI.CompletionUsage {
Expand Down Expand Up @@ -162,8 +163,9 @@ export class UnboundHandler extends BaseProvider implements SingleCompletionHand
yield { type: "text", text: delta.content }
}

if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
const reasoningText = extractReasoningFromDelta(delta)
if (reasoningText) {
yield { type: "reasoning", text: reasoningText }
}

// Handle native tool calls
Expand Down
Loading