From 5f2c3dc7618213278c7519e6abc2dbe83b1857b4 Mon Sep 17 00:00:00 2001 From: Mob Code 100 Date: Sun, 14 Jun 2026 05:16:09 +0800 Subject: [PATCH 1/3] feat: add glm-5.2 support Introduces a Max reasoning-effort tier; High is the default and Max is opt-in. --- .changeset/add-glm-5-2-support.md | 5 + packages/types/src/model.ts | 15 ++- packages/types/src/providers/zai.ts | 32 +++++ src/api/providers/__tests__/zai.spec.ts | 128 +++++++++++++++++++ src/api/providers/zai.ts | 18 ++- webview-ui/src/i18n/locales/en/settings.json | 3 +- 6 files changed, 194 insertions(+), 7 deletions(-) create mode 100644 .changeset/add-glm-5-2-support.md diff --git a/.changeset/add-glm-5-2-support.md b/.changeset/add-glm-5-2-support.md new file mode 100644 index 0000000000..26bd162cdf --- /dev/null +++ b/.changeset/add-glm-5-2-support.md @@ -0,0 +1,5 @@ +--- +"zoo-code": minor +--- + +Add GLM-5.2 support with High/Max `reasoning_effort` tiers. The default effort is High (deep reasoning stays opt-in), Max is selected only when the user explicitly picks it, and the parameter is omitted entirely when reasoning is disabled. diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index a86eadfaf9..87ebfaf967 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5.2 international model with High/Max effort tiers and 1M context", () => { + const testModelId: InternationalZAiModelId = "glm-5.2" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(model.info.reasoningEffort).toBe("high") + expect(model.info.preserveReasoning).toBe(true) + expect(model.info.supportsMaxTokens).toBe(true) + expect(model.info.inputPrice).toBe(1.4) + expect(model.info.outputPrice).toBe(4.4) + expect(model.info.cacheReadsPrice).toBe(0.26) + }) + it("should return GLM-5-Turbo international model with thinking support", () => { const testModelId: InternationalZAiModelId = "glm-5-turbo" const handlerWithModel = new ZAiHandler({ @@ -231,6 +252,27 @@ describe("ZAiHandler", () => { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5.2 China model with High/Max effort tiers and 1M context", () => { + const testModelId: MainlandZAiModelId = "glm-5.2" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(model.info.reasoningEffort).toBe("high") + expect(model.info.preserveReasoning).toBe(true) + expect(model.info.supportsMaxTokens).toBe(true) + expect(model.info.inputPrice).toBe(0.68) + expect(model.info.outputPrice).toBe(2.28) + expect(model.info.cacheReadsPrice).toBe(0.13) + }) + it("should return GLM-4.7 China model with thinking support", () => { const testModelId: MainlandZAiModelId = "glm-4.7" const handlerWithModel = new ZAiHandler({ @@ -573,6 +615,92 @@ describe("ZAiHandler", () => { ) }) + it("should send reasoning_effort:high by default for GLM-5.2 (model default)", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + // No reasoningEffort setting - should use model default (high) + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "high", + }), + ) + }) + + it("should send reasoning_effort:max for GLM-5.2 when reasoningEffort is set to max", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + reasoningEffort: "max", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "max", + }), + ) + }) + + it("should omit reasoning_effort for GLM-5.2 when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.thinking).toEqual({ type: "disabled" }) + expect(callArgs.reasoning_effort).toBeUndefined() + }) + it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => { const handlerWithModel = new ZAiHandler({ apiModelId: "glm-4.7", diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 113cf655d3..4c4a3c7910 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -17,9 +17,13 @@ import { convertToZAiFormat } from "../transform/zai-format" import type { ApiHandlerCreateMessageMetadata } from "../index" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" -// Custom interface for Z.ai params to support thinking mode -type ZAiChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { +// Custom interface for Z.ai params to support thinking mode and reasoning effort tiers. +// Z.ai accepts the standard `reasoning_effort` ladder (none/minimal/low/medium/high/xhigh/max) +// alongside the GLM-specific `thinking` toggle. Omit the OpenAI-typed `reasoning_effort` so we +// can widen it to include provider-specific values such as "max". +type ZAiChatCompletionParams = Omit & { thinking?: { type: "enabled" | "disabled" } + reasoning_effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max" } export class ZAiHandler extends BaseOpenAiCompatibleProvider { @@ -79,6 +83,11 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { ) { const { id: model, info } = this.getModel() + // Resolve the reasoning-effort tier (e.g. "high" | "max" for GLM-5.2) from the user + // setting, falling back to the model's default. Omitted when reasoning is disabled. + const effort = useReasoning ? (this.options.reasoningEffort ?? info.reasoningEffort) : undefined + const reasoningEffort = effort && effort !== "disable" ? effort : undefined + const max_tokens = this.options.modelMaxTokens || (getModelMaxOutputTokens({ @@ -103,11 +112,14 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { stream_options: { include_usage: true }, // Thinking is ON by default for these models, so explicitly disable it when needed. thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, + reasoning_effort: reasoningEffort, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, } - return this.client.chat.completions.create(params) + return this.client.chat.completions.create( + params as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, + ) } } diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index eec2a564f5..edc416afcf 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -620,7 +620,8 @@ "low": "Low", "medium": "Medium", "high": "High", - "xhigh": "Extra High" + "xhigh": "Extra High", + "max": "Max" }, "verbosity": { "label": "Output Verbosity", From 5ed5288fce2a86b03f7b793636bed21e8b95804b Mon Sep 17 00:00:00 2001 From: Mob Code 100 Date: Sun, 14 Jun 2026 06:14:45 +0800 Subject: [PATCH 2/3] i18n: add max reasoning-effort translations for all 17 non-English locales --- webview-ui/src/i18n/locales/ca/settings.json | 1 + webview-ui/src/i18n/locales/de/settings.json | 1 + webview-ui/src/i18n/locales/es/settings.json | 1 + webview-ui/src/i18n/locales/fr/settings.json | 1 + webview-ui/src/i18n/locales/hi/settings.json | 1 + webview-ui/src/i18n/locales/id/settings.json | 1 + webview-ui/src/i18n/locales/it/settings.json | 1 + webview-ui/src/i18n/locales/ja/settings.json | 1 + webview-ui/src/i18n/locales/ko/settings.json | 1 + webview-ui/src/i18n/locales/nl/settings.json | 1 + webview-ui/src/i18n/locales/pl/settings.json | 1 + webview-ui/src/i18n/locales/pt-BR/settings.json | 1 + webview-ui/src/i18n/locales/ru/settings.json | 1 + webview-ui/src/i18n/locales/tr/settings.json | 1 + webview-ui/src/i18n/locales/vi/settings.json | 1 + webview-ui/src/i18n/locales/zh-CN/settings.json | 1 + webview-ui/src/i18n/locales/zh-TW/settings.json | 3 ++- 17 files changed, 18 insertions(+), 1 deletion(-) diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index cd2d1d9c94..5de02a4451 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -544,6 +544,7 @@ "minimal": "Mínim (el més ràpid)", "high": "Alt", "xhigh": "Molt alt", + "max": "Màxim", "medium": "Mitjà", "low": "Baix" }, diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 89d71a1e73..5d40186752 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (schnellste)", "high": "Hoch", "xhigh": "Sehr hoch", + "max": "Maximum", "medium": "Mittel", "low": "Niedrig" }, diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 85491e2cad..f58cd7e952 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -544,6 +544,7 @@ "minimal": "Mínimo (el más rápido)", "high": "Alto", "xhigh": "Muy alto", + "max": "Máximo", "medium": "Medio", "low": "Bajo" }, diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 05c2d0f14d..9323727520 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (le plus rapide)", "high": "Élevé", "xhigh": "Très élevé", + "max": "Maximum", "medium": "Moyen", "low": "Faible" }, diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index be7141aa7c..79309334dc 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -544,6 +544,7 @@ "minimal": "न्यूनतम (सबसे तेज़)", "high": "उच्च", "xhigh": "बहुत उच्च", + "max": "अधिकतम", "medium": "मध्यम", "low": "निम्न" }, diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 5d6030a47d..12fec8249e 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (Tercepat)", "high": "Tinggi", "xhigh": "Sangat tinggi", + "max": "Maksimum", "medium": "Sedang", "low": "Rendah" }, diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index ad6f13afb0..b8bda12cd3 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimo (più veloce)", "high": "Alto", "xhigh": "Molto alto", + "max": "Massimo", "medium": "Medio", "low": "Basso" }, diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index a5883c4f23..42f24324b6 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -544,6 +544,7 @@ "minimal": "最小 (最速)", "high": "高", "xhigh": "非常に高い", + "max": "最高", "medium": "中", "low": "低" }, diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 67ffde009e..9f19e30461 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -544,6 +544,7 @@ "minimal": "최소 (가장 빠름)", "high": "높음", "xhigh": "매우 높음", + "max": "최대", "medium": "중간", "low": "낮음" }, diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index bee84a6c4f..bdca878318 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimaal (Snelst)", "high": "Hoog", "xhigh": "Zeer hoog", + "max": "Maximum", "medium": "Middel", "low": "Laag" }, diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 3a1ff0f4f6..43d76ad8ce 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimalny (najszybszy)", "high": "Wysoki", "xhigh": "Bardzo wysoki", + "max": "Maksymalny", "medium": "Średni", "low": "Niski" }, diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 43e7435828..4ae7fd3a20 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -544,6 +544,7 @@ "minimal": "Mínimo (mais rápido)", "high": "Alto", "xhigh": "Muito alto", + "max": "Máximo", "medium": "Médio", "low": "Baixo" }, diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 0761f1cec4..c636cb3523 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -544,6 +544,7 @@ "minimal": "Минимальный (самый быстрый)", "high": "Высокие", "xhigh": "Очень высокие", + "max": "Максимальные", "medium": "Средние", "low": "Низкие" }, diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 78d66e31aa..7e749e751c 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (en hızlı)", "high": "Yüksek", "xhigh": "Çok yüksek", + "max": "Maksimum", "medium": "Orta", "low": "Düşük" }, diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index 6aeaaaaea7..7d35057fba 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -544,6 +544,7 @@ "minimal": "Tối thiểu (nhanh nhất)", "high": "Cao", "xhigh": "Rất cao", + "max": "Tối đa", "medium": "Trung bình", "low": "Thấp" }, diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index f82dd244b1..7f82234c21 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -544,6 +544,7 @@ "minimal": "最小 (最快)", "high": "高", "xhigh": "超高", + "max": "最高", "medium": "中", "low": "低" }, diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index fe32aeec71..47e66645e2 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -560,7 +560,8 @@ "low": "低", "medium": "中", "high": "高", - "xhigh": "超高" + "xhigh": "超高", + "max": "最高" }, "verbosity": { "label": "輸出詳細程度", From 597280c0924347e332759c9d4a97c3384a2463ba Mon Sep 17 00:00:00 2001 From: Mob Code 100 Date: Sun, 14 Jun 2026 16:50:50 +0800 Subject: [PATCH 3/3] fix(zai): fall back to model default when persisted reasoning effort is unsupported Persisted reasoning effort not offered by the current model now falls back to the model default instead of silently disabling reasoning. Also wraps create() in handleOpenAIError for consistency with the base class, and flags GLM-5.2 pricing as provisional (mirrors GLM-5.1). --- packages/types/src/providers/zai.ts | 2 ++ src/api/providers/__tests__/zai.spec.ts | 30 ++++++++++++++++++++++ src/api/providers/zai.ts | 34 +++++++++++++++---------- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index 31b0404cc7..c2bd079264 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -162,6 +162,7 @@ export const internationalZAiModels = { supportsReasoningEffort: ["disable", "high", "max"], reasoningEffort: "high", preserveReasoning: true, + // TODO: Pricing is from GLM-5.1, should update later. inputPrice: 1.4, outputPrice: 4.4, cacheWritesPrice: 0, @@ -386,6 +387,7 @@ export const mainlandZAiModels = { supportsReasoningEffort: ["disable", "high", "max"], reasoningEffort: "high", preserveReasoning: true, + // TODO: Pricing is from GLM-5.1, should update later. inputPrice: 0.68, outputPrice: 2.28, cacheWritesPrice: 0, diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index 73eed907bf..d355aea33d 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -701,6 +701,36 @@ describe("ZAiHandler", () => { expect(callArgs.reasoning_effort).toBeUndefined() }) + it("should fall back to the model default effort when a persisted value is unsupported", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + reasoningEffort: "medium", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "high", + }), + ) + }) + it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => { const handlerWithModel = new ZAiHandler({ apiModelId: "glm-4.7", diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 4c4a3c7910..c8f720a971 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -11,11 +11,12 @@ import { zaiApiLineConfigs, } from "@roo-code/types" -import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api" +import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api" import { convertToZAiFormat } from "../transform/zai-format" import type { ApiHandlerCreateMessageMetadata } from "../index" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import { handleOpenAIError } from "./utils/openai-error-handler" // Custom interface for Z.ai params to support thinking mode and reasoning effort tiers. // Z.ai accepts the standard `reasoning_effort` ladder (none/minimal/low/medium/high/xhigh/max) @@ -60,12 +61,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { const isThinkingModel = Array.isArray(info.supportsReasoningEffort) if (isThinkingModel) { - // For GLM-4.7, thinking is ON by default in the API. - // We need to explicitly disable it when reasoning is off. - const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options }) - // Create the stream with our custom thinking parameter - return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning) + return this.createStreamWithThinking(systemPrompt, messages, metadata) } // For non-thinking models, use the default behavior @@ -79,14 +76,21 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, - useReasoning?: boolean, ) { const { id: model, info } = this.getModel() - // Resolve the reasoning-effort tier (e.g. "high" | "max" for GLM-5.2) from the user - // setting, falling back to the model's default. Omitted when reasoning is disabled. - const effort = useReasoning ? (this.options.reasoningEffort ?? info.reasoningEffort) : undefined + // Fall back to the model default when the resolved effort isn't supported by the model. + const supported = info.supportsReasoningEffort + const raw = + this.options.enableReasoningEffort === false + ? undefined + : (this.options.reasoningEffort ?? info.reasoningEffort) + const effort = + raw && raw !== "disable" && Array.isArray(supported) && !supported.includes(raw) + ? info.reasoningEffort + : raw const reasoningEffort = effort && effort !== "disable" ? effort : undefined + const useReasoning = reasoningEffort !== undefined const max_tokens = this.options.modelMaxTokens || @@ -118,8 +122,12 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { parallel_tool_calls: metadata?.parallelToolCalls ?? true, } - return this.client.chat.completions.create( - params as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, - ) + try { + return this.client.chat.completions.create( + params as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } } }