diff --git a/.changeset/add-glm-5-2-support.md b/.changeset/add-glm-5-2-support.md new file mode 100644 index 000000000..26bd162cd --- /dev/null +++ b/.changeset/add-glm-5-2-support.md @@ -0,0 +1,5 @@ +--- +"zoo-code": minor +--- + +Add GLM-5.2 support with High/Max `reasoning_effort` tiers. The default effort is High (deep reasoning stays opt-in), Max is selected only when the user explicitly picks it, and the parameter is omitted entirely when reasoning is disabled. diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index a86eadfaf..87ebfaf96 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5.2 international model with High/Max effort tiers and 1M context", () => { + const testModelId: InternationalZAiModelId = "glm-5.2" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(model.info.reasoningEffort).toBe("high") + expect(model.info.preserveReasoning).toBe(true) + expect(model.info.supportsMaxTokens).toBe(true) + expect(model.info.inputPrice).toBe(1.4) + expect(model.info.outputPrice).toBe(4.4) + expect(model.info.cacheReadsPrice).toBe(0.26) + }) + it("should return GLM-5-Turbo international model with thinking support", () => { const testModelId: InternationalZAiModelId = "glm-5-turbo" const handlerWithModel = new ZAiHandler({ @@ -233,6 +254,27 @@ describe("ZAiHandler", () => { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5.2 China model with High/Max effort tiers and 1M context", () => { + const testModelId: MainlandZAiModelId = "glm-5.2" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(model.info.reasoningEffort).toBe("high") + expect(model.info.preserveReasoning).toBe(true) + expect(model.info.supportsMaxTokens).toBe(true) + expect(model.info.inputPrice).toBe(0.68) + expect(model.info.outputPrice).toBe(2.28) + expect(model.info.cacheReadsPrice).toBe(0.13) + }) + it("should return GLM-4.7 China model with thinking support", () => { const testModelId: MainlandZAiModelId = "glm-4.7" const handlerWithModel = new ZAiHandler({ @@ -575,6 +617,122 @@ describe("ZAiHandler", () => { ) }) + it("should send reasoning_effort:high by default for GLM-5.2 (model default)", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + // No reasoningEffort setting - should use model default (high) + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "high", + }), + ) + }) + + it("should send reasoning_effort:max for GLM-5.2 when reasoningEffort is set to max", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + reasoningEffort: "max", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "max", + }), + ) + }) + + it("should omit reasoning_effort for GLM-5.2 when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.thinking).toEqual({ type: "disabled" }) + expect(callArgs.reasoning_effort).toBeUndefined() + }) + + it("should fall back to the model default effort when a persisted value is unsupported", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + reasoningEffort: "medium", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "high", + }), + ) + }) + it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => { const handlerWithModel = new ZAiHandler({ apiModelId: "glm-4.7", diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 113cf655d..c8f720a97 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -11,15 +11,20 @@ import { zaiApiLineConfigs, } from "@roo-code/types" -import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api" +import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api" import { convertToZAiFormat } from "../transform/zai-format" import type { ApiHandlerCreateMessageMetadata } from "../index" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import { handleOpenAIError } from "./utils/openai-error-handler" -// Custom interface for Z.ai params to support thinking mode -type ZAiChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { +// Custom interface for Z.ai params to support thinking mode and reasoning effort tiers. +// Z.ai accepts the standard `reasoning_effort` ladder (none/minimal/low/medium/high/xhigh/max) +// alongside the GLM-specific `thinking` toggle. Omit the OpenAI-typed `reasoning_effort` so we +// can widen it to include provider-specific values such as "max". +type ZAiChatCompletionParams = Omit & { thinking?: { type: "enabled" | "disabled" } + reasoning_effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max" } export class ZAiHandler extends BaseOpenAiCompatibleProvider { @@ -56,12 +61,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { const isThinkingModel = Array.isArray(info.supportsReasoningEffort) if (isThinkingModel) { - // For GLM-4.7, thinking is ON by default in the API. - // We need to explicitly disable it when reasoning is off. - const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options }) - // Create the stream with our custom thinking parameter - return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning) + return this.createStreamWithThinking(systemPrompt, messages, metadata) } // For non-thinking models, use the default behavior @@ -75,10 +76,22 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, - useReasoning?: boolean, ) { const { id: model, info } = this.getModel() + // Fall back to the model default when the resolved effort isn't supported by the model. + const supported = info.supportsReasoningEffort + const raw = + this.options.enableReasoningEffort === false + ? undefined + : (this.options.reasoningEffort ?? info.reasoningEffort) + const effort = + raw && raw !== "disable" && Array.isArray(supported) && !supported.includes(raw) + ? info.reasoningEffort + : raw + const reasoningEffort = effort && effort !== "disable" ? effort : undefined + const useReasoning = reasoningEffort !== undefined + const max_tokens = this.options.modelMaxTokens || (getModelMaxOutputTokens({ @@ -103,11 +116,18 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { stream_options: { include_usage: true }, // Thinking is ON by default for these models, so explicitly disable it when needed. thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, + reasoning_effort: reasoningEffort, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, } - return this.client.chat.completions.create(params) + try { + return this.client.chat.completions.create( + params as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } } } diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index cd2d1d9c9..5de02a445 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -544,6 +544,7 @@ "minimal": "Mínim (el més ràpid)", "high": "Alt", "xhigh": "Molt alt", + "max": "Màxim", "medium": "Mitjà", "low": "Baix" }, diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 89d71a1e7..5d4018675 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (schnellste)", "high": "Hoch", "xhigh": "Sehr hoch", + "max": "Maximum", "medium": "Mittel", "low": "Niedrig" }, diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index eec2a564f..edc416afc 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -620,7 +620,8 @@ "low": "Low", "medium": "Medium", "high": "High", - "xhigh": "Extra High" + "xhigh": "Extra High", + "max": "Max" }, "verbosity": { "label": "Output Verbosity", diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 85491e2ca..f58cd7e95 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -544,6 +544,7 @@ "minimal": "Mínimo (el más rápido)", "high": "Alto", "xhigh": "Muy alto", + "max": "Máximo", "medium": "Medio", "low": "Bajo" }, diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 05c2d0f14..932372752 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (le plus rapide)", "high": "Élevé", "xhigh": "Très élevé", + "max": "Maximum", "medium": "Moyen", "low": "Faible" }, diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index be7141aa7..79309334d 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -544,6 +544,7 @@ "minimal": "न्यूनतम (सबसे तेज़)", "high": "उच्च", "xhigh": "बहुत उच्च", + "max": "अधिकतम", "medium": "मध्यम", "low": "निम्न" }, diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 5d6030a47..12fec8249 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (Tercepat)", "high": "Tinggi", "xhigh": "Sangat tinggi", + "max": "Maksimum", "medium": "Sedang", "low": "Rendah" }, diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index ad6f13afb..b8bda12cd 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimo (più veloce)", "high": "Alto", "xhigh": "Molto alto", + "max": "Massimo", "medium": "Medio", "low": "Basso" }, diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index a5883c4f2..42f24324b 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -544,6 +544,7 @@ "minimal": "最小 (最速)", "high": "高", "xhigh": "非常に高い", + "max": "最高", "medium": "中", "low": "低" }, diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 67ffde009..9f19e3046 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -544,6 +544,7 @@ "minimal": "최소 (가장 빠름)", "high": "높음", "xhigh": "매우 높음", + "max": "최대", "medium": "중간", "low": "낮음" }, diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index bee84a6c4..bdca87831 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimaal (Snelst)", "high": "Hoog", "xhigh": "Zeer hoog", + "max": "Maximum", "medium": "Middel", "low": "Laag" }, diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 3a1ff0f4f..43d76ad8c 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimalny (najszybszy)", "high": "Wysoki", "xhigh": "Bardzo wysoki", + "max": "Maksymalny", "medium": "Średni", "low": "Niski" }, diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 43e743582..4ae7fd3a2 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -544,6 +544,7 @@ "minimal": "Mínimo (mais rápido)", "high": "Alto", "xhigh": "Muito alto", + "max": "Máximo", "medium": "Médio", "low": "Baixo" }, diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 0761f1cec..c636cb352 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -544,6 +544,7 @@ "minimal": "Минимальный (самый быстрый)", "high": "Высокие", "xhigh": "Очень высокие", + "max": "Максимальные", "medium": "Средние", "low": "Низкие" }, diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 78d66e31a..7e749e751 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -544,6 +544,7 @@ "minimal": "Minimal (en hızlı)", "high": "Yüksek", "xhigh": "Çok yüksek", + "max": "Maksimum", "medium": "Orta", "low": "Düşük" }, diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index 6aeaaaaea..7d35057fb 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -544,6 +544,7 @@ "minimal": "Tối thiểu (nhanh nhất)", "high": "Cao", "xhigh": "Rất cao", + "max": "Tối đa", "medium": "Trung bình", "low": "Thấp" }, diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index f82dd244b..7f82234c2 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -544,6 +544,7 @@ "minimal": "最小 (最快)", "high": "高", "xhigh": "超高", + "max": "最高", "medium": "中", "low": "低" }, diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index fe32aeec7..47e66645e 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -560,7 +560,8 @@ "low": "低", "medium": "中", "high": "高", - "xhigh": "超高" + "xhigh": "超高", + "max": "最高" }, "verbosity": { "label": "輸出詳細程度",