Zoo-Code-Org · MobCode100 · Jun 13, 2026 · Jun 13, 2026 · Jun 14, 2026
@@ -0,0 +1,5 @@
+---
+"zoo-code": minor
+---
+
+Add GLM-5.2 support with High/Max `reasoning_effort` tiers. The default effort is High (deep reasoning stays opt-in), Max is selected only when the user explicitly picks it, and the parameter is omitted entirely when reasoning is disabled.
@@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer<typeof reasoningEffortWithMinim
  * Extended Reasoning Effort (includes "none" and "minimal")
  * Note: "disable" is a UI/control value, not a value sent as effort
  */
-export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"] as const
+export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
 
 export const reasoningEffortExtendedSchema = z.enum(reasoningEffortsExtended)
 
@@ -32,7 +32,16 @@ export type ReasoningEffortExtended = z.infer<typeof reasoningEffortExtendedSche
 /**
  * Reasoning Effort user setting (includes "disable")
  */
-export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"] as const
+export const reasoningEffortSettingValues = [
+	"disable",
+	"none",
+	"minimal",
+	"low",
+	"medium",
+	"high",
+	"xhigh",
+	"max",
+] as const
 export const reasoningEffortSettingSchema = z.enum(reasoningEffortSettingValues)
 
 /**
@@ -93,7 +102,7 @@ export const modelInfoSchema = z.object({
 	defaultTemperature: z.number().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	supportsReasoningEffort: z
-		.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))])
+		.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh", "max"]))])
 		.optional(),
 	requiredReasoningEffort: z.boolean().optional(),
 	preserveReasoning: z.boolean().optional(),

@@ -153,6 +153,23 @@ export const internationalZAiModels = {
 		description:
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
+	"glm-5.2": {
+		maxTokens: 131_072,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsMaxTokens: true,
+		supportsReasoningEffort: ["disable", "high", "max"],
+		reasoningEffort: "high",
+		preserveReasoning: true,
+		// TODO: Pricing is from GLM-5.1, should update later.
+		inputPrice: 1.4,
+		outputPrice: 4.4,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.26,
+		description:
+			"GLM-5.2 is Zhipu's flagship model with a 1M context window, 128k max output, and dual thinking-effort modes (High/Max). It delivers top-tier long-context reasoning, coding, and agentic performance for extended engineering sessions.",
+	},
 	"glm-5-turbo": {
 		maxTokens: 131_072,
 		contextWindow: 202_752,
@@ -361,6 +378,23 @@ export const mainlandZAiModels = {
 		description:
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
+	"glm-5.2": {
+		maxTokens: 131_072,
+		contextWindow: 1_000_000,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsMaxTokens: true,
+		supportsReasoningEffort: ["disable", "high", "max"],
+		reasoningEffort: "high",
+		preserveReasoning: true,
+		// TODO: Pricing is from GLM-5.1, should update later.
+		inputPrice: 0.68,
+		outputPrice: 2.28,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.13,
+		description:
+			"GLM-5.2 is Zhipu's flagship model with a 1M context window, 128k max output, and dual thinking-effort modes (High/Max). It delivers top-tier long-context reasoning, coding, and agentic performance for extended engineering sessions.",
+	},
 	"glm-5-turbo": {
 		maxTokens: 131_072,
 		contextWindow: 202_752,

@@ -116,6 +116,27 @@ describe("ZAiHandler", () => {
 			expect(model.info.supportsImages).toBe(false)
 		})
 
+		it("should return GLM-5.2 international model with High/Max effort tiers and 1M context", () => {
+			const testModelId: InternationalZAiModelId = "glm-5.2"
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: testModelId,
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+			expect(model.info).toEqual(internationalZAiModels[testModelId])
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.maxTokens).toBe(131_072)
+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"])
+			expect(model.info.reasoningEffort).toBe("high")
+			expect(model.info.preserveReasoning).toBe(true)
+			expect(model.info.supportsMaxTokens).toBe(true)
+			expect(model.info.inputPrice).toBe(1.4)
+			expect(model.info.outputPrice).toBe(4.4)
+			expect(model.info.cacheReadsPrice).toBe(0.26)
+		})
+
 		it("should return GLM-5-Turbo international model with thinking support", () => {
 			const testModelId: InternationalZAiModelId = "glm-5-turbo"
 			const handlerWithModel = new ZAiHandler({
@@ -231,6 +252,27 @@ describe("ZAiHandler", () => {
 			expect(model.info.supportsImages).toBe(false)
 		})
 
+		it("should return GLM-5.2 China model with High/Max effort tiers and 1M context", () => {
+			const testModelId: MainlandZAiModelId = "glm-5.2"
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: testModelId,
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "china_coding",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+			expect(model.info).toEqual(mainlandZAiModels[testModelId])
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.maxTokens).toBe(131_072)
+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"])
+			expect(model.info.reasoningEffort).toBe("high")
+			expect(model.info.preserveReasoning).toBe(true)
+			expect(model.info.supportsMaxTokens).toBe(true)
+			expect(model.info.inputPrice).toBe(0.68)
+			expect(model.info.outputPrice).toBe(2.28)
+			expect(model.info.cacheReadsPrice).toBe(0.13)
+		})
+
 		it("should return GLM-4.7 China model with thinking support", () => {
 			const testModelId: MainlandZAiModelId = "glm-4.7"
 			const handlerWithModel = new ZAiHandler({
@@ -573,6 +615,122 @@ describe("ZAiHandler", () => {
 			)
 		})
 
+		it("should send reasoning_effort:high by default for GLM-5.2 (model default)", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5.2",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+				// No reasoningEffort setting - should use model default (high)
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5.2",
+					thinking: { type: "enabled" },
+					reasoning_effort: "high",
+				}),
+			)
+		})
+
+		it("should send reasoning_effort:max for GLM-5.2 when reasoningEffort is set to max", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5.2",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+				reasoningEffort: "max",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5.2",
+					thinking: { type: "enabled" },
+					reasoning_effort: "max",
+				}),
+			)
+		})
+
+		it("should omit reasoning_effort for GLM-5.2 when reasoningEffort is set to disable", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5.2",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+				reasoningEffort: "disable",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "disabled" })
+			expect(callArgs.reasoning_effort).toBeUndefined()
+		})
+
+		it("should fall back to the model default effort when a persisted value is unsupported", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5.2",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+				reasoningEffort: "medium",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5.2",
+					thinking: { type: "enabled" },
+					reasoning_effort: "high",
+				}),
+			)
+		})
+
 		it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => {
 			const handlerWithModel = new ZAiHandler({
 				apiModelId: "glm-4.7",

@@ -11,15 +11,20 @@ import {
 	zaiApiLineConfigs,
 } from "@roo-code/types"
 
-import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api"
+import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
 import { convertToZAiFormat } from "../transform/zai-format"
 
 import type { ApiHandlerCreateMessageMetadata } from "../index"
 import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
+import { handleOpenAIError } from "./utils/openai-error-handler"
 
-// Custom interface for Z.ai params to support thinking mode
-type ZAiChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
+// Custom interface for Z.ai params to support thinking mode and reasoning effort tiers.
+// Z.ai accepts the standard `reasoning_effort` ladder (none/minimal/low/medium/high/xhigh/max)
+// alongside the GLM-specific `thinking` toggle. Omit the OpenAI-typed `reasoning_effort` so we
+// can widen it to include provider-specific values such as "max".
+type ZAiChatCompletionParams = Omit<OpenAI.Chat.ChatCompletionCreateParamsStreaming, "reasoning_effort"> & {
 	thinking?: { type: "enabled" | "disabled" }
+	reasoning_effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max"
 }
 
 export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
@@ -56,12 +61,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 		const isThinkingModel = Array.isArray(info.supportsReasoningEffort)
 
 		if (isThinkingModel) {
-			// For GLM-4.7, thinking is ON by default in the API.
-			// We need to explicitly disable it when reasoning is off.
-			const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options })
-
 			// Create the stream with our custom thinking parameter
-			return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning)
+			return this.createStreamWithThinking(systemPrompt, messages, metadata)
 		}
 
 		// For non-thinking models, use the default behavior
@@ -75,10 +76,22 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
-		useReasoning?: boolean,
 	) {
 		const { id: model, info } = this.getModel()
 
+		// Fall back to the model default when the resolved effort isn't supported by the model.
+		const supported = info.supportsReasoningEffort
+		const raw =
+			this.options.enableReasoningEffort === false
+				? undefined
+				: (this.options.reasoningEffort ?? info.reasoningEffort)
+		const effort =
+			raw && raw !== "disable" && Array.isArray(supported) && !supported.includes(raw)
+				? info.reasoningEffort
+				: raw
+		const reasoningEffort = effort && effort !== "disable" ? effort : undefined
+		const useReasoning = reasoningEffort !== undefined
+
 		const max_tokens =
 			this.options.modelMaxTokens ||
 			(getModelMaxOutputTokens({
@@ -103,11 +116,18 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			stream_options: { include_usage: true },
 			// Thinking is ON by default for these models, so explicitly disable it when needed.
 			thinking: useReasoning ? { type: "enabled" } : { type: "disabled" },
+			reasoning_effort: reasoningEffort,
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
 			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
 		}
 
-		return this.client.chat.completions.create(params)
+		try {
+			return this.client.chat.completions.create(
+				params as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming,
+			)
+		} catch (error) {
+			throw handleOpenAIError(error, this.providerName)
+		}
 	}
 }
@@ -620,7 +620,8 @@
 			"low": "Low",
 			"medium": "Medium",
 			"high": "High",
-			"xhigh": "Extra High"
+			"xhigh": "Extra High",
+			"max": "Max"
 		},
 		"verbosity": {
 			"label": "Output Verbosity",