MoonshotAI · guglxni · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/.changeset/azure-foundry-provider.md b/.changeset/azure-foundry-provider.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": minor
+---
+
+Add an `azure-foundry` provider type for Microsoft Foundry model deployments via the OpenAI v1-compatible route. Clamp completion budgets against the model's shared input+output context window. For Foundry-hosted Kimi reasoning models, send `max_completion_tokens` (not `max_tokens`) so reasoning and visible output use separate budgets, and enable reasoning via `reasoning_effort` only — Foundry rejects Moonshot's proprietary `thinking` parameter.
diff --git a/docs/en/configuration/config-files.md b/docs/en/configuration/config-files.md
@@ -100,7 +100,7 @@ Each entry in the `providers` table defines an API provider, keyed by a unique n
 
 | Field | Type | Required | Description |
 | --- | --- | --- | --- |
-| `type` | `string` | Yes | Provider type: `kimi`, `anthropic`, `openai`, `openai_responses`, `google-genai`, `vertexai` |
+| `type` | `string` | Yes | Provider type: `kimi`, `anthropic`, `openai`, `openai_responses`, `azure-foundry`, `google-genai`, `vertexai` |
 | `api_key` | `string` | No | API key, written in plain text in the config file |
 | `base_url` | `string` | No | API base URL |
 | `oauth` | `table` | No | OAuth credential reference (`storage` and `key` fields); injected automatically by the login flow — normally no need to write this by hand |

diff --git a/docs/en/configuration/env-vars.md b/docs/en/configuration/env-vars.md
@@ -58,6 +58,8 @@ Key names per provider:
 | `ANTHROPIC_BASE_URL` | Anthropic | Follows Anthropic SDK default |
 | `OPENAI_API_KEY` | OpenAI (`openai` and `openai_responses`) | None |
 | `OPENAI_BASE_URL` | OpenAI (`openai` and `openai_responses`) | `https://api.openai.com/v1` |
+| `AZURE_FOUNDRY_API_KEY` | Microsoft Foundry (`azure-foundry`) | None |
+| `AZURE_FOUNDRY_BASE_URL` | Microsoft Foundry (`azure-foundry`) | None |
 | `GOOGLE_API_KEY` | Google GenAI, Vertex AI | None |
 | `VERTEXAI_API_KEY` | Vertex AI | None |
 | `GOOGLE_CLOUD_PROJECT` | Vertex AI | None |

diff --git a/docs/en/configuration/providers.md b/docs/en/configuration/providers.md
@@ -12,6 +12,7 @@ The `type` field in the `providers` table determines which protocol implementati
 | `anthropic` | Anthropic Messages | Claude model family |
 | `openai` | OpenAI Chat Completions | OpenAI and compatible services, DeepSeek, Qwen, etc. |
 | `openai_responses` | OpenAI Responses API | OpenAI's newer Responses interface |
+| `azure-foundry` | Microsoft Foundry (OpenAI v1) | Azure AI Foundry model deployments (GPT, DeepSeek, Llama, Mistral, etc.) |
 | `google-genai` | Google GenAI | Gemini API |
 | `vertexai` | Google GenAI on Vertex | Google Cloud Vertex AI |
 
@@ -107,6 +108,30 @@ base_url = "https://api.openai.com/v1"
 api_key = "sk-xxxxx"
 ```
 
+## `azure-foundry`
+
+For connecting to [Microsoft Foundry](https://learn.microsoft.com/en-us/azure/foundry/) model deployments through the OpenAI v1-compatible inference route. Foundry hosts multiple model families — OpenAI GPT, DeepSeek, Meta Llama, Mistral, and others sold directly by Azure — not just OpenAI models. Put the model ID from your Foundry deployment in `[models.<alias>]`.
+
+Microsoft recommends the OpenAI v1 route for third-party SDKs and custom applications. See [Integrate Microsoft Foundry with your applications](https://learn.microsoft.com/en-us/azure/foundry/how-to/integrate-with-other-apps).
+
+- Recommended `base_url`: `https://{resource}.openai.azure.com/openai/v1`
+- Credential key names: `AZURE_FOUNDRY_API_KEY`, `AZURE_FOUNDRY_BASE_URL`
+- Auth: sends the Foundry `api-key` header
+
+```toml
+[providers.foundry]
+type = "azure-foundry"
+base_url = "https://YOUR-RESOURCE.openai.azure.com/openai/v1"
+api_key = "YOUR_KEY"
+
+[models.foundry-gpt4o]
+provider = "foundry"
+model = "gpt-4o"
+max_context_size = 128000
+```
+
+Third-party reasoning models on Foundry work the same way as on the generic `openai` provider: set `reasoning_key` on the model alias when your gateway returns reasoning content under a non-standard field name.
+
 ## `google-genai`
 
 For connecting directly to the Google Gemini API. Thinking, vision, and multimodal capabilities are auto-detected by model name.

diff --git a/docs/zh/configuration/config-files.md b/docs/zh/configuration/config-files.md
@@ -100,7 +100,7 @@ timeout = 5
 
 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `type` | `string` | 是 | 供应商类型：`kimi`、`anthropic`、`openai`、`openai_responses`、`google-genai`、`vertexai` |
+| `type` | `string` | 是 | 供应商类型：`kimi`、`anthropic`、`openai`、`openai_responses`、`azure-foundry`、`google-genai`、`vertexai` |
 | `api_key` | `string` | 否 | API 密钥，明文写在配置文件里 |
 | `base_url` | `string` | 否 | API 基础 URL |
 | `oauth` | `table` | 否 | OAuth 凭据引用（`storage`、`key` 两个字段），由登录流程自动注入，通常无需手写 |

diff --git a/docs/zh/configuration/env-vars.md b/docs/zh/configuration/env-vars.md
@@ -58,6 +58,8 @@ KIMI_BASE_URL = "https://api.moonshot.ai/v1"
 | `ANTHROPIC_BASE_URL` | Anthropic | Anthropic SDK 默认值 |
 | `OPENAI_API_KEY` | OpenAI（`openai` 和 `openai_responses`） | 无 |
 | `OPENAI_BASE_URL` | OpenAI（`openai` 和 `openai_responses`） | `https://api.openai.com/v1` |
+| `AZURE_FOUNDRY_API_KEY` | Microsoft Foundry（`azure-foundry`） | 无 |
+| `AZURE_FOUNDRY_BASE_URL` | Microsoft Foundry（`azure-foundry`） | 无 |
 | `GOOGLE_API_KEY` | Google GenAI、Vertex AI | 无 |
 | `VERTEXAI_API_KEY` | Vertex AI | 无 |
 | `GOOGLE_CLOUD_PROJECT` | Vertex AI | 无 |

diff --git a/docs/zh/configuration/providers.md b/docs/zh/configuration/providers.md
@@ -12,6 +12,7 @@ Kimi Code CLI 支持同时接入多家 LLM 平台——用 Kimi Code 托管服
 | `anthropic` | Anthropic Messages | Claude 系列模型 |
 | `openai` | OpenAI Chat Completions | OpenAI 及兼容服务、DeepSeek、Qwen 等 |
 | `openai_responses` | OpenAI Responses API | OpenAI 较新的 Responses 接口 |
+| `azure-foundry` | Microsoft Foundry（OpenAI v1） | Azure AI Foundry 模型部署（GPT、DeepSeek、Llama、Mistral 等） |
 | `google-genai` | Google GenAI | Gemini API |
 | `vertexai` | Google GenAI on Vertex | Google Cloud Vertex AI |
 
@@ -107,6 +108,30 @@ base_url = "https://api.openai.com/v1"
 api_key = "sk-xxxxx"
 ```
 
+## `azure-foundry`
+
+用于连接 [Microsoft Foundry](https://learn.microsoft.com/en-us/azure/foundry/) 上的模型部署，走 OpenAI v1 兼容推理路由。Foundry 托管多种模型家族——OpenAI GPT、DeepSeek、Meta Llama、Mistral 等 Azure 直售模型，并非只有 OpenAI。在 `[models.<alias>]` 中填写 Foundry 部署的模型 ID。
+
+Microsoft 建议第三方 SDK 和自定义应用使用 OpenAI v1 路由。详见 [Integrate Microsoft Foundry with your applications](https://learn.microsoft.com/en-us/azure/foundry/how-to/integrate-with-other-apps)。
+
+- 推荐 `base_url`：`https://{resource}.openai.azure.com/openai/v1`
+- 凭证键名：`AZURE_FOUNDRY_API_KEY`、`AZURE_FOUNDRY_BASE_URL`
+- 认证：发送 Foundry 的 `api-key` 请求头
+
+```toml
+[providers.foundry]
+type = "azure-foundry"
+base_url = "https://YOUR-RESOURCE.openai.azure.com/openai/v1"
+api_key = "YOUR_KEY"
+
+[models.foundry-gpt4o]
+provider = "foundry"
+model = "gpt-4o"
+max_context_size = 128000
+```
+
+Foundry 上的第三方推理模型与通用 `openai` 供应商用法相同：若网关以非标准字段返回推理内容，可在模型别名上设置 `reasoning_key`。
+
 ## `google-genai`
 
 用于直连 Google Gemini API。thinking、视觉及多模态能力按模型名自动识别。

diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts
@@ -40,6 +40,11 @@ import { USER_PROMPT_ORIGIN, type PromptOrigin } from '../context';
 import { renderUserPromptHookBlockResult, renderUserPromptHookResult } from '../../session/hooks';
 import { canonicalTelemetryArgs, isPlainRecord } from './canonical-args';
 import { ToolCallDeduplicator } from './tool-dedup';
+import {
+  hasToolResultsSinceLastUserMessage,
+  TOOL_STALL_RECOVERY_NAME,
+  TOOL_STALL_RECOVERY_TEXT,
+} from './tool-stall-recovery';
 
 interface ActiveTurn {
   readonly turnId: number;
@@ -613,6 +618,7 @@ export class TurnFlow {
   private async runStepLoop(turnId: number, signal: AbortSignal): Promise<LoopTurnStopReason> {
     let stopHookContinuationUsed = false;
     let goalOutcomeMessageContinuationUsed = false;
+    let toolStallContinuationUsed = false;
     const deduper = new ToolCallDeduplicator({ telemetry: this.agent.telemetry });
     await this.agent.mcp?.waitForInitialLoad(signal);
     // Surface the active goal at the start of the turn (append-only; no-op when
@@ -679,6 +685,29 @@ export class TurnFlow {
                 return { continue: true };
               }
 
+              // 3b. Recover once when the model ends a step without tools after
+              //     tool results already landed in the same turn (common with
+              //     shared-window thinking models that stop after long reasoning).
+              if (
+                !toolStallContinuationUsed &&
+                ctx.stopReason === 'end_turn' &&
+                ctx.stepNumber > 1 &&
+                hasToolResultsSinceLastUserMessage(this.agent.context.messages)
+              ) {
+                if (!hasStepBudgetRemaining(loopControl?.maxStepsPerTurn, ctx.stepNumber)) {
+                  return { continue: false };
+                }
+                toolStallContinuationUsed = true;
+                this.agent.context.appendUserMessage(
+                  [{ type: 'text', text: TOOL_STALL_RECOVERY_TEXT }],
+                  {
+                    kind: 'system_trigger',
+                    name: TOOL_STALL_RECOVERY_NAME,
+                  },
+                );
+                return { continue: true };
+              }
+
               // 3. The external Stop hook gets exactly one continuation; the cap
               //    is intentionally separate from (and does not cap) goal mode.
               if (!stopHookContinuationUsed) {

diff --git a/packages/agent-core/src/agent/turn/tool-stall-recovery.ts b/packages/agent-core/src/agent/turn/tool-stall-recovery.ts
@@ -0,0 +1,20 @@
+import type { Message } from '@moonshot-ai/kosong';
+
+export const TOOL_STALL_RECOVERY_NAME = 'tool_stall_recovery';
+
+export const TOOL_STALL_RECOVERY_TEXT =
+  '<system-reminder>\n' +
+  'Your previous step ended without calling any tools even though more work remains on the user request. ' +
+  'Call the appropriate tools now instead of only describing what you plan to do next.\n' +
+  '</system-reminder>';
+
+/** True when tool results appear after the latest user message in the turn history. */
+export function hasToolResultsSinceLastUserMessage(messages: readonly Message[]): boolean {
+  for (let index = messages.length - 1; index >= 0; index -= 1) {
+    const message = messages[index];
+    if (message === undefined) continue;
+    if (message.role === 'user') return false;
+    if (message.role === 'tool') return true;
+  }
+  return false;
+}
diff --git a/packages/agent-core/src/config/kimi-env-params.ts b/packages/agent-core/src/config/kimi-env-params.ts
@@ -50,8 +50,10 @@ export function applyKimiEnvThinkingKeep(
   thinkingLevel: ThinkingEffort,
   env: Env = process.env,
 ): ChatProvider {
-  if (!(provider instanceof KimiChatProvider)) return provider;
   const keep = env['KIMI_MODEL_THINKING_KEEP']?.trim();
   if (keep === undefined || keep.length === 0 || thinkingLevel === 'off') return provider;
-  return provider.withExtraBody({ thinking: { keep } });
+  if (provider instanceof KimiChatProvider) {
+    return provider.withExtraBody({ thinking: { keep } });
+  }
+  return provider;
 }
diff --git a/packages/agent-core/src/config/schema.ts b/packages/agent-core/src/config/schema.ts
@@ -5,6 +5,7 @@ import { z } from 'zod';
 
 export const ProviderTypeSchema = z.enum([
   'anthropic',
+  'azure-foundry',
   'openai',
   'kimi',
   'google-genai',

diff --git a/packages/agent-core/src/services/modelCatalog/modelCatalogService.ts b/packages/agent-core/src/services/modelCatalog/modelCatalogService.ts
@@ -214,6 +214,8 @@ function hasConfiguredApiKey(provider: ProviderConfig): boolean {
     case 'openai':
     case 'openai_responses':
       return nonEmpty(provider.env?.['OPENAI_API_KEY']) !== undefined;
+    case 'azure-foundry':
+      return nonEmpty(provider.env?.['AZURE_FOUNDRY_API_KEY']) !== undefined;
     case 'kimi':
       return nonEmpty(provider.env?.['KIMI_API_KEY']) !== undefined;
     case 'google-genai':

diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts
@@ -109,6 +109,7 @@ export class ProviderManager implements ModelProvider {
       alias.model,
       this.options.kimiRequestHeaders,
       alias.maxOutputSize,
+      alias.maxContextSize,
       alias.reasoningKey,
       this.options.promptCacheKey,
       alias.adaptiveThinking,
@@ -221,6 +222,7 @@ function toKosongProviderConfig(
   model: string,
   kimiRequestHeaders: Record<string, string> | undefined,
   maxOutputSize: number | undefined,
+  maxContextSize: number | undefined,
   reasoningKey: string | undefined,
   promptCacheKey: string | undefined,
   adaptiveThinking: boolean | undefined,
@@ -245,6 +247,24 @@ function toKosongProviderConfig(
         reasoningKey,
         ...defaultHeadersField(provider.customHeaders),
       };
+    case 'azure-foundry': {
+      const baseUrl = providerValue(provider.baseUrl, provider.env, 'AZURE_FOUNDRY_BASE_URL');
+      if (baseUrl === undefined) {
+        throw new KimiError(
+          ErrorCodes.MODEL_CONFIG_INVALID,
+          'Provider type "azure-foundry" requires base_url (or AZURE_FOUNDRY_BASE_URL in [providers.<name>.env]). Example: https://YOUR-RESOURCE.openai.azure.com/openai/v1',
+        );
+      }
+      return {
+        type: 'azure-foundry',
+        model,
+        baseUrl,
+        apiKey: providerApiKey(provider),
+        reasoningKey,
+        sharedContextWindowTokens: maxContextSize,
+        ...defaultHeadersField(provider.customHeaders),
+      };
+    }
     case 'kimi':
       return {
         type: 'kimi',
@@ -306,6 +326,8 @@ function providerApiKey(provider: ProviderConfig): string | undefined {
     case 'openai':
     case 'openai_responses':
       return providerValue(provider.apiKey, provider.env, 'OPENAI_API_KEY');
+    case 'azure-foundry':
+      return providerValue(provider.apiKey, provider.env, 'AZURE_FOUNDRY_API_KEY');
     case 'kimi':
       return providerValue(provider.apiKey, provider.env, 'KIMI_API_KEY');
     case 'google-genai':

diff --git a/packages/agent-core/src/utils/completion-budget.ts b/packages/agent-core/src/utils/completion-budget.ts
@@ -50,18 +50,21 @@ function parseEnvBudget(raw: string | undefined): EnvBudget {
 
 /**
  * Compute the effective `max_completion_tokens` cap.
+ *
+ * Uses the explicit hard cap or reserved-context fallback when set, clamped
+ * to the model context window. Shared-window providers reject requests where
+ * input tokens plus max_completion_tokens exceed the total window.
  */
 export function computeCompletionBudgetCap(args: {
   readonly budget: CompletionBudgetConfig;
   readonly capability: ModelCapability | undefined;
 }): number {
   const maxCtx = args.capability?.max_context_tokens ?? 0;
-  // The provider backend computes the safe request-specific value from the
-  // serialized prompt. Locally using the largest cap avoids cutting off
-  // thinking before the model produces a summary.
-  const cap =
+  const requested =
     args.budget.hardCap ??
-    (maxCtx > 0 ? maxCtx : args.budget.fallback ?? DEFAULT_UNKNOWN_CONTEXT_FALLBACK);
+    args.budget.fallback ??
+    (maxCtx > 0 ? maxCtx : DEFAULT_UNKNOWN_CONTEXT_FALLBACK);
+  const cap = maxCtx > 0 ? Math.min(requested, maxCtx) : requested;
   return Math.max(MIN_FLOOR, cap);
 }
 

diff --git a/packages/agent-core/test/agent/tool-stall-recovery.test.ts b/packages/agent-core/test/agent/tool-stall-recovery.test.ts
@@ -0,0 +1,32 @@
+import type { Message } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import { hasToolResultsSinceLastUserMessage } from '../../src/agent/turn/tool-stall-recovery';
+
+describe('hasToolResultsSinceLastUserMessage', () => {
+  it('returns false when the latest user message has no trailing tool results', () => {
+    const messages: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
+      { role: 'assistant', content: [{ type: 'text', text: 'hello' }], toolCalls: [] },
+    ];
+    expect(hasToolResultsSinceLastUserMessage(messages)).toBe(false);
+  });
+
+  it('returns true when tool results follow the latest user message', () => {
+    const messages: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'explore' }], toolCalls: [] },
+      {
+        role: 'assistant',
+        content: [{ type: 'text', text: 'reading' }],
+        toolCalls: [{ type: 'function', id: 'call_1', name: 'Read', arguments: '{}' }],
+      },
+      { role: 'tool', content: [{ type: 'text', text: 'file contents' }], toolCalls: [], toolCallId: 'call_1' },
+      {
+        role: 'assistant',
+        content: [{ type: 'text', text: 'I will continue' }],
+        toolCalls: [],
+      },
+    ];
+    expect(hasToolResultsSinceLastUserMessage(messages)).toBe(true);
+  });
+});
diff --git a/packages/agent-core/test/agent/turn.test.ts b/packages/agent-core/test/agent/turn.test.ts
@@ -112,6 +112,29 @@ describe('Agent turn flow', () => {
     });
   });
 
+  it('continues once after a post-tool step ends without further tool calls', async () => {
+    const ctx = testAgent({ kaos: createCommandKaos('ok') });
+    ctx.configure({ tools: ['Bash'] });
+    await ctx.rpc.setPermission({ mode: 'yolo' });
+
+    ctx.mockNextResponse(
+      { type: 'text', text: 'Running first command.' },
+      bashCallWithId('call_1', 'printf ok'),
+    );
+    ctx.mockNextResponse({ type: 'text', text: 'I will continue exploring.' });
+    ctx.mockNextResponse(
+      { type: 'text', text: 'Continuing with another command.' },
+      bashCallWithId('call_2', 'printf more'),
+    );
+    ctx.mockNextResponse({ type: 'text', text: 'Done.' });
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Explore the repo' }] });
+    await ctx.untilTurnEnd();
+
+    expect(ctx.llmCalls).toHaveLength(4);
+    expect(JSON.stringify(ctx.llmCalls[2]?.history ?? [])).toContain('system-reminder');
+  });
+
   it('tracks cross-step duplicate tool-call detection telemetry', async () => {
     const records: TelemetryRecord[] = [];
     const ctx = testAgent({

diff --git a/packages/agent-core/test/config/kimi-env-params.test.ts b/packages/agent-core/test/config/kimi-env-params.test.ts
@@ -1,4 +1,4 @@
-import { type ChatProvider, KimiChatProvider } from '@moonshot-ai/kosong';
+import { createProvider, type ChatProvider, KimiChatProvider } from '@moonshot-ai/kosong';
 import { describe, expect, it } from 'vitest';
 
 import { applyKimiEnvSamplingParams, applyKimiEnvThinkingKeep } from '../../src/config/kimi-env-params';
@@ -8,6 +8,15 @@ function kimi(): KimiChatProvider {
   return new KimiChatProvider({ model: 'kimi-k2', apiKey: 'k' });
 }
 
+function foundryKimi(): ChatProvider {
+  return createProvider({
+    type: 'azure-foundry',
+    model: 'Kimi-K2.6',
+    apiKey: 'k',
+    baseUrl: 'https://example.openai.azure.com/openai/v1',
+  });
+}
+
 interface KimiGenerationState {
   temperature?: number;
   top_p?: number;
@@ -63,6 +72,12 @@ describe('applyKimiEnvThinkingKeep', () => {
     expect(genState(out).extra_body?.thinking?.keep).toBe('all');
   });
 
+  it('does not inject thinking.keep for Foundry-hosted Kimi models', () => {
+    const provider = foundryKimi();
+    const out = applyKimiEnvThinkingKeep(provider, 'high', { KIMI_MODEL_THINKING_KEEP: 'all' });
+    expect(out).toBe(provider);
+  });
+
   it('does NOT inject thinking.keep when thinking is off', () => {
     const out = applyKimiEnvThinkingKeep(kimi(), 'off', { KIMI_MODEL_THINKING_KEEP: 'all' });
     expect(genState(out).extra_body).toBeUndefined();