From 72e74476be556ecac42033af8362479c599d5fe9 Mon Sep 17 00:00:00 2001
From: "Hanna Paasivirta (OpenFn)" <hanna@openfn.org>
Date: Tue, 16 Jun 2026 01:53:18 +0900
Subject: [PATCH 1/4] use env var for model selection

---
 pyproject.toml                                |  1 +
 services/doc_agent_chat/agent.py              |  4 +--
 services/doc_agent_chat/config.yaml           |  4 ++-
 services/global_chat/config.yaml              |  4 ++-
 services/global_chat/planner.py               |  4 +--
 services/job_chat/job_chat.py                 |  4 +--
 services/job_chat/rag.yaml                    |  5 ++-
 services/models.py                            | 36 ++++++++++++++++---
 services/streaming_util.py                    |  2 +-
 services/testing/judge.py                     |  6 ++--
 services/tests/__init__.py                    |  0
 services/tests/unit/__init__.py               |  0
 services/tests/unit/test_models.py            | 33 +++++++++++++++++
 .../workflow_chat/gen_project_config.yaml     |  4 ++-
 services/workflow_chat/workflow_chat.py       |  4 +--
 15 files changed, 90 insertions(+), 21 deletions(-)
 create mode 100644 services/tests/__init__.py
 create mode 100644 services/tests/unit/__init__.py
 create mode 100644 services/tests/unit/test_models.py

diff --git a/pyproject.toml b/pyproject.toml
index 91f181fa..fff1e25b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,6 +49,7 @@ testpaths = [
     "services/workflow_chat/tests",
     "services/job_chat/tests",
     "services/search_docsite/tests",
+    "services/tests",
     "services/tools",
 ]
 
diff --git a/services/doc_agent_chat/agent.py b/services/doc_agent_chat/agent.py
index d9e24856..48ea9a93 100644
--- a/services/doc_agent_chat/agent.py
+++ b/services/doc_agent_chat/agent.py
@@ -7,7 +7,7 @@
 from doc_agent_chat.prompt import build_system_prompt
 from doc_agent_chat.tools import TOOL_DEFINITIONS, search_documents, format_search_results_as_documents
 from doc_agent_chat.config_loader import ConfigLoader
-from models import resolve_model
+from models import preferred_chat_model
 
 logger = create_logger("agent")
 
@@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None):
             raise ValueError("API key must be provided")
 
         self.client = Anthropic(api_key=self.api_key)
-        self.model = resolve_model(config.get("model", "claude-fable"))
+        self.model = preferred_chat_model(config.get("model"))
         self.max_tokens = config.get("max_tokens", 49152)
         self.max_tool_calls = config.get("max_tool_calls", 10)
         self.search_top_k = config.get("search_top_k", 5)
diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml
index 389bf4b0..907d3c19 100644
--- a/services/doc_agent_chat/config.yaml
+++ b/services/doc_agent_chat/config.yaml
@@ -1,5 +1,7 @@
 config_version: 1.0
-model: claude-fable
+# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can
+# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key
+# here only to pin this service to a specific model.
 max_tokens: 49152
 max_tool_calls: 10
 search_top_k: 5
diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml
index 376ea662..1f570a25 100644
--- a/services/global_chat/config.yaml
+++ b/services/global_chat/config.yaml
@@ -7,7 +7,9 @@ router:
   temperature: 0.0
 
 # Planner configuration (complex orchestration)
+# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can
+# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key
+# here only to pin the planner to a specific model.
 planner:
-  model: "claude-fable"
   max_tokens: 24576
   max_tool_calls: 10
diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py
index 4918aae3..6d1fb4af 100644
--- a/services/global_chat/planner.py
+++ b/services/global_chat/planner.py
@@ -24,7 +24,7 @@
     STATUS_PLANNING,
 )
 from global_chat.config_loader import ConfigLoader
-from models import resolve_model
+from models import preferred_chat_model
 from global_chat.tools.tool_definitions import TOOL_DEFINITIONS
 from global_chat.yaml_utils import stitch_job_code, redact_job_bodies, find_job_in_yaml
 from tools.search_documentation.search_documentation import search_documentation_tool
@@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None):
         self.tools = TOOL_DEFINITIONS
 
         planner_config = config_loader.config.get("planner", {})
-        self.model = resolve_model(planner_config.get("model", "claude-fable"))
+        self.model = preferred_chat_model(planner_config.get("model"))
         self.max_tokens = planner_config.get("max_tokens", 24576)
         self.max_tool_calls = planner_config.get("max_tool_calls", 20)
 
diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py
index 7da4d13f..522bfe4b 100644
--- a/services/job_chat/job_chat.py
+++ b/services/job_chat/job_chat.py
@@ -29,13 +29,13 @@
     STATUS_WORKING,
     STATUS_WRITING_CODE,
 )
-from models import resolve_model
+from models import preferred_chat_model
 
 _dir = os.path.dirname(os.path.abspath(__file__))
 with open(os.path.join(_dir, "rag.yaml")) as _f:
     _service_config = yaml.safe_load(_f)
 
-_MODEL = resolve_model(_service_config.get("model", "claude-fable"))
+_MODEL = preferred_chat_model(_service_config.get("model"))
 
 logger = create_logger("job_chat")
 
diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml
index 82546592..0f251d19 100644
--- a/services/job_chat/rag.yaml
+++ b/services/job_chat/rag.yaml
@@ -1,5 +1,8 @@
 config_version: 1.0
-model: "claude-fable"
+# Main chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and
+# can be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:`
+# key here only to pin this service. (The llm_* keys below are the smaller
+# RAG/retrieval models and are unaffected by that default.)
 llm_search_decision: "claude-sonnet"
 llm_retrieval: "claude-sonnet"
 threshold: 0.8
diff --git a/services/models.py b/services/models.py
index 10abd40f..56c2e275 100644
--- a/services/models.py
+++ b/services/models.py
@@ -3,18 +3,15 @@
 Update values here to change models used across all services.
 """
 
+import os
+
 CLAUDE_MODELS: dict[str, str] = {
     "claude-opus":   "claude-opus-4-8",
-    # Fable rejects temperature/top_p/top_k and any explicit `thinking`
-    # config other than {"type": "adaptive"}; tokenizer yields ~30% more
-    # tokens than Sonnet/Opus for the same content.
-    "claude-fable":  "claude-fable-5",
     "claude-sonnet": "claude-sonnet-4-6",
     "claude-haiku":  "claude-haiku-4-5-20251001",
 }
 
 CLAUDE_OPUS:   str = CLAUDE_MODELS["claude-opus"]
-CLAUDE_FABLE:  str = CLAUDE_MODELS["claude-fable"]
 CLAUDE_SONNET: str = CLAUDE_MODELS["claude-sonnet"]
 CLAUDE_HAIKU:  str = CLAUDE_MODELS["claude-haiku"]
 
@@ -22,3 +19,32 @@
 def resolve_model(alias: str) -> str:
     """Resolve a model alias to its full ID. Passes through unknown strings unchanged."""
     return CLAUDE_MODELS.get(alias, alias)
+
+
+# --- Main chat model selection ----------------------------------------------
+#
+# The "main chat model" is the large model that drives user-facing chat
+# (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is
+# distinct from the smaller models used for RAG/routing (haiku/sonnet), which
+# are configured directly and are NOT affected by the helper below.
+
+# Env var that overrides the main chat model at runtime, so we can switch the
+# live model without a redeploy. Holds a model alias or full ID.
+CHAT_MODEL_ENV = "APOLLO_CHAT_MODEL"
+
+# Default main chat model when neither the env var nor a service config overrides it.
+CHAT_MODEL_DEFAULT = CLAUDE_OPUS
+
+
+def preferred_chat_model(config_value: str | None = None) -> str:
+    """Resolve the main chat model.
+
+    Precedence: APOLLO_CHAT_MODEL env var > per-service config value > CLAUDE_OPUS.
+    The env var lets us switch the live chat model without redeploying.
+    """
+    override = os.getenv(CHAT_MODEL_ENV)
+    if override:
+        return resolve_model(override)
+    if config_value:
+        return resolve_model(config_value)
+    return CHAT_MODEL_DEFAULT
diff --git a/services/streaming_util.py b/services/streaming_util.py
index 7b6a4e78..045aa102 100644
--- a/services/streaming_util.py
+++ b/services/streaming_util.py
@@ -96,7 +96,7 @@ class StreamManager:
     block lifecycle and index tracking.
 
     Example usage:
-        manager = StreamManager(model=resolve_model("claude-fable"))
+        manager = StreamManager(model=resolve_model("claude-opus"))
         manager.start_stream()
         manager.send_thinking("Researching...")
         manager.send_text("Here's what I found...")
diff --git a/services/testing/judge.py b/services/testing/judge.py
index 747034d9..4600a76b 100644
--- a/services/testing/judge.py
+++ b/services/testing/judge.py
@@ -34,11 +34,11 @@
 
 from anthropic import Anthropic
 
-from models import CLAUDE_FABLE
+from models import CLAUDE_OPUS
 from testing.judges import load_judge
 
 
-DEFAULT_MODEL = CLAUDE_FABLE
+DEFAULT_MODEL = CLAUDE_OPUS
 DEFAULT_JUDGE = "general"
 
 
@@ -277,7 +277,7 @@ def evaluate(
             guessing.
         judge: Name of the judge (file at services/testing/judges/<name>.md).
             Defaults to "general".
-        model: Model to use. Defaults to CLAUDE_FABLE from services/models.py.
+        model: Model to use. Defaults to CLAUDE_OPUS from services/models.py.
         client: Optional Anthropic client. Constructed from env if not given.
 
     Returns:
diff --git a/services/tests/__init__.py b/services/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/services/tests/unit/__init__.py b/services/tests/unit/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py
new file mode 100644
index 00000000..27431da9
--- /dev/null
+++ b/services/tests/unit/test_models.py
@@ -0,0 +1,33 @@
+"""Unit tests for the central chat-model selection in `services/models.py`.
+
+No real model calls — pure resolution logic. The repo-root conftest marks
+everything under a `unit/` dir as `unit` and blocks real client construction.
+"""
+
+import models as m
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _clear_env(monkeypatch):
+    """Ensure a stray APOLLO_CHAT_MODEL in the real environment can't skew tests."""
+    monkeypatch.delenv(m.CHAT_MODEL_ENV, raising=False)
+
+
+# --- preferred_chat_model: precedence ---------------------------------------
+
+def test_preferred_defaults_to_opus_when_nothing_set():
+    assert m.preferred_chat_model() == m.CLAUDE_OPUS
+    assert m.preferred_chat_model(None) == m.CLAUDE_OPUS
+
+
+def test_preferred_uses_config_value_when_no_env():
+    assert m.preferred_chat_model("claude-sonnet") == m.CLAUDE_SONNET
+    # full IDs pass through unchanged
+    assert m.preferred_chat_model("claude-opus-4-7") == "claude-opus-4-7"
+
+
+def test_preferred_env_overrides_config(monkeypatch):
+    # Also proves the env value is alias-resolved ("claude-sonnet" -> full ID).
+    monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-sonnet")
+    assert m.preferred_chat_model("claude-opus") == m.CLAUDE_SONNET
diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml
index a88d652d..1b84e0b6 100644
--- a/services/workflow_chat/gen_project_config.yaml
+++ b/services/workflow_chat/gen_project_config.yaml
@@ -1,4 +1,6 @@
 config_version: 1.0
-model: "claude-fable"
+# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can
+# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key
+# here only to pin this service to a specific model.
 threshold: 0.7
 top_k: 5
diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py
index 08571bb6..cadd78dd 100644
--- a/services/workflow_chat/workflow_chat.py
+++ b/services/workflow_chat/workflow_chat.py
@@ -6,13 +6,13 @@
 from typing import List, Optional, Dict, Any
 import yaml
 from dataclasses import dataclass
-from models import resolve_model
+from models import preferred_chat_model
 
 _dir = os.path.dirname(os.path.abspath(__file__))
 with open(os.path.join(_dir, "gen_project_config.yaml")) as _f:
     _service_config = yaml.safe_load(_f)
 
-_MODEL = resolve_model(_service_config.get("model", "claude-fable"))
+_MODEL = preferred_chat_model(_service_config.get("model"))
 
 # JSON schema for structured outputs — guarantees valid JSON from the API
 _OUTPUT_SCHEMA = {

From d8e2a2cc1c286c5f9c3e7e3b9755ff61b94700d0 Mon Sep 17 00:00:00 2001
From: "Hanna Paasivirta (OpenFn)" <hanna@openfn.org>
Date: Tue, 16 Jun 2026 03:00:50 +0900
Subject: [PATCH 2/4] use service specific model settings

---
 services/doc_agent_chat/agent.py              |  2 +-
 services/doc_agent_chat/config.yaml           |  5 +-
 services/global_chat/config.yaml              |  5 +-
 services/global_chat/planner.py               |  2 +-
 services/job_chat/job_chat.py                 |  7 +--
 services/job_chat/rag.yaml                    |  7 +--
 services/models.py                            | 55 ++++++++++++++-----
 services/tests/unit/test_models.py            | 49 ++++++++++++-----
 .../workflow_chat/gen_project_config.yaml     |  5 +-
 services/workflow_chat/workflow_chat.py       |  6 +-
 10 files changed, 89 insertions(+), 54 deletions(-)

diff --git a/services/doc_agent_chat/agent.py b/services/doc_agent_chat/agent.py
index 48ea9a93..2e06e437 100644
--- a/services/doc_agent_chat/agent.py
+++ b/services/doc_agent_chat/agent.py
@@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None):
             raise ValueError("API key must be provided")
 
         self.client = Anthropic(api_key=self.api_key)
-        self.model = preferred_chat_model(config.get("model"))
+        self.model = preferred_chat_model("doc_agent_chat")
         self.max_tokens = config.get("max_tokens", 49152)
         self.max_tool_calls = config.get("max_tool_calls", 10)
         self.search_top_k = config.get("search_top_k", 5)
diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml
index 907d3c19..000dc0ed 100644
--- a/services/doc_agent_chat/config.yaml
+++ b/services/doc_agent_chat/config.yaml
@@ -1,7 +1,6 @@
 config_version: 1.0
-# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can
-# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key
-# here only to pin this service to a specific model.
+# The chat model is configured in services/models.py (the default plus the
+# APOLLO_CHAT_MODEL env override), not here.
 max_tokens: 49152
 max_tool_calls: 10
 search_top_k: 5
diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml
index 1f570a25..f57db3c5 100644
--- a/services/global_chat/config.yaml
+++ b/services/global_chat/config.yaml
@@ -7,9 +7,8 @@ router:
   temperature: 0.0
 
 # Planner configuration (complex orchestration)
-# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can
-# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key
-# here only to pin the planner to a specific model.
+# The planner's chat model is configured in services/models.py (the default plus
+# the APOLLO_CHAT_MODEL env override), not here.
 planner:
   max_tokens: 24576
   max_tool_calls: 10
diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py
index 6d1fb4af..9e806a91 100644
--- a/services/global_chat/planner.py
+++ b/services/global_chat/planner.py
@@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None):
         self.tools = TOOL_DEFINITIONS
 
         planner_config = config_loader.config.get("planner", {})
-        self.model = preferred_chat_model(planner_config.get("model"))
+        self.model = preferred_chat_model("global_chat")
         self.max_tokens = planner_config.get("max_tokens", 24576)
         self.max_tool_calls = planner_config.get("max_tool_calls", 20)
 
diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py
index 522bfe4b..cc569590 100644
--- a/services/job_chat/job_chat.py
+++ b/services/job_chat/job_chat.py
@@ -1,7 +1,6 @@
 import os
 import json
 import re
-import yaml
 from typing import List, Optional, Dict, Any
 from dataclasses import dataclass
 import httpx
@@ -31,11 +30,7 @@
 )
 from models import preferred_chat_model
 
-_dir = os.path.dirname(os.path.abspath(__file__))
-with open(os.path.join(_dir, "rag.yaml")) as _f:
-    _service_config = yaml.safe_load(_f)
-
-_MODEL = preferred_chat_model(_service_config.get("model"))
+_MODEL = preferred_chat_model("job_chat")
 
 logger = create_logger("job_chat")
 
diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml
index 0f251d19..6c225aea 100644
--- a/services/job_chat/rag.yaml
+++ b/services/job_chat/rag.yaml
@@ -1,8 +1,7 @@
 config_version: 1.0
-# Main chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and
-# can be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:`
-# key here only to pin this service. (The llm_* keys below are the smaller
-# RAG/retrieval models and are unaffected by that default.)
+# The main chat model is configured in services/models.py (per-service default
+# plus APOLLO_CHAT_MODEL / APOLLO_JOB_CHAT_MODEL env overrides), not here. The
+# llm_* keys below are the smaller RAG/retrieval models and are separate.
 llm_search_decision: "claude-sonnet"
 llm_retrieval: "claude-sonnet"
 threshold: 0.8
diff --git a/services/models.py b/services/models.py
index 56c2e275..a2ffbf0d 100644
--- a/services/models.py
+++ b/services/models.py
@@ -26,25 +26,50 @@ def resolve_model(alias: str) -> str:
 # The "main chat model" is the large model that drives user-facing chat
 # (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is
 # distinct from the smaller models used for RAG/routing (haiku/sonnet), which
-# are configured directly and are NOT affected by the helper below.
+# are configured directly and are NOT affected by the helpers below.
+#
+# The whole per-service model story lives here on purpose, so there is one place
+# to read what each service uses and how to override it. Nothing is configured
+# in the service yamls.
+
+# Default chat model for any service without its own entry below.
+CHAT_MODEL_DEFAULT = CLAUDE_OPUS
 
-# Env var that overrides the main chat model at runtime, so we can switch the
-# live model without a redeploy. Holds a model alias or full ID.
+# Global override env var. When set, forces every chat service to this model
+# (except a service that has its own env var set — see precedence below).
 CHAT_MODEL_ENV = "APOLLO_CHAT_MODEL"
 
-# Default main chat model when neither the env var nor a service config overrides it.
-CHAT_MODEL_DEFAULT = CLAUDE_OPUS
+# Per-service model config. `default` is the built-in choice; `env`, if set at
+# runtime, overrides it (and the global env var) for that service only.
+# Services not listed use CHAT_MODEL_DEFAULT and only honour CHAT_MODEL_ENV.
+CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = {
+    # workflow_chat forces JSON/YAML output via structured outputs; Sonnet
+    # handles that better than Opus today, so it defaults to Sonnet.
+    "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"},
+    "job_chat":      {"default": CLAUDE_OPUS,   "env": "APOLLO_JOB_CHAT_MODEL"},
+}
 
 
-def preferred_chat_model(config_value: str | None = None) -> str:
-    """Resolve the main chat model.
+def preferred_chat_model(service: str | None = None) -> str:
+    """Resolve the main chat model for `service`.
 
-    Precedence: APOLLO_CHAT_MODEL env var > per-service config value > CLAUDE_OPUS.
-    The env var lets us switch the live chat model without redeploying.
+    Precedence (most specific wins):
+        per-service env var  >  global env var (APOLLO_CHAT_MODEL)
+                             >  per-service default  >  CHAT_MODEL_DEFAULT
+
+    So APOLLO_CHAT_MODEL is a "force everything" switch, while a per-service env
+    var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) pins that one service against it. All
+    env vars are optional; with none set, each service uses its default. The env
+    vars let us switch the live model without redeploying.
     """
-    override = os.getenv(CHAT_MODEL_ENV)
-    if override:
-        return resolve_model(override)
-    if config_value:
-        return resolve_model(config_value)
-    return CHAT_MODEL_DEFAULT
+    cfg = CHAT_SERVICE_MODELS.get(service, {})
+
+    service_override = os.getenv(cfg["env"]) if cfg.get("env") else None
+    if service_override:
+        return resolve_model(service_override)
+
+    global_override = os.getenv(CHAT_MODEL_ENV)
+    if global_override:
+        return resolve_model(global_override)
+
+    return cfg.get("default", CHAT_MODEL_DEFAULT)
diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py
index 27431da9..bf19e000 100644
--- a/services/tests/unit/test_models.py
+++ b/services/tests/unit/test_models.py
@@ -1,33 +1,56 @@
 """Unit tests for the central chat-model selection in `services/models.py`.
 
-No real model calls — pure resolution logic. The repo-root conftest marks
+No real model calls, pure resolution logic. The repo-root conftest marks
 everything under a `unit/` dir as `unit` and blocks real client construction.
 """
 
 import models as m
 import pytest
 
+_WORKFLOW_ENV = m.CHAT_SERVICE_MODELS["workflow_chat"]["env"]
+
 
 @pytest.fixture(autouse=True)
 def _clear_env(monkeypatch):
-    """Ensure a stray APOLLO_CHAT_MODEL in the real environment can't skew tests."""
+    """Clear the global and all per-service overrides so the real environment
+    can't skew tests."""
     monkeypatch.delenv(m.CHAT_MODEL_ENV, raising=False)
+    for cfg in m.CHAT_SERVICE_MODELS.values():
+        monkeypatch.delenv(cfg["env"], raising=False)
+
+
+# --- defaults ---------------------------------------------------------------
+
+def test_unlisted_service_uses_global_default():
+    # A service with no entry (or none at all) falls back to CHAT_MODEL_DEFAULT.
+    assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT
+    assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT
 
 
-# --- preferred_chat_model: precedence ---------------------------------------
+def test_per_service_defaults():
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET
+    assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS
 
-def test_preferred_defaults_to_opus_when_nothing_set():
-    assert m.preferred_chat_model() == m.CLAUDE_OPUS
-    assert m.preferred_chat_model(None) == m.CLAUDE_OPUS
 
+# --- precedence -------------------------------------------------------------
 
-def test_preferred_uses_config_value_when_no_env():
-    assert m.preferred_chat_model("claude-sonnet") == m.CLAUDE_SONNET
-    # full IDs pass through unchanged
-    assert m.preferred_chat_model("claude-opus-4-7") == "claude-opus-4-7"
+def test_per_service_env_overrides_its_default(monkeypatch):
+    # Also proves the env value is alias-resolved ("claude-opus" -> full ID).
+    monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus")
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS
 
 
-def test_preferred_env_overrides_config(monkeypatch):
-    # Also proves the env value is alias-resolved ("claude-sonnet" -> full ID).
+def test_global_env_overrides_defaults(monkeypatch):
     monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-sonnet")
-    assert m.preferred_chat_model("claude-opus") == m.CLAUDE_SONNET
+    # applies to a service with no per-service env set...
+    assert m.preferred_chat_model("job_chat") == m.CLAUDE_SONNET
+    # ...and to an unlisted service
+    assert m.preferred_chat_model("doc_agent_chat") == m.CLAUDE_SONNET
+
+
+def test_per_service_env_beats_global_env(monkeypatch):
+    # Global says "force everything to opus", but workflow pins itself to sonnet.
+    monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-opus")
+    monkeypatch.setenv(_WORKFLOW_ENV, "claude-sonnet")
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET  # per-service wins
+    assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS         # global applies here
diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml
index 1b84e0b6..2f8cbae8 100644
--- a/services/workflow_chat/gen_project_config.yaml
+++ b/services/workflow_chat/gen_project_config.yaml
@@ -1,6 +1,5 @@
 config_version: 1.0
-# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can
-# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key
-# here only to pin this service to a specific model.
+# The chat model is configured in services/models.py (per-service default plus
+# APOLLO_CHAT_MODEL / APOLLO_WORKFLOW_CHAT_MODEL env overrides), not here.
 threshold: 0.7
 top_k: 5
diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py
index cadd78dd..281a2af0 100644
--- a/services/workflow_chat/workflow_chat.py
+++ b/services/workflow_chat/workflow_chat.py
@@ -8,11 +8,7 @@
 from dataclasses import dataclass
 from models import preferred_chat_model
 
-_dir = os.path.dirname(os.path.abspath(__file__))
-with open(os.path.join(_dir, "gen_project_config.yaml")) as _f:
-    _service_config = yaml.safe_load(_f)
-
-_MODEL = preferred_chat_model(_service_config.get("model"))
+_MODEL = preferred_chat_model("workflow_chat")
 
 # JSON schema for structured outputs — guarantees valid JSON from the API
 _OUTPUT_SCHEMA = {

From 455b36ec85d3109642a376548917f6482c1c243b Mon Sep 17 00:00:00 2001
From: "Hanna Paasivirta (OpenFn)" <hanna@openfn.org>
Date: Tue, 16 Jun 2026 03:04:22 +0900
Subject: [PATCH 3/4] add env example

---
 .env.example | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.env.example b/.env.example
index 727672a7..03287f62 100644
--- a/.env.example
+++ b/.env.example
@@ -10,6 +10,13 @@ LANGFUSE_SECRET_KEY=sk-lf-...
 LANGFUSE_PUBLIC_KEY=pk-lf-...
 LANGFUSE_BASE_URL=https://cloud.langfuse.com
 
+# Chat model overrides (optional). Defaults live in services/models.py; set any
+# of these to switch the live model without redeploying. Accepts an alias
+# (claude-opus, claude-sonnet) or a full model ID.
+# APOLLO_CHAT_MODEL=          # global: forces all chat services to this model
+# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat only (overrides the global var)
+# APOLLO_JOB_CHAT_MODEL=      # job_chat only (overrides the global var)
+
 # HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base
 # ZILLIZ_URI =  https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX
 # ZILLIZ_TOKEN =db_admin:password (or ApiKey)

From 0f7dcd25a1003c005a091071017face7e54ffc4f Mon Sep 17 00:00:00 2001
From: "Hanna Paasivirta (OpenFn)" <hanna@openfn.org>
Date: Tue, 16 Jun 2026 03:38:19 +0900
Subject: [PATCH 4/4] use three vars

---
 .env.example                                  | 12 +++----
 services/doc_agent_chat/config.yaml           |  4 +--
 services/global_chat/config.yaml              |  2 +-
 services/job_chat/rag.yaml                    |  6 ++--
 services/models.py                            | 34 +++++++------------
 services/tests/unit/test_models.py            | 34 ++++++-------------
 .../workflow_chat/gen_project_config.yaml     |  4 +--
 7 files changed, 37 insertions(+), 59 deletions(-)

diff --git a/.env.example b/.env.example
index 03287f62..4c13b535 100644
--- a/.env.example
+++ b/.env.example
@@ -10,12 +10,12 @@ LANGFUSE_SECRET_KEY=sk-lf-...
 LANGFUSE_PUBLIC_KEY=pk-lf-...
 LANGFUSE_BASE_URL=https://cloud.langfuse.com
 
-# Chat model overrides (optional). Defaults live in services/models.py; set any
-# of these to switch the live model without redeploying. Accepts an alias
-# (claude-opus, claude-sonnet) or a full model ID.
-# APOLLO_CHAT_MODEL=          # global: forces all chat services to this model
-# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat only (overrides the global var)
-# APOLLO_JOB_CHAT_MODEL=      # job_chat only (overrides the global var)
+# Chat model overrides (optional, one per service). Defaults live in
+# services/models.py; set a var to switch that service's live model without
+# redeploying. Accepts an alias (claude-opus, claude-sonnet) or a full model ID.
+# APOLLO_GLOBAL_CHAT_MODEL=   # global_chat planner
+# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat
+# APOLLO_JOB_CHAT_MODEL=      # job_chat
 
 # HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base
 # ZILLIZ_URI =  https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX
diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml
index 000dc0ed..85202aec 100644
--- a/services/doc_agent_chat/config.yaml
+++ b/services/doc_agent_chat/config.yaml
@@ -1,6 +1,6 @@
 config_version: 1.0
-# The chat model is configured in services/models.py (the default plus the
-# APOLLO_CHAT_MODEL env override), not here.
+# The chat model is configured in services/models.py (the default; doc_agent has
+# no per-service env override), not here.
 max_tokens: 49152
 max_tool_calls: 10
 search_top_k: 5
diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml
index f57db3c5..ef59fc80 100644
--- a/services/global_chat/config.yaml
+++ b/services/global_chat/config.yaml
@@ -8,7 +8,7 @@ router:
 
 # Planner configuration (complex orchestration)
 # The planner's chat model is configured in services/models.py (the default plus
-# the APOLLO_CHAT_MODEL env override), not here.
+# the APOLLO_GLOBAL_CHAT_MODEL env override), not here.
 planner:
   max_tokens: 24576
   max_tool_calls: 10
diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml
index 6c225aea..9d16daed 100644
--- a/services/job_chat/rag.yaml
+++ b/services/job_chat/rag.yaml
@@ -1,7 +1,7 @@
 config_version: 1.0
-# The main chat model is configured in services/models.py (per-service default
-# plus APOLLO_CHAT_MODEL / APOLLO_JOB_CHAT_MODEL env overrides), not here. The
-# llm_* keys below are the smaller RAG/retrieval models and are separate.
+# The main chat model is configured in services/models.py (the default plus the
+# APOLLO_JOB_CHAT_MODEL env override), not here. The llm_* keys below are the
+# smaller RAG/retrieval models and are separate.
 llm_search_decision: "claude-sonnet"
 llm_retrieval: "claude-sonnet"
 threshold: 0.8
diff --git a/services/models.py b/services/models.py
index a2ffbf0d..e9777598 100644
--- a/services/models.py
+++ b/services/models.py
@@ -35,41 +35,33 @@ def resolve_model(alias: str) -> str:
 # Default chat model for any service without its own entry below.
 CHAT_MODEL_DEFAULT = CLAUDE_OPUS
 
-# Global override env var. When set, forces every chat service to this model
-# (except a service that has its own env var set — see precedence below).
-CHAT_MODEL_ENV = "APOLLO_CHAT_MODEL"
-
 # Per-service model config. `default` is the built-in choice; `env`, if set at
-# runtime, overrides it (and the global env var) for that service only.
-# Services not listed use CHAT_MODEL_DEFAULT and only honour CHAT_MODEL_ENV.
+# runtime, overrides it for that service only (one env var per service, no
+# global override). Services not listed (e.g. doc_agent_chat) use
+# CHAT_MODEL_DEFAULT and have no runtime override.
 CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = {
     # workflow_chat forces JSON/YAML output via structured outputs; Sonnet
     # handles that better than Opus today, so it defaults to Sonnet.
     "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"},
     "job_chat":      {"default": CLAUDE_OPUS,   "env": "APOLLO_JOB_CHAT_MODEL"},
+    "global_chat":   {"default": CLAUDE_OPUS,   "env": "APOLLO_GLOBAL_CHAT_MODEL"},
 }
 
 
 def preferred_chat_model(service: str | None = None) -> str:
     """Resolve the main chat model for `service`.
 
-    Precedence (most specific wins):
-        per-service env var  >  global env var (APOLLO_CHAT_MODEL)
-                             >  per-service default  >  CHAT_MODEL_DEFAULT
-
-    So APOLLO_CHAT_MODEL is a "force everything" switch, while a per-service env
-    var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) pins that one service against it. All
-    env vars are optional; with none set, each service uses its default. The env
-    vars let us switch the live model without redeploying.
+    Precedence: the service's env var if set, else its per-service default, else
+    CHAT_MODEL_DEFAULT. Each service's env var (e.g. APOLLO_WORKFLOW_CHAT_MODEL)
+    is optional and lets us switch that one service's live model without
+    redeploying.
     """
     cfg = CHAT_SERVICE_MODELS.get(service, {})
 
-    service_override = os.getenv(cfg["env"]) if cfg.get("env") else None
-    if service_override:
-        return resolve_model(service_override)
-
-    global_override = os.getenv(CHAT_MODEL_ENV)
-    if global_override:
-        return resolve_model(global_override)
+    env_name = cfg.get("env")
+    if env_name:
+        override = os.getenv(env_name)
+        if override:
+            return resolve_model(override)
 
     return cfg.get("default", CHAT_MODEL_DEFAULT)
diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py
index bf19e000..b7639a22 100644
--- a/services/tests/unit/test_models.py
+++ b/services/tests/unit/test_models.py
@@ -12,17 +12,13 @@
 
 @pytest.fixture(autouse=True)
 def _clear_env(monkeypatch):
-    """Clear the global and all per-service overrides so the real environment
-    can't skew tests."""
-    monkeypatch.delenv(m.CHAT_MODEL_ENV, raising=False)
+    """Clear all per-service overrides so the real environment can't skew tests."""
     for cfg in m.CHAT_SERVICE_MODELS.values():
         monkeypatch.delenv(cfg["env"], raising=False)
 
 
-# --- defaults ---------------------------------------------------------------
-
-def test_unlisted_service_uses_global_default():
-    # A service with no entry (or none at all) falls back to CHAT_MODEL_DEFAULT.
+def test_unlisted_service_uses_default():
+    # A service with no entry (e.g. doc_agent_chat, or none at all) uses the default.
     assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT
     assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT
 
@@ -30,27 +26,17 @@ def test_unlisted_service_uses_global_default():
 def test_per_service_defaults():
     assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET
     assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS
+    assert m.preferred_chat_model("global_chat") == m.CLAUDE_OPUS
 
 
-# --- precedence -------------------------------------------------------------
-
-def test_per_service_env_overrides_its_default(monkeypatch):
+def test_env_var_overrides_its_service_default(monkeypatch):
     # Also proves the env value is alias-resolved ("claude-opus" -> full ID).
     monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus")
     assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS
 
 
-def test_global_env_overrides_defaults(monkeypatch):
-    monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-sonnet")
-    # applies to a service with no per-service env set...
-    assert m.preferred_chat_model("job_chat") == m.CLAUDE_SONNET
-    # ...and to an unlisted service
-    assert m.preferred_chat_model("doc_agent_chat") == m.CLAUDE_SONNET
-
-
-def test_per_service_env_beats_global_env(monkeypatch):
-    # Global says "force everything to opus", but workflow pins itself to sonnet.
-    monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-opus")
-    monkeypatch.setenv(_WORKFLOW_ENV, "claude-sonnet")
-    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET  # per-service wins
-    assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS         # global applies here
+def test_env_var_is_scoped_to_one_service(monkeypatch):
+    # Setting one service's var must not affect another service.
+    monkeypatch.setenv(_WORKFLOW_ENV, "claude-haiku")
+    assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_HAIKU
+    assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS  # unaffected
diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml
index 2f8cbae8..2cead7fd 100644
--- a/services/workflow_chat/gen_project_config.yaml
+++ b/services/workflow_chat/gen_project_config.yaml
@@ -1,5 +1,5 @@
 config_version: 1.0
-# The chat model is configured in services/models.py (per-service default plus
-# APOLLO_CHAT_MODEL / APOLLO_WORKFLOW_CHAT_MODEL env overrides), not here.
+# The chat model is configured in services/models.py (the default plus the
+# APOLLO_WORKFLOW_CHAT_MODEL env override), not here.
 threshold: 0.7
 top_k: 5