From 72e74476be556ecac42033af8362479c599d5fe9 Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Tue, 16 Jun 2026 01:53:18 +0900 Subject: [PATCH 1/4] use env var for model selection --- pyproject.toml | 1 + services/doc_agent_chat/agent.py | 4 +-- services/doc_agent_chat/config.yaml | 4 ++- services/global_chat/config.yaml | 4 ++- services/global_chat/planner.py | 4 +-- services/job_chat/job_chat.py | 4 +-- services/job_chat/rag.yaml | 5 ++- services/models.py | 36 ++++++++++++++++--- services/streaming_util.py | 2 +- services/testing/judge.py | 6 ++-- services/tests/__init__.py | 0 services/tests/unit/__init__.py | 0 services/tests/unit/test_models.py | 33 +++++++++++++++++ .../workflow_chat/gen_project_config.yaml | 4 ++- services/workflow_chat/workflow_chat.py | 4 +-- 15 files changed, 90 insertions(+), 21 deletions(-) create mode 100644 services/tests/__init__.py create mode 100644 services/tests/unit/__init__.py create mode 100644 services/tests/unit/test_models.py diff --git a/pyproject.toml b/pyproject.toml index 91f181fa..fff1e25b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ testpaths = [ "services/workflow_chat/tests", "services/job_chat/tests", "services/search_docsite/tests", + "services/tests", "services/tools", ] diff --git a/services/doc_agent_chat/agent.py b/services/doc_agent_chat/agent.py index d9e24856..48ea9a93 100644 --- a/services/doc_agent_chat/agent.py +++ b/services/doc_agent_chat/agent.py @@ -7,7 +7,7 @@ from doc_agent_chat.prompt import build_system_prompt from doc_agent_chat.tools import TOOL_DEFINITIONS, search_documents, format_search_results_as_documents from doc_agent_chat.config_loader import ConfigLoader -from models import resolve_model +from models import preferred_chat_model logger = create_logger("agent") @@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None): raise ValueError("API key must be provided") self.client = Anthropic(api_key=self.api_key) - self.model = resolve_model(config.get("model", "claude-fable")) + self.model = preferred_chat_model(config.get("model")) self.max_tokens = config.get("max_tokens", 49152) self.max_tool_calls = config.get("max_tool_calls", 10) self.search_top_k = config.get("search_top_k", 5) diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml index 389bf4b0..907d3c19 100644 --- a/services/doc_agent_chat/config.yaml +++ b/services/doc_agent_chat/config.yaml @@ -1,5 +1,7 @@ config_version: 1.0 -model: claude-fable +# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can +# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key +# here only to pin this service to a specific model. max_tokens: 49152 max_tool_calls: 10 search_top_k: 5 diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index 376ea662..1f570a25 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -7,7 +7,9 @@ router: temperature: 0.0 # Planner configuration (complex orchestration) +# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can +# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key +# here only to pin the planner to a specific model. planner: - model: "claude-fable" max_tokens: 24576 max_tool_calls: 10 diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py index 4918aae3..6d1fb4af 100644 --- a/services/global_chat/planner.py +++ b/services/global_chat/planner.py @@ -24,7 +24,7 @@ STATUS_PLANNING, ) from global_chat.config_loader import ConfigLoader -from models import resolve_model +from models import preferred_chat_model from global_chat.tools.tool_definitions import TOOL_DEFINITIONS from global_chat.yaml_utils import stitch_job_code, redact_job_bodies, find_job_in_yaml from tools.search_documentation.search_documentation import search_documentation_tool @@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None): self.tools = TOOL_DEFINITIONS planner_config = config_loader.config.get("planner", {}) - self.model = resolve_model(planner_config.get("model", "claude-fable")) + self.model = preferred_chat_model(planner_config.get("model")) self.max_tokens = planner_config.get("max_tokens", 24576) self.max_tool_calls = planner_config.get("max_tool_calls", 20) diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py index 7da4d13f..522bfe4b 100644 --- a/services/job_chat/job_chat.py +++ b/services/job_chat/job_chat.py @@ -29,13 +29,13 @@ STATUS_WORKING, STATUS_WRITING_CODE, ) -from models import resolve_model +from models import preferred_chat_model _dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(_dir, "rag.yaml")) as _f: _service_config = yaml.safe_load(_f) -_MODEL = resolve_model(_service_config.get("model", "claude-fable")) +_MODEL = preferred_chat_model(_service_config.get("model")) logger = create_logger("job_chat") diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml index 82546592..0f251d19 100644 --- a/services/job_chat/rag.yaml +++ b/services/job_chat/rag.yaml @@ -1,5 +1,8 @@ config_version: 1.0 -model: "claude-fable" +# Main chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and +# can be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` +# key here only to pin this service. (The llm_* keys below are the smaller +# RAG/retrieval models and are unaffected by that default.) llm_search_decision: "claude-sonnet" llm_retrieval: "claude-sonnet" threshold: 0.8 diff --git a/services/models.py b/services/models.py index 10abd40f..56c2e275 100644 --- a/services/models.py +++ b/services/models.py @@ -3,18 +3,15 @@ Update values here to change models used across all services. """ +import os + CLAUDE_MODELS: dict[str, str] = { "claude-opus": "claude-opus-4-8", - # Fable rejects temperature/top_p/top_k and any explicit `thinking` - # config other than {"type": "adaptive"}; tokenizer yields ~30% more - # tokens than Sonnet/Opus for the same content. - "claude-fable": "claude-fable-5", "claude-sonnet": "claude-sonnet-4-6", "claude-haiku": "claude-haiku-4-5-20251001", } CLAUDE_OPUS: str = CLAUDE_MODELS["claude-opus"] -CLAUDE_FABLE: str = CLAUDE_MODELS["claude-fable"] CLAUDE_SONNET: str = CLAUDE_MODELS["claude-sonnet"] CLAUDE_HAIKU: str = CLAUDE_MODELS["claude-haiku"] @@ -22,3 +19,32 @@ def resolve_model(alias: str) -> str: """Resolve a model alias to its full ID. Passes through unknown strings unchanged.""" return CLAUDE_MODELS.get(alias, alias) + + +# --- Main chat model selection ---------------------------------------------- +# +# The "main chat model" is the large model that drives user-facing chat +# (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is +# distinct from the smaller models used for RAG/routing (haiku/sonnet), which +# are configured directly and are NOT affected by the helper below. + +# Env var that overrides the main chat model at runtime, so we can switch the +# live model without a redeploy. Holds a model alias or full ID. +CHAT_MODEL_ENV = "APOLLO_CHAT_MODEL" + +# Default main chat model when neither the env var nor a service config overrides it. +CHAT_MODEL_DEFAULT = CLAUDE_OPUS + + +def preferred_chat_model(config_value: str | None = None) -> str: + """Resolve the main chat model. + + Precedence: APOLLO_CHAT_MODEL env var > per-service config value > CLAUDE_OPUS. + The env var lets us switch the live chat model without redeploying. + """ + override = os.getenv(CHAT_MODEL_ENV) + if override: + return resolve_model(override) + if config_value: + return resolve_model(config_value) + return CHAT_MODEL_DEFAULT diff --git a/services/streaming_util.py b/services/streaming_util.py index 7b6a4e78..045aa102 100644 --- a/services/streaming_util.py +++ b/services/streaming_util.py @@ -96,7 +96,7 @@ class StreamManager: block lifecycle and index tracking. Example usage: - manager = StreamManager(model=resolve_model("claude-fable")) + manager = StreamManager(model=resolve_model("claude-opus")) manager.start_stream() manager.send_thinking("Researching...") manager.send_text("Here's what I found...") diff --git a/services/testing/judge.py b/services/testing/judge.py index 747034d9..4600a76b 100644 --- a/services/testing/judge.py +++ b/services/testing/judge.py @@ -34,11 +34,11 @@ from anthropic import Anthropic -from models import CLAUDE_FABLE +from models import CLAUDE_OPUS from testing.judges import load_judge -DEFAULT_MODEL = CLAUDE_FABLE +DEFAULT_MODEL = CLAUDE_OPUS DEFAULT_JUDGE = "general" @@ -277,7 +277,7 @@ def evaluate( guessing. judge: Name of the judge (file at services/testing/judges/.md). Defaults to "general". - model: Model to use. Defaults to CLAUDE_FABLE from services/models.py. + model: Model to use. Defaults to CLAUDE_OPUS from services/models.py. client: Optional Anthropic client. Constructed from env if not given. Returns: diff --git a/services/tests/__init__.py b/services/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/tests/unit/__init__.py b/services/tests/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py new file mode 100644 index 00000000..27431da9 --- /dev/null +++ b/services/tests/unit/test_models.py @@ -0,0 +1,33 @@ +"""Unit tests for the central chat-model selection in `services/models.py`. + +No real model calls — pure resolution logic. The repo-root conftest marks +everything under a `unit/` dir as `unit` and blocks real client construction. +""" + +import models as m +import pytest + + +@pytest.fixture(autouse=True) +def _clear_env(monkeypatch): + """Ensure a stray APOLLO_CHAT_MODEL in the real environment can't skew tests.""" + monkeypatch.delenv(m.CHAT_MODEL_ENV, raising=False) + + +# --- preferred_chat_model: precedence --------------------------------------- + +def test_preferred_defaults_to_opus_when_nothing_set(): + assert m.preferred_chat_model() == m.CLAUDE_OPUS + assert m.preferred_chat_model(None) == m.CLAUDE_OPUS + + +def test_preferred_uses_config_value_when_no_env(): + assert m.preferred_chat_model("claude-sonnet") == m.CLAUDE_SONNET + # full IDs pass through unchanged + assert m.preferred_chat_model("claude-opus-4-7") == "claude-opus-4-7" + + +def test_preferred_env_overrides_config(monkeypatch): + # Also proves the env value is alias-resolved ("claude-sonnet" -> full ID). + monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-sonnet") + assert m.preferred_chat_model("claude-opus") == m.CLAUDE_SONNET diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml index a88d652d..1b84e0b6 100644 --- a/services/workflow_chat/gen_project_config.yaml +++ b/services/workflow_chat/gen_project_config.yaml @@ -1,4 +1,6 @@ config_version: 1.0 -model: "claude-fable" +# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can +# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key +# here only to pin this service to a specific model. threshold: 0.7 top_k: 5 diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py index 08571bb6..cadd78dd 100644 --- a/services/workflow_chat/workflow_chat.py +++ b/services/workflow_chat/workflow_chat.py @@ -6,13 +6,13 @@ from typing import List, Optional, Dict, Any import yaml from dataclasses import dataclass -from models import resolve_model +from models import preferred_chat_model _dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(_dir, "gen_project_config.yaml")) as _f: _service_config = yaml.safe_load(_f) -_MODEL = resolve_model(_service_config.get("model", "claude-fable")) +_MODEL = preferred_chat_model(_service_config.get("model")) # JSON schema for structured outputs — guarantees valid JSON from the API _OUTPUT_SCHEMA = { From d8e2a2cc1c286c5f9c3e7e3b9755ff61b94700d0 Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Tue, 16 Jun 2026 03:00:50 +0900 Subject: [PATCH 2/4] use service specific model settings --- services/doc_agent_chat/agent.py | 2 +- services/doc_agent_chat/config.yaml | 5 +- services/global_chat/config.yaml | 5 +- services/global_chat/planner.py | 2 +- services/job_chat/job_chat.py | 7 +-- services/job_chat/rag.yaml | 7 +-- services/models.py | 55 ++++++++++++++----- services/tests/unit/test_models.py | 49 ++++++++++++----- .../workflow_chat/gen_project_config.yaml | 5 +- services/workflow_chat/workflow_chat.py | 6 +- 10 files changed, 89 insertions(+), 54 deletions(-) diff --git a/services/doc_agent_chat/agent.py b/services/doc_agent_chat/agent.py index 48ea9a93..2e06e437 100644 --- a/services/doc_agent_chat/agent.py +++ b/services/doc_agent_chat/agent.py @@ -24,7 +24,7 @@ def __init__(self, api_key: Optional[str] = None): raise ValueError("API key must be provided") self.client = Anthropic(api_key=self.api_key) - self.model = preferred_chat_model(config.get("model")) + self.model = preferred_chat_model("doc_agent_chat") self.max_tokens = config.get("max_tokens", 49152) self.max_tool_calls = config.get("max_tool_calls", 10) self.search_top_k = config.get("search_top_k", 5) diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml index 907d3c19..000dc0ed 100644 --- a/services/doc_agent_chat/config.yaml +++ b/services/doc_agent_chat/config.yaml @@ -1,7 +1,6 @@ config_version: 1.0 -# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can -# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key -# here only to pin this service to a specific model. +# The chat model is configured in services/models.py (the default plus the +# APOLLO_CHAT_MODEL env override), not here. max_tokens: 49152 max_tool_calls: 10 search_top_k: 5 diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index 1f570a25..f57db3c5 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -7,9 +7,8 @@ router: temperature: 0.0 # Planner configuration (complex orchestration) -# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can -# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key -# here only to pin the planner to a specific model. +# The planner's chat model is configured in services/models.py (the default plus +# the APOLLO_CHAT_MODEL env override), not here. planner: max_tokens: 24576 max_tool_calls: 10 diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py index 6d1fb4af..9e806a91 100644 --- a/services/global_chat/planner.py +++ b/services/global_chat/planner.py @@ -60,7 +60,7 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None): self.tools = TOOL_DEFINITIONS planner_config = config_loader.config.get("planner", {}) - self.model = preferred_chat_model(planner_config.get("model")) + self.model = preferred_chat_model("global_chat") self.max_tokens = planner_config.get("max_tokens", 24576) self.max_tool_calls = planner_config.get("max_tool_calls", 20) diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py index 522bfe4b..cc569590 100644 --- a/services/job_chat/job_chat.py +++ b/services/job_chat/job_chat.py @@ -1,7 +1,6 @@ import os import json import re -import yaml from typing import List, Optional, Dict, Any from dataclasses import dataclass import httpx @@ -31,11 +30,7 @@ ) from models import preferred_chat_model -_dir = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(_dir, "rag.yaml")) as _f: - _service_config = yaml.safe_load(_f) - -_MODEL = preferred_chat_model(_service_config.get("model")) +_MODEL = preferred_chat_model("job_chat") logger = create_logger("job_chat") diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml index 0f251d19..6c225aea 100644 --- a/services/job_chat/rag.yaml +++ b/services/job_chat/rag.yaml @@ -1,8 +1,7 @@ config_version: 1.0 -# Main chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and -# can be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` -# key here only to pin this service. (The llm_* keys below are the smaller -# RAG/retrieval models and are unaffected by that default.) +# The main chat model is configured in services/models.py (per-service default +# plus APOLLO_CHAT_MODEL / APOLLO_JOB_CHAT_MODEL env overrides), not here. The +# llm_* keys below are the smaller RAG/retrieval models and are separate. llm_search_decision: "claude-sonnet" llm_retrieval: "claude-sonnet" threshold: 0.8 diff --git a/services/models.py b/services/models.py index 56c2e275..a2ffbf0d 100644 --- a/services/models.py +++ b/services/models.py @@ -26,25 +26,50 @@ def resolve_model(alias: str) -> str: # The "main chat model" is the large model that drives user-facing chat # (job_chat, workflow_chat, doc_agent_chat, and the global_chat planner). It is # distinct from the smaller models used for RAG/routing (haiku/sonnet), which -# are configured directly and are NOT affected by the helper below. +# are configured directly and are NOT affected by the helpers below. +# +# The whole per-service model story lives here on purpose, so there is one place +# to read what each service uses and how to override it. Nothing is configured +# in the service yamls. + +# Default chat model for any service without its own entry below. +CHAT_MODEL_DEFAULT = CLAUDE_OPUS -# Env var that overrides the main chat model at runtime, so we can switch the -# live model without a redeploy. Holds a model alias or full ID. +# Global override env var. When set, forces every chat service to this model +# (except a service that has its own env var set — see precedence below). CHAT_MODEL_ENV = "APOLLO_CHAT_MODEL" -# Default main chat model when neither the env var nor a service config overrides it. -CHAT_MODEL_DEFAULT = CLAUDE_OPUS +# Per-service model config. `default` is the built-in choice; `env`, if set at +# runtime, overrides it (and the global env var) for that service only. +# Services not listed use CHAT_MODEL_DEFAULT and only honour CHAT_MODEL_ENV. +CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = { + # workflow_chat forces JSON/YAML output via structured outputs; Sonnet + # handles that better than Opus today, so it defaults to Sonnet. + "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"}, + "job_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_JOB_CHAT_MODEL"}, +} -def preferred_chat_model(config_value: str | None = None) -> str: - """Resolve the main chat model. +def preferred_chat_model(service: str | None = None) -> str: + """Resolve the main chat model for `service`. - Precedence: APOLLO_CHAT_MODEL env var > per-service config value > CLAUDE_OPUS. - The env var lets us switch the live chat model without redeploying. + Precedence (most specific wins): + per-service env var > global env var (APOLLO_CHAT_MODEL) + > per-service default > CHAT_MODEL_DEFAULT + + So APOLLO_CHAT_MODEL is a "force everything" switch, while a per-service env + var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) pins that one service against it. All + env vars are optional; with none set, each service uses its default. The env + vars let us switch the live model without redeploying. """ - override = os.getenv(CHAT_MODEL_ENV) - if override: - return resolve_model(override) - if config_value: - return resolve_model(config_value) - return CHAT_MODEL_DEFAULT + cfg = CHAT_SERVICE_MODELS.get(service, {}) + + service_override = os.getenv(cfg["env"]) if cfg.get("env") else None + if service_override: + return resolve_model(service_override) + + global_override = os.getenv(CHAT_MODEL_ENV) + if global_override: + return resolve_model(global_override) + + return cfg.get("default", CHAT_MODEL_DEFAULT) diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py index 27431da9..bf19e000 100644 --- a/services/tests/unit/test_models.py +++ b/services/tests/unit/test_models.py @@ -1,33 +1,56 @@ """Unit tests for the central chat-model selection in `services/models.py`. -No real model calls — pure resolution logic. The repo-root conftest marks +No real model calls, pure resolution logic. The repo-root conftest marks everything under a `unit/` dir as `unit` and blocks real client construction. """ import models as m import pytest +_WORKFLOW_ENV = m.CHAT_SERVICE_MODELS["workflow_chat"]["env"] + @pytest.fixture(autouse=True) def _clear_env(monkeypatch): - """Ensure a stray APOLLO_CHAT_MODEL in the real environment can't skew tests.""" + """Clear the global and all per-service overrides so the real environment + can't skew tests.""" monkeypatch.delenv(m.CHAT_MODEL_ENV, raising=False) + for cfg in m.CHAT_SERVICE_MODELS.values(): + monkeypatch.delenv(cfg["env"], raising=False) + + +# --- defaults --------------------------------------------------------------- + +def test_unlisted_service_uses_global_default(): + # A service with no entry (or none at all) falls back to CHAT_MODEL_DEFAULT. + assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT + assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT -# --- preferred_chat_model: precedence --------------------------------------- +def test_per_service_defaults(): + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET + assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS -def test_preferred_defaults_to_opus_when_nothing_set(): - assert m.preferred_chat_model() == m.CLAUDE_OPUS - assert m.preferred_chat_model(None) == m.CLAUDE_OPUS +# --- precedence ------------------------------------------------------------- -def test_preferred_uses_config_value_when_no_env(): - assert m.preferred_chat_model("claude-sonnet") == m.CLAUDE_SONNET - # full IDs pass through unchanged - assert m.preferred_chat_model("claude-opus-4-7") == "claude-opus-4-7" +def test_per_service_env_overrides_its_default(monkeypatch): + # Also proves the env value is alias-resolved ("claude-opus" -> full ID). + monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus") + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS -def test_preferred_env_overrides_config(monkeypatch): - # Also proves the env value is alias-resolved ("claude-sonnet" -> full ID). +def test_global_env_overrides_defaults(monkeypatch): monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-sonnet") - assert m.preferred_chat_model("claude-opus") == m.CLAUDE_SONNET + # applies to a service with no per-service env set... + assert m.preferred_chat_model("job_chat") == m.CLAUDE_SONNET + # ...and to an unlisted service + assert m.preferred_chat_model("doc_agent_chat") == m.CLAUDE_SONNET + + +def test_per_service_env_beats_global_env(monkeypatch): + # Global says "force everything to opus", but workflow pins itself to sonnet. + monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-opus") + monkeypatch.setenv(_WORKFLOW_ENV, "claude-sonnet") + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET # per-service wins + assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS # global applies here diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml index 1b84e0b6..2f8cbae8 100644 --- a/services/workflow_chat/gen_project_config.yaml +++ b/services/workflow_chat/gen_project_config.yaml @@ -1,6 +1,5 @@ config_version: 1.0 -# Chat model is set centrally in services/models.py (CHAT_MODEL_DEFAULT) and can -# be overridden at runtime via the APOLLO_CHAT_MODEL env var. Add a `model:` key -# here only to pin this service to a specific model. +# The chat model is configured in services/models.py (per-service default plus +# APOLLO_CHAT_MODEL / APOLLO_WORKFLOW_CHAT_MODEL env overrides), not here. threshold: 0.7 top_k: 5 diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py index cadd78dd..281a2af0 100644 --- a/services/workflow_chat/workflow_chat.py +++ b/services/workflow_chat/workflow_chat.py @@ -8,11 +8,7 @@ from dataclasses import dataclass from models import preferred_chat_model -_dir = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(_dir, "gen_project_config.yaml")) as _f: - _service_config = yaml.safe_load(_f) - -_MODEL = preferred_chat_model(_service_config.get("model")) +_MODEL = preferred_chat_model("workflow_chat") # JSON schema for structured outputs — guarantees valid JSON from the API _OUTPUT_SCHEMA = { From 455b36ec85d3109642a376548917f6482c1c243b Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Tue, 16 Jun 2026 03:04:22 +0900 Subject: [PATCH 3/4] add env example --- .env.example | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.env.example b/.env.example index 727672a7..03287f62 100644 --- a/.env.example +++ b/.env.example @@ -10,6 +10,13 @@ LANGFUSE_SECRET_KEY=sk-lf-... LANGFUSE_PUBLIC_KEY=pk-lf-... LANGFUSE_BASE_URL=https://cloud.langfuse.com +# Chat model overrides (optional). Defaults live in services/models.py; set any +# of these to switch the live model without redeploying. Accepts an alias +# (claude-opus, claude-sonnet) or a full model ID. +# APOLLO_CHAT_MODEL= # global: forces all chat services to this model +# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat only (overrides the global var) +# APOLLO_JOB_CHAT_MODEL= # job_chat only (overrides the global var) + # HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base # ZILLIZ_URI = https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX # ZILLIZ_TOKEN =db_admin:password (or ApiKey) From 0f7dcd25a1003c005a091071017face7e54ffc4f Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Tue, 16 Jun 2026 03:38:19 +0900 Subject: [PATCH 4/4] use three vars --- .env.example | 12 +++---- services/doc_agent_chat/config.yaml | 4 +-- services/global_chat/config.yaml | 2 +- services/job_chat/rag.yaml | 6 ++-- services/models.py | 34 +++++++------------ services/tests/unit/test_models.py | 34 ++++++------------- .../workflow_chat/gen_project_config.yaml | 4 +-- 7 files changed, 37 insertions(+), 59 deletions(-) diff --git a/.env.example b/.env.example index 03287f62..4c13b535 100644 --- a/.env.example +++ b/.env.example @@ -10,12 +10,12 @@ LANGFUSE_SECRET_KEY=sk-lf-... LANGFUSE_PUBLIC_KEY=pk-lf-... LANGFUSE_BASE_URL=https://cloud.langfuse.com -# Chat model overrides (optional). Defaults live in services/models.py; set any -# of these to switch the live model without redeploying. Accepts an alias -# (claude-opus, claude-sonnet) or a full model ID. -# APOLLO_CHAT_MODEL= # global: forces all chat services to this model -# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat only (overrides the global var) -# APOLLO_JOB_CHAT_MODEL= # job_chat only (overrides the global var) +# Chat model overrides (optional, one per service). Defaults live in +# services/models.py; set a var to switch that service's live model without +# redeploying. Accepts an alias (claude-opus, claude-sonnet) or a full model ID. +# APOLLO_GLOBAL_CHAT_MODEL= # global_chat planner +# APOLLO_WORKFLOW_CHAT_MODEL= # workflow_chat +# APOLLO_JOB_CHAT_MODEL= # job_chat # HF_ACCESS_TOKEN=hf_YOUR-API-KEY-HERE # llama2 base # ZILLIZ_URI = https://in01-XXXXXXXXXXXXX.aws-us-west-2.vectordb.zillizcloud.com:XXXXX diff --git a/services/doc_agent_chat/config.yaml b/services/doc_agent_chat/config.yaml index 000dc0ed..85202aec 100644 --- a/services/doc_agent_chat/config.yaml +++ b/services/doc_agent_chat/config.yaml @@ -1,6 +1,6 @@ config_version: 1.0 -# The chat model is configured in services/models.py (the default plus the -# APOLLO_CHAT_MODEL env override), not here. +# The chat model is configured in services/models.py (the default; doc_agent has +# no per-service env override), not here. max_tokens: 49152 max_tool_calls: 10 search_top_k: 5 diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index f57db3c5..ef59fc80 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -8,7 +8,7 @@ router: # Planner configuration (complex orchestration) # The planner's chat model is configured in services/models.py (the default plus -# the APOLLO_CHAT_MODEL env override), not here. +# the APOLLO_GLOBAL_CHAT_MODEL env override), not here. planner: max_tokens: 24576 max_tool_calls: 10 diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml index 6c225aea..9d16daed 100644 --- a/services/job_chat/rag.yaml +++ b/services/job_chat/rag.yaml @@ -1,7 +1,7 @@ config_version: 1.0 -# The main chat model is configured in services/models.py (per-service default -# plus APOLLO_CHAT_MODEL / APOLLO_JOB_CHAT_MODEL env overrides), not here. The -# llm_* keys below are the smaller RAG/retrieval models and are separate. +# The main chat model is configured in services/models.py (the default plus the +# APOLLO_JOB_CHAT_MODEL env override), not here. The llm_* keys below are the +# smaller RAG/retrieval models and are separate. llm_search_decision: "claude-sonnet" llm_retrieval: "claude-sonnet" threshold: 0.8 diff --git a/services/models.py b/services/models.py index a2ffbf0d..e9777598 100644 --- a/services/models.py +++ b/services/models.py @@ -35,41 +35,33 @@ def resolve_model(alias: str) -> str: # Default chat model for any service without its own entry below. CHAT_MODEL_DEFAULT = CLAUDE_OPUS -# Global override env var. When set, forces every chat service to this model -# (except a service that has its own env var set — see precedence below). -CHAT_MODEL_ENV = "APOLLO_CHAT_MODEL" - # Per-service model config. `default` is the built-in choice; `env`, if set at -# runtime, overrides it (and the global env var) for that service only. -# Services not listed use CHAT_MODEL_DEFAULT and only honour CHAT_MODEL_ENV. +# runtime, overrides it for that service only (one env var per service, no +# global override). Services not listed (e.g. doc_agent_chat) use +# CHAT_MODEL_DEFAULT and have no runtime override. CHAT_SERVICE_MODELS: dict[str, dict[str, str]] = { # workflow_chat forces JSON/YAML output via structured outputs; Sonnet # handles that better than Opus today, so it defaults to Sonnet. "workflow_chat": {"default": CLAUDE_SONNET, "env": "APOLLO_WORKFLOW_CHAT_MODEL"}, "job_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_JOB_CHAT_MODEL"}, + "global_chat": {"default": CLAUDE_OPUS, "env": "APOLLO_GLOBAL_CHAT_MODEL"}, } def preferred_chat_model(service: str | None = None) -> str: """Resolve the main chat model for `service`. - Precedence (most specific wins): - per-service env var > global env var (APOLLO_CHAT_MODEL) - > per-service default > CHAT_MODEL_DEFAULT - - So APOLLO_CHAT_MODEL is a "force everything" switch, while a per-service env - var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) pins that one service against it. All - env vars are optional; with none set, each service uses its default. The env - vars let us switch the live model without redeploying. + Precedence: the service's env var if set, else its per-service default, else + CHAT_MODEL_DEFAULT. Each service's env var (e.g. APOLLO_WORKFLOW_CHAT_MODEL) + is optional and lets us switch that one service's live model without + redeploying. """ cfg = CHAT_SERVICE_MODELS.get(service, {}) - service_override = os.getenv(cfg["env"]) if cfg.get("env") else None - if service_override: - return resolve_model(service_override) - - global_override = os.getenv(CHAT_MODEL_ENV) - if global_override: - return resolve_model(global_override) + env_name = cfg.get("env") + if env_name: + override = os.getenv(env_name) + if override: + return resolve_model(override) return cfg.get("default", CHAT_MODEL_DEFAULT) diff --git a/services/tests/unit/test_models.py b/services/tests/unit/test_models.py index bf19e000..b7639a22 100644 --- a/services/tests/unit/test_models.py +++ b/services/tests/unit/test_models.py @@ -12,17 +12,13 @@ @pytest.fixture(autouse=True) def _clear_env(monkeypatch): - """Clear the global and all per-service overrides so the real environment - can't skew tests.""" - monkeypatch.delenv(m.CHAT_MODEL_ENV, raising=False) + """Clear all per-service overrides so the real environment can't skew tests.""" for cfg in m.CHAT_SERVICE_MODELS.values(): monkeypatch.delenv(cfg["env"], raising=False) -# --- defaults --------------------------------------------------------------- - -def test_unlisted_service_uses_global_default(): - # A service with no entry (or none at all) falls back to CHAT_MODEL_DEFAULT. +def test_unlisted_service_uses_default(): + # A service with no entry (e.g. doc_agent_chat, or none at all) uses the default. assert m.preferred_chat_model() == m.CHAT_MODEL_DEFAULT assert m.preferred_chat_model("doc_agent_chat") == m.CHAT_MODEL_DEFAULT @@ -30,27 +26,17 @@ def test_unlisted_service_uses_global_default(): def test_per_service_defaults(): assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS + assert m.preferred_chat_model("global_chat") == m.CLAUDE_OPUS -# --- precedence ------------------------------------------------------------- - -def test_per_service_env_overrides_its_default(monkeypatch): +def test_env_var_overrides_its_service_default(monkeypatch): # Also proves the env value is alias-resolved ("claude-opus" -> full ID). monkeypatch.setenv(_WORKFLOW_ENV, "claude-opus") assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_OPUS -def test_global_env_overrides_defaults(monkeypatch): - monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-sonnet") - # applies to a service with no per-service env set... - assert m.preferred_chat_model("job_chat") == m.CLAUDE_SONNET - # ...and to an unlisted service - assert m.preferred_chat_model("doc_agent_chat") == m.CLAUDE_SONNET - - -def test_per_service_env_beats_global_env(monkeypatch): - # Global says "force everything to opus", but workflow pins itself to sonnet. - monkeypatch.setenv(m.CHAT_MODEL_ENV, "claude-opus") - monkeypatch.setenv(_WORKFLOW_ENV, "claude-sonnet") - assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_SONNET # per-service wins - assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS # global applies here +def test_env_var_is_scoped_to_one_service(monkeypatch): + # Setting one service's var must not affect another service. + monkeypatch.setenv(_WORKFLOW_ENV, "claude-haiku") + assert m.preferred_chat_model("workflow_chat") == m.CLAUDE_HAIKU + assert m.preferred_chat_model("job_chat") == m.CLAUDE_OPUS # unaffected diff --git a/services/workflow_chat/gen_project_config.yaml b/services/workflow_chat/gen_project_config.yaml index 2f8cbae8..2cead7fd 100644 --- a/services/workflow_chat/gen_project_config.yaml +++ b/services/workflow_chat/gen_project_config.yaml @@ -1,5 +1,5 @@ config_version: 1.0 -# The chat model is configured in services/models.py (per-service default plus -# APOLLO_CHAT_MODEL / APOLLO_WORKFLOW_CHAT_MODEL env overrides), not here. +# The chat model is configured in services/models.py (the default plus the +# APOLLO_WORKFLOW_CHAT_MODEL env override), not here. threshold: 0.7 top_k: 5