diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 098a53a9..28d64166 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -6,18 +6,16 @@ Fixes #
 
 ## Implementation Details
 
-A more detailed breakdown of the changes, including motivations (if not provided in the issue).
+A more detailed breakdown of the changes, including motivations (if not provided
+in the issue).
 
 ## AI Usage
 
-Please disclose how you've used AI in this work (it's cool, we just want to know!):
+Please disclose whether you've used AI in this work (it's cool, we just want to
+know!):
 
-- [ ] Code generation (copilot but not intellisense)
-- [ ] Learning or fact checking
-- [ ] Strategy / design
-- [ ] Optimisation / refactoring
-- [ ] Translation / spellchecking / doc gen
-- [ ] Other
-- [ ] I have not used AI
+- [ ] Yes, I have not used AI
+- [ ] No, I have not used AI
 
-You can read more details in our [Responsible AI Policy](https://www.openfn.org/ai#pull-request-templates)
+You can read more details in our
+[Responsible AI Policy](https://www.openfn.org/ai#pull-request-templates)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5b857b77..bcfdeeb0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # apollo
 
+## 1.5.0
+
+### Minor Changes
+
+- 5cd94ea: upgrade to opus in planner and job chat
+
+### Patch Changes
+
+- 01754a0: Add version metadata to HTTP headers, chat payloads (meta key), and
+  langfuse traces
+- 01754a0: For langfuse traces with code, add a `has_code_attachment` tag
+
 ## 1.4.0
 
 ### Minor Changes
diff --git a/package.json b/package.json
index fed1c224..d25cbc20 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "apollo",
   "module": "platform/index.ts",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "type": "module",
   "scripts": {
     "start": "NODE_ENV=production bun platform/src/index.ts",
diff --git a/platform/src/bridge.ts b/platform/src/bridge.ts
index cbb46abf..d5e13d0c 100644
--- a/platform/src/bridge.ts
+++ b/platform/src/bridge.ts
@@ -3,6 +3,7 @@ import path from "node:path";
 import { spawn } from "node:child_process";
 import { rm } from "node:fs/promises";
 import { getInternalToken } from "./auth/internal-token";
+import pkg from "../../package.json";
 
 /**
   Run a python script
@@ -46,7 +47,13 @@ export const run = async (
       // Hand the internal token to the child explicitly so its apollo() self-calls
       // are recognised by the auth hook. Spawned from here (the honest owner) rather than
       // written back onto this process's env.
-      { env: { ...process.env, APOLLO_INTERNAL_TOKEN: getInternalToken() } }
+      {
+        env: {
+          ...process.env,
+          APOLLO_INTERNAL_TOKEN: getInternalToken(),
+          APOLLO_VERSION: pkg.version,
+        },
+      }
     );
 
     proc.on("error", async (err) => {
diff --git a/platform/src/middleware/dir.tsx b/platform/src/middleware/dir.tsx
index 4d7df8fb..4bc2e2ee 100644
--- a/platform/src/middleware/dir.tsx
+++ b/platform/src/middleware/dir.tsx
@@ -24,6 +24,8 @@ export default async (app: Elysia) => {
   // jsx templates!
   // https://elysiajs.com/patterns/mvc.html#view
 
+  app.head("/", () => new Response(null, { status: 200 }));
+
   const modules = await describeModules(path.resolve("./services"));
   app.get("/", () => {
     return (
diff --git a/platform/src/middleware/services.ts b/platform/src/middleware/services.ts
index c09e06ad..05379844 100644
--- a/platform/src/middleware/services.ts
+++ b/platform/src/middleware/services.ts
@@ -74,6 +74,8 @@ export default async (app: Elysia, port: number, auth: InstanceAuth) => {
       const { name, readme } = m;
       console.log(" - mounted /services/" + name);
 
+      app.head(name, () => new Response(null, { status: 200 }));
+
       // simple post
       app.post(name, async (ctx) => {
         console.log(`POST /services/${name}: ${ctx.uuid}`);
diff --git a/platform/src/server.ts b/platform/src/server.ts
index 7c99b2d8..cbd8f3a1 100644
--- a/platform/src/server.ts
+++ b/platform/src/server.ts
@@ -11,6 +11,7 @@ import { captureException } from "./util/sentry";
 import { clientsDbUrl, closeDb } from "./db";
 import { runMigrations } from "./db/migrate";
 import { randomUUID } from "node:crypto";
+import pkg from "../../package.json";
 
 export default async (
   port: number | string = 3000,
@@ -23,6 +24,7 @@ export default async (
   app.use(html());
 
   app.derive(() => ({ start: Date.now(), uuid: randomUUID() }));
+  app.onAfterHandle(({ set }) => { set.headers["X-Api-Version"] = pkg.version; });
   app.onAfterHandle(logRequest);
 
   // Report unhandled throws to Sentry, then return nothing so Elysia produces
diff --git a/platform/test/server.test.ts b/platform/test/server.test.ts
index 36e3947e..15667f0a 100644
--- a/platform/test/server.test.ts
+++ b/platform/test/server.test.ts
@@ -1,4 +1,12 @@
-import { afterEach, beforeEach, describe, expect, it, setSystemTime, spyOn } from "bun:test";
+import {
+  afterEach,
+  beforeEach,
+  describe,
+  expect,
+  it,
+  setSystemTime,
+  spyOn,
+} from "bun:test";
 import { randomBytes } from "node:crypto";
 import { Elysia } from "elysia";
 import setup from "../src/server";
@@ -8,6 +16,7 @@ import { InstanceAuth, type Client } from "../src/auth/instance-auth";
 import { hashToken } from "../src/auth/hash";
 import { internalAuthHeader } from "../src/auth/internal-token";
 import { encryptKey } from "../src/util/instance-key-crypto";
+import pkg from "../../package.json";
 
 const port = 9865;
 
@@ -59,6 +68,19 @@ describe("Main server", () => {
     expect(status).toBe(200);
   });
 
+  it("includes X-Api-Version header on every response", async () => {
+    const response = await app.handle(get(""));
+    expect(response.headers.get("X-Api-Version")).toBe(pkg.version);
+  });
+
+  it("responds to HEAD at root with 200 and no body", async () => {
+    const response = await app.handle(
+      new Request(`${baseUrl}/`, { method: "HEAD" })
+    );
+    expect(response.status).toBe(200);
+    expect(await response.text()).toBe("");
+  });
+
   // send messages through a web socket
 });
 
@@ -66,6 +88,13 @@ describe("Main server", () => {
 // but we can use the test echo service at least
 describe("Python Services", () => {
   describe("Python echo", () => {
+    it("responds to HEAD with 200 and no body", async () => {
+      const req = new Request(`${baseUrl}/services/echo`, { method: "HEAD" });
+      const response = await app.handle(req);
+      expect(response.status).toBe(200);
+      expect(await response.text()).toBe("");
+    });
+
     it("returns a 200", async () => {
       const json = { x: 1 };
       const response = await app.handle(post("services/echo", json));
@@ -121,13 +150,13 @@ describe("Python Services", () => {
       );
 
       expect(response.status).toBe(429);
-      
+
       const body = await response.json();
       expect(body).toEqual({
         code: 429,
         type: "RATE_LIMIT",
         message: "Rate limit exceeded, please try again later",
-        details: { retry_after: 60 }
+        details: { retry_after: 60 },
       });
     });
 
@@ -137,7 +166,7 @@ describe("Python Services", () => {
       );
 
       expect(response.status).toBe(500);
-      
+
       const body = await response.json();
       expect(body.code).toBe(500);
       expect(body.type).toBe("INTERNAL_ERROR");
@@ -161,7 +190,9 @@ describe("Sentry", () => {
   // No SENTRY_DSN is set in the test env, so the helper was never initialised.
   it("captureException is a silent no-op when no DSN is configured", () => {
     expect(() => captureException(new Error("test"))).not.toThrow();
-    expect(() => captureException("not even an error", { foo: 1 })).not.toThrow();
+    expect(() =>
+      captureException("not even an error", { foo: 1 })
+    ).not.toThrow();
   });
 
   // Mirrors the onError hook server.ts registers: report, return nothing, and
@@ -172,7 +203,9 @@ describe("Sentry", () => {
         throw new Error("kaboom");
       });
 
-    const withHook = boom(new Elysia().onError(({ error }) => captureException(error)));
+    const withHook = boom(
+      new Elysia().onError(({ error }) => captureException(error))
+    );
     const without = boom(new Elysia());
 
     const a = await withHook.handle(new Request("http://localhost/boom"));
@@ -229,7 +262,9 @@ describe("Instance authentication", () => {
 
   // Row 3 — unknown but sk-ant-shaped: bring-your-own key, forwarded unchanged.
   it("forwards an unknown sk-ant-shaped key unchanged (bring-your-own)", async () => {
-    const res = await app.handle(postKey("services/echo", { x: 1 }, "sk-ant-byo"));
+    const res = await app.handle(
+      postKey("services/echo", { x: 1 }, "sk-ant-byo")
+    );
     expect(res.status).toBe(200);
     const body = await res.json();
     expect(body.api_key).toBe("sk-ant-byo");
@@ -238,7 +273,9 @@ describe("Instance authentication", () => {
   // Row 3b — unknown and NOT sk-ant-shaped: a likely Lightning credential; reject
   // rather than forward it to the LLM.
   it("rejects an unknown non-sk-ant- key with 401 (never forwarded)", async () => {
-    const res = await app.handle(postKey("services/echo", { x: 1 }, "lightning-cred-unknown"));
+    const res = await app.handle(
+      postKey("services/echo", { x: 1 }, "lightning-cred-unknown")
+    );
     expect(res.status).toBe(401);
     const body = await res.json();
     expect(body.code).toBe(401);
@@ -255,8 +292,12 @@ describe("Instance authentication", () => {
 
   // Row 1 and row 3 coexist: known-client swap and bring-your-own forward in one run.
   it("serves the known-client swap and the bring-your-own forward side by side", async () => {
-    const swapped = await (await app.handle(postKey("services/echo", { x: 1 }, ALPHA))).json();
-    const forwarded = await (await app.handle(postKey("services/echo", { x: 1 }, "sk-ant-byo"))).json();
+    const swapped = await (
+      await app.handle(postKey("services/echo", { x: 1 }, ALPHA))
+    ).json();
+    const forwarded = await (
+      await app.handle(postKey("services/echo", { x: 1 }, "sk-ant-byo"))
+    ).json();
     expect(swapped.api_key).toBe("sk-ant-stored-alpha");
     expect(forwarded.api_key).toBe("sk-ant-byo");
   });
@@ -279,13 +320,18 @@ describe("Instance authentication", () => {
       const req = new Request(`${baseUrl}/services/echo`, {
         method: "POST",
         body: JSON.stringify({ x: 9 }),
-        headers: { "Content-Type": "application/json", ...internalAuthHeader() },
+        headers: {
+          "Content-Type": "application/json",
+          ...internalAuthHeader(),
+        },
       });
       const res = await app.handle(req);
       expect(res.status).toBe(200);
       // Correct token: the mismatch warn must not fire.
       expect(
-        warn.mock.calls.some(([m]) => String(m).includes("internal token MISMATCH"))
+        warn.mock.calls.some(([m]) =>
+          String(m).includes("internal token MISMATCH")
+        )
       ).toBe(false);
     } finally {
       warn.mockRestore();
@@ -298,7 +344,10 @@ describe("Instance authentication", () => {
       const req = new Request(`${baseUrl}/services/echo`, {
         method: "POST",
         body: JSON.stringify({ x: 9 }),
-        headers: { "Content-Type": "application/json", "x-apollo-internal": "nope" },
+        headers: {
+          "Content-Type": "application/json",
+          "x-apollo-internal": "nope",
+        },
       });
       const res = await app.handle(req);
       expect(res.status).toBe(401);
@@ -319,7 +368,10 @@ describe("Instance authentication", () => {
       const req = new Request(`${baseUrl}/services/echo`, {
         method: "POST",
         body: JSON.stringify({ x: 9 }),
-        headers: { "Content-Type": "application/json", "x-apollo-internal": "nope" },
+        headers: {
+          "Content-Type": "application/json",
+          "x-apollo-internal": "nope",
+        },
       });
       const res = await app.handle(req);
       // Behaviour unchanged: a forged internal header still rejects.
@@ -340,12 +392,17 @@ describe("Instance authentication", () => {
         // ALPHA is a known, otherwise-valid credential; the wrong internal
         // header must still reject it rather than authenticate via api_key.
         body: JSON.stringify({ x: 9, api_key: ALPHA }),
-        headers: { "Content-Type": "application/json", "x-apollo-internal": "nope" },
+        headers: {
+          "Content-Type": "application/json",
+          "x-apollo-internal": "nope",
+        },
       });
       const res = await app.handle(req);
       expect(res.status).toBe(401);
       expect(
-        warn.mock.calls.some(([m]) => String(m).includes("internal token MISMATCH"))
+        warn.mock.calls.some(([m]) =>
+          String(m).includes("internal token MISMATCH")
+        )
       ).toBe(true);
     } finally {
       warn.mockRestore();
@@ -356,10 +413,14 @@ describe("Instance authentication", () => {
     const warn = spyOn(console, "warn").mockImplementation(() => {});
     try {
       // An unknown non-sk-ant- key takes the explicit-fail path (no internal header).
-      const res = await app.handle(postKey("services/echo", { x: 1 }, "lightning-cred-unknown"));
+      const res = await app.handle(
+        postKey("services/echo", { x: 1 }, "lightning-cred-unknown")
+      );
       expect(res.status).toBe(401);
       expect(
-        warn.mock.calls.some(([m]) => String(m).includes("internal token MISMATCH"))
+        warn.mock.calls.some(([m]) =>
+          String(m).includes("internal token MISMATCH")
+        )
       ).toBe(false);
     } finally {
       warn.mockRestore();
@@ -495,7 +556,9 @@ describe("Instance auth — DB-down forward path", () => {
 
   // Row 7
   it("forwards an unknown sk-ant- key when the DB is down", async () => {
-    const res = await app.handle(post("services/echo", { x: 1, api_key: "sk-ant-byo" }));
+    const res = await app.handle(
+      post("services/echo", { x: 1, api_key: "sk-ant-byo" })
+    );
     expect(res.status).toBe(200);
     const body = await res.json();
     expect(body.api_key).toBe("sk-ant-byo");
@@ -503,7 +566,9 @@ describe("Instance auth — DB-down forward path", () => {
 
   // Row 8
   it("rejects an unknown non-sk-ant- key when the DB is down (never forwarded)", async () => {
-    const res = await app.handle(post("services/echo", { x: 1, api_key: "lightning-cred-unknown" }));
+    const res = await app.handle(
+      post("services/echo", { x: 1, api_key: "lightning-cred-unknown" })
+    );
     expect(res.status).toBe(401);
   });
 });
@@ -519,7 +584,7 @@ describe("Instance auth — lookup never came up (dbReady false)", () => {
       request: { headers: { get: () => null } },
       body: { api_key: apiKey },
       set: { status: 200 },
-    }) as any;
+    } as any);
 
   it("forwards an unknown sk-ant- key via the shape rule (no DB)", async () => {
     const auth = new InstanceAuth();
@@ -556,7 +621,7 @@ describe("Instance auth cache refresh", () => {
       request: { headers: { get: () => null } },
       body: apiKey ? { api_key: apiKey } : {},
       set: { status: 200 },
-    }) as any;
+    } as any);
   const tick = () => new Promise((r) => setTimeout(r, 10));
   const settle = () => new Promise((r) => setTimeout(r, 40));
   const TTL_MS = 60_000;
@@ -765,8 +830,16 @@ describe("Instance auth cache refresh", () => {
       expect(ctx.lightningClient).toBeUndefined();
       // ALPHA is not sk-ant-shaped and the evicted-then-failed lookup could not verify it, so we 503 (our outage) rather than a misleading 401.
       expect(ctx.set.status).toBe(503);
-      expect(warn.mock.calls.some(([m]) => String(m).includes("max-staleness ceiling"))).toBe(true);
-      expect(error.mock.calls.some(([m]) => String(m).includes("client lookup failed"))).toBe(true);
+      expect(
+        warn.mock.calls.some(([m]) =>
+          String(m).includes("max-staleness ceiling")
+        )
+      ).toBe(true);
+      expect(
+        error.mock.calls.some(([m]) =>
+          String(m).includes("client lookup failed")
+        )
+      ).toBe(true);
     } finally {
       warn.mockRestore();
       error.mockRestore();
@@ -887,7 +960,8 @@ describe("Instance auth key encryption", () => {
       auth.rowToClient({ name: "enc", anthropic_api_key: enc })?.anthropicKey
     ).toBe("sk-ant-secret");
     expect(
-      auth.rowToClient({ name: "plain", anthropic_api_key: "sk-ant-plain" })?.anthropicKey
+      auth.rowToClient({ name: "plain", anthropic_api_key: "sk-ant-plain" })
+        ?.anthropicKey
     ).toBe("sk-ant-plain");
     expect(
       auth.rowToClient({ name: "none", anthropic_api_key: null })?.anthropicKey
@@ -900,7 +974,9 @@ describe("Instance auth key encryption", () => {
       const enc = encryptKey("sk-ant-secret", randomBytes(32)); // encrypted with key A
       const auth = new InstanceAuth({ encKey: randomBytes(32) }); // holds a different key
 
-      expect(auth.rowToClient({ name: "bad", anthropic_api_key: enc })).toBeNull();
+      expect(
+        auth.rowToClient({ name: "bad", anthropic_api_key: enc })
+      ).toBeNull();
     } finally {
       error.mockRestore();
     }
@@ -912,7 +988,9 @@ describe("Instance auth key encryption", () => {
       const auth = new InstanceAuth({ encKey: null });
       const enc = encryptKey("sk-ant-secret", randomBytes(32));
 
-      expect(auth.rowToClient({ name: "bad", anthropic_api_key: enc })).toBeNull();
+      expect(
+        auth.rowToClient({ name: "bad", anthropic_api_key: enc })
+      ).toBeNull();
     } finally {
       error.mockRestore();
     }
@@ -929,7 +1007,9 @@ describe("Instance auth key encryption", () => {
       const enc = encryptKey("sk-ant-secret", randomBytes(32));
 
       // Behaviour unchanged: the row still drops to a miss.
-      expect(auth.rowToClient({ name: "missing", anthropic_api_key: enc })).toBeNull();
+      expect(
+        auth.rowToClient({ name: "missing", anthropic_api_key: enc })
+      ).toBeNull();
 
       const extras = capturedExtras(capture, "missing-enc-key");
       expect(extras).toBeDefined();
@@ -948,7 +1028,9 @@ describe("Instance auth key encryption", () => {
       const enc = encryptKey("sk-ant-secret", randomBytes(32)); // encrypted with key A
       const auth = new InstanceAuth({ encKey: randomBytes(32) }); // holds a different key
 
-      expect(auth.rowToClient({ name: "corrupt", anthropic_api_key: enc })).toBeNull();
+      expect(
+        auth.rowToClient({ name: "corrupt", anthropic_api_key: enc })
+      ).toBeNull();
 
       const extras = capturedExtras(capture, "decrypt-error");
       expect(extras).toBeDefined();
diff --git a/services/echo/echo.py b/services/echo/echo.py
index fba08b27..90ecaf32 100644
--- a/services/echo/echo.py
+++ b/services/echo/echo.py
@@ -1,7 +1,11 @@
+from util import ApolloError
 from .log import log
 
-# Simple python service to echo requests back to the caller
-# Used in test
+# Sample python service to echo requests back to the caller
 def main(x):
+    # raise a 400 if the payload is empty (ignoring the session id which is system-set)
+    ## useful for diagnosing errors
+    if not x or set(x.keys()) == {"session_id"}:
+        raise ApolloError(code=400, message="payload is required", type="BAD_REQUEST")
     log(x)
     return x
diff --git a/services/entry.py b/services/entry.py
index eb26d355..cb0c7f7f 100644
--- a/services/entry.py
+++ b/services/entry.py
@@ -27,7 +27,7 @@ def _should_export_span(span):
     return is_default_export_span(span)
 
 
-langfuse = Langfuse(should_export_span=_should_export_span)
+langfuse = Langfuse(should_export_span=_should_export_span, release=os.getenv("APOLLO_VERSION", "unknown"))
 
 env = os.getenv('ENVIRONMENT', 'unknown')
 trace_rates = {
diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml
index a8714f58..d2f26a51 100644
--- a/services/global_chat/config.yaml
+++ b/services/global_chat/config.yaml
@@ -8,7 +8,6 @@ router:
 
 # Planner configuration (complex orchestration)
 planner:
-  model: "claude-sonnet"
-  max_tokens: 8192
-  temperature: 1.0
+  model: "claude-opus"
+  max_tokens: 24576
   max_tool_calls: 10
diff --git a/services/global_chat/global_chat.py b/services/global_chat/global_chat.py
index 3e085b9c..b2c4add4 100644
--- a/services/global_chat/global_chat.py
+++ b/services/global_chat/global_chat.py
@@ -13,7 +13,7 @@
 sys.path.append(str(Path(__file__).parent.parent))
 
 from langfuse import observe, propagate_attributes, get_client as get_langfuse_client
-from util import ApolloError, create_logger
+from util import ApolloError, create_logger, APOLLO_VERSION
 from langfuse_util import should_track, build_tags
 from global_chat.config_loader import ConfigLoader
 from global_chat.router import RouterAgent
@@ -111,7 +111,10 @@ def main(data_dict: dict) -> dict:
                 "attachments": result.attachments,
                 "history": result.history,
                 "usage": result.usage,
-                "meta": result.meta
+                "meta": {
+                **result.meta,
+                "apollo_version": APOLLO_VERSION
+                }
             }
 
     except ApolloError as e:
diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py
index 7eb3bd9b..ee4fe80f 100644
--- a/services/global_chat/planner.py
+++ b/services/global_chat/planner.py
@@ -6,6 +6,7 @@
 from typing import List, Dict, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
+import httpx
 from anthropic import Anthropic
 import sentry_sdk
 
@@ -59,9 +60,8 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None):
         self.tools = TOOL_DEFINITIONS
 
         planner_config = config_loader.config.get("planner", {})
-        self.model = resolve_model(planner_config.get("model", "claude-sonnet"))
-        self.max_tokens = planner_config.get("max_tokens", 8192)
-        self.temperature = planner_config.get("temperature", 1.0)
+        self.model = resolve_model(planner_config.get("model", "claude-opus"))
+        self.max_tokens = planner_config.get("max_tokens", 24576)
         self.max_tool_calls = planner_config.get("max_tool_calls", 20)
 
         self.current_yaml: Optional[str] = None
@@ -285,6 +285,7 @@ def _call_api(self, system_prompt, messages, stream):
                 messages=messages,
                 tools=self.tools,
                 thinking={"type": "adaptive"},
+                output_config={"effort": "medium"},
             ) as stream_obj:
                 for event in stream_obj:
                     if event.type == "content_block_delta":
@@ -299,7 +300,11 @@ def _call_api(self, system_prompt, messages, stream):
                 messages=messages,
                 tools=self.tools,
                 thinking={"type": "adaptive"},
-                output_config={"effort": "high"},
+                output_config={"effort": "medium"},
+                # Per-request timeout (same values as the SDK default):
+                # required for non-streaming calls with max_tokens > ~21k,
+                # which the SDK otherwise rejects.
+                timeout=httpx.Timeout(600.0, connect=5.0),
                 betas=["context-management-2025-06-27"],
                 context_management={
                     "edits": [
diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py
index 31c11440..6e8d111b 100644
--- a/services/job_chat/job_chat.py
+++ b/services/job_chat/job_chat.py
@@ -4,6 +4,7 @@
 import yaml
 from typing import List, Optional, Dict, Any
 from dataclasses import dataclass
+import httpx
 from anthropic import (
     Anthropic,
     APIConnectionError,
@@ -18,7 +19,7 @@
 import sentry_sdk
 from langfuse import observe, propagate_attributes, get_client as get_langfuse_client
 from langfuse_util import should_track, build_tags
-from util import ApolloError, create_logger, AdaptorSpecifier, add_page_prefix
+from util import ApolloError, create_logger, AdaptorSpecifier, add_page_prefix, APOLLO_VERSION
 from .prompt import build_prompt, build_error_correction_prompt
 from .old_prompt import build_old_prompt
 from streaming_util import (
@@ -138,7 +139,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Payload":
 @dataclass
 class ChatConfig:
     model: str = _MODEL
-    max_tokens: int = 16384
+    max_tokens: int = 24576
     api_key: Optional[str] = None
 
 
@@ -288,6 +289,10 @@ def generate(
                         max_tokens=self.config.max_tokens, messages=prompt, model=self.config.model, system=system_message,
                         thinking={"type": "adaptive"},
                         output_config=output_config,
+                        # Per-request timeout (same values as the SDK default):
+                        # required for non-streaming calls with max_tokens > ~21k,
+                        # which the SDK otherwise rejects.
+                        timeout=httpx.Timeout(600.0, connect=5.0),
                         **tool_kwargs
                     )
                     message = self.client.messages.create(**create_kwargs)
@@ -668,7 +673,7 @@ def main(data_dict: dict) -> dict:
                 "suggested_code": result.suggested_code,
                 "history": result.history,
                 "usage": result.usage,
-                "meta": {"rag": result.rag}
+                "meta": {"rag": result.rag, "apollo_version": APOLLO_VERSION}
             }
 
             if result.diff:
diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml
index 9a6b6c39..13caa0de 100644
--- a/services/job_chat/rag.yaml
+++ b/services/job_chat/rag.yaml
@@ -1,5 +1,5 @@
 config_version: 1.0
-model: "claude-sonnet"
+model: "claude-opus"
 llm_search_decision: "claude-sonnet"
 llm_retrieval: "claude-sonnet"
 threshold: 0.8
diff --git a/services/util.py b/services/util.py
index ca08b6f3..45d6d0f7 100644
--- a/services/util.py
+++ b/services/util.py
@@ -7,6 +7,8 @@
 import psycopg2
 import requests
 
+APOLLO_VERSION = os.getenv("APOLLO_VERSION", "unknown")
+
 # Adaptor parsing constants
 SCOPED_ADAPTOR_MIN_PARTS = 3
 SHORTHAND_ADAPTOR_PARTS = 2
diff --git a/services/workflow_chat/workflow_chat.py b/services/workflow_chat/workflow_chat.py
index 69b554d9..4266e2d0 100644
--- a/services/workflow_chat/workflow_chat.py
+++ b/services/workflow_chat/workflow_chat.py
@@ -43,7 +43,7 @@
 import sentry_sdk
 from langfuse import observe, propagate_attributes, get_client as get_langfuse_client
 from langfuse_util import should_track, build_tags
-from util import ApolloError, create_logger, add_page_prefix
+from util import ApolloError, create_logger, add_page_prefix, APOLLO_VERSION
 from .gen_project_prompt import build_prompt
 from workflow_chat.available_adaptors import get_available_adaptors
 from streaming_util import (
@@ -694,7 +694,8 @@ def main(data_dict: dict) -> dict:
                 "response": result.content,
                 "response_yaml": result.content_yaml,
                 "history": result.history,
-                "usage": result.usage
+                "usage": result.usage,
+                "meta": {"apollo_version": APOLLO_VERSION}
             }
 
             return response_dict