diff --git a/Dockerfile b/Dockerfile index ff3b697bf..1b9b880f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -140,6 +140,21 @@ RUN if [ "$PRUNE_MODULES" = "true" ]; then \ # Stage 2: runtime # Minimal image with only what the node needs at run time. Runs as non-root. # ============================================================================= +# ── wstcp (TLSNotary websocket↔TCP proxy) ───────────────────────────── +# The TLSNotary flow spawns `wstcp` on demand to relay the browser's +# WebSocket to the target TLS server (see proxyManager.ts). The runtime +# image ships no Rust toolchain, so the on-demand `cargo install wstcp` +# fallback in ensureWstcp() cannot run — bake the binary in instead. +# Without it the proxy never binds its port and every verification fails +# with an nginx 502 / CloseEvent 1006 on the prover. +FROM rust:1-slim AS wstcp +# Pin the version so an upstream wstcp release can't silently change +# behaviour or break the build. `--locked` is intentionally omitted: the +# crate's bundled Cargo.lock pins dependency versions that no longer +# compile on the current toolchain, so we let cargo resolve compatible +# deps for this exact wstcp version. +RUN cargo install wstcp --version 0.2.1 --root /wstcp + FROM oven/bun:1.3-debian AS runtime # OCI image metadata. @@ -178,6 +193,11 @@ RUN chmod 0755 /app/scripts/docker-entrypoint.sh \ && chown demos:demos /app /app/data /app/logs /app/state \ && chmod 0755 /app /app/data /app/logs /app/state +# TLSNotary proxy binary, baked in so the on-demand proxy can spawn +# without a Rust toolchain. Lands at $HOME/.cargo/bin/wstcp (HOME=/app) — +# the exact path proxyManager.ts::ensureWstcp() probes via `test -x`. +COPY --from=wstcp --chown=demos:demos /wstcp/bin/wstcp /app/.cargo/bin/wstcp + # Build-time provenance. These ARGs are populated by the build driver # (compose passes `git rev-parse HEAD` + `git rev-parse --abbrev-ref HEAD` # + `git diff --quiet; echo $?` + an ISO timestamp). They land in the diff --git a/docker-compose.devnet.yml b/docker-compose.devnet.yml index 7c623e91d..80f4b26df 100644 --- a/docker-compose.devnet.yml +++ b/docker-compose.devnet.yml @@ -56,6 +56,11 @@ services: container_name: demos-node-devnet environment: TLSNOTARY_PORT: 7147 + # wstcp proxy window, offset +100 from mainnet (55000-55063) so the + # two stacks don't fight over the same host ports. The allocation + # range and the published range below read these same values. + TLSNOTARY_PROXY_PORT_MIN: 55100 + TLSNOTARY_PROXY_PORT_MAX: 55163 networks: !override - demos-network-devnet # node_data/node_logs/node_state are isolated by the top-level `name:` @@ -69,6 +74,11 @@ services: - "53651:${OMNI_PORT:-53551}" - "3105:${RPC_SIGNALING_PORT:-3005}" - "9190:9090" + # wstcp TLSNotary proxy range (devnet window), localhost-bound so only + # the host reverse proxy reaches it. MUST match TLSNOTARY_PROXY_PORT_MIN/ + # MAX above. Host:container are equal (no offset) — the node advertises + # the container-internal port and nginx forwards /tlsn// to it. + - "127.0.0.1:55100-55163:55100-55163" prometheus: container_name: demos-prometheus-devnet @@ -101,6 +111,11 @@ services: caddy: container_name: demos-caddy-devnet + environment: + # Devnet remaps the notary to 7147; Caddy must see the same value or its + # /tlsn// route falls back to 7047 → 502. Merges with (and + # overrides) the base caddy environment. + TLSNOTARY_PORT: 7147 networks: !override - demos-network-devnet ports: !override diff --git a/docker-compose.yml b/docker-compose.yml index ed774c77a..f59be4641 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -173,6 +173,13 @@ services: TLSNOTARY_MODE: ${TLSNOTARY_MODE:-docker} TLSNOTARY_FATAL: ${TLSNOTARY_FATAL:-false} TLSNOTARY_SIGNING_KEY: ${TLSNOTARY_SIGNING_KEY:-} + # wstcp proxy port window. The node spawns dynamic wstcp proxies in + # this range and the host reverse proxy forwards /tlsn// to them, + # so the SAME range must be host-published in `ports:` below — kept + # narrow to avoid a 2000-port mapping. Both stanzas read these vars so + # the allocation range and the published range can never drift. + TLSNOTARY_PROXY_PORT_MIN: ${TLSNOTARY_PROXY_PORT_MIN:-55000} + TLSNOTARY_PROXY_PORT_MAX: ${TLSNOTARY_PROXY_PORT_MAX:-55063} # Logging & misc LOG_LEVEL: ${LOG_LEVEL:-info} PROD: ${PROD:-false} @@ -198,6 +205,12 @@ services: - "${RPC_PORT:-53550}:${RPC_PORT:-53550}" - "${OMNI_PORT:-53551}:${OMNI_PORT:-53551}" - "${RPC_SIGNALING_PORT:-3005}:${RPC_SIGNALING_PORT:-3005}" + # wstcp TLSNotary proxy range, bound to 127.0.0.1 so ONLY the host's + # reverse proxy can reach it (not the public internet). The node + # allocates dynamic proxy ports here and nginx forwards + # /tlsn// to them — without this mapping every verification + # 502s. MUST match TLSNOTARY_PROXY_PORT_MIN/MAX in `environment:`. + - "127.0.0.1:${TLSNOTARY_PROXY_PORT_MIN:-55000}-${TLSNOTARY_PROXY_PORT_MAX:-55063}:${TLSNOTARY_PROXY_PORT_MIN:-55000}-${TLSNOTARY_PROXY_PORT_MAX:-55063}" # MCP (Model Context Protocol) intentionally NOT host-published. # The server binds `localhost` inside the container (src/index.ts) # and the SDK has no built-in authentication — publishing this port @@ -427,6 +440,10 @@ services: # Epic 12 T14 — selects which tlsnotary-modes/*.caddy snippet # is imported. Allowed: subpath (default), subdomain, direct. TLSNOTARY_PROXY_MODE: ${TLSNOTARY_PROXY_MODE:-subpath} + # The subpath snippet routes /tlsn// via {$TLSNOTARY_PORT}, + # which Caddy reads from its OWN env — so it must be set here, or the + # notary leg falls back to 7047 and 502s on port-remapped stacks (devnet). + TLSNOTARY_PORT: ${TLSNOTARY_PORT:-7047} volumes: - ./monitoring/caddy/Caddyfile:/etc/caddy/Caddyfile:ro - ./monitoring/caddy/tlsnotary-modes:/etc/caddy/tlsnotary-modes:ro diff --git a/docs/runbooks/tlsn-reverse-proxy.md b/docs/runbooks/tlsn-reverse-proxy.md new file mode 100644 index 000000000..8659d701f --- /dev/null +++ b/docs/runbooks/tlsn-reverse-proxy.md @@ -0,0 +1,89 @@ +--- +type: runbook +title: TLSNotary reverse-proxy routing (/tlsn//) +status: active +--- + +# TLSNotary reverse-proxy routing + +The browser SDK opens `wss:///tlsn//` for both the notary and +each per-target **wstcp** proxy. The node's public reverse proxy must route +`/tlsn//` to the matching local port **as a WebSocket** (HTTP upgrade). If +it doesn't, the attest websocket closes with `CloseEvent: { code: 1006 }`. + +This is required on **every** node that serves TLSN to a browser. The node image +(this is baked in — see the Dockerfile `wstcp` stage) and the published proxy +port range are repo-managed; the reverse proxy is the only piece an operator may +need to touch by hand. + +## ⚠️ The one non-obvious gotcha + +**`wstcp` 0.2.1 checks the `Connection` header case-sensitively** — it requires +`Connection: Upgrade` (capital U). The common nginx snippet uses +`proxy_set_header Connection "upgrade";` (lowercase), which `wstcp` rejects: + +``` +Invalid WebSocket handshake request: assertion failed: `values.any(|v| v.trim() == "Upgrade")`; value="upgrade" +``` + +→ the handshake never completes, every `/tlsn//` 502s, and the browser sees +`CloseEvent 1006`. Use **capital `Upgrade`**. + +## Caddy (repo-managed — automatic) + +Deploying with the `proxy` compose profile applies +`monitoring/caddy/tlsnotary-modes/subpath.caddy`, which routes `/tlsn//` +for the notary and the wstcp proxies. Caddy's `reverse_proxy` performs the +upgrade with the correct casing — no manual step. Just redeploy. + +## nginx (hand-maintained nodes) + +Add this `location` inside the node's `listen 443 ssl; server_name ;` +block, then `sudo nginx -t && sudo systemctl reload nginx`: + +```nginx +# Dynamic wstcp TLSNotary proxies: /tlsn// -> 127.0.0.1: +# The 5[567]\d{3} class restricts to the 55000-57999 allocation window so this +# can't be used to reach arbitrary node ports (RPC 53550, MCP 3001, ...). +location ~ ^/tlsn/(5[567]\d{3})/?(.*)$ { + proxy_pass http://127.0.0.1:$1/$2$is_args$args; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; # capital U — wstcp is case-sensitive + proxy_set_header Host $host; + proxy_read_timeout 120s; +} +``` + +If the notary port is outside `55000-57999` (e.g. `7047`/`7147`), give it its own +`location = /tlsn// { ... }` block, or widen the class accordingly. + +Prerequisites (both already handled by deploying this branch): + +1. **wstcp baked into the image** — `docker exec ls /app/.cargo/bin/wstcp` + must exist. +2. **Proxy port range published on loopback** so nginx can reach it — e.g. + `127.0.0.1:55000-55063` (mainnet) / `127.0.0.1:55100-55163` (devnet), matching + `TLSNOTARY_PROXY_PORT_MIN/MAX`. Confirm with + `docker port | grep 55` and `sudo ss -ltn | grep ':55'`. + +## Verify + +```bash +# expect HTTP 101 (Switching Protocols) +curl -s -o /dev/null -w '%{http_code}\n' -k \ + -H "Connection: Upgrade" -H "Upgrade: websocket" \ + -H "Sec-WebSocket-Version: 13" -H "Sec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==" \ + https:///tlsn// +``` + +`101` → routing + upgrade are correct. `502` → the location is missing, the port +isn't published/reachable, or `Connection` is lowercase. + +## Note: HTTPS RPC + +The browser client (HTTPS page) cannot call a plain-HTTP node RPC (mixed +content). The node RPC must be reachable over HTTPS at the URL the client is +built with. Production nodes already terminate TLS at the public domain; only +custom-port dev setups (`...:53650` with no TLS) need a dedicated `listen +ssl` block in front of the RPC. diff --git a/monitoring/caddy/tlsnotary-modes/subpath.caddy b/monitoring/caddy/tlsnotary-modes/subpath.caddy index 7ef1d21f7..ec814d791 100644 --- a/monitoring/caddy/tlsnotary-modes/subpath.caddy +++ b/monitoring/caddy/tlsnotary-modes/subpath.caddy @@ -1,10 +1,33 @@ -# TLSNotary route — subpath mode (default). -# Mount at https://${PROXY_DOMAIN}/tlsnotary/. Caddy strips the prefix -# before forwarding so the notary container stays at root. Works when -# upstream notary has no sub-path assumptions in its WS framing. +# TLSNotary routes — subpath mode (default). +# +# The node advertises EVERY TLSN endpoint under /tlsn// (buildWsUrl's +# default path), and two upstreams live behind that prefix: +# - the notary websocket — `tlsnotary` container on $TLSNOTARY_PORT +# - the per-target wstcp proxies — spawned in the `node` container on a +# dynamic port in TLSNOTARY_PROXY_PORT_MIN..MAX (wstcp binds 0.0.0.0, +# so it's reachable by service name on the compose network) +# Both must be routed or the SDK's attest() websocket closes with 1006. +# Caddy upgrades the websocket automatically; no extra headers needed. # # Selected when TLSNOTARY_PROXY_MODE=subpath (or unset). +# Notary — legacy /tlsnotary/* and the advertised /tlsn//. +# Honour $TLSNOTARY_PORT: the dev stack remaps it (e.g. 7147), so the +# hard-coded 7047 here is what made the notary leg 502 on remapped hosts. handle_path /tlsnotary/* { - reverse_proxy tlsnotary:7047 + reverse_proxy tlsnotary:{$TLSNOTARY_PORT:7047} +} +handle_path /tlsn/{$TLSNOTARY_PORT:7047}/* { + reverse_proxy tlsnotary:{$TLSNOTARY_PORT:7047} +} + +# Dynamic per-target wstcp proxies: /tlsn// -> node:. +# The literal notary path above is more specific, so it wins for the +# notary port; everything else under /tlsn// lands here. +# Restrict to the 55000-57999 wstcp allocation window so this can't proxy +# arbitrary node ports (RPC 53550, MCP 3001, metrics 9090, ...) publicly. +@tlsnproxy path_regexp tlsnport ^/tlsn/(5[567]\d{3})(/.*)?$ +handle @tlsnproxy { + uri strip_prefix /tlsn/{re.tlsnport.1} + reverse_proxy node:{re.tlsnport.1} } diff --git a/src/features/tlsnotary/constants.ts b/src/features/tlsnotary/constants.ts index f3a990711..e6e145b49 100644 --- a/src/features/tlsnotary/constants.ts +++ b/src/features/tlsnotary/constants.ts @@ -49,10 +49,16 @@ export const SIGNING_KEY_FILE_MODE = 0o600 * Configuration constants for port allocation and proxy lifecycle */ export const PORT_CONFIG = { - /** Minimum port number in the allocation range */ - PORT_MIN: 55000, - /** Maximum port number in the allocation range */ - PORT_MAX: 57000, + /** + * Minimum/maximum port for wstcp proxy allocation. Overridable via env so + * the published host range (docker-compose `ports:`) can be narrowed to a + * window the reverse proxy can actually reach — the proxies bind dynamic + * ports in this range and nginx forwards `/tlsn//` to them, so the + * range MUST be host-reachable or every verification 502s. Defaults keep + * the historical 55000-57000 behaviour. + */ + PORT_MIN: Number(process.env.TLSNOTARY_PROXY_PORT_MIN) || 55000, + PORT_MAX: Number(process.env.TLSNOTARY_PROXY_PORT_MAX) || 57000, /** Idle timeout before a proxy is considered stale (30 seconds) */ IDLE_TIMEOUT_MS: 30000, /** Maximum number of spawn retry attempts */ diff --git a/src/features/tlsnotary/portAllocator.ts b/src/features/tlsnotary/portAllocator.ts index 749826ddb..c08b26fcf 100644 --- a/src/features/tlsnotary/portAllocator.ts +++ b/src/features/tlsnotary/portAllocator.ts @@ -10,17 +10,10 @@ // REVIEW: TLSNotary port pool management for wstcp proxy instances import * as net from "net" import log from "@/utilities/logger" - -/** - * Configuration constants for port allocation - */ -export const PORT_CONFIG = { - PORT_MIN: 55000, - PORT_MAX: 57000, - IDLE_TIMEOUT_MS: 30000, // 30 seconds - MAX_SPAWN_RETRIES: 3, - SPAWN_TIMEOUT_MS: 5000, // 5 seconds to wait for wstcp to start -} +// Single source of truth for the proxy port window, so the env-overridable +// range (TLSNOTARY_PROXY_PORT_MIN/MAX) actually drives allocation — a local +// hardcoded copy here would silently ignore the published host range. +import { PORT_CONFIG } from "./constants" /** * Port pool state interface diff --git a/src/features/tlsnotary/proxyManager.ts b/src/features/tlsnotary/proxyManager.ts index 7110b2287..a954ca23b 100644 --- a/src/features/tlsnotary/proxyManager.ts +++ b/src/features/tlsnotary/proxyManager.ts @@ -36,12 +36,12 @@ import { promisify } from "util" import log from "@/utilities/logger" import { getSharedState } from "@/utilities/sharedState" import { - PORT_CONFIG, initPortPool, allocatePort, releasePort, type PortPoolState, } from "./portAllocator" +import { PORT_CONFIG } from "./constants" const execAsync = promisify(exec)