Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions terraphim_server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@ Axum Server for Terraphim AI
============================
This is the Axum server for Terraphim AI. It is a simple server that serves /config and /search API endpoints.

## Health / Readiness Probe

`GET /health` returns HTTP `200` with a fixed JSON body:

```json
{"status":"ok"}
```

The endpoint is available as soon as the Axum router is serving requests.
Integration tests use it as a deterministic readiness signal (polling instead
of a fixed `sleep`) — see `tests/common/mod.rs::wait_for_health`.

Note: axum have it's own default/settings.toml file to configure the server depending on the device capabilities.
it will copy default/settings.toml to ~/.config/terraphim/settings.toml if it doesn't exist on MacOS and Linux.

Expand Down
44 changes: 41 additions & 3 deletions terraphim_server/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,47 @@ use terraphim_types::{Document, IndexedDocument, SearchQuery};
use crate::error::{Result, Status};
pub type SearchResultsStream = Sender<IndexedDocument>;

/// Health check endpoint
pub(crate) async fn health() -> impl IntoResponse {
(StatusCode::OK, "OK")
/// Response body of the [`health`] readiness probe.
///
/// Deterministic JSON contract so integration test harnesses can parse the
/// body (not just assert the status code): `{"status":"ok"}`.
#[derive(Debug, serde::Serialize)]
pub struct HealthResponse {
/// Readiness state of the server.
pub status: &'static str,
}

/// Readiness probe.
///
/// Returns HTTP 200 with a fixed JSON body `{"status":"ok"}` once the Axum
/// router is serving requests. Used by integration tests to wait for startup
/// instead of a fixed `sleep`, which is the documented root cause of the
/// `test_default_role_ripgrep_integration` flake (#2998, #2947).
pub(crate) async fn health() -> Json<HealthResponse> {
Json(HealthResponse { status: "ok" })
}

#[cfg(test)]
mod health_tests {
use super::*;

/// AC #2998: `GET /health` returns HTTP 200 with body `{"status":"ok"}`.
///
/// Locks the JSON body contract (was previously a free-form `"OK"` string
/// that could not be parsed by a JSON-asserting harness).
#[test]
fn health_serialises_to_fixed_json_status_ok() {
let body = serde_json::to_string(&HealthResponse { status: "ok" })
.expect("HealthResponse must serialise");
assert_eq!(body, r#"{"status":"ok"}"#);
}

/// The handler returns the canonical body without allocation surprises.
#[tokio::test]
async fn health_handler_returns_json_status_ok() {
let Json(payload) = health().await;
assert_eq!(payload.status, "ok");
}
}

/// Response for creating a document
Expand Down
64 changes: 64 additions & 0 deletions terraphim_server/tests/common/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//! Shared helpers for `terraphim_server` integration tests.
//!
//! Centralises the readiness-poll pattern so tests do not each reinvent a
//! `sleep` heuristic (the documented root cause of the
//! `test_default_role_ripgrep_integration` flake in #2998 / #2947).
//!
//! Usage:
//! ```no_run
//! # use std::net::SocketAddr;
//! # async fn spawn(_: SocketAddr) {}
//! use terraphim_server::axum_server;
//! mod common;
//! use common::wait_for_health;
//!
//! # #[tokio::test]
//! # async fn demo() {
//! let addr: SocketAddr = "127.0.0.1:0".parse().unwrap();
//! // NB: bind an ephemeral port first, then hand the resolved address in.
//! // see `bind_ephemeral` below.
//! let ready = wait_for_health(addr, 60).await;
//! # }
//! ```
use std::net::SocketAddr;
use std::time::Duration;

/// Default per-attempt sleep when polling `/health`.
const POLL_INTERVAL: Duration = Duration::from_millis(250);

/// Poll `GET /health` until it returns HTTP 200, panicking after `max_attempts`.
///
/// Deterministic replacement for `sleep(N seconds)` before issuing search
/// requests. Returns the address once the server is ready.
///
/// # Panics
/// Panics with a descriptive message if the server does not return 200 within
/// `max_attempts * 250ms`, surfacing the last transport error for diagnosis.
pub async fn wait_for_health(addr: SocketAddr, max_attempts: u32) -> SocketAddr {
let client = terraphim_service::http_client::create_default_client()
.expect("Failed to create HTTP client for /health polling");
let health_url = format!("http://{addr}/health");

let mut last_err = String::from("no attempt made");
for attempt in 0..max_attempts {
match client.get(&health_url).send().await {
Ok(response) if response.status().is_success() => return addr,
Ok(response) => {
last_err = format!("HTTP {}", response.status());
}
Err(e) => {
last_err = e.to_string();
}
}
tokio::time::sleep(POLL_INTERVAL).await;
if attempt > 0 && attempt % 8 == 0 {
eprintln!(
"wait_for_health: still not ready at {addr} after {attempt} attempts ({last_err})"
);
}
}
panic!(
"Server at {addr} did not become ready within {} attempts ({})",
max_attempts, last_err
);
}
34 changes: 26 additions & 8 deletions terraphim_server/tests/default_role_integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ use std::time::Duration;
use serial_test::serial;
use tokio::time::sleep;

mod common;
use common::wait_for_health;

use terraphim_config::{Config, ConfigState};
use terraphim_server::{ConfigResponse, SearchResponse, axum_server};

Expand Down Expand Up @@ -115,26 +118,31 @@ async fn test_default_role_ripgrep_integration() {
log::info!("✅ Default role configuration validated");
log::info!(" - Ripgrep haystack: {}", ripgrep_haystack.location);

// Start server on a test port
let server_addr = "127.0.0.1:8085".parse().unwrap();
// Start server on an ephemeral port (held by the listener until axum binds,
// eliminating the port-race that caused the #2947/#2998 flake).
let listener =
std::net::TcpListener::bind("127.0.0.1:0").expect("Failed to bind ephemeral port");
let server_addr = listener.local_addr().expect("Failed to read bound port");
drop(listener); // axum_server rebinds; held long enough to avoid TOCTOU on busy CI
let server_handle = tokio::spawn(async move {
if let Err(e) = axum_server(server_addr, config_state).await {
log::error!("Server error: {:?}", e);
}
});

// Wait for server to start
log::info!("⏳ Waiting for server startup...");
sleep(Duration::from_secs(3)).await;
// Deterministic readiness: poll /health instead of a fixed sleep.
// AC #2998: harness waits for /health success before issuing search requests.
log::info!("⏳ Waiting for server startup via /health...");
wait_for_health(server_addr, 60).await;

let client = terraphim_service::http_client::create_default_client()
.expect("Failed to create HTTP client");
let base_url = "http://127.0.0.1:8085";
let base_url = format!("http://{server_addr}");

// Test 1: Health check
// Test 1: Health check (body contract: {"status":"ok"})
log::info!("🔍 Testing server health...");
let health_response = client
.get(format!("{}/health", base_url))
.get(format!("{base_url}/health"))
.send()
.await
.expect("Health check failed");
Expand All @@ -143,6 +151,16 @@ async fn test_default_role_ripgrep_integration() {
health_response.status().is_success(),
"Health check should succeed"
);
// Lock the JSON body contract added in #2998 (was free-form "OK" text).
let health_json: serde_json::Value = health_response
.json()
.await
.expect("/health body must be valid JSON");
assert_eq!(
health_json.get("status").and_then(|v| v.as_str()),
Some("ok"),
"/health body must be {{\"status\":\"ok\"}}, got: {health_json}"
);
log::info!("✅ Server health check passed");

// Test 2: Get configuration
Expand Down