Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/rs_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,17 @@ jobs:
with:
token: ${{ secrets.GITHUB_TOKEN }}
working-directory: bottlecap

# Guards cold-start H1 (PR #1276): the default build must keep reqwest on
# compiled-in webpki roots (no per-build native-cert filesystem scan), and the
# FIPS build must keep native roots. Reads the resolved feature graph only
# (no compilation), so it's fast and needs no protoc.
tls-root-features:
name: TLS Root Features
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions-rust-lang/setup-rust-toolchain@150fca883cd4034361b621bd4e6a9d34e5143606 # v1.15.4
with:
cache: false
- run: ./scripts/verify_tls_root_features.sh
2 changes: 2 additions & 0 deletions bottlecap/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 18 additions & 1 deletion bottlecap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ tower = { version = "0.5", features = ["util"] }
mock_instant = "0.6"
serial_test = "3.1"
tempfile = "3.20"
# Local TLS server for the http.rs root-trust tests (H1: webpki default +
# tls_cert_file mitigation). Already present transitively via reqwest's
# hyper-rustls, so promoting it to a dev-dep adds nothing to the shipped binary.
tokio-rustls = { version = "0.26", default-features = false, features = ["logging", "tls12"] }
# fake-intake test harness: decode msgpack+gzip stats payloads on arrival
rmp-serde = { version = "1.3.1", default-features = false }
flate2 = { version = "1.1", default-features = false, features = ["rust_backend"] }
Expand Down Expand Up @@ -129,7 +133,20 @@ tikv-jemallocator = "0.5"

[features]
default = [
"reqwest/rustls-tls-native-roots",
# Use webpki-roots (compiled-in Mozilla CA bundle) rather than
# rustls-tls-native-roots for the non-FIPS reqwest clients. native-roots
# calls rustls_native_certs::load_native_certs() — a filesystem cert-bundle
# scan/parse — on *every* reqwest::Client::build(), and bottlecap builds
# three reqwest clients during cold-start init (the register client in
# bin/bottlecap/main.rs, the shared flush client in src/http.rs, and the
# secrets-decrypt client in src/secrets/decrypt.rs for KMS/SM/SSM/STS).
# webpki-roots removes that per-build scan. Trade-off: webpki trusts only the
# public Mozilla roots, so private/internal OS-installed CAs are no longer
# picked up implicitly; users relying on those must point DD_PROXY_HTTPS at a
# trusted intercept or supply the CA via tls_cert_file (which still layers on
# top via add_root_certificate in src/http.rs). FIPS builds keep native-roots
# (see the `fips` feature) and are unaffected.
"reqwest/rustls-tls-webpki-roots",
"datadog-fips/default",
"datadog-agent-config/https",
"libdd-common/https",
Expand Down
236 changes: 236 additions & 0 deletions bottlecap/src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@ pub fn headers_to_map(headers: &HeaderMap) -> HashMap<String, String> {
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use std::sync::atomic::{AtomicUsize, Ordering};
use tempfile::NamedTempFile;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpListener;

Expand Down Expand Up @@ -245,4 +247,238 @@ mod tests {
"expected 2 fresh TCP connections, got {opened} (pooling not disabled?)"
);
}

// ---------------------------------------------------------------------
// TLS root-trust tests for cold-start hypothesis H1 (PR #1276). The
// non-FIPS build trusts compiled-in webpki roots; `tls_cert_file` is the
// supported mitigation for environments behind a private CA (e.g. a
// TLS-intercepting DD_PROXY_HTTPS). These pin both halves of that contract
// so the trust model can't regress silently.
// ---------------------------------------------------------------------

// Self-contained test PKI, generated offline with OpenSSL (ECDSA P-256,
// valid ~100 years). The CA is a *private* root deliberately NOT in the
// webpki/Mozilla bundle, so it exercises exactly the path this PR changes.
// Regenerate with:
// openssl ecparam -name prime256v1 -genkey -noout -out ca.key
// openssl req -x509 -new -key ca.key -sha256 -days 36500 \
// -subj "/CN=Bottlecap Test CA" \
// -addext "basicConstraints=critical,CA:TRUE" \
// -addext "keyUsage=critical,keyCertSign,cRLSign" -out ca.crt
// openssl ecparam -name prime256v1 -genkey -noout -out leaf.key
// openssl req -new -key leaf.key -subj "/CN=localhost" -out leaf.csr
// printf 'basicConstraints=CA:FALSE\nkeyUsage=critical,digitalSignature\nextendedKeyUsage=serverAuth\nsubjectAltName=DNS:localhost\n' > leaf.ext
// openssl x509 -req -in leaf.csr -CA ca.crt -CAkey ca.key -CAcreateserial \
// -sha256 -days 36500 -extfile leaf.ext -out leaf.crt
// openssl pkcs8 -topk8 -nocrypt -in leaf.key -out leaf.pk8.pem
const TEST_CA_CERT_PEM: &str = r"
-----BEGIN CERTIFICATE-----
MIIBnzCCAUWgAwIBAgIUf5l+dmNjL/xU10EM0qk4Iseh3nQwCgYIKoZIzj0EAwIw
HDEaMBgGA1UEAwwRQm90dGxlY2FwIFRlc3QgQ0EwIBcNMjYwNjI1MTgzNjMwWhgP
MjEyNjA2MDExODM2MzBaMBwxGjAYBgNVBAMMEUJvdHRsZWNhcCBUZXN0IENBMFkw
EwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEK9O0WnLz1pRAD3RZzYzsCf8vEieLLwnV
D8RNYJBaEBZBJbE+4snr+0vhVm2mGtIojZTJ0bc5Z98JHpZ57LBpwqNjMGEwHQYD
VR0OBBYEFJtZHbWEv1FI0KacM/4ctJkUuMg2MB8GA1UdIwQYMBaAFJtZHbWEv1FI
0KacM/4ctJkUuMg2MA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMAoG
CCqGSM49BAMCA0gAMEUCIQDFLc+zjel9LGmytihROgErrPc6WDxmziypva+k2cSN
CAIgYb1nsDag2/bwlzf4OOcYvqU9xNRdXMarkRxn4o5rPR4=
-----END CERTIFICATE-----
";
const TEST_SERVER_CERT_PEM: &str = r"
-----BEGIN CERTIFICATE-----
MIIBvTCCAWSgAwIBAgIUOkYng/RZxQcYlMY5I+RAXzR7dSkwCgYIKoZIzj0EAwIw
HDEaMBgGA1UEAwwRQm90dGxlY2FwIFRlc3QgQ0EwIBcNMjYwNjI1MTgzNjMwWhgP
MjEyNjA2MDExODM2MzBaMBQxEjAQBgNVBAMMCWxvY2FsaG9zdDBZMBMGByqGSM49
AgEGCCqGSM49AwEHA0IABOuKun0roY5MAEOgA3NNebQa9l56HVsNFwGJ4a5chM2T
s+vToAoyflPMZfxuS6PUv2WHrahmUH5WZKr0XaT/RIijgYkwgYYwCQYDVR0TBAIw
ADAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwEwFAYDVR0RBA0w
C4IJbG9jYWxob3N0MB0GA1UdDgQWBBTcqKduDXHEOeUXAL6fqwAYl50VVTAfBgNV
HSMEGDAWgBSbWR21hL9RSNCmnDP+HLSZFLjINjAKBggqhkjOPQQDAgNHADBEAiAT
nwOMjOwnQoeLvZgHhNqrzlVGNqnT4FPudJTgTKrAAAIgVMw8wqk70JXLm4pEdN8/
oMUo5j7nV6S8Hd02b5hW5pM=
-----END CERTIFICATE-----
";
const TEST_SERVER_KEY_PEM: &str = r"
-----BEGIN PRIVATE KEY-----
MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgkflx4lEcI8Fafj8E
EgBejgnIKCAkaQXo+p0h3a6IsQOhRANCAATrirp9K6GOTABDoANzTXm0GvZeeh1b
DRcBieGuXITNk7Pr06AKMn5TzGX8bkuj1L9lh62oZlB+VmSq9F2k/0SI
-----END PRIVATE KEY-----
";

/// Installs a process-default rustls `CryptoProvider` matching the build's
/// TLS backend (mirrors `traces/http_client.rs`); idempotent across tests.
fn ensure_crypto_provider() {
#[cfg(feature = "fips")]
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
#[cfg(not(feature = "fips"))]
let _ = rustls::crypto::ring::default_provider().install_default();
}

/// Spawns a local HTTPS (HTTP/1.1-over-TLS) server presenting the test leaf
/// certificate (signed by `TEST_CA_CERT_PEM`); returns the bound port.
async fn spawn_tls_server() -> u16 {
ensure_crypto_provider();
let certs = rustls_pemfile::certs(&mut BufReader::new(TEST_SERVER_CERT_PEM.as_bytes()))
.collect::<Result<Vec<_>, _>>()
.expect("parse test server cert");
let key = rustls_pemfile::private_key(&mut BufReader::new(TEST_SERVER_KEY_PEM.as_bytes()))
.expect("read test server key")
.expect("test server key present");
let server_config = rustls::ServerConfig::builder()
.with_no_client_auth()
.with_single_cert(certs, key)
.expect("build server TLS config");
let acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config));

let listener = TcpListener::bind("127.0.0.1:0")
.await
.expect("bind tls listener");
let port = listener.local_addr().expect("listener local_addr").port();
tokio::spawn(async move {
loop {
let Ok((sock, _)) = listener.accept().await else {
return;
};
let acceptor = acceptor.clone();
tokio::spawn(async move {
let Ok(mut tls) = acceptor.accept(sock).await else {
return;
};
let mut buf = vec![0u8; 8192];
let mut accumulated = Vec::new();
loop {
let n = match tls.read(&mut buf).await {
Ok(0) | Err(_) => return,
Ok(n) => n,
};
accumulated.extend_from_slice(&buf[..n]);
while let Some(idx) = accumulated.windows(4).position(|w| w == b"\r\n\r\n")
{
accumulated.drain(..idx + 4);
if tls
.write_all(
b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\nConnection: keep-alive\r\n\r\nok",
)
.await
.is_err()
{
return;
}
}
}
});
}
});
port
}

fn ca_temp_file() -> NamedTempFile {
let mut f = NamedTempFile::new().expect("create temp CA file");
f.write_all(TEST_CA_CERT_PEM.as_bytes())
.expect("write temp CA file");
f
}

/// `tls_cert_file` must add the private CA *on top of* the webpki roots, so a
/// server presenting a privately-signed cert is trusted. This is the
/// documented mitigation for private-CA / TLS-intercepting-proxy setups.
#[tokio::test]
async fn tls_cert_file_trusts_private_ca() {
let port = spawn_tls_server().await;
let ca = ca_temp_file();
let cfg = config::Config {
http_protocol: Some("http1".to_string()),
flush_timeout: 5,
tls_cert_file: Some(ca.path().to_str().expect("ca path utf8").to_string()),
..config::Config::default()
};
let client = get_client(&Arc::new(cfg));
let resp = client
.get(format!("https://localhost:{port}/"))
.send()
.await
.expect("request should succeed when tls_cert_file trusts the private CA");
assert_eq!(resp.status(), 200);
}

/// Without `tls_cert_file`, the default root set (webpki on non-FIPS builds)
/// must reject a privately-signed cert. Pins the post-PR trust behavior:
/// private-CA users must opt in via `tls_cert_file`.
#[tokio::test]
async fn private_ca_rejected_without_tls_cert_file() {
let port = spawn_tls_server().await;
let cfg = config::Config {
http_protocol: Some("http1".to_string()),
flush_timeout: 5,
..config::Config::default()
};
let client = get_client(&Arc::new(cfg));
let result = client
.get(format!("https://localhost:{port}/"))
.send()
.await;
assert!(
result.is_err(),
"default roots must reject a privately-signed cert without tls_cert_file"
);
}

/// `skip_ssl_validation` remains a full escape hatch: an untrusted cert is
/// accepted regardless of the configured root set.
#[tokio::test]
async fn skip_ssl_validation_accepts_untrusted_cert() {
let port = spawn_tls_server().await;
let cfg = config::Config {
http_protocol: Some("http1".to_string()),
flush_timeout: 5,
skip_ssl_validation: true,
..config::Config::default()
};
let client = get_client(&Arc::new(cfg));
let resp = client
.get(format!("https://localhost:{port}/"))
.send()
.await
.expect("request should succeed when skip_ssl_validation is set");
assert_eq!(resp.status(), 200);
}

#[test]
fn load_custom_cert_parses_single() {
let ca = ca_temp_file();
let certs = load_custom_cert(ca.path().to_str().expect("path utf8"))
.expect("should parse one certificate");
assert_eq!(certs.len(), 1);
}

#[test]
fn load_custom_cert_parses_multiple() {
let mut f = NamedTempFile::new().expect("temp file");
f.write_all(format!("{TEST_CA_CERT_PEM}{TEST_SERVER_CERT_PEM}").as_bytes())
.expect("write certs");
let certs = load_custom_cert(f.path().to_str().expect("path utf8"))
.expect("should parse two certificates");
assert_eq!(certs.len(), 2);
}

#[test]
fn load_custom_cert_empty_file_errors() {
let f = NamedTempFile::new().expect("temp file");
assert!(
load_custom_cert(f.path().to_str().expect("path utf8")).is_err(),
"empty file must error"
);
}

#[test]
fn load_custom_cert_non_pem_errors() {
let mut f = NamedTempFile::new().expect("temp file");
f.write_all(b"this is not a certificate")
.expect("write garbage");
assert!(
load_custom_cert(f.path().to_str().expect("path utf8")).is_err(),
"non-PEM content must error"
);
}
}
12 changes: 12 additions & 0 deletions bottlecap/src/secrets/decrypt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ pub async fn resolve_secrets(
{
let before_decrypt = Instant::now();

// Dedicated client for *direct* AWS control-plane calls (KMS / Secrets
// Manager / SSM / STS, and the SnapStart container-credentials endpoint).
// It intentionally uses the default root set (compiled-in webpki roots on
// non-FIPS builds; see bottlecap/Cargo.toml) with no proxy and no
// tls_cert_file: AWS endpoints present certificates that chain to public
// AWS CAs, and DD_PROXY_HTTPS / tls_cert_file are for *Datadog* egress, not
// AWS API traffic (which goes direct, or through VPC endpoints that also
// present public certs). skip_ssl_validation is deliberately not applied
// here either — we do not disable certificate validation on the calls that
// fetch credentials and secrets. The delegated-auth path (dd_org_uuid)
// talks to a Datadog endpoint and so uses `shared_client` below, which does
// honor the proxy and tls_cert_file settings.
let builder = match create_reqwest_client_builder() {
Ok(builder) => builder,
Err(err) => {
Expand Down
52 changes: 52 additions & 0 deletions scripts/verify_tls_root_features.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash
#
# Guards the TLS trust configuration from cold-start hypothesis H1 (PR #1276).
#
# The non-FIPS (default) build must trust the compiled-in webpki root bundle so
# it does NOT run rustls_native_certs::load_native_certs() — a per-build
# filesystem cert scan — on the cold-start critical path. The FIPS build must
# keep native roots. Cargo unifies features additively across the dependency
# graph, so a dependency added later could silently flip reqwest's root source
# (re-introducing the scan, or cross-linking webpki into FIPS). This catches
# that before it ships. Errors are handled explicitly (no `set -e`) so a
# non-matching grep can't masquerade as a build failure.
set -uo pipefail

cd "$(dirname "$0")/../bottlecap" || exit 2

fail() {
echo "FAIL: $1" >&2
exit 1
}

# Activated feature list for reqwest under a given bottlecap feature set.
reqwest_feature_line() { # $1 = bottlecap feature set
cargo tree --no-default-features --features "$1" -i reqwest -f '{p}|{f}' 2>/dev/null \
| grep -m1 '^reqwest ' | sed 's/^[^|]*|//'
}

echo "[tls-roots] default build → reqwest must use webpki roots, not native"
def="$(reqwest_feature_line default)"
[ -n "$def" ] || fail "could not determine reqwest features for the default build"
case ",$def," in
*,rustls-tls-webpki-roots,*) : ;;
*) fail "default build is missing reqwest/rustls-tls-webpki-roots (got: $def)" ;;
esac
case "$def" in
*rustls-tls-native-roots*)
fail "default build pulls reqwest native roots — re-introduces the cold-start native-cert scan (got: $def)" ;;
esac

echo "[tls-roots] fips build → reqwest must use native roots, not webpki"
fips="$(reqwest_feature_line fips)"
[ -n "$fips" ] || fail "could not determine reqwest features for the fips build"
case "$fips" in
*rustls-tls-native-roots*) : ;;
*) fail "fips build is missing reqwest native roots (got: $fips)" ;;
esac
case ",$fips," in
*,rustls-tls-webpki-roots,*)
fail "webpki roots cross-linked into the fips build (got: $fips)" ;;
esac

echo "[tls-roots] OK — reqwest root sources are correct for both builds"
Loading