Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d46a7ec
feat(init): add cold-start init-phase timing instrumentation
duncanista Jun 23, 2026
3748a14
perf(build): eager symbol binding (-z,now,-z,relro) for faster init
duncanista Jun 24, 2026
81e6985
perf(build): set jemalloc narenas:1 to trim init metadata + RSS
duncanista Jun 24, 2026
f3a5314
perf(build): set -Ctarget-cpu for Graviton2 / x86-64-v2
duncanista Jun 24, 2026
70749fc
chore(build): honor pinned toolchain, drop dead UPX install
duncanista Jun 24, 2026
b5de2b8
feat(http): share one TLS root config across init HTTP clients
duncanista Jun 24, 2026
a75e3cb
perf(runtime): size Tokio worker pool from Lambda memory tier
duncanista Jun 24, 2026
f6ec9db
perf(tags): cache tag strings; hoist per-call regexes to LazyLock
duncanista Jun 24, 2026
a5a3a5d
chore(deps): unify duplicate crate versions (nix, thiserror, semconv)
duncanista Jun 24, 2026
1144e14
perf(init): issue telemetry subscribe earlier, overlapping service co…
duncanista Jun 24, 2026
48694df
perf(init): parallelize init HTTP client construction
duncanista Jun 24, 2026
47f251b
perf(appsec): build WAF off the init critical path
duncanista Jun 24, 2026
ee1bd39
chore(build): drop inert --component rust-src flag
duncanista Jun 24, 2026
5bb8141
Merge remote-tracking branch 'origin/jordan.gonzalez/eager-binding/fe…
duncanista Jun 24, 2026
f99a3d5
Merge remote-tracking branch 'origin/jordan.gonzalez/target-cpu/featu…
duncanista Jun 24, 2026
7aa1984
Merge remote-tracking branch 'origin/jordan.gonzalez/build-hygiene/fe…
duncanista Jun 24, 2026
a58a2c6
Merge remote-tracking branch 'origin/jordan.gonzalez/tls-shared-roots…
duncanista Jun 24, 2026
2cbe08a
Merge remote-tracking branch 'origin/jordan.gonzalez/dedup-crates/fea…
duncanista Jun 24, 2026
f8b447a
Merge remote-tracking branch 'origin/jordan.gonzalez/tokio-runtime/fe…
duncanista Jun 24, 2026
f78d053
Merge remote-tracking branch 'origin/jordan.gonzalez/http-client-reus…
duncanista Jun 24, 2026
22e6a64
Merge remote-tracking branch 'origin/jordan.gonzalez/init-reorder/fea…
duncanista Jun 24, 2026
cc0e1aa
Merge remote-tracking branch 'origin/jordan.gonzalez/appsec-defer/fea…
duncanista Jun 24, 2026
9dc28e9
Merge remote-tracking branch 'origin/jordan.gonzalez/tag-regex-cleanu…
duncanista Jun 24, 2026
8b565fe
fixup(integration): drop stray config arg at start_api_runtime_proxy …
duncanista Jun 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 19 additions & 42 deletions bottlecap/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 17 additions & 5 deletions bottlecap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ lazy_static = { version = "1.5", default-features = false }
log = { version = "0.4", default-features = false }
mime = { version = "0.3", default-features = false }
multipart = { version = "0.18", default-features = false, features = ["server"] }
nix = { version = "0.26", default-features = false, features = ["feature", "fs"] }
nix = { version = "0.29", default-features = false, features = ["feature", "fs"] }
ordered_hash_map = { version = "0.4", default-features = false }
regex = { version = "1.10", default-features = false }
reqwest = { version = "0.12.11", features = ["json", "http2"], default-features = false }
serde = { version = "1.0", default-features = false, features = ["derive"] }
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
thiserror = { version = "1.0", default-features = false }
thiserror = { version = "2.0", default-features = false }
# Transitive dependency (pulled in via cookie). Pinned to >=0.3.47 so cargo audit / CI passes (RUSTSEC-2026-0009).
time = { version = "0.3.47", default-features = false }
tokio = { version = "1.47", default-features = false, features = ["macros", "rt-multi-thread", "time"] }
Expand All @@ -46,7 +46,7 @@ rustls-webpki = { version = "0.103.13", default-features = false }
rustls-pemfile = { version = "2.0", default-features = false, features = ["std"] }
rustls-pki-types = { version = "1.0", default-features = false }
hyper-rustls = { version = "0.27.7", default-features = false }
rand = { version = "0.8", default-features = false }
rand = { version = "0.9", default-features = false }
prost = { version = "0.14", default-features = false }
tonic = { version = "0.14", features = ["transport", "codegen", "server", "channel", "router"], default-features = false }
tonic-types = { version = "0.14", default-features = false }
Expand All @@ -55,7 +55,7 @@ futures = { version = "0.3.31", default-features = false }
serde-aux = { version = "4.7", default-features = false }
serde_html_form = { version = "0.2", default-features = false }
opentelemetry-proto = { version = "0.31.0", features = ["trace", "with-serde", "gen-tonic"] }
opentelemetry-semantic-conventions = { version = "0.30", features = ["semconv_experimental"] }
opentelemetry-semantic-conventions = { version = "0.31", features = ["semconv_experimental"] }
# Pinned to <0.8.3: version 0.8.3 upgraded to openssl-probe 0.2.x which scans all cert
# directories and parses ~200 individual cert files on Lambda instead of loading a single
# bundle file, adding ~45ms to each reqwest::Client::build() call.
Expand Down Expand Up @@ -129,7 +129,19 @@ tikv-jemallocator = "0.5"

[features]
default = [
"reqwest/rustls-tls-native-roots",
# Use webpki-roots (compiled-in Mozilla CA bundle) rather than
# rustls-tls-native-roots for the non-FIPS reqwest clients. native-roots
# calls rustls_native_certs::load_native_certs() — a filesystem cert-bundle
# scan/parse — on *every* reqwest::Client::build(), and bottlecap builds two
# reqwest clients during cold-start init (the register client in
# bin/bottlecap/main.rs and the shared flush client in src/http.rs).
# webpki-roots removes that per-build scan. Trade-off: webpki trusts only the
# public Mozilla roots, so private/internal OS-installed CAs are no longer
# picked up implicitly; users relying on those must point DD_PROXY_HTTPS at a
# trusted intercept or supply the CA via tls_cert_file (which still layers on
# top via add_root_certificate in src/http.rs). FIPS builds keep native-roots
# (see the `fips` feature) and are unaffected.
"reqwest/rustls-tls-webpki-roots",
"datadog-fips/default",
"datadog-agent-config/https",
"libdd-common/https",
Expand Down
97 changes: 97 additions & 0 deletions bottlecap/src/appsec/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
use std::env;
use std::sync::Arc;

use tokio::sync::{Mutex, OnceCell};
use tracing::error;

use crate::appsec::processor::{Error as AppSecError, Processor};
use crate::config::Config;

pub mod processor;

/// A [`Processor`] shared across the trace agent and the runtime API proxy.
///
/// Wrapped in a [`Mutex`] (and an [`Arc`] so the value stays [`Send`]) because
/// the WAF context buffer is mutated from multiple asynchronous tasks.
pub type SharedProcessor = Arc<Mutex<Processor>>;

/// A handle to a [`SharedProcessor`] whose construction is deferred off the
/// initialization critical path.
///
/// Building the WAF (zstd-decompressing and JSON-parsing the ruleset, then
/// compiling it through `libddwaf`) costs tens of milliseconds and is only
/// needed once the first request payload is evaluated — which is strictly after
/// the first `/next`. Rather than block init on that work, consumers hold this
/// awaitable handle and resolve it (via [`resolve`]) at the point where they
/// actually need the WAF.
///
/// The outer [`Option`] (see [`defer_processor`]) distinguishes the
/// feature-disabled case (no handle at all) from the enabled case. The inner
/// [`Option`] distinguishes a successfully built processor (`Some`) from a build
/// that failed (`None`), in which case the feature is treated as a no-op exactly
/// as it would have been with the previous eager construction.
pub type DeferredProcessor = Arc<OnceCell<Option<SharedProcessor>>>;

/// Determines whether the Serverless App & API Protection features are enabled.
#[must_use]
pub const fn is_enabled(cfg: &Config) -> bool {
Expand All @@ -16,3 +44,72 @@ pub const fn is_enabled(cfg: &Config) -> bool {
pub fn is_standalone() -> bool {
env::var("DD_APM_TRACING_ENABLED").is_ok_and(|s| s.to_lowercase() == "true")
}

/// Prepares a [`DeferredProcessor`] for the App & API Protection feature without
/// blocking the caller on the (CPU-bound) WAF build.
///
/// Returns [`None`] immediately when the feature is disabled, preserving the
/// cheap, synchronous disabled-by-default path. When enabled, a background task
/// is spawned to build the processor off the critical path, and a handle that
/// resolves to it is returned right away. Consumers call [`resolve`] where they
/// use the WAF; if a request arrives before the background build has finished,
/// that call simply awaits the in-flight build (or kicks it off itself).
#[must_use]
pub fn defer_processor(cfg: &Arc<Config>) -> Option<DeferredProcessor> {
if !is_enabled(cfg) {
// Feature disabled: nothing to build, and nothing to await.
return None;
}

let cell: DeferredProcessor = Arc::new(OnceCell::new());

// Kick the build off the synchronous init path. The first consumer to call
// `resolve` will await whatever this task produces (or, if it somehow races
// ahead, run an equivalent build itself via `get_or_init`).
let background_cell = Arc::clone(&cell);
let background_cfg = Arc::clone(cfg);
tokio::spawn(async move {
let _ = background_cell
.get_or_init(|| build_processor(background_cfg))
.await;
});

Some(cell)
}

/// Resolves a [`DeferredProcessor`] to the underlying [`SharedProcessor`], if the
/// WAF was built successfully.
///
/// Awaits the background build started by [`defer_processor`] when it is still in
/// flight; if no build is running yet, it starts one (off-thread, CPU-bound work
/// runs on the blocking pool). Subsequent calls return immediately.
pub async fn resolve(handle: &DeferredProcessor, cfg: &Arc<Config>) -> Option<SharedProcessor> {
handle
.get_or_init(|| build_processor(Arc::clone(cfg)))
.await
.clone()
}

/// Builds the App & API Protection [`Processor`] on the blocking thread pool.
///
/// The WAF build is CPU-bound (ruleset decompression, JSON parsing, and WAF
/// compilation), so it is offloaded with [`tokio::task::spawn_blocking`] to keep
/// it off the async worker threads. Returns [`None`] (logging at the appropriate
/// level) when the feature is disabled or the build fails, matching the previous
/// "feature is silently a no-op" behaviour.
async fn build_processor(cfg: Arc<Config>) -> Option<SharedProcessor> {
match tokio::task::spawn_blocking(move || Processor::new(&cfg)).await {
Ok(Ok(processor)) => Some(Arc::new(Mutex::new(processor))),
Ok(Err(AppSecError::FeatureDisabled)) => None,
Ok(Err(e)) => {
error!(
"AAP | error creating App & API Protection processor, the feature will be disabled: {e}"
);
None
}
Err(e) => {
error!("AAP | App & API Protection processor build task failed to join: {e}");
None
}
}
}
Loading
Loading