From 72288a1d7ae86e689c4afcba4138b3b866b20e46 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 13 May 2026 19:59:32 +0200 Subject: [PATCH 01/49] WIP: trace filters --- libdd-common/src/regex_engine.rs | 4 +- libdd-data-pipeline/src/agent_info/schema.rs | 12 +- .../src/trace_exporter/builder.rs | 15 ++ libdd-data-pipeline/src/trace_exporter/mod.rs | 12 ++ .../src/trace_exporter/trace_filter.rs | 186 ++++++++++++++++++ libdd-trace-utils/src/span/trace_utils.rs | 48 ++++- libdd-trace-utils/src/trace_utils.rs | 5 +- 7 files changed, 271 insertions(+), 11 deletions(-) create mode 100644 libdd-data-pipeline/src/trace_exporter/trace_filter.rs diff --git a/libdd-common/src/regex_engine.rs b/libdd-common/src/regex_engine.rs index f3674f6e12..c5fb7d7973 100644 --- a/libdd-common/src/regex_engine.rs +++ b/libdd-common/src/regex_engine.rs @@ -13,7 +13,7 @@ //! regexes requiring Unicode character class support. #[cfg(all(feature = "regex-lite", not(feature = "require-regex-full")))] -pub use regex_lite::{escape, Captures, Regex, RegexBuilder, Replacer}; +pub use regex_lite::{escape, Captures, Error, Regex, RegexBuilder, Replacer}; #[cfg(not(all(feature = "regex-lite", not(feature = "require-regex-full"))))] -pub use regex::{escape, Captures, Regex, RegexBuilder, Replacer}; +pub use regex::{escape, Captures, Error, Regex, RegexBuilder, Replacer}; diff --git a/libdd-data-pipeline/src/agent_info/schema.rs b/libdd-data-pipeline/src/agent_info/schema.rs index f0eedc97e1..3afb2c7eed 100644 --- a/libdd-data-pipeline/src/agent_info/schema.rs +++ b/libdd-data-pipeline/src/agent_info/schema.rs @@ -40,9 +40,11 @@ pub struct AgentInfoStruct { /// Container tags hash from HTTP response header pub container_tags_hash: Option, /// Exact-match tag filters applied before stats computation (root span only). - pub filter_tags: Option, + #[serde(default)] + pub filter_tags: FilterTagsConfig, /// Regex-match tag filters applied before stats computation (root span only). - pub filter_tags_regex: Option, + #[serde(default)] + pub filter_tags_regex: FilterTagsConfig, /// Regex patterns for root-span resource names; matching traces are excluded from stats. pub ignore_resources: Option>, } @@ -51,9 +53,11 @@ pub struct AgentInfoStruct { #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)] pub struct FilterTagsConfig { /// All listed filters must match at least one root-span tag for the trace to be accepted. - pub require: Option>, + #[serde(default)] + pub require: Vec, /// If any listed filter matches a root-span tag the trace is rejected. - pub reject: Option>, + #[serde(default)] + pub reject: Vec, } #[allow(missing_docs)] diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index bd157abe8d..6bb91cce85 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -1,6 +1,7 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::agent_info::schema::FilterTagsConfig; use crate::agent_info::AgentInfoFetcher; use crate::otlp::config::{OtlpProtocol, DEFAULT_OTLP_TIMEOUT}; use crate::otlp::OtlpTraceConfig; @@ -8,6 +9,7 @@ use crate::otlp::OtlpTraceConfig; use crate::telemetry::TelemetryClientBuilder; use crate::trace_exporter::agent_response::AgentResponsePayloadVersion; use crate::trace_exporter::error::BuilderErrorKind; +use crate::trace_exporter::trace_filter::TraceFilterer; #[cfg(all(not(target_arch = "wasm32"), feature = "telemetry"))] use crate::trace_exporter::TelemetryConfig; #[cfg(not(target_arch = "wasm32"))] @@ -65,6 +67,8 @@ pub struct TraceExporterBuilder { connection_timeout: Option, otlp_endpoint: Option, otlp_headers: Vec<(String, String)>, + filter_tags: FilterTagsConfig, + filter_tags_regex: FilterTagsConfig, } impl TraceExporterBuilder { @@ -286,6 +290,16 @@ impl TraceExporterBuilder { self } + pub fn set_filter_tags(&mut self, filter_tags: FilterTagsConfig) -> &mut Self { + self.filter_tags = filter_tags; + self + } + + pub fn set_filter_tags_regex(&mut self, filter_tags_regex: FilterTagsConfig) -> &mut Self { + self.filter_tags_regex = filter_tags_regex; + self + } + #[allow(missing_docs)] pub fn build( self, @@ -496,6 +510,7 @@ impl TraceExporterBuilder { .agent_rates_payload_version_enabled .then(AgentResponsePayloadVersion::new), otlp_config, + trace_filterer: TraceFilterer::new(&self.filter_tags, &self.filter_tags_regex), }) } diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 561bc56e88..c43a21e5a8 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -5,6 +5,7 @@ pub mod builder; pub mod error; pub mod metrics; pub mod stats; +mod trace_filter; mod trace_serializer; // Re-export the builder @@ -236,6 +237,7 @@ pub struct TraceExporter, /// When set, traces are exported via OTLP HTTP/JSON instead of the Datadog agent. otlp_config: Option, + trace_filterer: trace_filter::TraceFilterer, } impl TraceExporter { @@ -382,6 +384,11 @@ impl Tra fn check_agent_info(&self) { if let Some(agent_info) = agent_info::get_agent_info() { if self.has_agent_info_state_changed(&agent_info) { + // FIXME: trace_filterer should only be enabled when CSS is on. (why ?) + self.trace_filterer.update_conf( + &agent_info.info.filter_tags, + &agent_info.info.filter_tags_regex, + ); match &**self.client_side_stats.status.load() { StatsComputationStatus::Disabled => {} StatsComputationStatus::DisabledByAgent { .. } => { @@ -610,6 +617,11 @@ impl Tra ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); + // FIXME: when client_computed_top_level is true, looking twice for the root span here is + // inefficient and just below in process_traces_for_stats. + // Also, only do it when css is on (why ???) + self.trace_filterer.filter_traces(&mut traces); + // Process stats computation and drop non-sampled (p0) chunks. // This must run before the OTLP path so that unsampled spans are not exported. let dropped_p0_stats = stats::process_traces_for_stats( diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs new file mode 100644 index 0000000000..a08e0e3c8b --- /dev/null +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -0,0 +1,186 @@ +// TODO: +// regex cache ?: https://docs.rs/lru-cache/latest/lru_cache/ + +use std::{str::FromStr, sync::Arc}; + +use libdd_common::regex_engine; +use libdd_trace_stats::span_concentrator::StatSpan; +use libdd_trace_utils::span::trace_utils::get_root_span_index_v4; +use tracing::{debug, error}; + +#[derive(Debug)] +struct TagFilter { + key: String, + value: Option, +} + +#[derive(Debug)] +struct RegexTagFilter { + key: String, + value: Option, +} + +/// Parsed config +#[derive(Debug)] +struct TraceFilteredConf { + reject: Vec, + reject_regex: Vec, + require: Vec, + require_regex: Vec, +} + +#[derive(Debug)] +pub struct TraceFilterer { + conf: arc_swap::ArcSwap, +} + +impl TagFilter { + fn from_str(tag: &str) -> Self { + if let Some((key, value)) = tag.split_once(":") { + TagFilter { + key: key.to_owned(), + value: Some(value.to_owned()), + } + } else { + TagFilter { + key: tag.to_owned(), + value: None, + } + } + } +} + +impl FromStr for RegexTagFilter { + type Err = regex_engine::Error; + + fn from_str(tag: &str) -> Result { + if let Some((key, value)) = tag.split_once(":") { + let regex = match regex_engine::Regex::new(value) { + Ok(regex) => regex, + Err(err) => { + error!("Invalid regex pattern in tag filter, skipping it: {tag}"); + return Err(err); + } + }; + Ok(RegexTagFilter { + key: key.to_owned(), + value: Some(regex), + }) + } else { + Ok(RegexTagFilter { + key: tag.to_owned(), + value: None, + }) + } + } +} + +impl TraceFilteredConf { + fn parse( + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ) -> Self { + TraceFilteredConf { + reject: filter_tags + .reject + .iter() + .map(|tag| TagFilter::from_str(tag)) + .collect(), + reject_regex: filter_tags_regex + .reject + .iter() + .filter_map(|regex_tag| RegexTagFilter::from_str(regex_tag).ok()) + .collect(), + require: filter_tags + .require + .iter() + .map(|tag| TagFilter::from_str(tag)) + .collect(), + require_regex: filter_tags_regex + .require + .iter() + .filter_map(|regex_tag| RegexTagFilter::from_str(regex_tag).ok()) + .collect(), + } + } +} + +impl TraceFilterer { + pub fn new( + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ) -> Self { + let conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex); + Self { + conf: arc_swap::ArcSwap::from_pointee(conf), + } + } + + pub fn update_conf( + &self, + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ) { + let new_conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex); + self.conf.swap(Arc::new(new_conf)); + } + + pub fn filter_traces( + &self, + traces: &mut Vec>>, + ) { + traces.retain(|trace| { + let Ok(root_span_index) = get_root_span_index_v4(trace) else { + // FIXME: in this case it's a distributed trace ? Maybe we should remove the debug + // log in get_root_span_index_v4 then + return true; + }; + let root_span = &trace[root_span_index]; + let should_drop = self.should_drop(root_span); + if should_drop { + debug!("Trace rejected as it fails to meet tag requirements. root: %v"); + } + !should_drop + }); + } + + fn should_drop( + &self, + root_span: &libdd_trace_utils::span::v04::Span, + ) -> bool { + let conf = self.conf.load(); + if conf.reject.iter().any(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) + }) { + return true; + } + + if conf.reject_regex.iter().any(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) + }) { + return true; + } + + if !conf.require.iter().all(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) + }) { + return true; + } + + if !conf.require_regex.iter().all(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) + }) { + return true; + } + + false + } +} diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index 8dd8a03b05..17910320f9 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -3,8 +3,10 @@ //! Trace-utils functionalities implementation for tinybytes based spans +use tracing::debug; + use super::{v04::Span, SpanText, TraceData}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; /// Span metric the mini agent must set for the backend to recognize top level span const TOP_LEVEL_KEY: &str = "_top_level"; @@ -60,6 +62,50 @@ where } } +// FIXME: duplicated with super::get_root_span_index +pub fn get_root_span_index_v4(trace: &[Span]) -> anyhow::Result +where + T: TraceData, +{ + if trace.is_empty() { + anyhow::bail!("Cannot find root span index in an empty trace."); + } + + // Do a first pass to find if we have an obvious root span (starting from the end) since some + // clients put the root span last. + for (i, span) in trace.iter().enumerate().rev() { + if span.parent_id == 0 { + return Ok(i); + } + } + + let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id).collect(); + + let mut root_span_id = None; + for (i, span) in trace.iter().enumerate() { + // If a span's parent is not in the trace, it is a root + if !span_ids.contains(&span.parent_id) { + if root_span_id.is_some() { + debug!( + trace_id = &trace[0].trace_id, + "trace has multiple root spans" + ); + } + root_span_id = Some(i); + } + } + Ok(match root_span_id { + Some(i) => i, + None => { + debug!( + trace_id = &trace[0].trace_id, + "Could not find the root span for trace" + ); + trace.len() - 1 + } + }) +} + /// Return true if the span has a top level key set pub fn has_top_level(span: &Span) -> bool { span.metrics diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index cd4d3bfb3f..0289b9db61 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -360,10 +360,7 @@ pub fn get_root_span_index(trace: &[pb::Span]) -> anyhow::Result { } } - let mut span_ids: HashSet = HashSet::with_capacity(trace.len()); - for span in trace.iter() { - span_ids.insert(span.span_id); - } + let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id).collect(); let mut root_span_id = None; for (i, span) in trace.iter().enumerate() { From bd2defb88db770eee0dedb9c8c523031920c0524 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 13 May 2026 20:20:29 +0200 Subject: [PATCH 02/49] feat: ignore_resources in trace filters --- libdd-data-pipeline/src/agent_info/schema.rs | 3 +- .../src/trace_exporter/builder.rs | 7 ++++- libdd-data-pipeline/src/trace_exporter/mod.rs | 1 + .../src/trace_exporter/trace_filter.rs | 31 +++++++++++++++---- 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/libdd-data-pipeline/src/agent_info/schema.rs b/libdd-data-pipeline/src/agent_info/schema.rs index 3afb2c7eed..d0274594fd 100644 --- a/libdd-data-pipeline/src/agent_info/schema.rs +++ b/libdd-data-pipeline/src/agent_info/schema.rs @@ -46,7 +46,8 @@ pub struct AgentInfoStruct { #[serde(default)] pub filter_tags_regex: FilterTagsConfig, /// Regex patterns for root-span resource names; matching traces are excluded from stats. - pub ignore_resources: Option>, + #[serde(default)] + pub ignore_resources: Vec, } /// Require/reject lists for tag-based trace filters exposed by the agent /info endpoint. diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index 6bb91cce85..c85069cc4a 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -69,6 +69,7 @@ pub struct TraceExporterBuilder { otlp_headers: Vec<(String, String)>, filter_tags: FilterTagsConfig, filter_tags_regex: FilterTagsConfig, + ignore_resources: Vec, } impl TraceExporterBuilder { @@ -510,7 +511,11 @@ impl TraceExporterBuilder { .agent_rates_payload_version_enabled .then(AgentResponsePayloadVersion::new), otlp_config, - trace_filterer: TraceFilterer::new(&self.filter_tags, &self.filter_tags_regex), + trace_filterer: TraceFilterer::new( + &self.filter_tags, + &self.filter_tags_regex, + &self.ignore_resources, + ), }) } diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index c43a21e5a8..a45539c976 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -388,6 +388,7 @@ impl Tra self.trace_filterer.update_conf( &agent_info.info.filter_tags, &agent_info.info.filter_tags_regex, + &agent_info.info.ignore_resources, ); match &**self.client_side_stats.status.load() { StatsComputationStatus::Disabled => {} diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index a08e0e3c8b..ea37cc0984 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -1,6 +1,3 @@ -// TODO: -// regex cache ?: https://docs.rs/lru-cache/latest/lru_cache/ - use std::{str::FromStr, sync::Arc}; use libdd_common::regex_engine; @@ -27,6 +24,7 @@ struct TraceFilteredConf { reject_regex: Vec, require: Vec, require_regex: Vec, + ignore_resources: Vec, } #[derive(Debug)] @@ -58,7 +56,9 @@ impl FromStr for RegexTagFilter { let regex = match regex_engine::Regex::new(value) { Ok(regex) => regex, Err(err) => { - error!("Invalid regex pattern in tag filter, skipping it: {tag}"); + error!( + "Invalid regex pattern in tag filter, skipping it: tag=`{tag}` err={err}" + ); return Err(err); } }; @@ -79,6 +79,7 @@ impl TraceFilteredConf { fn parse( filter_tags: &crate::agent_info::schema::FilterTagsConfig, filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], ) -> Self { TraceFilteredConf { reject: filter_tags @@ -101,6 +102,14 @@ impl TraceFilteredConf { .iter() .filter_map(|regex_tag| RegexTagFilter::from_str(regex_tag).ok()) .collect(), + ignore_resources: ignore_resources + .iter() + .filter_map(|regex| { + regex_engine::Regex::new(regex).inspect_err(|err| { + error!("Invalid regex pattern in ignore resources filter, skipping it: regex=`{regex}` err={err}") + }).ok() + }) + .collect(), } } } @@ -109,8 +118,9 @@ impl TraceFilterer { pub fn new( filter_tags: &crate::agent_info::schema::FilterTagsConfig, filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], ) -> Self { - let conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex); + let conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex, ignore_resources); Self { conf: arc_swap::ArcSwap::from_pointee(conf), } @@ -120,8 +130,9 @@ impl TraceFilterer { &self, filter_tags: &crate::agent_info::schema::FilterTagsConfig, filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], ) { - let new_conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex); + let new_conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex, ignore_resources); self.conf.swap(Arc::new(new_conf)); } @@ -181,6 +192,14 @@ impl TraceFilterer { return true; } + if conf + .ignore_resources + .iter() + .any(|resource_pattern| resource_pattern.is_match(root_span.resource())) + { + return true; + } + false } } From c0724c68a01d2b3496db9e3575718e73da5b2629 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 13 May 2026 20:22:43 +0200 Subject: [PATCH 03/49] fix: expose ignore_resouces in builder --- libdd-data-pipeline/src/trace_exporter/builder.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index c85069cc4a..2f7f92add6 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -291,16 +291,24 @@ impl TraceExporterBuilder { self } + // TODO: doc pub fn set_filter_tags(&mut self, filter_tags: FilterTagsConfig) -> &mut Self { self.filter_tags = filter_tags; self } + // TODO: doc pub fn set_filter_tags_regex(&mut self, filter_tags_regex: FilterTagsConfig) -> &mut Self { self.filter_tags_regex = filter_tags_regex; self } + // TODO: doc + pub fn set_ignore_resources(&mut self, ignore_resources: Vec) -> &mut Self { + self.ignore_resources = ignore_resources; + self + } + #[allow(missing_docs)] pub fn build( self, From 29eb4f0d75ce983fc9a742b945ed593f01d2e832 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 15 May 2026 12:43:39 +0200 Subject: [PATCH 04/49] fix: add missing license header --- libdd-data-pipeline/src/trace_exporter/trace_filter.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index ea37cc0984..2ffe45f3ba 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -1,3 +1,5 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 use std::{str::FromStr, sync::Arc}; use libdd_common::regex_engine; From 58b5939fa2725a9ab43f4bbe5fd1423aa7b9b4c1 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 15 May 2026 13:55:05 +0200 Subject: [PATCH 05/49] feat: add snapshot test for trace filters --- .gitignore | 1 + Cargo.lock | 82 ++++++++ libdd-data-pipeline/Cargo.toml | 1 + libdd-data-pipeline/src/trace_exporter/mod.rs | 179 ++++++++++++++++++ ..._single_threaded_tests__trace_filters.snap | 150 +++++++++++++++ 5 files changed, 413 insertions(+) create mode 100644 libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap diff --git a/.gitignore b/.gitignore index 5a4edd14ce..0b20ce159e 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ examples/cxx/exporter_manager.exe examples/cxx/profiling examples/cxx/profiling.exe profile.pprof +*.snap.new diff --git a/Cargo.lock b/Cargo.lock index 4839ed758a..327a84d9e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -999,6 +999,17 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "console-api" version = "0.9.0" @@ -1713,6 +1724,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -2670,6 +2687,21 @@ dependencies = [ "serde_core", ] +[[package]] +name = "insta" +version = "1.47.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4a6248eb93a4401ed2f37dfe8ea592d3cf05b7cf4f8efa867b6895af7e094e" +dependencies = [ + "console", + "once_cell", + "pest", + "pest_derive", + "serde", + "similar", + "tempfile", +] + [[package]] name = "io-lifetimes" version = "1.0.11" @@ -2975,6 +3007,7 @@ dependencies = [ "http", "http-body-util", "httpmock", + "insta", "libdd-capabilities", "libdd-capabilities-impl", "libdd-common", @@ -4084,6 +4117,49 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "petgraph" version = "0.8.3" @@ -6162,6 +6238,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unarray" version = "0.1.4" diff --git a/libdd-data-pipeline/Cargo.toml b/libdd-data-pipeline/Cargo.toml index 192cf04ca5..a6bc2aff54 100644 --- a/libdd-data-pipeline/Cargo.toml +++ b/libdd-data-pipeline/Cargo.toml @@ -80,6 +80,7 @@ tokio = { version = "1.23", features = [ "time", "test-util", ], default-features = false } +insta = { version = "1.47.2", features = ["json", "redactions"] } [features] default = ["https", "telemetry"] diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index a45539c976..2e9fa1a080 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -1901,10 +1901,14 @@ mod tests { #[cfg(test)] mod single_threaded_tests { + use std::collections::HashMap; + use std::sync::Mutex; + use super::*; use crate::agent_info; use httpmock::prelude::*; use libdd_capabilities_impl::NativeCapabilities; + use libdd_trace_protobuf::pb::ClientStatsPayload; use libdd_trace_utils::msgpack_encoder; use libdd_trace_utils::span::v04::SpanBytes; @@ -2203,4 +2207,179 @@ mod single_threaded_tests { "obfuscation must activate when opted in and agent supports" ); } + + #[cfg_attr(miri, ignore)] + #[test] + fn test_trace_filters_snapshot() { + // Clear the agent info cache to ensure test isolation + agent_info::clear_cache_for_test(); + + let server = MockServer::start(); + let captured_stats = Arc::new(Mutex::new(Vec::new())); + + let captured_stats_in = captured_stats.clone(); + + let mock_traces = server.mock(|when, then| { + when.method(POST) + .header("Content-type", "application/msgpack") + .path("/v0.4/traces"); + then.status(200).body(""); + }); + + let mock_stats = server.mock(|when, then| { + when.method(POST) + .header("Content-type", "application/msgpack") + .path("/v0.6/stats") + .is_true(move |req| { + captured_stats_in.lock().unwrap().push(req.body_vec()); + true + }); + then.status(200).body(""); + }); + + let _mock_info = server.mock(|when, then| { + when.method(GET).path("/info"); + then.status(200) + .header("content-type", "application/json") + .header("datadog-agent-state", "1") + .body( + r#"{ + "version":"1", + "client_drop_p0s":true, + "endpoints":["/v0.4/traces","/v0.6/stats"], + "filter_tags": {"reject": ["my_ignore_tag"], "require": ["my_require_tag:true"]}, + "filter_tags_regex": {"reject": ["my_regex_ignore_tag:.*true.*"]}, + "ignore_resources": [".*IGNORED.*"] + }"#, + ); + }); + + let runtime = Arc::new(SharedRuntime::new().unwrap()); + + let mut builder = TraceExporter::::builder(); + builder + .set_url(&server.url("/")) + .set_service("test") + .set_env("staging") + .set_tracer_version("v0.1") + .set_language("nodejs") + .set_language_version("1.0") + .set_language_interpreter("v8") + .set_input_format(TraceExporterInputFormat::V04) + .set_output_format(TraceExporterOutputFormat::V04) + .set_shared_runtime(runtime.clone()) + .enable_stats(Duration::from_secs(10)); + let exporter = builder.build::().unwrap(); + + // Wait for the info fetcher to get the config + while agent_info::get_agent_info().is_none() { + std::thread::sleep(Duration::from_millis(100)); + } + + let result = exporter.send( + msgpack_encoder::v04::to_vec(&[ + vec![SpanBytes { + duration: 10, + resource: "test".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), + ..Default::default() + }], + // This one gets filtered out because it matches an ignore_resources pattern + vec![SpanBytes { + duration: 10, + resource: "test IGNORED resource test".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), + ..Default::default() + }], + // This one gets filtered out because one of its tag matches a reject filter_tag + vec![SpanBytes { + duration: 10, + resource: "test ignored because of reject filter_tag".into(), + meta: HashMap::from_iter([ + ("my_ignore_tag".into(), "".into()), + ("my_require_tag".into(), "true".into()), + ]), + ..Default::default() + }], + // This one gets filtered out because one of its tag matches a reject + // regex_filter_tag + vec![SpanBytes { + duration: 10, + resource: "test ignored because of reject regex_filter_tag".into(), + meta: HashMap::from_iter([ + ( + "my_regex_ignore_tag".into(), + "something-true-something".into(), + ), + ("my_require_tag".into(), "true".into()), + ]), + ..Default::default() + }], + // This one gets filtered out because it doesn't have my_require_tag:true + vec![SpanBytes { + duration: 10, + resource: "test ignored because missing a required filter_tag".into(), + meta: HashMap::from_iter([("a_useless_tag".into(), "true".into())]), + ..Default::default() + }], + // This one gets filtered out because it doesn't have my_require_tag:true + vec![SpanBytes { + duration: 10, + resource: "test ignored because wrong value on filter_tag".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "false".into())]), + ..Default::default() + }], + vec![SpanBytes { + duration: 10, + resource: "test2".into(), + meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), + ..Default::default() + }], + ]) + .as_ref(), + ); + assert!(result.is_err()); + + // Wait for the stats worker to be active before shutting down to avoid potential flaky + // tests on CI where we shutdown before the stats worker had time to start + let start_time = std::time::Instant::now(); + while !exporter.is_stats_worker_active() { + if start_time.elapsed() > Duration::from_secs(10) { + panic!("Timeout waiting for stats worker to become active"); + } + std::thread::sleep(Duration::from_millis(10)); + } + + runtime.shutdown(None).unwrap(); + + // Wait for the mock server to process the stats + for _ in 0..1000 { + if mock_traces.calls() > 0 && mock_stats.calls() > 0 { + break; + } else { + std::thread::sleep(Duration::from_millis(10)); + } + } + + mock_traces.assert(); + mock_stats.assert(); + + // Verify snapshots matches + let captured_stats: Vec = captured_stats + .lock() + .unwrap() + .iter() + .map(|payload| rmp_serde::from_slice(payload).unwrap()) + .collect(); + insta::assert_json_snapshot!( + "trace_filters", + serde_json::to_value(&captured_stats).unwrap(), + { + "[].RuntimeID" => "[id]", + "[].Stats[].Start" => "[timestamp]", + "[].Stats[].Stats[].OkSummary" => "[sketch]", + "[].Stats[].Stats[].ErrorSummary" => "[sketch]", + } + ); + } } diff --git a/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap new file mode 100644 index 0000000000..936e3e4ab8 --- /dev/null +++ b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap @@ -0,0 +1,150 @@ +--- +source: libdd-data-pipeline/src/trace_exporter/mod.rs +expression: "serde_json::to_value(&captured_stats).unwrap()" +--- +[ + { + "Hostname": "", + "Env": "staging", + "Version": "", + "Stats": [ + { + "Start": "[timestamp]", + "Duration": 10000000000, + "Stats": [ + { + "Service": "", + "Name": "", + "Resource": "test2", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + }, + { + "Service": "", + "Name": "", + "Resource": "test", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + } + ], + "AgentTimeShift": 0 + } + ], + "Lang": "", + "TracerVersion": "", + "RuntimeID": "[id]", + "Sequence": 0, + "AgentAggregation": "", + "Service": "test", + "ContainerID": "", + "Tags": [], + "GitCommitSha": "", + "ImageTag": "", + "ProcessTagsHash": 0, + "ProcessTags": "" + }, + { + "Hostname": "", + "Env": "staging", + "Version": "", + "Stats": [ + { + "Start": "[timestamp]", + "Duration": 10000000000, + "Stats": [ + { + "Service": "", + "Name": "", + "Resource": "test2", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + }, + { + "Service": "", + "Name": "", + "Resource": "test", + "HTTPStatusCode": 0, + "Type": "", + "DBType": "", + "Hits": 1, + "Errors": 0, + "Duration": 10, + "OkSummary": "[sketch]", + "ErrorSummary": "[sketch]", + "Synthetics": false, + "TopLevelHits": 1, + "SpanKind": "", + "PeerTags": [], + "IsTraceRoot": 1, + "GRPCStatusCode": "", + "HTTPMethod": "", + "HTTPEndpoint": "", + "srv_src": "", + "SpanDerivedPrimaryTags": [] + } + ], + "AgentTimeShift": 0 + } + ], + "Lang": "", + "TracerVersion": "", + "RuntimeID": "[id]", + "Sequence": 0, + "AgentAggregation": "", + "Service": "test", + "ContainerID": "", + "Tags": [], + "GitCommitSha": "", + "ImageTag": "", + "ProcessTagsHash": 0, + "ProcessTags": "" + } +] From 859f929f04bfcc2cac33ab03269c78ea0f81c9be Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 15 May 2026 14:48:29 +0200 Subject: [PATCH 06/49] fix: update LICENSE-3rdparty.csv and fix snapshot ordering test: sort stats by resource for deterministic snapshot --- LICENSE-3rdparty.csv | 7 +++++++ libdd-data-pipeline/src/trace_exporter/mod.rs | 8 +++++++- ...ce_exporter__single_threaded_tests__trace_filters.snap | 8 ++++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 3ec30833d4..33d4dbcf81 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -85,6 +85,7 @@ colorchoice,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The colorchoic colored,https://github.com/mackwic/colored,MPL-2.0,Thomas Wickham combine,https://github.com/Marwes/combine,MIT,Markus Westerlind concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT,"Stjepan Glavina , Taiki Endo , John Nunley " +console,https://github.com/console-rs/console,MIT,The console Authors console-api,https://github.com/tokio-rs/console,MIT,"Eliza Weisman , Tokio Contributors " console-subscriber,https://github.com/tokio-rs/console,MIT,"Eliza Weisman , Tokio Contributors " const_format,https://github.com/rodrimati1992/const_format_crates,Zlib,rodrimati1992 @@ -125,6 +126,7 @@ dispatch2,https://github.com/madsmtm/objc2,Zlib OR Apache-2.0 OR MIT,"Mads Marqu displaydoc,https://github.com/yaahc/displaydoc,MIT OR Apache-2.0,Jane Lusby dyn-clone,https://github.com/dtolnay/dyn-clone,MIT OR Apache-2.0,David Tolnay either,https://github.com/rayon-rs/either,MIT OR Apache-2.0,bluss +encode_unicode,https://github.com/tormol/encode_unicode,Apache-2.0 OR MIT,Torbjørn Birch Moltu encoding_rs,https://github.com/hsivonen/encoding_rs,(Apache-2.0 OR MIT) AND BSD-3-Clause,Henri Sivonen enum-as-inner,https://github.com/bluejekyll/enum-as-inner,MIT OR Apache-2.0,Benjamin Fry equivalent,https://github.com/cuviper/equivalent,Apache-2.0 OR MIT,The equivalent Authors @@ -282,6 +284,10 @@ parking_lot_core,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanie paste,https://github.com/dtolnay/paste,MIT OR Apache-2.0,David Tolnay path-tree,https://github.com/viz-rs/path-tree,MIT OR Apache-2.0,Fangdun Tsai percent-encoding,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers +pest,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice +pest_derive,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice +pest_generator,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice +pest_meta,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice petgraph,https://github.com/petgraph/petgraph,MIT OR Apache-2.0,"bluss, mitchmindtree" pico-args,https://github.com/RazrFalcon/pico-args,MIT,Yevhenii Reizner pin-project,https://github.com/taiki-e/pin-project,Apache-2.0 OR MIT,The pin-project Authors @@ -464,6 +470,7 @@ try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur typeid,https://github.com/dtolnay/typeid,MIT OR Apache-2.0,David Tolnay typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg , Andre Bogus " +ucd-trie,https://github.com/BurntSushi/ucd-generate,MIT OR Apache-2.0,Andrew Gallant unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors unicase,https://github.com/seanmonstar/unicase,MIT OR Apache-2.0,Sean McArthur unicode-ident,https://github.com/dtolnay/unicode-ident,(MIT OR Apache-2.0) AND Unicode-DFS-2016,David Tolnay diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 2e9fa1a080..3dd1dc501d 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -2365,12 +2365,18 @@ mod single_threaded_tests { mock_stats.assert(); // Verify snapshots matches - let captured_stats: Vec = captured_stats + let mut captured_stats: Vec = captured_stats .lock() .unwrap() .iter() .map(|payload| rmp_serde::from_slice(payload).unwrap()) .collect(); + // Sort for deterministic snapshot output + for payload in &mut captured_stats { + for bucket in &mut payload.stats { + bucket.stats.sort_by(|a, b| a.resource.cmp(&b.resource)); + } + } insta::assert_json_snapshot!( "trace_filters", serde_json::to_value(&captured_stats).unwrap(), diff --git a/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap index 936e3e4ab8..cbe5725103 100644 --- a/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap +++ b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap @@ -15,7 +15,7 @@ expression: "serde_json::to_value(&captured_stats).unwrap()" { "Service": "", "Name": "", - "Resource": "test2", + "Resource": "test", "HTTPStatusCode": 0, "Type": "", "DBType": "", @@ -38,7 +38,7 @@ expression: "serde_json::to_value(&captured_stats).unwrap()" { "Service": "", "Name": "", - "Resource": "test", + "Resource": "test2", "HTTPStatusCode": 0, "Type": "", "DBType": "", @@ -87,7 +87,7 @@ expression: "serde_json::to_value(&captured_stats).unwrap()" { "Service": "", "Name": "", - "Resource": "test2", + "Resource": "test", "HTTPStatusCode": 0, "Type": "", "DBType": "", @@ -110,7 +110,7 @@ expression: "serde_json::to_value(&captured_stats).unwrap()" { "Service": "", "Name": "", - "Resource": "test", + "Resource": "test2", "HTTPStatusCode": 0, "Type": "", "DBType": "", From 38a03ca938f06eae8bc6f8580f652eaafe2687d8 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 18 May 2026 14:19:17 +0200 Subject: [PATCH 07/49] fix: allow old PascalCase fields in agent redis/memcached obfuscation config --- libdd-data-pipeline/src/agent_info/fetcher.rs | 32 +++++++++++++++---- libdd-data-pipeline/src/agent_info/schema.rs | 7 ++++ .../src/trace_exporter/trace_filter.rs | 16 +++++++--- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/libdd-data-pipeline/src/agent_info/fetcher.rs b/libdd-data-pipeline/src/agent_info/fetcher.rs index 43e594caab..131e4c04fc 100644 --- a/libdd-data-pipeline/src/agent_info/fetcher.rs +++ b/libdd-data-pipeline/src/agent_info/fetcher.rs @@ -408,23 +408,43 @@ mod single_threaded_tests { }, "remove_stack_traces": false, "redis": { - "enabled": true, - "remove_all_args": false + "Enabled": true, + "RemoveAllArgs": false }, "memcached": { - "enabled": true, - "keep_command": false + "Enabled": true, + "KeepCommand": false } } }, - "peer_tags": ["db.hostname","http.host","aws.s3.bucket"] + "peer_tags": ["db.hostname","http.host","aws.s3.bucket"], + "obfuscation_version": 1, + "filter_tags": { + "reject": [ + "appsec.events.system_tests_appsec_event.value:tf-reject-exact" + ], + "require": [ + "appsec.events.system_tests_appsec_event.value:tf-require-exact" + ] + }, + "filter_tags_regex": { + "reject": [ + "appsec.events.system_tests_appsec_event.value:tf-reject-regex-.*" + ], + "require": [ + "appsec.events.system_tests_appsec_event.value:tf-require-regex-.*" + ] + }, + "ignore_resources": [ + ".*(stats-unique|StatsUniqueHandler).*" + ] }"#; fn calculate_hash(json: &str) -> String { format!("{:x}", Sha256::digest(json.as_bytes())) } - const TEST_INFO_HASH: &str = "cce54bf6e7d1bf38088a3ec809bfeec160bc52d37f70bd6b581ce3c2f7be5a65"; + const TEST_INFO_HASH: &str = "d0f6dde2c1ef3b7b776a58162d42574346e23f4677c3fafb440f5c7ca83a8a28"; #[cfg_attr(miri, ignore)] #[tokio::test] diff --git a/libdd-data-pipeline/src/agent_info/schema.rs b/libdd-data-pipeline/src/agent_info/schema.rs index d0274594fd..341e7b077e 100644 --- a/libdd-data-pipeline/src/agent_info/schema.rs +++ b/libdd-data-pipeline/src/agent_info/schema.rs @@ -104,14 +104,21 @@ pub struct HttpObfuscationConfig { #[allow(missing_docs)] #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)] +#[serde(rename_all = "PascalCase")] pub struct RedisObfuscationConfig { + // Agent sent pascal case fields here in versions <7.79.0 + #[serde(alias = "Enabled")] pub enabled: bool, + #[serde(alias = "RemoveAllArgs")] pub remove_all_args: bool, } #[allow(missing_docs)] #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)] pub struct MemcachedObfuscationConfig { + // Agent sent pascal case fields here in versions <7.79.0 + #[serde(alias = "Enabled")] pub enabled: bool, + #[serde(alias = "KeepCommand")] pub keep_command: bool, } diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 2ffe45f3ba..4b9ea4b845 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -59,7 +59,9 @@ impl FromStr for RegexTagFilter { Ok(regex) => regex, Err(err) => { error!( - "Invalid regex pattern in tag filter, skipping it: tag=`{tag}` err={err}" + ?tag, + ?err, + "Invalid regex pattern in tag filter, skipping it" ); return Err(err); } @@ -107,9 +109,15 @@ impl TraceFilteredConf { ignore_resources: ignore_resources .iter() .filter_map(|regex| { - regex_engine::Regex::new(regex).inspect_err(|err| { - error!("Invalid regex pattern in ignore resources filter, skipping it: regex=`{regex}` err={err}") - }).ok() + regex_engine::Regex::new(regex) + .inspect_err(|err| { + error!( + ?regex, + ?err, + "Invalid regex pattern in ignore resources filter, skipping it" + ) + }) + .ok() }) .collect(), } From 9bab9f1395fbf0242d94f520778abe66652b94b6 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 21 May 2026 11:30:30 +0200 Subject: [PATCH 08/49] wip normalization --- Cargo.lock | 1 + libdd-data-pipeline/Cargo.toml | 1 + libdd-data-pipeline/src/trace_exporter/mod.rs | 10 ++++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 327a84d9e9..d17a94e517 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3017,6 +3017,7 @@ dependencies = [ "libdd-shared-runtime", "libdd-telemetry", "libdd-tinybytes", + "libdd-trace-normalization", "libdd-trace-obfuscation", "libdd-trace-protobuf", "libdd-trace-stats", diff --git a/libdd-data-pipeline/Cargo.toml b/libdd-data-pipeline/Cargo.toml index a6bc2aff54..9ef2915dd1 100644 --- a/libdd-data-pipeline/Cargo.toml +++ b/libdd-data-pipeline/Cargo.toml @@ -38,6 +38,7 @@ libdd-telemetry = { version = "5.0.0", path = "../libdd-telemetry", default-feat libdd-trace-protobuf = { version = "3.0.1", path = "../libdd-trace-protobuf" } libdd-trace-stats = { version = "2.0.0", path = "../libdd-trace-stats", default-features = false } libdd-trace-utils = { version = "3.0.1", path = "../libdd-trace-utils", default-features = false } +libdd-trace-normalization = { version = "2.0.0", path = "../libdd-trace-normalization" } libdd-trace-obfuscation = { version = "2.0.0", path = "../libdd-trace-obfuscation", optional = true } libdd-ddsketch = { version = "1.0.1", path = "../libdd-ddsketch" } libdd-dogstatsd-client = { version = "2.0.0", path = "../libdd-dogstatsd-client", default-features = false } diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 3dd1dc501d..bfd199d3cb 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -612,15 +612,21 @@ impl Tra self.handle_send_result(result, chunks, payload_len).await } + fn normalize_traces(&self, traces: &mut [Vec>]) { + for trace in traces.iter_mut() { + libdd_trace_normalization::normalizer::normalize_trace(trace); + } + } + async fn send_trace_chunks_inner( &self, mut traces: Vec>>, ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); - + self.normalize_traces(&mut traces); // FIXME: when client_computed_top_level is true, looking twice for the root span here is // inefficient and just below in process_traces_for_stats. - // Also, only do it when css is on (why ???) + // Also, only do it when css is on self.trace_filterer.filter_traces(&mut traces); // Process stats computation and drop non-sampled (p0) chunks. From 3056bd25c916c6fdb625d483953ff45c3578e98c Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 22 May 2026 17:29:44 +0200 Subject: [PATCH 09/49] WIP --- libdd-data-pipeline/src/trace_exporter/mod.rs | 7 - .../src/trace_exporter/trace_filter.rs | 129 ++++++++++++------ .../src/normalize_utils.rs | 6 + 3 files changed, 91 insertions(+), 51 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 55733e1283..bf6a30be4b 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -577,18 +577,11 @@ impl Tra self.handle_send_result(result, chunks, payload_len).await } - fn normalize_traces(&self, traces: &mut [Vec>]) { - for trace in traces.iter_mut() { - libdd_trace_normalization::normalizer::normalize_trace(trace); - } - } - async fn send_trace_chunks_inner( &self, mut traces: Vec>>, ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); - self.normalize_traces(&mut traces); // FIXME: when client_computed_top_level is true, looking twice for the root span here is // inefficient and just below in process_traces_for_stats. // Also, only do it when css is on diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 4b9ea4b845..d120e7cd93 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -1,5 +1,7 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, +//! ignore_resources as published by the agent's /info endpoint). use std::{str::FromStr, sync::Arc}; use libdd_common::regex_engine; @@ -14,18 +16,25 @@ struct TagFilter { } #[derive(Debug)] -struct RegexTagFilter { +struct TagRegexFilter { key: String, value: Option, } +// #[derive(Debug)] +// // Slowest kind of filter where the key field is also a regex +// struct TagRegexKeyFilter { +// key: regex_engine::Regex, +// value: Option, +// } + /// Parsed config #[derive(Debug)] struct TraceFilteredConf { reject: Vec, - reject_regex: Vec, + reject_regex: Vec, require: Vec, - require_regex: Vec, + require_regex: Vec, ignore_resources: Vec, } @@ -50,7 +59,7 @@ impl TagFilter { } } -impl FromStr for RegexTagFilter { +impl FromStr for TagRegexFilter { type Err = regex_engine::Error; fn from_str(tag: &str) -> Result { @@ -66,12 +75,12 @@ impl FromStr for RegexTagFilter { return Err(err); } }; - Ok(RegexTagFilter { + Ok(TagRegexFilter { key: key.to_owned(), value: Some(regex), }) } else { - Ok(RegexTagFilter { + Ok(TagRegexFilter { key: tag.to_owned(), value: None, }) @@ -94,7 +103,7 @@ impl TraceFilteredConf { reject_regex: filter_tags_regex .reject .iter() - .filter_map(|regex_tag| RegexTagFilter::from_str(regex_tag).ok()) + .filter_map(|regex_tag| TagRegexFilter::from_str(regex_tag).ok()) .collect(), require: filter_tags .require @@ -104,7 +113,7 @@ impl TraceFilteredConf { require_regex: filter_tags_regex .require .iter() - .filter_map(|regex_tag| RegexTagFilter::from_str(regex_tag).ok()) + .filter_map(|regex_tag| TagRegexFilter::from_str(regex_tag).ok()) .collect(), ignore_resources: ignore_resources .iter() @@ -150,6 +159,7 @@ impl TraceFilterer { &self, traces: &mut Vec>>, ) { + let conf = self.conf.load(); traces.retain(|trace| { let Ok(root_span_index) = get_root_span_index_v4(trace) else { // FIXME: in this case it's a distributed trace ? Maybe we should remove the debug @@ -157,7 +167,7 @@ impl TraceFilterer { return true; }; let root_span = &trace[root_span_index]; - let should_drop = self.should_drop(root_span); + let should_drop = Self::should_drop(&conf, root_span); if should_drop { debug!("Trace rejected as it fails to meet tag requirements. root: %v"); } @@ -166,48 +176,79 @@ impl TraceFilterer { } fn should_drop( - &self, + conf: &TraceFilteredConf, root_span: &libdd_trace_utils::span::v04::Span, ) -> bool { - let conf = self.conf.load(); - if conf.reject.iter().any(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) - }) { - return true; - } + let has_tag_filters = !conf.reject.is_empty() + || !conf.reject_regex.is_empty() + || !conf.require.is_empty() + || !conf.require_regex.is_empty(); + if has_tag_filters { + // let env_tag = root_span + // .meta + // .get("env") + // .map(|v| libdd_trace_normalization::normalize_utils::normalize_tag(v.borrow())); - if conf.reject_regex.iter().any(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) - }) { - return true; - } + // if let Some(code) = s.meta.get("http.status_code") { + // if !is_valid_status_code(code) { + // s.meta.remove("http.status_code"); + // } + // }; - if !conf.require.iter().all(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) - }) { - return true; - } + if conf.reject.iter().any(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) + }) { + return true; + } + + if conf.reject_regex.iter().any(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) + }) { + return true; + } + + if !conf.require.iter().all(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) + }) { + return true; + } - if !conf.require_regex.iter().all(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) - }) { - return true; + if !conf.require_regex.iter().all(|tag| { + root_span + .get_meta(&tag.key) + .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) + }) { + return true; + } } - if conf - .ignore_resources - .iter() - .any(|resource_pattern| resource_pattern.is_match(root_span.resource())) - { - return true; + if !conf.ignore_resources.is_empty() { + let span_resource = root_span.resource(); + // Normalization + let span_resource = if span_resource.is_empty() { + let span_name = root_span.name(); + debug!( + ?span_name, + "Fixing malformed trace. Resource is empty setting span.resource=span.name" + ); + span_name + } else { + span_resource + }; + + if conf + .ignore_resources + .iter() + .any(|resource_pattern| resource_pattern.is_match(span_resource)) + { + return true; + } } false diff --git a/libdd-trace-normalization/src/normalize_utils.rs b/libdd-trace-normalization/src/normalize_utils.rs index b70093c817..fab5d43cae 100644 --- a/libdd-trace-normalization/src/normalize_utils.rs +++ b/libdd-trace-normalization/src/normalize_utils.rs @@ -83,6 +83,12 @@ pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) { } } +pub fn normalize_tag_cloned(tag: &str) -> String { + let mut tag = tag.to_owned(); + normalize_tag(&mut tag); + tag +} + pub fn normalize_tag(tag: &mut String) { // Since we know that we're only going to write valid utf8 we can work with the Vec directly let bytes = unsafe { tag.as_mut_vec() }; From e75b9bbb4ab4eefd488b5457e70297211c81824f Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 26 May 2026 16:47:26 +0200 Subject: [PATCH 10/49] feat: finish normalization on trace filters --- .../src/trace_exporter/trace_filter.rs | 152 +++++++++++------- libdd-trace-normalization/src/normalizer.rs | 16 +- 2 files changed, 107 insertions(+), 61 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index d120e7cd93..39cee606bc 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -2,15 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::{str::FromStr, sync::Arc}; +use std::{borrow::Borrow as _, str::FromStr, sync::Arc}; use libdd_common::regex_engine; use libdd_trace_stats::span_concentrator::StatSpan; use libdd_trace_utils::span::trace_utils::get_root_span_index_v4; use tracing::{debug, error}; +trait TagFilter { + /// Returns true if the given tag value matches the Filterer. + fn matches_tag_value(&self, value: &str) -> bool; + /// Getter to the filter key + fn key(&self) -> &str; +} + #[derive(Debug)] -struct TagFilter { +struct TagStringFilter { key: String, value: Option, } @@ -31,9 +38,9 @@ struct TagRegexFilter { /// Parsed config #[derive(Debug)] struct TraceFilteredConf { - reject: Vec, + reject: Vec, reject_regex: Vec, - require: Vec, + require: Vec, require_regex: Vec, ignore_resources: Vec, } @@ -43,15 +50,15 @@ pub struct TraceFilterer { conf: arc_swap::ArcSwap, } -impl TagFilter { +impl TagStringFilter { fn from_str(tag: &str) -> Self { if let Some((key, value)) = tag.split_once(":") { - TagFilter { + TagStringFilter { key: key.to_owned(), value: Some(value.to_owned()), } } else { - TagFilter { + TagStringFilter { key: tag.to_owned(), value: None, } @@ -59,6 +66,19 @@ impl TagFilter { } } +impl TagFilter for TagStringFilter { + fn matches_tag_value(&self, value: &str) -> bool { + match &self.value { + None => true, // No value requirement => Any value is a match + Some(required_value) => value == required_value, + } + } + + fn key(&self) -> &str { + &self.key + } +} + impl FromStr for TagRegexFilter { type Err = regex_engine::Error; @@ -88,6 +108,19 @@ impl FromStr for TagRegexFilter { } } +impl TagFilter for TagRegexFilter { + fn matches_tag_value(&self, value: &str) -> bool { + match &self.value { + None => true, // No value requirement => Any value is a match + Some(pattern) => pattern.is_match(value), + } + } + + fn key(&self) -> &str { + &self.key + } +} + impl TraceFilteredConf { fn parse( filter_tags: &crate::agent_info::schema::FilterTagsConfig, @@ -98,7 +131,7 @@ impl TraceFilteredConf { reject: filter_tags .reject .iter() - .map(|tag| TagFilter::from_str(tag)) + .map(|tag| TagStringFilter::from_str(tag)) .collect(), reject_regex: filter_tags_regex .reject @@ -108,7 +141,7 @@ impl TraceFilteredConf { require: filter_tags .require .iter() - .map(|tag| TagFilter::from_str(tag)) + .map(|tag| TagStringFilter::from_str(tag)) .collect(), require_regex: filter_tags_regex .require @@ -175,57 +208,43 @@ impl TraceFilterer { }); } + /// Checks if the trace with root span `root_span` should be dropped based on filter configuration. + /// + /// Applies a subset of trace normalization logic from `libdd-trace-normalization` before checking. fn should_drop( conf: &TraceFilteredConf, root_span: &libdd_trace_utils::span::v04::Span, ) -> bool { - let has_tag_filters = !conf.reject.is_empty() - || !conf.reject_regex.is_empty() - || !conf.require.is_empty() - || !conf.require_regex.is_empty(); - if has_tag_filters { - // let env_tag = root_span - // .meta - // .get("env") - // .map(|v| libdd_trace_normalization::normalize_utils::normalize_tag(v.borrow())); - - // if let Some(code) = s.meta.get("http.status_code") { - // if !is_valid_status_code(code) { - // s.meta.remove("http.status_code"); - // } - // }; - - if conf.reject.iter().any(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) - }) { - return true; - } + if conf + .reject + .iter() + .any(|filter| check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } - if conf.reject_regex.iter().any(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) - }) { - return true; - } + if conf + .reject_regex + .iter() + .any(|filter| check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } - if !conf.require.iter().all(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|v| v == value)) - }) { - return true; - } + if !conf + .require + .iter() + .all(|filter| check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } - if !conf.require_regex.iter().all(|tag| { - root_span - .get_meta(&tag.key) - .is_some_and(|value| tag.value.as_ref().is_none_or(|pat| pat.is_match(value))) - }) { - return true; - } + if !conf + .require_regex + .iter() + .all(|filter| check_tag_filter_with_normalization(filter, root_span)) + { + return true; } if !conf.ignore_resources.is_empty() { @@ -235,7 +254,7 @@ impl TraceFilterer { let span_name = root_span.name(); debug!( ?span_name, - "Fixing malformed trace. Resource is empty setting span.resource=span.name" + "Trace filter fixing malformed trace. Resource is empty so using name instead" ); span_name } else { @@ -254,3 +273,28 @@ impl TraceFilterer { false } } + +fn check_tag_filter_with_normalization( + filter: &impl TagFilter, + root_span: &libdd_trace_utils::span::v04::Span, +) -> bool { + let Some(value) = root_span.meta.get(filter.key()) else { + return false; + }; + let value = value.borrow(); + match filter.key() { + "env" => { + let normalized_value = + libdd_trace_normalization::normalize_utils::normalize_tag_cloned(value); + filter.matches_tag_value(&normalized_value) + } + "http.status_code" => { + if !libdd_trace_normalization::normalizer::is_valid_http_status_code(value) { + debug!(?value,"trace filter on http.status_code ignored because root span's `http.status_code` is invalid"); + return false; + } + filter.matches_tag_value(value) + } + _ => filter.matches_tag_value(value), + } +} diff --git a/libdd-trace-normalization/src/normalizer.rs b/libdd-trace-normalization/src/normalizer.rs index 7450dad908..ee5790cbc6 100644 --- a/libdd-trace-normalization/src/normalizer.rs +++ b/libdd-trace-normalization/src/normalizer.rs @@ -35,7 +35,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { } if let Some(code) = s.meta.get("http.status_code") { - if !is_valid_status_code(code) { + if !is_valid_http_status_code(code) { s.meta.remove("http.status_code"); } }; @@ -43,7 +43,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { Ok(()) } -pub(crate) fn is_valid_status_code(sc: &str) -> bool { +pub fn is_valid_http_status_code(sc: &str) -> bool { if let Ok(code) = sc.parse::() { return (100..600).contains(&code); } @@ -476,11 +476,13 @@ mod tests { #[test] fn test_is_valid_status_code() { - assert!(normalizer::is_valid_status_code("100")); - assert!(normalizer::is_valid_status_code("599")); - assert!(!normalizer::is_valid_status_code("99")); - assert!(!normalizer::is_valid_status_code("600")); - assert!(!normalizer::is_valid_status_code("Invalid status code")); + assert!(normalizer::is_valid_http_status_code("100")); + assert!(normalizer::is_valid_http_status_code("599")); + assert!(!normalizer::is_valid_http_status_code("99")); + assert!(!normalizer::is_valid_http_status_code("600")); + assert!(!normalizer::is_valid_http_status_code( + "Invalid status code" + )); } #[test] From 9ffd1c86af9d74df0b4bf537cd728b49a8ac94e5 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 26 May 2026 17:02:45 +0200 Subject: [PATCH 11/49] fix: remove bad change to agent_info's schema --- libdd-data-pipeline/src/agent_info/schema.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/libdd-data-pipeline/src/agent_info/schema.rs b/libdd-data-pipeline/src/agent_info/schema.rs index 117e06ffa8..0b70b77164 100644 --- a/libdd-data-pipeline/src/agent_info/schema.rs +++ b/libdd-data-pipeline/src/agent_info/schema.rs @@ -104,7 +104,6 @@ pub struct HttpObfuscationConfig { #[allow(missing_docs)] #[derive(Clone, Serialize, Deserialize, Default, Debug, PartialEq)] -#[serde(rename_all = "PascalCase")] pub struct RedisObfuscationConfig { // Agent sent pascal case fields here in versions <7.79.0 #[serde(alias = "Enabled")] From c2596f13100cef66de1fdb9f2e9865e74d1d6d5c Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 26 May 2026 18:29:41 +0200 Subject: [PATCH 12/49] feat: regex key filters --- .../src/trace_exporter/trace_filter.rs | 278 ++++++++++++------ 1 file changed, 180 insertions(+), 98 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 39cee606bc..7280daebfd 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::{borrow::Borrow as _, str::FromStr, sync::Arc}; +use std::{borrow::Borrow as _, collections::HashMap, sync::Arc}; use libdd_common::regex_engine; use libdd_trace_stats::span_concentrator::StatSpan; @@ -12,8 +12,10 @@ use tracing::{debug, error}; trait TagFilter { /// Returns true if the given tag value matches the Filterer. fn matches_tag_value(&self, value: &str) -> bool; - /// Getter to the filter key - fn key(&self) -> &str; + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &'a self, + meta: &'a HashMap, + ) -> Option<(&'a str, &'a T)>; } #[derive(Debug)] @@ -28,20 +30,24 @@ struct TagRegexFilter { value: Option, } -// #[derive(Debug)] -// // Slowest kind of filter where the key field is also a regex -// struct TagRegexKeyFilter { -// key: regex_engine::Regex, -// value: Option, -// } +#[derive(Debug)] +// Slowest kind of filter where the key field is also a regex +struct TagRegexKeyFilter { + key: regex_engine::Regex, + value: Option, +} /// Parsed config #[derive(Debug)] struct TraceFilteredConf { reject: Vec, reject_regex: Vec, + reject_key_regex: Vec, + require: Vec, require_regex: Vec, + require_key_regex: Vec, + ignore_resources: Vec, } @@ -74,41 +80,31 @@ impl TagFilter for TagStringFilter { } } - fn key(&self) -> &str { - &self.key + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &'a self, + meta: &'a HashMap, + ) -> std::option::Option<(&'a str, &'a T)> { + Some((self.key.as_ref(), meta.get(&self.key)?)) } } -impl FromStr for TagRegexFilter { - type Err = regex_engine::Error; - - fn from_str(tag: &str) -> Result { - if let Some((key, value)) = tag.split_once(":") { - let regex = match regex_engine::Regex::new(value) { - Ok(regex) => regex, - Err(err) => { - error!( - ?tag, - ?err, - "Invalid regex pattern in tag filter, skipping it" - ); - return Err(err); - } - }; - Ok(TagRegexFilter { - key: key.to_owned(), - value: Some(regex), - }) - } else { - Ok(TagRegexFilter { - key: tag.to_owned(), - value: None, - }) +impl TagFilter for TagRegexFilter { + fn matches_tag_value(&self, value: &str) -> bool { + match &self.value { + None => true, // No value requirement => Any value is a match + Some(pattern) => pattern.is_match(value), } } + + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &'a self, + meta: &'a HashMap, + ) -> std::option::Option<(&'a str, &'a T)> { + Some((self.key.as_ref(), meta.get(&self.key)?)) + } } -impl TagFilter for TagRegexFilter { +impl TagFilter for TagRegexKeyFilter { fn matches_tag_value(&self, value: &str) -> bool { match &self.value { None => true, // No value requirement => Any value is a match @@ -116,52 +112,122 @@ impl TagFilter for TagRegexFilter { } } - fn key(&self) -> &str { - &self.key + fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( + &self, + meta: &'a HashMap, + ) -> std::option::Option<(&'a str, &'a T)> { + meta.iter() + .find(|&(key, _)| self.key.is_match(key.borrow())) + .map(|(key, value)| (key.borrow(), value)) } } +/// Compile a regex anchored to the full string. +fn compile_anchored(pattern: &str) -> Result { + regex_engine::Regex::new(&format!("^(?:{pattern})$")) +} + +/// Returns `true` when `key` contains no regex metacharacters and can be used for a direct +/// O(1) lookup. `.` is intentionally treated as a literal (not a wildcard) in key patterns. +fn is_literal_key(key: &str) -> bool { + !key.contains([ + '*', '+', '?', '[', ']', '(', ')', '{', '}', '^', '$', ',', '\\', + ]) +} + impl TraceFilteredConf { + /// Compile all `filter_tags_regex` entries, splitting into literal-key (fast) and + /// regex-key (slow) lists based on whether the key portion contains metacharacters. + fn compile_regex_filters(filters: &[String]) -> (Vec, Vec) { + let mut tag_regex_filters = Vec::new(); + let mut tag_regex_key_filters = Vec::new(); + for filter in filters { + let (key, value) = match filter.split_once(":") { + Some((key, value)) => (key, Some(value)), + None => (filter.as_ref(), None), + }; + + let value = match value { + Some(value) => match compile_anchored(value) { + Ok(regex) => Some(regex), + Err(err) => { + error!( + ?filter, + ?err, + "Invalid regex pattern in tag filter's value, skipping it" + ); + // FIXME: dd-trace-php considers that if the value pattern is bad, we still keep the filter by only matching on the key. I find it more intuitive to drop the filter altogether + continue; + } + }, + None => None, + }; + + if is_literal_key(key) { + tag_regex_filters.push(TagRegexFilter { + key: key.to_owned(), + value, + }); + } else { + match compile_anchored(key) { + Ok(key) => tag_regex_key_filters.push(TagRegexKeyFilter { key, value }), + Err(err) => { + error!( + ?filter, + ?err, + "Invalid regex pattern in tag filter's key, skipping it" + ); + continue; + } + } + } + } + + (tag_regex_filters, tag_regex_key_filters) + } + fn parse( filter_tags: &crate::agent_info::schema::FilterTagsConfig, filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, ignore_resources: &[String], ) -> Self { + let (require_regex, require_key_regex) = + Self::compile_regex_filters(&filter_tags_regex.require); + let (reject_regex, reject_key_regex) = + Self::compile_regex_filters(&filter_tags_regex.reject); + + let reject = filter_tags + .reject + .iter() + .map(|tag| TagStringFilter::from_str(tag)) + .collect(); + let require = filter_tags + .require + .iter() + .map(|tag| TagStringFilter::from_str(tag)) + .collect(); + let ignore_resources = ignore_resources + .iter() + .filter_map(|regex| { + compile_anchored(regex) + .inspect_err(|err| { + error!( + ?regex, + ?err, + "Invalid regex pattern in ignore resources filter, skipping it" + ) + }) + .ok() + }) + .collect(); TraceFilteredConf { - reject: filter_tags - .reject - .iter() - .map(|tag| TagStringFilter::from_str(tag)) - .collect(), - reject_regex: filter_tags_regex - .reject - .iter() - .filter_map(|regex_tag| TagRegexFilter::from_str(regex_tag).ok()) - .collect(), - require: filter_tags - .require - .iter() - .map(|tag| TagStringFilter::from_str(tag)) - .collect(), - require_regex: filter_tags_regex - .require - .iter() - .filter_map(|regex_tag| TagRegexFilter::from_str(regex_tag).ok()) - .collect(), - ignore_resources: ignore_resources - .iter() - .filter_map(|regex| { - regex_engine::Regex::new(regex) - .inspect_err(|err| { - error!( - ?regex, - ?err, - "Invalid regex pattern in ignore resources filter, skipping it" - ) - }) - .ok() - }) - .collect(), + reject, + require, + reject_regex, + require_regex, + reject_key_regex, + require_key_regex, + ignore_resources, } } } @@ -218,7 +284,7 @@ impl TraceFilterer { if conf .reject .iter() - .any(|filter| check_tag_filter_with_normalization(filter, root_span)) + .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) { return true; } @@ -226,7 +292,15 @@ impl TraceFilterer { if conf .reject_regex .iter() - .any(|filter| check_tag_filter_with_normalization(filter, root_span)) + .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if conf + .reject_key_regex + .iter() + .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) { return true; } @@ -234,7 +308,7 @@ impl TraceFilterer { if !conf .require .iter() - .all(|filter| check_tag_filter_with_normalization(filter, root_span)) + .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) { return true; } @@ -242,7 +316,15 @@ impl TraceFilterer { if !conf .require_regex .iter() - .all(|filter| check_tag_filter_with_normalization(filter, root_span)) + .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) + { + return true; + } + + if !conf + .require_key_regex + .iter() + .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) { return true; } @@ -272,29 +354,29 @@ impl TraceFilterer { false } -} -fn check_tag_filter_with_normalization( - filter: &impl TagFilter, - root_span: &libdd_trace_utils::span::v04::Span, -) -> bool { - let Some(value) = root_span.meta.get(filter.key()) else { - return false; - }; - let value = value.borrow(); - match filter.key() { - "env" => { - let normalized_value = - libdd_trace_normalization::normalize_utils::normalize_tag_cloned(value); - filter.matches_tag_value(&normalized_value) - } - "http.status_code" => { - if !libdd_trace_normalization::normalizer::is_valid_http_status_code(value) { - debug!(?value,"trace filter on http.status_code ignored because root span's `http.status_code` is invalid"); - return false; + fn check_tag_filter_with_normalization( + filter: &impl TagFilter, + root_span: &libdd_trace_utils::span::v04::Span, + ) -> bool { + let Some((key, value)) = filter.find_tag(&root_span.meta) else { + return false; + }; + let value = value.borrow(); + match key { + "env" => { + let normalized_value = + libdd_trace_normalization::normalize_utils::normalize_tag_cloned(value); + filter.matches_tag_value(&normalized_value) + } + "http.status_code" => { + if !libdd_trace_normalization::normalizer::is_valid_http_status_code(value) { + debug!(?value,"trace filter on http.status_code ignored because root span's `http.status_code` is invalid"); + return false; + } + filter.matches_tag_value(value) } - filter.matches_tag_value(value) + _ => filter.matches_tag_value(value), } - _ => filter.matches_tag_value(value), } } From b200c2f8f3d7392bc6f45054471e8884a9719197 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 26 May 2026 18:35:48 +0200 Subject: [PATCH 13/49] fix: fmt --- libdd-data-pipeline/src/trace_exporter/trace_filter.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 7280daebfd..0bee91fe81 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -156,7 +156,9 @@ impl TraceFilteredConf { ?err, "Invalid regex pattern in tag filter's value, skipping it" ); - // FIXME: dd-trace-php considers that if the value pattern is bad, we still keep the filter by only matching on the key. I find it more intuitive to drop the filter altogether + // FIXME: dd-trace-php considers that if the value pattern is bad, we still + // keep the filter by only matching on the key. I find it more intuitive to + // drop the filter altogether continue; } }, @@ -274,9 +276,11 @@ impl TraceFilterer { }); } - /// Checks if the trace with root span `root_span` should be dropped based on filter configuration. + /// Checks if the trace with root span `root_span` should be dropped based on filter + /// configuration. /// - /// Applies a subset of trace normalization logic from `libdd-trace-normalization` before checking. + /// Applies a subset of trace normalization logic from `libdd-trace-normalization` before + /// checking. fn should_drop( conf: &TraceFilteredConf, root_span: &libdd_trace_utils::span::v04::Span, From 4e0ff40bf4435cefd02ba2f8046b3dba9be13a57 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 27 May 2026 11:47:53 +0200 Subject: [PATCH 14/49] add tests --- .../src/trace_exporter/trace_filter.rs | 313 ++++++++++++++++++ 1 file changed, 313 insertions(+) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 0bee91fe81..c8aff018cb 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -384,3 +384,316 @@ impl TraceFilterer { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent_info::schema::FilterTagsConfig; + use libdd_trace_utils::span::v04::SpanBytes; + use std::collections::HashMap; + + // ---- helpers ---- + + fn ftc(require: &[&str], reject: &[&str]) -> FilterTagsConfig { + FilterTagsConfig { + require: require.iter().map(|s| s.to_string()).collect(), + reject: reject.iter().map(|s| s.to_string()).collect(), + } + } + + fn no_tags() -> FilterTagsConfig { + FilterTagsConfig::default() + } + + fn span_with(resource: &'static str, meta: &[(&'static str, &'static str)]) -> SpanBytes { + SpanBytes { + service: "svc".into(), + name: "op".into(), + resource: resource.into(), + span_id: 1, + trace_id: 1, + parent_id: 0, + meta: meta + .iter() + .map(|(k, v)| ((*k).into(), (*v).into())) + .collect::>(), + ..Default::default() + } + } + + fn one_trace(s: SpanBytes) -> Vec> { + vec![vec![s]] + } + + fn reject_str(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&ftc(&[], tags), &no_tags(), &[]) + } + + fn require_str(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&ftc(tags, &[]), &no_tags(), &[]) + } + + fn reject_regex(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&no_tags(), &ftc(&[], tags), &[]) + } + + fn require_regex(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&no_tags(), &ftc(tags, &[]), &[]) + } + + fn ignore_resources(patterns: &[&str]) -> TraceFilterer { + let pats: Vec = patterns.iter().map(|s| s.to_string()).collect(); + TraceFilterer::new(&no_tags(), &no_tags(), &pats) + } + + // ---- reject (TagStringFilter) ---- + + #[test] + fn reject_string_exact_match_drops() { + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn reject_string_wrong_value_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "staging")])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn reject_string_missing_tag_keeps() { + let mut traces = one_trace(span_with("r", &[])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn reject_string_key_only_matches_any_value() { + // A key-only filter (no `:value` part) matches regardless of the tag's value. + let mut traces = one_trace(span_with("r", &[("env", "anything")])); + reject_str(&["env"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- reject_regex (TagRegexFilter – literal key, regex value) ---- + + #[test] + fn reject_regex_value_match_drops() { + let mut traces = one_trace(span_with("r", &[("env", "production")])); + reject_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn reject_regex_value_no_match_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "staging")])); + reject_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + // ---- reject_key_regex (TagRegexKeyFilter – regex key) ---- + // A key pattern containing `*` triggers the key-regex path. + + #[test] + fn reject_key_regex_key_and_value_match_drops() { + // "err.*" contains `*` → key is compiled as a regex; matches "error". + let mut traces = one_trace(span_with("r", &[("error", "timeout")])); + reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn reject_key_regex_wrong_value_keeps() { + let mut traces = one_trace(span_with("r", &[("error", "network")])); + reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn reject_key_regex_missing_key_keeps() { + let mut traces = one_trace(span_with("r", &[])); + reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + // ---- require (TagStringFilter) ---- + + #[test] + fn require_string_present_and_matching_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + require_str(&["env:prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn require_string_missing_tag_drops() { + let mut traces = one_trace(span_with("r", &[])); + require_str(&["env:prod"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn require_string_wrong_value_drops() { + let mut traces = one_trace(span_with("r", &[("env", "staging")])); + require_str(&["env:prod"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- require_regex (TagRegexFilter – literal key, regex value) ---- + + #[test] + fn require_regex_value_match_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "production")])); + require_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn require_regex_missing_drops() { + let mut traces = one_trace(span_with("r", &[])); + require_regex(&["env:prod.*"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- require_key_regex (TagRegexKeyFilter – regex key) ---- + + #[test] + fn require_key_regex_key_exists_keeps() { + // Key-only pattern → value: None → any tag value satisfies the requirement. + let mut traces = one_trace(span_with("r", &[("error", "any")])); + require_regex(&["err.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn require_key_regex_missing_key_drops() { + let mut traces = one_trace(span_with("r", &[])); + require_regex(&["err.*"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- ignore_resources ---- + + #[test] + fn ignore_resources_match_drops() { + let mut traces = one_trace(span_with("GET /health", &[])); + ignore_resources(&["GET /health"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn ignore_resources_no_match_keeps() { + let mut traces = one_trace(span_with("POST /data", &[])); + ignore_resources(&["GET /health"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn ignore_resources_empty_resource_falls_back_to_name() { + // When resource is empty the span's name field is used for matching. + // The helper sets name = "op", so ignore_resources("op") must drop it. + let mut traces = one_trace(span_with("", &[])); + ignore_resources(&["op"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- env tag normalization ---- + + #[test] + fn env_normalization_reject_matches_after_lowercase() { + // normalize_tag_cloned("PROD") == "prod"; the reject filter "env:prod" must fire. + let mut traces = one_trace(span_with("r", &[("env", "PROD")])); + reject_str(&["env:prod"]).filter_traces(&mut traces); + assert!( + traces.is_empty(), + "env value should be normalized before matching" + ); + } + + #[test] + fn env_normalization_require_matches_normalized_value() { + // normalize_tag_cloned("Prod Env") == "prod_env" (uppercase + space → underscore). + let mut traces = one_trace(span_with("r", &[("env", "Prod Env")])); + require_str(&["env:prod_env"]).filter_traces(&mut traces); + assert_eq!( + traces.len(), + 1, + "normalized env should satisfy the require filter" + ); + } + + // ---- http.status_code special handling ---- + + #[test] + fn http_status_code_invalid_value_skips_reject_filter() { + // is_valid_http_status_code("abc") == false → check_tag_filter returns false + // → reject never fires → trace kept even though the raw value equals the filter. + let mut traces = one_trace(span_with("r", &[("http.status_code", "abc")])); + reject_str(&["http.status_code:abc"]).filter_traces(&mut traces); + assert_eq!( + traces.len(), + 1, + "invalid status code should not trigger the filter" + ); + } + + #[test] + fn http_status_code_valid_value_triggers_reject_filter() { + let mut traces = one_trace(span_with("r", &[("http.status_code", "500")])); + reject_str(&["http.status_code:500"]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- update_conf ---- + + #[test] + fn update_conf_takes_effect() { + let f = TraceFilterer::new(&no_tags(), &no_tags(), &[]); + + // No filters: trace is kept. + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + + // Swap in a reject filter: same trace is now dropped. + f.update_conf(&ftc(&[], &["env:prod"]), &no_tags(), &[]); + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + f.filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + // ---- edge / misc ---- + + #[test] + fn multiple_traces_partial_rejection() { + let f = reject_str(&["env:prod"]); + let mut traces = vec![ + vec![span_with("r", &[("env", "prod")])], // dropped + vec![span_with("r", &[("env", "staging")])], // kept + ]; + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn no_filters_keeps_all_traces() { + let f = TraceFilterer::new(&no_tags(), &no_tags(), &[]); + let mut traces = vec![ + vec![span_with("r1", &[])], + vec![span_with("r2", &[("env", "prod")])], + ]; + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 2); + } + + #[test] + fn invalid_regex_in_filter_is_skipped_gracefully() { + // A bad regex pattern is silently discarded; no panic, trace is kept. + let f = reject_regex(&["env:[invalid"]); + let mut traces = one_trace(span_with("r", &[("env", "anything")])); + f.filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } +} From d22bab9dc1f996a60b9ef5b73b527f4b6356c88c Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 1 Jun 2026 15:18:38 +0200 Subject: [PATCH 15/49] revert regex key filters --- .../src/trace_exporter/trace_filter.rs | 149 ++++-------------- 1 file changed, 30 insertions(+), 119 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index c8aff018cb..31369e9d31 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -2,9 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::{borrow::Borrow as _, collections::HashMap, sync::Arc}; +use std::{borrow::Borrow as _, sync::Arc}; -use libdd_common::regex_engine; +use libdd_common::regex_engine::Regex; use libdd_trace_stats::span_concentrator::StatSpan; use libdd_trace_utils::span::trace_utils::get_root_span_index_v4; use tracing::{debug, error}; @@ -12,10 +12,8 @@ use tracing::{debug, error}; trait TagFilter { /// Returns true if the given tag value matches the Filterer. fn matches_tag_value(&self, value: &str) -> bool; - fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( - &'a self, - meta: &'a HashMap, - ) -> Option<(&'a str, &'a T)>; + // Getter to the key field + fn key(&self) -> &str; } #[derive(Debug)] @@ -27,14 +25,7 @@ struct TagStringFilter { #[derive(Debug)] struct TagRegexFilter { key: String, - value: Option, -} - -#[derive(Debug)] -// Slowest kind of filter where the key field is also a regex -struct TagRegexKeyFilter { - key: regex_engine::Regex, - value: Option, + value: Option, } /// Parsed config @@ -42,13 +33,11 @@ struct TagRegexKeyFilter { struct TraceFilteredConf { reject: Vec, reject_regex: Vec, - reject_key_regex: Vec, require: Vec, require_regex: Vec, - require_key_regex: Vec, - ignore_resources: Vec, + ignore_resources: Vec, } #[derive(Debug)] @@ -80,11 +69,8 @@ impl TagFilter for TagStringFilter { } } - fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( - &'a self, - meta: &'a HashMap, - ) -> std::option::Option<(&'a str, &'a T)> { - Some((self.key.as_ref(), meta.get(&self.key)?)) + fn key(&self) -> &str { + &self.key } } @@ -96,51 +82,14 @@ impl TagFilter for TagRegexFilter { } } - fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( - &'a self, - meta: &'a HashMap, - ) -> std::option::Option<(&'a str, &'a T)> { - Some((self.key.as_ref(), meta.get(&self.key)?)) + fn key(&self) -> &str { + &self.key } } -impl TagFilter for TagRegexKeyFilter { - fn matches_tag_value(&self, value: &str) -> bool { - match &self.value { - None => true, // No value requirement => Any value is a match - Some(pattern) => pattern.is_match(value), - } - } - - fn find_tag<'a, T: libdd_trace_utils::span::SpanText>( - &self, - meta: &'a HashMap, - ) -> std::option::Option<(&'a str, &'a T)> { - meta.iter() - .find(|&(key, _)| self.key.is_match(key.borrow())) - .map(|(key, value)| (key.borrow(), value)) - } -} - -/// Compile a regex anchored to the full string. -fn compile_anchored(pattern: &str) -> Result { - regex_engine::Regex::new(&format!("^(?:{pattern})$")) -} - -/// Returns `true` when `key` contains no regex metacharacters and can be used for a direct -/// O(1) lookup. `.` is intentionally treated as a literal (not a wildcard) in key patterns. -fn is_literal_key(key: &str) -> bool { - !key.contains([ - '*', '+', '?', '[', ']', '(', ')', '{', '}', '^', '$', ',', '\\', - ]) -} - impl TraceFilteredConf { - /// Compile all `filter_tags_regex` entries, splitting into literal-key (fast) and - /// regex-key (slow) lists based on whether the key portion contains metacharacters. - fn compile_regex_filters(filters: &[String]) -> (Vec, Vec) { + fn compile_regex_filters(filters: &[String]) -> Vec { let mut tag_regex_filters = Vec::new(); - let mut tag_regex_key_filters = Vec::new(); for filter in filters { let (key, value) = match filter.split_once(":") { Some((key, value)) => (key, Some(value)), @@ -148,7 +97,7 @@ impl TraceFilteredConf { }; let value = match value { - Some(value) => match compile_anchored(value) { + Some(value) => match Regex::new(value) { Ok(regex) => Some(regex), Err(err) => { error!( @@ -156,36 +105,19 @@ impl TraceFilteredConf { ?err, "Invalid regex pattern in tag filter's value, skipping it" ); - // FIXME: dd-trace-php considers that if the value pattern is bad, we still - // keep the filter by only matching on the key. I find it more intuitive to - // drop the filter altogether continue; } }, None => None, }; - if is_literal_key(key) { - tag_regex_filters.push(TagRegexFilter { - key: key.to_owned(), - value, - }); - } else { - match compile_anchored(key) { - Ok(key) => tag_regex_key_filters.push(TagRegexKeyFilter { key, value }), - Err(err) => { - error!( - ?filter, - ?err, - "Invalid regex pattern in tag filter's key, skipping it" - ); - continue; - } - } - } + tag_regex_filters.push(TagRegexFilter { + key: key.to_owned(), + value, + }); } - (tag_regex_filters, tag_regex_key_filters) + tag_regex_filters } fn parse( @@ -193,10 +125,8 @@ impl TraceFilteredConf { filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, ignore_resources: &[String], ) -> Self { - let (require_regex, require_key_regex) = - Self::compile_regex_filters(&filter_tags_regex.require); - let (reject_regex, reject_key_regex) = - Self::compile_regex_filters(&filter_tags_regex.reject); + let require_regex = Self::compile_regex_filters(&filter_tags_regex.require); + let reject_regex = Self::compile_regex_filters(&filter_tags_regex.reject); let reject = filter_tags .reject @@ -211,7 +141,7 @@ impl TraceFilteredConf { let ignore_resources = ignore_resources .iter() .filter_map(|regex| { - compile_anchored(regex) + Regex::new(regex) .inspect_err(|err| { error!( ?regex, @@ -227,8 +157,6 @@ impl TraceFilteredConf { require, reject_regex, require_regex, - reject_key_regex, - require_key_regex, ignore_resources, } } @@ -263,8 +191,6 @@ impl TraceFilterer { let conf = self.conf.load(); traces.retain(|trace| { let Ok(root_span_index) = get_root_span_index_v4(trace) else { - // FIXME: in this case it's a distributed trace ? Maybe we should remove the debug - // log in get_root_span_index_v4 then return true; }; let root_span = &trace[root_span_index]; @@ -301,14 +227,6 @@ impl TraceFilterer { return true; } - if conf - .reject_key_regex - .iter() - .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) - { - return true; - } - if !conf .require .iter() @@ -325,14 +243,6 @@ impl TraceFilterer { return true; } - if !conf - .require_key_regex - .iter() - .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) - { - return true; - } - if !conf.ignore_resources.is_empty() { let span_resource = root_span.resource(); // Normalization @@ -363,11 +273,11 @@ impl TraceFilterer { filter: &impl TagFilter, root_span: &libdd_trace_utils::span::v04::Span, ) -> bool { - let Some((key, value)) = filter.find_tag(&root_span.meta) else { + let Some(value) = root_span.meta.get(filter.key()) else { return false; }; let value = value.borrow(); - match key { + match filter.key() { "env" => { let normalized_value = libdd_trace_normalization::normalize_utils::normalize_tag_cloned(value); @@ -493,15 +403,15 @@ mod tests { assert_eq!(traces.len(), 1); } - // ---- reject_key_regex (TagRegexKeyFilter – regex key) ---- - // A key pattern containing `*` triggers the key-regex path. + // ---- reject_key_regex ---- + // Checks that it's not implemented #[test] fn reject_key_regex_key_and_value_match_drops() { - // "err.*" contains `*` → key is compiled as a regex; matches "error". let mut traces = one_trace(span_with("r", &[("error", "timeout")])); reject_regex(&["err.*:timeout"]).filter_traces(&mut traces); - assert!(traces.is_empty()); + // Regex keys are not implemented so it doesn't match + assert!(!traces.is_empty()); } #[test] @@ -557,14 +467,15 @@ mod tests { assert!(traces.is_empty()); } - // ---- require_key_regex (TagRegexKeyFilter – regex key) ---- + // ---- require_key_regex ---- + // (Checks that it's not implemented) #[test] fn require_key_regex_key_exists_keeps() { - // Key-only pattern → value: None → any tag value satisfies the requirement. let mut traces = one_trace(span_with("r", &[("error", "any")])); require_regex(&["err.*"]).filter_traces(&mut traces); - assert_eq!(traces.len(), 1); + // Regex keys are not implemented so it doesn't match + assert!(traces.is_empty()); } #[test] From f8c4e39c099633707573511895d7e12ac99f3f99 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 1 Jun 2026 15:23:32 +0200 Subject: [PATCH 16/49] fix: check ignore_resources before the others --- .../src/trace_exporter/trace_filter.rs | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 31369e9d31..9a113a38b7 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -207,10 +207,36 @@ impl TraceFilterer { /// /// Applies a subset of trace normalization logic from `libdd-trace-normalization` before /// checking. + // 1. Resource filtering: If the root span's resource name matches any pattern in ignore_resources, reject the trace. + // 2. Reject filtering: If any tag on the root span matches filters in filter_tags.reject or filter_tags_regex.reject, reject the trace. + // 3. Require filtering: If filter_tags.require or filter_tags_regex.require contain any filters, all of them must match tags on the root span. If any required filter doesn't match, reject the trace. fn should_drop( conf: &TraceFilteredConf, root_span: &libdd_trace_utils::span::v04::Span, ) -> bool { + if !conf.ignore_resources.is_empty() { + let span_resource = root_span.resource(); + // Normalization + let span_resource = if span_resource.is_empty() { + let span_name = root_span.name(); + debug!( + ?span_name, + "Trace filter fixing malformed trace. Resource is empty so using name instead" + ); + span_name + } else { + span_resource + }; + + if conf + .ignore_resources + .iter() + .any(|resource_pattern| resource_pattern.is_match(span_resource)) + { + return true; + } + } + if conf .reject .iter() @@ -243,29 +269,6 @@ impl TraceFilterer { return true; } - if !conf.ignore_resources.is_empty() { - let span_resource = root_span.resource(); - // Normalization - let span_resource = if span_resource.is_empty() { - let span_name = root_span.name(); - debug!( - ?span_name, - "Trace filter fixing malformed trace. Resource is empty so using name instead" - ); - span_name - } else { - span_resource - }; - - if conf - .ignore_resources - .iter() - .any(|resource_pattern| resource_pattern.is_match(span_resource)) - { - return true; - } - } - false } From e235455d5d79b195ffef3c28132cc2e03751ca08 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 1 Jun 2026 15:29:23 +0200 Subject: [PATCH 17/49] fix: rename get_root_span_index --- libdd-data-pipeline/src/trace_exporter/trace_filter.rs | 4 ++-- libdd-trace-utils/src/span/trace_utils.rs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 9a113a38b7..e037e11237 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -6,7 +6,7 @@ use std::{borrow::Borrow as _, sync::Arc}; use libdd_common::regex_engine::Regex; use libdd_trace_stats::span_concentrator::StatSpan; -use libdd_trace_utils::span::trace_utils::get_root_span_index_v4; +use libdd_trace_utils::span::trace_utils::get_root_span_index; use tracing::{debug, error}; trait TagFilter { @@ -190,7 +190,7 @@ impl TraceFilterer { ) { let conf = self.conf.load(); traces.retain(|trace| { - let Ok(root_span_index) = get_root_span_index_v4(trace) else { + let Ok(root_span_index) = get_root_span_index(trace) else { return true; }; let root_span = &trace[root_span_index]; diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index 17910320f9..d5bed60cff 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -62,8 +62,7 @@ where } } -// FIXME: duplicated with super::get_root_span_index -pub fn get_root_span_index_v4(trace: &[Span]) -> anyhow::Result +pub fn get_root_span_index(trace: &[Span]) -> anyhow::Result where T: TraceData, { From e7a8695efd9fa43f382b0225f7640b9b2876e034 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 1 Jun 2026 15:39:30 +0200 Subject: [PATCH 18/49] fix: remove ability to configure trace filters from the trace exporter builder --- .../src/trace_exporter/builder.rs | 28 +------------------ .../src/trace_exporter/trace_filter.rs | 27 +++++++++++------- 2 files changed, 18 insertions(+), 37 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index 1f8e100fb8..6fd23c33b4 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -1,7 +1,6 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use crate::agent_info::schema::FilterTagsConfig; use crate::agent_info::AgentInfoFetcher; use crate::otlp::config::{OtlpProtocol, DEFAULT_OTLP_TIMEOUT}; use crate::otlp::OtlpTraceConfig; @@ -67,9 +66,6 @@ pub struct TraceExporterBuilder { connection_timeout: Option, otlp_endpoint: Option, otlp_headers: Vec<(String, String)>, - filter_tags: FilterTagsConfig, - filter_tags_regex: FilterTagsConfig, - ignore_resources: Vec, } impl TraceExporterBuilder { @@ -291,24 +287,6 @@ impl TraceExporterBuilder { self } - // TODO: doc - pub fn set_filter_tags(&mut self, filter_tags: FilterTagsConfig) -> &mut Self { - self.filter_tags = filter_tags; - self - } - - // TODO: doc - pub fn set_filter_tags_regex(&mut self, filter_tags_regex: FilterTagsConfig) -> &mut Self { - self.filter_tags_regex = filter_tags_regex; - self - } - - // TODO: doc - pub fn set_ignore_resources(&mut self, ignore_resources: Vec) -> &mut Self { - self.ignore_resources = ignore_resources; - self - } - #[allow(missing_docs)] pub fn build( self, @@ -518,11 +496,7 @@ impl TraceExporterBuilder { .agent_rates_payload_version_enabled .then(AgentResponsePayloadVersion::new), otlp_config, - trace_filterer: TraceFilterer::new( - &self.filter_tags, - &self.filter_tags_regex, - &self.ignore_resources, - ), + trace_filterer: TraceFilterer::with_empty_conf(), }) } diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index e037e11237..4a5e46c692 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -4,6 +4,7 @@ //! ignore_resources as published by the agent's /info endpoint). use std::{borrow::Borrow as _, sync::Arc}; +use arc_swap::ArcSwap; use libdd_common::regex_engine::Regex; use libdd_trace_stats::span_concentrator::StatSpan; use libdd_trace_utils::span::trace_utils::get_root_span_index; @@ -29,8 +30,8 @@ struct TagRegexFilter { } /// Parsed config -#[derive(Debug)] -struct TraceFilteredConf { +#[derive(Debug, Default)] +struct TraceFiltererConf { reject: Vec, reject_regex: Vec, @@ -42,7 +43,7 @@ struct TraceFilteredConf { #[derive(Debug)] pub struct TraceFilterer { - conf: arc_swap::ArcSwap, + conf: ArcSwap, } impl TagStringFilter { @@ -87,7 +88,7 @@ impl TagFilter for TagRegexFilter { } } -impl TraceFilteredConf { +impl TraceFiltererConf { fn compile_regex_filters(filters: &[String]) -> Vec { let mut tag_regex_filters = Vec::new(); for filter in filters { @@ -152,7 +153,7 @@ impl TraceFilteredConf { .ok() }) .collect(); - TraceFilteredConf { + TraceFiltererConf { reject, require, reject_regex, @@ -163,14 +164,20 @@ impl TraceFilteredConf { } impl TraceFilterer { - pub fn new( + #[cfg(test)] + fn new( filter_tags: &crate::agent_info::schema::FilterTagsConfig, filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, ignore_resources: &[String], ) -> Self { - let conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex, ignore_resources); + let conf = TraceFiltererConf::parse(filter_tags, filter_tags_regex, ignore_resources); + Self { + conf: ArcSwap::from_pointee(conf), + } + } + pub fn with_empty_conf() -> Self { Self { - conf: arc_swap::ArcSwap::from_pointee(conf), + conf: ArcSwap::from_pointee(TraceFiltererConf::default()), } } @@ -180,7 +187,7 @@ impl TraceFilterer { filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, ignore_resources: &[String], ) { - let new_conf = TraceFilteredConf::parse(filter_tags, filter_tags_regex, ignore_resources); + let new_conf = TraceFiltererConf::parse(filter_tags, filter_tags_regex, ignore_resources); self.conf.swap(Arc::new(new_conf)); } @@ -211,7 +218,7 @@ impl TraceFilterer { // 2. Reject filtering: If any tag on the root span matches filters in filter_tags.reject or filter_tags_regex.reject, reject the trace. // 3. Require filtering: If filter_tags.require or filter_tags_regex.require contain any filters, all of them must match tags on the root span. If any required filter doesn't match, reject the trace. fn should_drop( - conf: &TraceFilteredConf, + conf: &TraceFiltererConf, root_span: &libdd_trace_utils::span::v04::Span, ) -> bool { if !conf.ignore_resources.is_empty() { From 4b26fba3acdcb002266923091c8cb28d494428fb Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 1 Jun 2026 15:40:19 +0200 Subject: [PATCH 19/49] fix: fmt --- libdd-data-pipeline/src/trace_exporter/trace_filter.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 4a5e46c692..07c733f319 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -214,9 +214,13 @@ impl TraceFilterer { /// /// Applies a subset of trace normalization logic from `libdd-trace-normalization` before /// checking. - // 1. Resource filtering: If the root span's resource name matches any pattern in ignore_resources, reject the trace. - // 2. Reject filtering: If any tag on the root span matches filters in filter_tags.reject or filter_tags_regex.reject, reject the trace. - // 3. Require filtering: If filter_tags.require or filter_tags_regex.require contain any filters, all of them must match tags on the root span. If any required filter doesn't match, reject the trace. + // 1. Resource filtering: If the root span's resource name matches any pattern in + // ignore_resources, reject the trace. + // 2. Reject filtering: If any tag on the root span matches filters in filter_tags.reject or + // filter_tags_regex.reject, reject the trace. + // 3. Require filtering: If filter_tags.require or filter_tags_regex.require contain any + // filters, all of them must match tags on the root span. If any required filter doesn't + // match, reject the trace. fn should_drop( conf: &TraceFiltererConf, root_span: &libdd_trace_utils::span::v04::Span, From 580ca8686cc6237bc73127484294d4dbcc706932 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 2 Jun 2026 16:50:09 +0200 Subject: [PATCH 20/49] fix: filter traces on the client only when css is enabled --- libdd-data-pipeline/src/trace_exporter/mod.rs | 6 +----- libdd-data-pipeline/src/trace_exporter/stats.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index bf6a30be4b..51a7397bef 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -349,7 +349,6 @@ impl Tra fn check_agent_info(&self) { if let Some(agent_info) = agent_info::get_agent_info() { if self.has_agent_info_state_changed(&agent_info) { - // FIXME: trace_filterer should only be enabled when CSS is on. (why ?) self.trace_filterer.update_conf( &agent_info.info.filter_tags, &agent_info.info.filter_tags_regex, @@ -582,10 +581,6 @@ impl Tra mut traces: Vec>>, ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); - // FIXME: when client_computed_top_level is true, looking twice for the root span here is - // inefficient and just below in process_traces_for_stats. - // Also, only do it when css is on - self.trace_filterer.filter_traces(&mut traces); // Process stats computation and drop non-sampled (p0) chunks. // This must run before the OTLP path so that unsampled spans are not exported. @@ -594,6 +589,7 @@ impl Tra &mut header_tags, &self.client_side_stats.status, self.client_computed_top_level, + &self.trace_filterer, ); // OTLP path: send sampled traces via OTLP when an OTLP endpoint is configured. diff --git a/libdd-data-pipeline/src/trace_exporter/stats.rs b/libdd-data-pipeline/src/trace_exporter/stats.rs index 7f1aaea36a..cf1415e46a 100644 --- a/libdd-data-pipeline/src/trace_exporter/stats.rs +++ b/libdd-data-pipeline/src/trace_exporter/stats.rs @@ -29,6 +29,7 @@ use tracing::{debug, error}; #[cfg(not(target_arch = "wasm32"))] use super::add_path; +use super::trace_filter::TraceFilterer; use super::TracerMetadata; #[cfg(not(target_arch = "wasm32"))] @@ -297,12 +298,19 @@ pub(crate) fn process_traces_for_stats( header_tags: &mut libdd_trace_utils::trace_utils::TracerHeaderTags, client_side_stats: &ArcSwap, client_computed_top_level: bool, + trace_filterer: &TraceFilterer, ) -> libdd_trace_utils::span::trace_utils::DroppedP0Stats { let status = client_side_stats.load(); if let StatsComputationStatus::Enabled { stats_concentrator, .. } = &**status { + // FIXME: when client_computed_top_level is true, looking twice for the root span here and + // just below in compute_top_level_span is inefficient + // + // FIXME: add dropped trace count to dropped_p0_stats ? + trace_filterer.filter_traces(traces); + if !client_computed_top_level { for chunk in traces.iter_mut() { libdd_trace_utils::span::trace_utils::compute_top_level_span(chunk); From db48e841581ee9ad567e14f44b5e9656923d5317 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 2 Jun 2026 18:50:24 +0200 Subject: [PATCH 21/49] fix: bad merge --- libdd-data-pipeline/src/trace_exporter/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 0161a8381e..b7f252d9cb 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -351,6 +351,7 @@ impl Tra /// Reconcile in-process stats state with the latest agent info. /// Async so the `Enabled` arm can await a stats-worker shutdown without `block_on`. + #[cfg(not(target_arch = "wasm32"))] async fn check_agent_info(&self) { let Some(agent_info) = agent_info::get_agent_info() else { return; From c88ad067b76ba60849f29f7dae42ebfe71ceb872 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 3 Jun 2026 15:06:47 +0200 Subject: [PATCH 22/49] feat: trim tag filters --- .../src/trace_exporter/trace_filter.rs | 161 +++++++++++++----- 1 file changed, 119 insertions(+), 42 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 07c733f319..114cbb1a36 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -18,7 +18,7 @@ trait TagFilter { } #[derive(Debug)] -struct TagStringFilter { +struct TagLiteralFilter { key: String, value: Option, } @@ -32,10 +32,10 @@ struct TagRegexFilter { /// Parsed config #[derive(Debug, Default)] struct TraceFiltererConf { - reject: Vec, + reject: Vec, reject_regex: Vec, - require: Vec, + require: Vec, require_regex: Vec, ignore_resources: Vec, @@ -46,23 +46,7 @@ pub struct TraceFilterer { conf: ArcSwap, } -impl TagStringFilter { - fn from_str(tag: &str) -> Self { - if let Some((key, value)) = tag.split_once(":") { - TagStringFilter { - key: key.to_owned(), - value: Some(value.to_owned()), - } - } else { - TagStringFilter { - key: tag.to_owned(), - value: None, - } - } - } -} - -impl TagFilter for TagStringFilter { +impl TagFilter for TagLiteralFilter { fn matches_tag_value(&self, value: &str) -> bool { match &self.value { None => true, // No value requirement => Any value is a match @@ -89,13 +73,46 @@ impl TagFilter for TagRegexFilter { } impl TraceFiltererConf { + fn compile_literal_filters(filters: &[String]) -> Vec { + let mut tag_regex_filters = Vec::new(); + for filter in filters { + let (key, value) = match filter.split_once(":") { + Some((key, value)) if !value.trim().is_empty() => { + (key.trim(), Some(value.trim().to_owned())) + } + _ => (filter.trim(), None), + }; + if key.is_empty() { + error!( + ?filter, + "Invalid tag filter with empty key value, skipping it" + ); + continue; + } + + tag_regex_filters.push(TagLiteralFilter { + key: key.to_owned(), + value, + }); + } + + tag_regex_filters + } + fn compile_regex_filters(filters: &[String]) -> Vec { let mut tag_regex_filters = Vec::new(); for filter in filters { let (key, value) = match filter.split_once(":") { - Some((key, value)) => (key, Some(value)), - None => (filter.as_ref(), None), + Some((key, value)) if !value.trim().is_empty() => (key.trim(), Some(value.trim())), + _ => (filter.trim(), None), }; + if key.is_empty() { + error!( + ?filter, + "Invalid tag filter with empty key value, skipping it" + ); + continue; + } let value = match value { Some(value) => match Regex::new(value) { @@ -121,25 +138,8 @@ impl TraceFiltererConf { tag_regex_filters } - fn parse( - filter_tags: &crate::agent_info::schema::FilterTagsConfig, - filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, - ignore_resources: &[String], - ) -> Self { - let require_regex = Self::compile_regex_filters(&filter_tags_regex.require); - let reject_regex = Self::compile_regex_filters(&filter_tags_regex.reject); - - let reject = filter_tags - .reject - .iter() - .map(|tag| TagStringFilter::from_str(tag)) - .collect(); - let require = filter_tags - .require - .iter() - .map(|tag| TagStringFilter::from_str(tag)) - .collect(); - let ignore_resources = ignore_resources + fn compile_resource_filters(ignore_resources: &[String]) -> Vec { + ignore_resources .iter() .filter_map(|regex| { Regex::new(regex) @@ -152,7 +152,20 @@ impl TraceFiltererConf { }) .ok() }) - .collect(); + .collect() + } + + fn parse( + filter_tags: &crate::agent_info::schema::FilterTagsConfig, + filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + ignore_resources: &[String], + ) -> Self { + let require_regex = Self::compile_regex_filters(&filter_tags_regex.require); + let reject_regex = Self::compile_regex_filters(&filter_tags_regex.reject); + let require = Self::compile_literal_filters(&filter_tags.require); + let reject = Self::compile_literal_filters(&filter_tags.reject); + let ignore_resources = Self::compile_resource_filters(ignore_resources); + TraceFiltererConf { reject, require, @@ -621,4 +634,68 @@ mod tests { f.filter_traces(&mut traces); assert_eq!(traces.len(), 1); } + + // ---- key/value trimming ---- + + #[test] + fn literal_reject_spaces_around_colon_drops() { + // " env : prod " → key="env", value="prod" + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + reject_str(&[" env : prod "]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn literal_require_spaces_around_colon_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + require_str(&[" env : prod "]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn literal_reject_key_only_with_spaces_drops_any_value() { + // " env " (no colon) → key="env", no value requirement + let mut traces = one_trace(span_with("r", &[("env", "anything")])); + reject_str(&[" env "]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn literal_reject_empty_key_is_skipped_keeps() { + // ":prod" → key="" → filter skipped → trace kept + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + reject_str(&[":prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn literal_require_empty_key_is_skipped_keeps() { + // ":prod" → filter skipped → require list empty → vacuous all() → trace kept + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + require_str(&[":prod"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn regex_reject_spaces_around_colon_drops() { + // " env : prod.* " → key="env", regex="prod.*" + let mut traces = one_trace(span_with("r", &[("env", "production")])); + reject_regex(&[" env : prod.* "]).filter_traces(&mut traces); + assert!(traces.is_empty()); + } + + #[test] + fn regex_require_spaces_around_colon_keeps() { + let mut traces = one_trace(span_with("r", &[("env", "production")])); + require_regex(&[" env : prod.* "]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } + + #[test] + fn regex_reject_empty_key_is_skipped_keeps() { + // ":prod.*" → key="" → filter skipped → trace kept + let mut traces = one_trace(span_with("r", &[("env", "prod")])); + reject_regex(&[":prod.*"]).filter_traces(&mut traces); + assert_eq!(traces.len(), 1); + } } From 72653689d32d376aedd054ac9949a19d42ae374d Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 3 Jun 2026 17:51:18 +0200 Subject: [PATCH 23/49] feat: count trace filter dropped traces in telemetry --- libdd-data-pipeline/src/telemetry/metrics.rs | 11 +++++++++++ libdd-data-pipeline/src/telemetry/mod.rs | 8 ++++++++ .../src/trace_exporter/stats.rs | 18 +++++++++--------- .../src/trace_exporter/trace_filter.rs | 6 +++++- libdd-trace-utils/src/span/trace_utils.rs | 8 +++++--- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/libdd-data-pipeline/src/telemetry/metrics.rs b/libdd-data-pipeline/src/telemetry/metrics.rs index feeb74e492..6404e3415c 100644 --- a/libdd-data-pipeline/src/telemetry/metrics.rs +++ b/libdd-data-pipeline/src/telemetry/metrics.rs @@ -27,6 +27,8 @@ pub enum MetricKind { ChunksSent, /// trace_chunks_dropped metric (reason: p0_drop) ChunksDroppedP0, + /// trace_chunks_dropped metric (reason: trace_filter) + ChunksDroppedByTraceFilter, /// trace_chunks_dropped metric (reason: serialization_error) ChunksDroppedSerializationError, /// trace_chunks_dropped metric (reason: send_failure) @@ -102,6 +104,15 @@ const METRICS: &[Metric] = &[ namespace: MetricNamespace::Tracers, tags: &[tag!["src_library", "libdatadog"], tag!["reason", "p0_drop"]], }, + Metric { + name: CHUNKS_DROPPED_STR, + metric_type: MetricType::Count, + namespace: MetricNamespace::Tracers, + tags: &[ + tag!["src_library", "libdatadog"], + tag!["reason", "trace_filters"], + ], + }, Metric { name: CHUNKS_DROPPED_STR, metric_type: MetricType::Count, diff --git a/libdd-data-pipeline/src/telemetry/mod.rs b/libdd-data-pipeline/src/telemetry/mod.rs index 9bf4177b5a..0b98cf71db 100644 --- a/libdd-data-pipeline/src/telemetry/mod.rs +++ b/libdd-data-pipeline/src/telemetry/mod.rs @@ -165,6 +165,7 @@ pub struct SendPayloadTelemetry { bytes_sent: u64, chunks_sent: u64, chunks_dropped_p0: u64, + chunks_dropped_by_trace_filter: u64, chunks_dropped_serialization_error: u64, chunks_dropped_send_failure: u64, responses_count_per_code: HashMap, @@ -288,6 +289,13 @@ impl TelemetryClient { self.worker .add_point(data.chunks_dropped_p0 as f64, key, vec![])?; } + if data.chunks_dropped_by_trace_filter > 0 { + let key = self + .metrics + .get(metrics::MetricKind::ChunksDroppedByTraceFilter); + self.worker + .add_point(data.chunks_dropped_by_trace_filter as f64, key, vec![])?; + } if data.chunks_dropped_serialization_error > 0 { let key = self .metrics diff --git a/libdd-data-pipeline/src/trace_exporter/stats.rs b/libdd-data-pipeline/src/trace_exporter/stats.rs index 4eb520611f..92c53c51d2 100644 --- a/libdd-data-pipeline/src/trace_exporter/stats.rs +++ b/libdd-data-pipeline/src/trace_exporter/stats.rs @@ -292,7 +292,7 @@ pub(crate) fn process_traces_for_stats( client_side_stats: &ArcSwap, client_computed_top_level: bool, trace_filterer: &TraceFilterer, -) -> libdd_trace_utils::span::trace_utils::DroppedP0Stats { +) -> libdd_trace_utils::span::trace_utils::DroppedStats { let status = client_side_stats.load(); if let StatsComputationStatus::Enabled { stats_concentrator, .. @@ -300,9 +300,7 @@ pub(crate) fn process_traces_for_stats( { // FIXME: when client_computed_top_level is true, looking twice for the root span here and // just below in compute_top_level_span is inefficient - // - // FIXME: add dropped trace count to dropped_p0_stats ? - trace_filterer.filter_traces(traces); + let dropped_by_trace_filter = trace_filterer.filter_traces(traces); if !client_computed_top_level { for chunk in traces.iter_mut() { @@ -312,20 +310,22 @@ pub(crate) fn process_traces_for_stats( add_spans_to_stats(stats_concentrator, traces); // Once stats have been computed we can drop all chunks that are not going to be // sampled by the agent - let dropped_p0_stats = libdd_trace_utils::span::trace_utils::drop_chunks(traces); + let mut dropped_stats = libdd_trace_utils::span::trace_utils::drop_chunks(traces); + dropped_stats.dropped_by_trace_filter = dropped_by_trace_filter; // Update the headers to indicate that stats have been computed and forward dropped // traces counts header_tags.client_computed_top_level = true; header_tags.client_computed_stats = true; - header_tags.dropped_p0_traces = dropped_p0_stats.dropped_p0_traces; - header_tags.dropped_p0_spans = dropped_p0_stats.dropped_p0_spans; + header_tags.dropped_p0_traces = dropped_stats.dropped_p0_traces; + header_tags.dropped_p0_spans = dropped_stats.dropped_p0_spans; - dropped_p0_stats + dropped_stats } else { - libdd_trace_utils::span::trace_utils::DroppedP0Stats { + libdd_trace_utils::span::trace_utils::DroppedStats { dropped_p0_traces: 0, dropped_p0_spans: 0, + dropped_by_trace_filter: 0, } } } diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 114cbb1a36..c0440f70bb 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -207,8 +207,9 @@ impl TraceFilterer { pub fn filter_traces( &self, traces: &mut Vec>>, - ) { + ) -> usize { let conf = self.conf.load(); + let traces_count_before = traces.len(); traces.retain(|trace| { let Ok(root_span_index) = get_root_span_index(trace) else { return true; @@ -220,6 +221,9 @@ impl TraceFilterer { } !should_drop }); + let traces_count_after = traces.len(); + let dropped_by_trace_filter = traces_count_before - traces_count_after; + dropped_by_trace_filter } /// Checks if the trace with root span `root_span` should be dropped based on filter diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index d5bed60cff..c330ff63e4 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -129,9 +129,10 @@ pub fn is_partial_snapshot(span: &Span) -> bool { .is_some_and(|v| *v >= 0.0) } -pub struct DroppedP0Stats { +pub struct DroppedStats { pub dropped_p0_traces: usize, pub dropped_p0_spans: usize, + pub dropped_by_trace_filter: usize, } // Keys used for sampling @@ -149,7 +150,7 @@ const SAMPLING_ANALYTICS_RATE_KEY: &str = "_dd1.sr.eausr"; /// /// # Trace-level attributes /// Some attributes related to the whole trace are stored in the root span of the chunk. -pub fn drop_chunks(traces: &mut Vec>>) -> DroppedP0Stats +pub fn drop_chunks(traces: &mut Vec>>) -> DroppedStats where T: TraceData, { @@ -200,9 +201,10 @@ where true }); - DroppedP0Stats { + DroppedStats { dropped_p0_traces, dropped_p0_spans, + dropped_by_trace_filter: 0, } } From c0cc28fe773df508de089d278dea6d7ef97b8cb3 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 4 Jun 2026 12:46:51 +0200 Subject: [PATCH 24/49] fix: clippy lint --- libdd-data-pipeline/src/trace_exporter/trace_filter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index c0440f70bb..48f9cde040 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -222,8 +222,8 @@ impl TraceFilterer { !should_drop }); let traces_count_after = traces.len(); - let dropped_by_trace_filter = traces_count_before - traces_count_after; - dropped_by_trace_filter + + traces_count_before - traces_count_after } /// Checks if the trace with root span `root_span` should be dropped based on filter From e197714ac888b0c5d581f34136be7e362852e61f Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 4 Jun 2026 13:06:57 +0200 Subject: [PATCH 25/49] fix: ChunksDroppedByTraceFilter reason comment typo --- libdd-data-pipeline/src/telemetry/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-data-pipeline/src/telemetry/metrics.rs b/libdd-data-pipeline/src/telemetry/metrics.rs index 6404e3415c..f610c7e619 100644 --- a/libdd-data-pipeline/src/telemetry/metrics.rs +++ b/libdd-data-pipeline/src/telemetry/metrics.rs @@ -27,7 +27,7 @@ pub enum MetricKind { ChunksSent, /// trace_chunks_dropped metric (reason: p0_drop) ChunksDroppedP0, - /// trace_chunks_dropped metric (reason: trace_filter) + /// trace_chunks_dropped metric (reason: trace_filters) ChunksDroppedByTraceFilter, /// trace_chunks_dropped metric (reason: serialization_error) ChunksDroppedSerializationError, From 977d4a4c7578dfe9ac8972800e00250517615567 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 4 Jun 2026 13:35:45 +0200 Subject: [PATCH 26/49] fix: pass dropped chunks by trace filters all the way --- libdd-data-pipeline/src/telemetry/mod.rs | 74 ++++++++++++++++--- libdd-data-pipeline/src/trace_exporter/mod.rs | 10 ++- 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/libdd-data-pipeline/src/telemetry/mod.rs b/libdd-data-pipeline/src/telemetry/mod.rs index 0b98cf71db..cd196e3556 100644 --- a/libdd-data-pipeline/src/telemetry/mod.rs +++ b/libdd-data-pipeline/src/telemetry/mod.rs @@ -13,6 +13,7 @@ use libdd_telemetry::worker::{ }; use libdd_trace_utils::{ send_with_retry::{SendWithRetryError, SendWithRetryResult}, + span::trace_utils::DroppedStats, trace_utils::SendDataResult, }; use std::{collections::HashMap, time::Duration}; @@ -194,15 +195,16 @@ impl SendPayloadTelemetry { /// * `value` - The result of sending traces with retry /// * `bytes_sent` - The number of bytes in the payload /// * `chunks` - The number of trace chunks in the payload - /// * `chunks_dropped_p0` - The number of P0 trace chunks dropped due to sampling + /// * `dropped_stats` - Trace dropped stats from `stats::process_traces_for_stats` pub fn from_retry_result( value: &SendWithRetryResult, bytes_sent: u64, chunks: u64, - chunks_dropped_p0: u64, + dropped_stats: DroppedStats, ) -> Self { let mut telemetry = Self { - chunks_dropped_p0, + chunks_dropped_p0: dropped_stats.dropped_p0_traces as u64, + chunks_dropped_by_trace_filter: dropped_stats.dropped_by_trace_filter as u64, ..Default::default() }; match value { @@ -712,7 +714,16 @@ mod tests { .unwrap(), 3, )); - let telemetry = SendPayloadTelemetry::from_retry_result(&result, 4, 5, 0); + let telemetry = SendPayloadTelemetry::from_retry_result( + &result, + 4, + 5, + DroppedStats { + dropped_p0_traces: 0, + dropped_p0_spans: 0, + dropped_by_trace_filter: 0, + }, + ); assert_eq!( telemetry, SendPayloadTelemetry { @@ -734,7 +745,16 @@ mod tests { .unwrap(), 3, )); - let telemetry = SendPayloadTelemetry::from_retry_result(&result, 4, 5, 10); + let telemetry = SendPayloadTelemetry::from_retry_result( + &result, + 4, + 5, + DroppedStats { + dropped_p0_traces: 10, + dropped_p0_spans: 0, + dropped_by_trace_filter: 0, + }, + ); assert_eq!( telemetry, SendPayloadTelemetry { @@ -755,7 +775,16 @@ mod tests { .body(Bytes::new()) .unwrap(); let result = Err(SendWithRetryError::Http(error_response, 5)); - let telemetry = SendPayloadTelemetry::from_retry_result(&result, 1, 2, 0); + let telemetry = SendPayloadTelemetry::from_retry_result( + &result, + 1, + 2, + DroppedStats { + dropped_p0_traces: 0, + dropped_p0_spans: 0, + dropped_by_trace_filter: 0, + }, + ); assert_eq!( telemetry, SendPayloadTelemetry { @@ -774,7 +803,16 @@ mod tests { HttpError::Network(anyhow::anyhow!("connection refused")), 5, )); - let telemetry = SendPayloadTelemetry::from_retry_result(&result, 1, 2, 0); + let telemetry = SendPayloadTelemetry::from_retry_result( + &result, + 1, + 2, + DroppedStats { + dropped_p0_traces: 0, + dropped_p0_spans: 0, + dropped_by_trace_filter: 0, + }, + ); assert_eq!( telemetry, SendPayloadTelemetry { @@ -789,7 +827,16 @@ mod tests { #[test] fn telemetry_from_timeout_error_test() { let result = Err(SendWithRetryError::Timeout(5)); - let telemetry = SendPayloadTelemetry::from_retry_result(&result, 1, 2, 0); + let telemetry = SendPayloadTelemetry::from_retry_result( + &result, + 1, + 2, + DroppedStats { + dropped_p0_traces: 0, + dropped_p0_spans: 0, + dropped_by_trace_filter: 0, + }, + ); assert_eq!( telemetry, SendPayloadTelemetry { @@ -805,7 +852,16 @@ mod tests { #[test] fn telemetry_from_build_error_test() { let result = Err(SendWithRetryError::Build(5)); - let telemetry = SendPayloadTelemetry::from_retry_result(&result, 1, 2, 0); + let telemetry = SendPayloadTelemetry::from_retry_result( + &result, + 1, + 2, + DroppedStats { + dropped_p0_traces: 0, + dropped_p0_spans: 0, + dropped_by_trace_filter: 0, + }, + ); assert_eq!( telemetry, SendPayloadTelemetry { diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index b7f252d9cb..c776e69cf3 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -10,6 +10,7 @@ mod trace_serializer; // Re-export the builder pub use builder::TraceExporterBuilder; +use libdd_trace_utils::span::trace_utils::DroppedStats; use self::agent_response::AgentResponse; use self::metrics::MetricsEmitter; @@ -549,7 +550,8 @@ impl Tra mp_payload: Vec, headers: HeaderMap, chunks: usize, - #[cfg_attr(not(feature = "telemetry"), allow(unused_variables))] chunks_dropped_p0: usize, + #[cfg_attr(not(feature = "telemetry"), allow(unused_variables))] + dropped_stats: DroppedStats, ) -> Result { let strategy = RetryStrategy::default(); let payload_len = mp_payload.len(); @@ -570,7 +572,7 @@ impl Tra &result, payload_len as u64, chunks as u64, - chunks_dropped_p0 as u64, + dropped_stats, )) { error!(?e, "Error sending telemetry"); } @@ -587,7 +589,7 @@ impl Tra // Process stats computation and drop non-sampled (p0) chunks. // This must run before the OTLP path so that unsampled spans are not exported. - let dropped_p0_stats = stats::process_traces_for_stats( + let dropped_stats = stats::process_traces_for_stats( &mut traces, &mut header_tags, &self.client_side_stats.status, @@ -639,7 +641,7 @@ impl Tra prepared.data, prepared.headers, prepared.chunk_count, - dropped_p0_stats.dropped_p0_traces, + dropped_stats, ) .await; From 36c65f457b3bdd8e1324e3370a250785fb6e2b3e Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 5 Jun 2026 18:09:45 +0200 Subject: [PATCH 27/49] remove unimportant fixme --- libdd-data-pipeline/src/trace_exporter/stats.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/stats.rs b/libdd-data-pipeline/src/trace_exporter/stats.rs index 92c53c51d2..d811f1fbf1 100644 --- a/libdd-data-pipeline/src/trace_exporter/stats.rs +++ b/libdd-data-pipeline/src/trace_exporter/stats.rs @@ -298,8 +298,6 @@ pub(crate) fn process_traces_for_stats( stats_concentrator, .. } = &**status { - // FIXME: when client_computed_top_level is true, looking twice for the root span here and - // just below in compute_top_level_span is inefficient let dropped_by_trace_filter = trace_filterer.filter_traces(traces); if !client_computed_top_level { From c0cce0df7b91802cd36b48731e7eb3f53188d203 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 5 Jun 2026 18:10:13 +0200 Subject: [PATCH 28/49] test: apply suggestion of not waiting for nothing --- libdd-data-pipeline/src/trace_exporter/mod.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index c776e69cf3..394d5e8b9b 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -2428,6 +2428,7 @@ mod single_threaded_tests { while agent_info::get_agent_info().is_none() { std::thread::sleep(Duration::from_millis(100)); } + assert!(exporter.is_stats_worker_active()); let result = exporter.send( msgpack_encoder::v04::to_vec(&[ @@ -2493,16 +2494,6 @@ mod single_threaded_tests { ); assert!(result.is_err()); - // Wait for the stats worker to be active before shutting down to avoid potential flaky - // tests on CI where we shutdown before the stats worker had time to start - let start_time = std::time::Instant::now(); - while !exporter.is_stats_worker_active() { - if start_time.elapsed() > Duration::from_secs(10) { - panic!("Timeout waiting for stats worker to become active"); - } - std::thread::sleep(Duration::from_millis(10)); - } - runtime.shutdown(None).unwrap(); // Wait for the mock server to process the stats From 4ea70a8f9bc3292fafb41342226796433459ff34 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 5 Jun 2026 18:33:13 +0200 Subject: [PATCH 29/49] Revert "test: apply suggestion of not waiting for nothing" This reverts commit c0cce0df7b91802cd36b48731e7eb3f53188d203. --- libdd-data-pipeline/src/trace_exporter/mod.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 394d5e8b9b..c776e69cf3 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -2428,7 +2428,6 @@ mod single_threaded_tests { while agent_info::get_agent_info().is_none() { std::thread::sleep(Duration::from_millis(100)); } - assert!(exporter.is_stats_worker_active()); let result = exporter.send( msgpack_encoder::v04::to_vec(&[ @@ -2494,6 +2493,16 @@ mod single_threaded_tests { ); assert!(result.is_err()); + // Wait for the stats worker to be active before shutting down to avoid potential flaky + // tests on CI where we shutdown before the stats worker had time to start + let start_time = std::time::Instant::now(); + while !exporter.is_stats_worker_active() { + if start_time.elapsed() > Duration::from_secs(10) { + panic!("Timeout waiting for stats worker to become active"); + } + std::thread::sleep(Duration::from_millis(10)); + } + runtime.shutdown(None).unwrap(); // Wait for the mock server to process the stats From 32f108521e80f8811fec7c01332e77ffed97739c Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 9 Jun 2026 16:56:59 +0200 Subject: [PATCH 30/49] feat: add snapshot test using test agent --- .../trace_filters_snapshot_test.json | 36 ++++ .../tests/test_trace_filters.rs | 160 ++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 libdd-data-pipeline/tests/snapshots/trace_filters_snapshot_test.json create mode 100644 libdd-data-pipeline/tests/test_trace_filters.rs diff --git a/libdd-data-pipeline/tests/snapshots/trace_filters_snapshot_test.json b/libdd-data-pipeline/tests/snapshots/trace_filters_snapshot_test.json new file mode 100644 index 0000000000..97cc5c70e8 --- /dev/null +++ b/libdd-data-pipeline/tests/snapshots/trace_filters_snapshot_test.json @@ -0,0 +1,36 @@ +[ + [ + { + "service": "test-service", + "name": "passes_filters_first", + "resource": "test", + "trace_id": 1, + "span_id": 1, + "start": 0, + "duration": 5, + "meta": { + "my_require_tag": "true" + }, + "metrics": { + "_top_level": 1.0 + } + } + ], + [ + { + "service": "test-service", + "name": "passes_filters_last", + "resource": "test2", + "trace_id": 1, + "span_id": 1, + "start": 0, + "duration": 5, + "meta": { + "my_require_tag": "true" + }, + "metrics": { + "_top_level": 1.0 + } + } + ] +] diff --git a/libdd-data-pipeline/tests/test_trace_filters.rs b/libdd-data-pipeline/tests/test_trace_filters.rs new file mode 100644 index 0000000000..6ce0752db2 --- /dev/null +++ b/libdd-data-pipeline/tests/test_trace_filters.rs @@ -0,0 +1,160 @@ +use std::{ + collections::HashMap, + time::{Duration, Instant}, +}; + +use libdd_capabilities_impl::NativeCapabilities; +use libdd_data_pipeline::{ + agent_info, + trace_exporter::{TraceExporter, TraceExporterInputFormat, TraceExporterOutputFormat}, +}; +use libdd_trace_utils::test_utils::datadog_test_agent::DatadogTestAgent; +use rand::Rng; +use serde_json::json; + +#[cfg_attr(miri, ignore)] +#[tokio::test] +async fn trace_filters_snapshot_test() { + const EXTRA_INFO: &str = r#"{ + "version":"1", + "filter_tags": {"reject": ["my_ignore_tag"], "require": ["my_require_tag:true"]}, + "filter_tags_regex": {"reject": ["my_regex_ignore_tag:.*true.*"]}, + "ignore_resources": [".*IGNORED.*"] + }"#; + let relative_snapshot_path = "libdd-data-pipeline/tests/snapshots/"; + let snapshot_name = "trace_filters_snapshot_test"; + let test_agent = DatadogTestAgent::new( + Some(relative_snapshot_path), + None, + &[("DD_AGENT_EXTRA_INFO", EXTRA_INFO)], + ) + .await; + let url = test_agent.get_base_uri().await; + test_agent.start_session(snapshot_name, None).await; + + let mut builder = TraceExporter::::builder(); + builder + .enable_stats(Duration::from_secs(10)) + .set_env("staging") + .set_language("nodejs") + .set_language_interpreter("v8") + .set_language_version("1.0") + .set_service("test") + .set_test_session_token(snapshot_name) + .set_tracer_version("1.0") + .set_input_format(TraceExporterInputFormat::V04) + .set_output_format(TraceExporterOutputFormat::V04) + .set_url(url.to_string().as_ref()); + + let trace_exporter = builder + .build_async::() + .await + .expect("Unable to build TraceExporter"); + let data = get_v04_trace_snapshot_test_payload(); + let timeout = Duration::from_secs(2); + let start = Instant::now(); + loop { + if std::time::Instant::now().duration_since(start) > timeout { + panic!("Timeout waiting for agent info to be ready"); + } + if agent_info::get_agent_info().is_some() { + break; + } + std::thread::sleep(Duration::from_millis(10)); + } + + let response = trace_exporter.send_async(data.as_ref()).await; + assert!(response.is_ok()); + + tokio::task::spawn_blocking(move || drop(trace_exporter)) + .await + .unwrap(); + + let received_traces = test_agent.get_sent_traces().await; + + println!( + "{}", + serde_json::to_string_pretty(&received_traces).unwrap() + ); + + test_agent.assert_snapshot(snapshot_name).await; +} + +fn get_v04_trace_snapshot_test_payload() -> Vec { + let traces = vec![ + trace_1_span( + "passes_filters_first", + "test", + &[("my_require_tag", "true")], + ), + // This one gets filtered out because it matches an ignore_resources pattern + trace_1_span( + "ignored_resource", + "test IGNORED resource test", + &[("my_require_tag", "true")], + ), + // This one gets filtered out because one of its tag matches a reject filter_tag + trace_1_span( + "reject_filter_tag", + "test ignored because of reject filter_tag", + &[("my_ignore_tag", ""), ("my_require_tag", "true")], + ), + // This one gets filtered out because one of its tag matches a reject + // regex_filter_tag + trace_1_span( + "reject_rejex_filter_tag", + "test ignored because of reject regex_filter_tag", + &[ + ("my_regex_ignore_tag", "something-true-something"), + ("my_require_tag", "true"), + ], + ), + // This one gets filtered out because it doesn't have my_require_tag:true + trace_1_span( + "missing_required_filter_tag", + "test ignored because missing a required filter_tag", + &[("a_useless_tag", "true")], + ), + // This one gets filtered out because it doesn't have my_require_tag:true + trace_1_span( + "missing_required_filter_tag_value", + "test ignored because wrong value on filter_tag", + &[("my_require_tag", "false")], + ), + trace_1_span( + "passes_filters_last", + "test2", + &[("my_require_tag", "true")], + ), + ]; + rmp_serde::to_vec_named(&traces).unwrap() +} + +pub fn trace_1_span(name: &str, resource: &str, meta: &[(&str, &str)]) -> Vec { + vec![span(name, resource, meta)] +} + +pub fn span(name: &str, resource: &str, meta: &[(&str, &str)]) -> serde_json::Value { + let trace_id: u32 = rand::thread_rng().gen(); + let span_id: u32 = rand::thread_rng().gen(); + let meta: HashMap<&str, &str> = HashMap::from_iter(meta.iter().copied()); + + json!( + { + "name": name, + "resource": resource, + "meta": meta, + "trace_id": trace_id, + "span_id": span_id, + "parent_id": 0, + "service": "test-service", + "start": 0, + "duration": 5, + "error": 0, + "metrics": {}, + "meta_struct": {}, + "span_links": [], + "span_events": [], + } + ) +} From 2c0ea25203f5106769d997801e06079e03f826cc Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 9 Jun 2026 17:00:27 +0200 Subject: [PATCH 31/49] remove old no test agent snapshot test --- Cargo.lock | 82 -------- libdd-data-pipeline/Cargo.toml | 1 - libdd-data-pipeline/src/trace_exporter/mod.rs | 184 ------------------ ..._single_threaded_tests__trace_filters.snap | 150 -------------- 4 files changed, 417 deletions(-) delete mode 100644 libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap diff --git a/Cargo.lock b/Cargo.lock index e2411bf362..00b66d635f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -999,17 +999,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "console" -version = "0.16.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" -dependencies = [ - "encode_unicode", - "libc", - "windows-sys 0.61.2", -] - [[package]] name = "console-api" version = "0.9.0" @@ -1748,12 +1737,6 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - [[package]] name = "encoding_rs" version = "0.8.35" @@ -2711,21 +2694,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "insta" -version = "1.47.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4a6248eb93a4401ed2f37dfe8ea592d3cf05b7cf4f8efa867b6895af7e094e" -dependencies = [ - "console", - "once_cell", - "pest", - "pest_derive", - "serde", - "similar", - "tempfile", -] - [[package]] name = "io-lifetimes" version = "1.0.11" @@ -3032,7 +3000,6 @@ dependencies = [ "http", "http-body-util", "httpmock", - "insta", "libdd-capabilities", "libdd-capabilities-impl", "libdd-common", @@ -4144,49 +4111,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "pest" -version = "2.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" -dependencies = [ - "memchr", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.87", -] - -[[package]] -name = "pest_meta" -version = "2.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" -dependencies = [ - "pest", - "sha2", -] - [[package]] name = "petgraph" version = "0.8.3" @@ -6277,12 +6201,6 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "unarray" version = "0.1.4" diff --git a/libdd-data-pipeline/Cargo.toml b/libdd-data-pipeline/Cargo.toml index 3b13f3d7e1..7dc51de8a0 100644 --- a/libdd-data-pipeline/Cargo.toml +++ b/libdd-data-pipeline/Cargo.toml @@ -81,7 +81,6 @@ tokio = { version = "1.23", features = [ "time", "test-util", ], default-features = false } -insta = { version = "1.47.2", features = ["json", "redactions"] } duplicate = "2.0.1" [features] diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index c776e69cf3..942eed4e8a 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -2053,11 +2053,8 @@ mod single_threaded_tests { use crate::agent_info; use httpmock::prelude::*; use libdd_capabilities_impl::NativeCapabilities; - use libdd_trace_protobuf::pb::ClientStatsPayload; use libdd_trace_utils::msgpack_encoder; use libdd_trace_utils::span::v04::SpanBytes; - use std::collections::HashMap; - use std::sync::Mutex; #[cfg_attr(miri, ignore)] #[test] @@ -2361,187 +2358,6 @@ mod single_threaded_tests { ); } - #[cfg_attr(miri, ignore)] - #[test] - fn test_trace_filters_snapshot() { - // Clear the agent info cache to ensure test isolation - agent_info::clear_cache_for_test(); - - let server = MockServer::start(); - let captured_stats = Arc::new(Mutex::new(Vec::new())); - - let captured_stats_in = captured_stats.clone(); - - let mock_traces = server.mock(|when, then| { - when.method(POST) - .header("Content-type", "application/msgpack") - .path("/v0.4/traces"); - then.status(200).body(""); - }); - - let mock_stats = server.mock(|when, then| { - when.method(POST) - .header("Content-type", "application/msgpack") - .path("/v0.6/stats") - .is_true(move |req| { - captured_stats_in.lock().unwrap().push(req.body_vec()); - true - }); - then.status(200).body(""); - }); - - let _mock_info = server.mock(|when, then| { - when.method(GET).path("/info"); - then.status(200) - .header("content-type", "application/json") - .header("datadog-agent-state", "1") - .body( - r#"{ - "version":"1", - "client_drop_p0s":true, - "endpoints":["/v0.4/traces","/v0.6/stats"], - "filter_tags": {"reject": ["my_ignore_tag"], "require": ["my_require_tag:true"]}, - "filter_tags_regex": {"reject": ["my_regex_ignore_tag:.*true.*"]}, - "ignore_resources": [".*IGNORED.*"] - }"#, - ); - }); - - let runtime = Arc::new(SharedRuntime::new().unwrap()); - - let mut builder = TraceExporter::::builder(); - builder - .set_url(&server.url("/")) - .set_service("test") - .set_env("staging") - .set_tracer_version("v0.1") - .set_language("nodejs") - .set_language_version("1.0") - .set_language_interpreter("v8") - .set_input_format(TraceExporterInputFormat::V04) - .set_output_format(TraceExporterOutputFormat::V04) - .set_shared_runtime(runtime.clone()) - .enable_stats(Duration::from_secs(10)); - let exporter = builder.build::().unwrap(); - - // Wait for the info fetcher to get the config - while agent_info::get_agent_info().is_none() { - std::thread::sleep(Duration::from_millis(100)); - } - - let result = exporter.send( - msgpack_encoder::v04::to_vec(&[ - vec![SpanBytes { - duration: 10, - resource: "test".into(), - meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), - ..Default::default() - }], - // This one gets filtered out because it matches an ignore_resources pattern - vec![SpanBytes { - duration: 10, - resource: "test IGNORED resource test".into(), - meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), - ..Default::default() - }], - // This one gets filtered out because one of its tag matches a reject filter_tag - vec![SpanBytes { - duration: 10, - resource: "test ignored because of reject filter_tag".into(), - meta: HashMap::from_iter([ - ("my_ignore_tag".into(), "".into()), - ("my_require_tag".into(), "true".into()), - ]), - ..Default::default() - }], - // This one gets filtered out because one of its tag matches a reject - // regex_filter_tag - vec![SpanBytes { - duration: 10, - resource: "test ignored because of reject regex_filter_tag".into(), - meta: HashMap::from_iter([ - ( - "my_regex_ignore_tag".into(), - "something-true-something".into(), - ), - ("my_require_tag".into(), "true".into()), - ]), - ..Default::default() - }], - // This one gets filtered out because it doesn't have my_require_tag:true - vec![SpanBytes { - duration: 10, - resource: "test ignored because missing a required filter_tag".into(), - meta: HashMap::from_iter([("a_useless_tag".into(), "true".into())]), - ..Default::default() - }], - // This one gets filtered out because it doesn't have my_require_tag:true - vec![SpanBytes { - duration: 10, - resource: "test ignored because wrong value on filter_tag".into(), - meta: HashMap::from_iter([("my_require_tag".into(), "false".into())]), - ..Default::default() - }], - vec![SpanBytes { - duration: 10, - resource: "test2".into(), - meta: HashMap::from_iter([("my_require_tag".into(), "true".into())]), - ..Default::default() - }], - ]) - .as_ref(), - ); - assert!(result.is_err()); - - // Wait for the stats worker to be active before shutting down to avoid potential flaky - // tests on CI where we shutdown before the stats worker had time to start - let start_time = std::time::Instant::now(); - while !exporter.is_stats_worker_active() { - if start_time.elapsed() > Duration::from_secs(10) { - panic!("Timeout waiting for stats worker to become active"); - } - std::thread::sleep(Duration::from_millis(10)); - } - - runtime.shutdown(None).unwrap(); - - // Wait for the mock server to process the stats - for _ in 0..1000 { - if mock_traces.calls() > 0 && mock_stats.calls() > 0 { - break; - } else { - std::thread::sleep(Duration::from_millis(10)); - } - } - - mock_traces.assert(); - mock_stats.assert(); - - // Verify snapshots matches - let mut captured_stats: Vec = captured_stats - .lock() - .unwrap() - .iter() - .map(|payload| rmp_serde::from_slice(payload).unwrap()) - .collect(); - // Sort for deterministic snapshot output - for payload in &mut captured_stats { - for bucket in &mut payload.stats { - bucket.stats.sort_by(|a, b| a.resource.cmp(&b.resource)); - } - } - insta::assert_json_snapshot!( - "trace_filters", - serde_json::to_value(&captured_stats).unwrap(), - { - "[].RuntimeID" => "[id]", - "[].Stats[].Start" => "[timestamp]", - "[].Stats[].Stats[].OkSummary" => "[sketch]", - "[].Stats[].Stats[].ErrorSummary" => "[sketch]", - } - ); - } - /// Agent rollback / partial-V1 scenario: `/info` advertises `/v1.0/traces` but the actual /// endpoint returns 404 (e.g. customer rolled back the agent without `/info` reflecting it). /// The fail-closed hook must flip `v1_active` to false on the first 404 so the next send diff --git a/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap b/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap deleted file mode 100644 index cbe5725103..0000000000 --- a/libdd-data-pipeline/src/trace_exporter/snapshots/libdd_data_pipeline__trace_exporter__single_threaded_tests__trace_filters.snap +++ /dev/null @@ -1,150 +0,0 @@ ---- -source: libdd-data-pipeline/src/trace_exporter/mod.rs -expression: "serde_json::to_value(&captured_stats).unwrap()" ---- -[ - { - "Hostname": "", - "Env": "staging", - "Version": "", - "Stats": [ - { - "Start": "[timestamp]", - "Duration": 10000000000, - "Stats": [ - { - "Service": "", - "Name": "", - "Resource": "test", - "HTTPStatusCode": 0, - "Type": "", - "DBType": "", - "Hits": 1, - "Errors": 0, - "Duration": 10, - "OkSummary": "[sketch]", - "ErrorSummary": "[sketch]", - "Synthetics": false, - "TopLevelHits": 1, - "SpanKind": "", - "PeerTags": [], - "IsTraceRoot": 1, - "GRPCStatusCode": "", - "HTTPMethod": "", - "HTTPEndpoint": "", - "srv_src": "", - "SpanDerivedPrimaryTags": [] - }, - { - "Service": "", - "Name": "", - "Resource": "test2", - "HTTPStatusCode": 0, - "Type": "", - "DBType": "", - "Hits": 1, - "Errors": 0, - "Duration": 10, - "OkSummary": "[sketch]", - "ErrorSummary": "[sketch]", - "Synthetics": false, - "TopLevelHits": 1, - "SpanKind": "", - "PeerTags": [], - "IsTraceRoot": 1, - "GRPCStatusCode": "", - "HTTPMethod": "", - "HTTPEndpoint": "", - "srv_src": "", - "SpanDerivedPrimaryTags": [] - } - ], - "AgentTimeShift": 0 - } - ], - "Lang": "", - "TracerVersion": "", - "RuntimeID": "[id]", - "Sequence": 0, - "AgentAggregation": "", - "Service": "test", - "ContainerID": "", - "Tags": [], - "GitCommitSha": "", - "ImageTag": "", - "ProcessTagsHash": 0, - "ProcessTags": "" - }, - { - "Hostname": "", - "Env": "staging", - "Version": "", - "Stats": [ - { - "Start": "[timestamp]", - "Duration": 10000000000, - "Stats": [ - { - "Service": "", - "Name": "", - "Resource": "test", - "HTTPStatusCode": 0, - "Type": "", - "DBType": "", - "Hits": 1, - "Errors": 0, - "Duration": 10, - "OkSummary": "[sketch]", - "ErrorSummary": "[sketch]", - "Synthetics": false, - "TopLevelHits": 1, - "SpanKind": "", - "PeerTags": [], - "IsTraceRoot": 1, - "GRPCStatusCode": "", - "HTTPMethod": "", - "HTTPEndpoint": "", - "srv_src": "", - "SpanDerivedPrimaryTags": [] - }, - { - "Service": "", - "Name": "", - "Resource": "test2", - "HTTPStatusCode": 0, - "Type": "", - "DBType": "", - "Hits": 1, - "Errors": 0, - "Duration": 10, - "OkSummary": "[sketch]", - "ErrorSummary": "[sketch]", - "Synthetics": false, - "TopLevelHits": 1, - "SpanKind": "", - "PeerTags": [], - "IsTraceRoot": 1, - "GRPCStatusCode": "", - "HTTPMethod": "", - "HTTPEndpoint": "", - "srv_src": "", - "SpanDerivedPrimaryTags": [] - } - ], - "AgentTimeShift": 0 - } - ], - "Lang": "", - "TracerVersion": "", - "RuntimeID": "[id]", - "Sequence": 0, - "AgentAggregation": "", - "Service": "test", - "ContainerID": "", - "Tags": [], - "GitCommitSha": "", - "ImageTag": "", - "ProcessTagsHash": 0, - "ProcessTags": "" - } -] From d50be09cd316f3d3c39caea0f286b9f422b39513 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Tue, 9 Jun 2026 17:14:09 +0200 Subject: [PATCH 32/49] feat: move tracefilter's conf arcswap to the trace exporter --- .../src/trace_exporter/builder.rs | 2 +- libdd-data-pipeline/src/trace_exporter/mod.rs | 13 +-- .../src/trace_exporter/trace_filter.rs | 86 ++++--------------- 3 files changed, 26 insertions(+), 75 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index aef41633ef..3fd418efc0 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -515,7 +515,7 @@ impl TraceExporterBuilder { .agent_rates_payload_version_enabled .then(AgentResponsePayloadVersion::new), otlp_config, - trace_filterer: TraceFilterer::with_empty_conf(), + trace_filterer: ArcSwap::from_pointee(TraceFilterer::with_empty_conf()), }) } diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 942eed4e8a..2772c54ab8 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -11,6 +11,7 @@ mod trace_serializer; // Re-export the builder pub use builder::TraceExporterBuilder; use libdd_trace_utils::span::trace_utils::DroppedStats; +use trace_filter::TraceFilterer; use self::agent_response::AgentResponse; use self::metrics::MetricsEmitter; @@ -32,7 +33,7 @@ use crate::{ health_metrics, health_metrics::{HealthMetric, SendResult, TransportErrorType}, }; -use arc_swap::ArcSwapOption; +use arc_swap::{ArcSwap, ArcSwapOption}; use bytes::Bytes; use http::header::HeaderMap; use http::uri::PathAndQuery; @@ -214,7 +215,7 @@ pub struct TraceExporter, /// When set, traces are exported via OTLP HTTP/JSON instead of the Datadog agent. otlp_config: Option, - trace_filterer: trace_filter::TraceFilterer, + trace_filterer: ArcSwap, } impl TraceExporter { @@ -354,6 +355,8 @@ impl Tra /// Async so the `Enabled` arm can await a stats-worker shutdown without `block_on`. #[cfg(not(target_arch = "wasm32"))] async fn check_agent_info(&self) { + use trace_filter::TraceFilterer; + let Some(agent_info) = agent_info::get_agent_info() else { return; }; @@ -365,11 +368,11 @@ impl Tra self.refresh_v1_active(&agent_info); } - self.trace_filterer.update_conf( + self.trace_filterer.store(Arc::new(TraceFilterer::new( &agent_info.info.filter_tags, &agent_info.info.filter_tags_regex, &agent_info.info.ignore_resources, - ); + ))); // load_full() avoids holding an ArcSwap Guard (!Send) across .await. let status = self.client_side_stats.status.load_full(); @@ -594,7 +597,7 @@ impl Tra &mut header_tags, &self.client_side_stats.status, self.client_computed_top_level, - &self.trace_filterer, + &self.trace_filterer.load(), ); // OTLP path: send sampled traces via OTLP when an OTLP endpoint is configured. diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 48f9cde040..75604cc8f1 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -2,9 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::{borrow::Borrow as _, sync::Arc}; +use std::borrow::Borrow as _; -use arc_swap::ArcSwap; +use crate::agent_info::schema::FilterTagsConfig; use libdd_common::regex_engine::Regex; use libdd_trace_stats::span_concentrator::StatSpan; use libdd_trace_utils::span::trace_utils::get_root_span_index; @@ -29,9 +29,8 @@ struct TagRegexFilter { value: Option, } -/// Parsed config #[derive(Debug, Default)] -struct TraceFiltererConf { +pub struct TraceFilterer { reject: Vec, reject_regex: Vec, @@ -41,11 +40,6 @@ struct TraceFiltererConf { ignore_resources: Vec, } -#[derive(Debug)] -pub struct TraceFilterer { - conf: ArcSwap, -} - impl TagFilter for TagLiteralFilter { fn matches_tag_value(&self, value: &str) -> bool { match &self.value { @@ -72,7 +66,7 @@ impl TagFilter for TagRegexFilter { } } -impl TraceFiltererConf { +impl TraceFilterer { fn compile_literal_filters(filters: &[String]) -> Vec { let mut tag_regex_filters = Vec::new(); for filter in filters { @@ -155,9 +149,9 @@ impl TraceFiltererConf { .collect() } - fn parse( - filter_tags: &crate::agent_info::schema::FilterTagsConfig, - filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, + pub fn new( + filter_tags: &FilterTagsConfig, + filter_tags_regex: &FilterTagsConfig, ignore_resources: &[String], ) -> Self { let require_regex = Self::compile_regex_filters(&filter_tags_regex.require); @@ -166,7 +160,7 @@ impl TraceFiltererConf { let reject = Self::compile_literal_filters(&filter_tags.reject); let ignore_resources = Self::compile_resource_filters(ignore_resources); - TraceFiltererConf { + Self { reject, require, reject_regex, @@ -174,48 +168,21 @@ impl TraceFiltererConf { ignore_resources, } } -} - -impl TraceFilterer { - #[cfg(test)] - fn new( - filter_tags: &crate::agent_info::schema::FilterTagsConfig, - filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, - ignore_resources: &[String], - ) -> Self { - let conf = TraceFiltererConf::parse(filter_tags, filter_tags_regex, ignore_resources); - Self { - conf: ArcSwap::from_pointee(conf), - } - } pub fn with_empty_conf() -> Self { - Self { - conf: ArcSwap::from_pointee(TraceFiltererConf::default()), - } - } - - pub fn update_conf( - &self, - filter_tags: &crate::agent_info::schema::FilterTagsConfig, - filter_tags_regex: &crate::agent_info::schema::FilterTagsConfig, - ignore_resources: &[String], - ) { - let new_conf = TraceFiltererConf::parse(filter_tags, filter_tags_regex, ignore_resources); - self.conf.swap(Arc::new(new_conf)); + Self::default() } pub fn filter_traces( &self, traces: &mut Vec>>, ) -> usize { - let conf = self.conf.load(); let traces_count_before = traces.len(); traces.retain(|trace| { let Ok(root_span_index) = get_root_span_index(trace) else { return true; }; let root_span = &trace[root_span_index]; - let should_drop = Self::should_drop(&conf, root_span); + let should_drop = self.should_drop(root_span); if should_drop { debug!("Trace rejected as it fails to meet tag requirements. root: %v"); } @@ -239,10 +206,10 @@ impl TraceFilterer { // filters, all of them must match tags on the root span. If any required filter doesn't // match, reject the trace. fn should_drop( - conf: &TraceFiltererConf, + &self, root_span: &libdd_trace_utils::span::v04::Span, ) -> bool { - if !conf.ignore_resources.is_empty() { + if !self.ignore_resources.is_empty() { let span_resource = root_span.resource(); // Normalization let span_resource = if span_resource.is_empty() { @@ -256,7 +223,7 @@ impl TraceFilterer { span_resource }; - if conf + if self .ignore_resources .iter() .any(|resource_pattern| resource_pattern.is_match(span_resource)) @@ -265,7 +232,7 @@ impl TraceFilterer { } } - if conf + if self .reject .iter() .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) @@ -273,7 +240,7 @@ impl TraceFilterer { return true; } - if conf + if self .reject_regex .iter() .any(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) @@ -281,7 +248,7 @@ impl TraceFilterer { return true; } - if !conf + if !self .require .iter() .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) @@ -289,7 +256,7 @@ impl TraceFilterer { return true; } - if !conf + if !self .require_regex .iter() .all(|filter| Self::check_tag_filter_with_normalization(filter, root_span)) @@ -329,7 +296,6 @@ impl TraceFilterer { #[cfg(test)] mod tests { use super::*; - use crate::agent_info::schema::FilterTagsConfig; use libdd_trace_utils::span::v04::SpanBytes; use std::collections::HashMap; @@ -588,24 +554,6 @@ mod tests { assert!(traces.is_empty()); } - // ---- update_conf ---- - - #[test] - fn update_conf_takes_effect() { - let f = TraceFilterer::new(&no_tags(), &no_tags(), &[]); - - // No filters: trace is kept. - let mut traces = one_trace(span_with("r", &[("env", "prod")])); - f.filter_traces(&mut traces); - assert_eq!(traces.len(), 1); - - // Swap in a reject filter: same trace is now dropped. - f.update_conf(&ftc(&[], &["env:prod"]), &no_tags(), &[]); - let mut traces = one_trace(span_with("r", &[("env", "prod")])); - f.filter_traces(&mut traces); - assert!(traces.is_empty()); - } - // ---- edge / misc ---- #[test] From 128d7b2a40370473c4d16457338e451648818a12 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 10 Jun 2026 17:09:37 +0200 Subject: [PATCH 33/49] feat: make implementation generic using a minimal span trait --- .../src/trace_exporter/trace_filter.rs | 105 +++++++++++++++--- .../src/span_concentrator/stat_span.rs | 30 +++++ libdd-trace-utils/src/span/trace_utils.rs | 47 +------- 3 files changed, 118 insertions(+), 64 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 75604cc8f1..15478d1abe 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -2,12 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::borrow::Borrow as _; +use std::{borrow::Borrow as _, collections::HashSet}; use crate::agent_info::schema::FilterTagsConfig; use libdd_common::regex_engine::Regex; use libdd_trace_stats::span_concentrator::StatSpan; -use libdd_trace_utils::span::trace_utils::get_root_span_index; use tracing::{debug, error}; trait TagFilter { @@ -66,6 +65,81 @@ impl TagFilter for TagRegexFilter { } } +pub trait Span<'a> { + fn resource(&'a self) -> &'a str; + fn name(&'a self) -> &'a str; + fn span_id(&'a self) -> u64; + fn parent_id(&'a self) -> u64; + fn trace_id(&'a self) -> u128; + fn get_meta(&'a self, key: &str) -> Option<&'a str>; +} + +impl<'a, T: StatSpan<'a>> Span<'a> for T { + fn resource(&'a self) -> &'a str { + StatSpan::resource(self) + } + + fn name(&'a self) -> &'a str { + StatSpan::name(self) + } + + fn span_id(&'a self) -> u64 { + StatSpan::span_id(self) + } + + fn parent_id(&'a self) -> u64 { + StatSpan::parent_id(self) + } + + fn trace_id(&'a self) -> u128 { + StatSpan::trace_id(self) + } + + fn get_meta(&'a self, key: &str) -> Option<&'a str> { + StatSpan::get_meta(self, key) + } +} + +fn get_root_span_index<'a>(trace: &'a [impl Span<'a>]) -> anyhow::Result { + if trace.is_empty() { + anyhow::bail!("Cannot find root span index in an empty trace."); + } + + // Do a first pass to find if we have an obvious root span (starting from the end) since some + // clients put the root span last. + for (i, span) in trace.iter().enumerate().rev() { + if span.parent_id() == 0 { + return Ok(i); + } + } + + let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id()).collect(); + + let mut root_span_id = None; + for (i, span) in trace.iter().enumerate() { + // If a span's parent is not in the trace, it is a root + if !span_ids.contains(&span.parent_id()) { + if root_span_id.is_some() { + debug!( + trace_id = &trace[0].trace_id(), + "trace has multiple root spans" + ); + } + root_span_id = Some(i); + } + } + Ok(match root_span_id { + Some(i) => i, + None => { + debug!( + trace_id = &trace[0].trace_id(), + "Could not find the root span for trace" + ); + trace.len() - 1 + } + }) +} + impl TraceFilterer { fn compile_literal_filters(filters: &[String]) -> Vec { let mut tag_regex_filters = Vec::new(); @@ -172,17 +246,16 @@ impl TraceFilterer { Self::default() } - pub fn filter_traces( - &self, - traces: &mut Vec>>, - ) -> usize { + pub fn filter_traces(&self, traces: &mut Vec>) -> usize + where + for<'a> T: Span<'a>, + { let traces_count_before = traces.len(); - traces.retain(|trace| { + traces.retain(|trace: &Vec| { let Ok(root_span_index) = get_root_span_index(trace) else { return true; }; - let root_span = &trace[root_span_index]; - let should_drop = self.should_drop(root_span); + let should_drop = self.should_drop(&trace[root_span_index]); if should_drop { debug!("Trace rejected as it fails to meet tag requirements. root: %v"); } @@ -205,12 +278,9 @@ impl TraceFilterer { // 3. Require filtering: If filter_tags.require or filter_tags_regex.require contain any // filters, all of them must match tags on the root span. If any required filter doesn't // match, reject the trace. - fn should_drop( - &self, - root_span: &libdd_trace_utils::span::v04::Span, - ) -> bool { + fn should_drop<'a>(&self, root_span: &'a impl Span<'a>) -> bool { if !self.ignore_resources.is_empty() { - let span_resource = root_span.resource(); + let span_resource = Span::resource(root_span); // Normalization let span_resource = if span_resource.is_empty() { let span_name = root_span.name(); @@ -267,14 +337,13 @@ impl TraceFilterer { false } - fn check_tag_filter_with_normalization( + fn check_tag_filter_with_normalization<'a>( filter: &impl TagFilter, - root_span: &libdd_trace_utils::span::v04::Span, + root_span: &'a impl Span<'a>, ) -> bool { - let Some(value) = root_span.meta.get(filter.key()) else { + let Some(value) = root_span.get_meta(filter.key()) else { return false; }; - let value = value.borrow(); match filter.key() { "env" => { let normalized_value = diff --git a/libdd-trace-stats/src/span_concentrator/stat_span.rs b/libdd-trace-stats/src/span_concentrator/stat_span.rs index 4696d50581..9841921ea4 100644 --- a/libdd-trace-stats/src/span_concentrator/stat_span.rs +++ b/libdd-trace-stats/src/span_concentrator/stat_span.rs @@ -37,6 +37,12 @@ pub trait StatSpan<'a> { fn get_meta(&'a self, key: &str) -> Option<&'a str>; /// Returns the value of a metrics field fn get_metrics(&'a self, key: &str) -> Option; + /// Returns the value of a parent id + fn parent_id(&'a self) -> u64; + /// Returns the value of a span id + fn span_id(&'a self) -> u64; + /// Returns the value of a trace id + fn trace_id(&'a self) -> u128; } impl<'a, T: TraceData> StatSpan<'a> for Span { @@ -91,6 +97,18 @@ impl<'a, T: TraceData> StatSpan<'a> for Span { fn get_metrics(&'a self, key: &str) -> Option { self.metrics.get(key).copied() } + + fn parent_id(&'a self) -> u64 { + self.parent_id + } + + fn span_id(&'a self) -> u64 { + self.span_id + } + + fn trace_id(&'a self) -> u128 { + self.trace_id + } } impl<'a> StatSpan<'a> for pb::Span { @@ -145,4 +163,16 @@ impl<'a> StatSpan<'a> for pb::Span { fn get_metrics(&'a self, key: &str) -> Option { self.metrics.get(key).copied() } + + fn parent_id(&'a self) -> u64 { + self.parent_id + } + + fn span_id(&'a self) -> u64 { + self.span_id + } + + fn trace_id(&'a self) -> u128 { + self.trace_id as u128 + } } diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index c330ff63e4..c54b575610 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -3,10 +3,8 @@ //! Trace-utils functionalities implementation for tinybytes based spans -use tracing::debug; - use super::{v04::Span, SpanText, TraceData}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; /// Span metric the mini agent must set for the backend to recognize top level span const TOP_LEVEL_KEY: &str = "_top_level"; @@ -62,49 +60,6 @@ where } } -pub fn get_root_span_index(trace: &[Span]) -> anyhow::Result -where - T: TraceData, -{ - if trace.is_empty() { - anyhow::bail!("Cannot find root span index in an empty trace."); - } - - // Do a first pass to find if we have an obvious root span (starting from the end) since some - // clients put the root span last. - for (i, span) in trace.iter().enumerate().rev() { - if span.parent_id == 0 { - return Ok(i); - } - } - - let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id).collect(); - - let mut root_span_id = None; - for (i, span) in trace.iter().enumerate() { - // If a span's parent is not in the trace, it is a root - if !span_ids.contains(&span.parent_id) { - if root_span_id.is_some() { - debug!( - trace_id = &trace[0].trace_id, - "trace has multiple root spans" - ); - } - root_span_id = Some(i); - } - } - Ok(match root_span_id { - Some(i) => i, - None => { - debug!( - trace_id = &trace[0].trace_id, - "Could not find the root span for trace" - ); - trace.len() - 1 - } - }) -} - /// Return true if the span has a top level key set pub fn has_top_level(span: &Span) -> bool { span.metrics From dde028f439a4d56fed5946cad350ed43305d1960 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 10 Jun 2026 17:10:16 +0200 Subject: [PATCH 34/49] chore: bump test agent version v1.56.0 -> v1.16.1 --- libdd-trace-utils/src/test_utils/datadog_test_agent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-trace-utils/src/test_utils/datadog_test_agent.rs b/libdd-trace-utils/src/test_utils/datadog_test_agent.rs index 7de186baec..528db31723 100644 --- a/libdd-trace-utils/src/test_utils/datadog_test_agent.rs +++ b/libdd-trace-utils/src/test_utils/datadog_test_agent.rs @@ -14,7 +14,7 @@ use std::str::FromStr; use std::time::Duration; const TEST_AGENT_IMAGE_NAME: &str = "ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent"; -const TEST_AGENT_IMAGE_TAG: &str = "v1.56.0"; +const TEST_AGENT_IMAGE_TAG: &str = "v1.61.1"; const TEST_AGENT_READY_MSG: &str = "INFO:ddapm_test_agent.agent:Trace request stall seconds setting set to 0.0."; From 4633ef8c1beaeafebb747fa2df924bd026325b41 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 10 Jun 2026 17:14:19 +0200 Subject: [PATCH 35/49] fix: clippy --- libdd-data-pipeline/src/trace_exporter/trace_filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs index 15478d1abe..1ece66ddff 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_filter.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::{borrow::Borrow as _, collections::HashSet}; +use std::collections::HashSet; use crate::agent_info::schema::FilterTagsConfig; use libdd_common::regex_engine::Regex; From daae8db711ba10abc2e9b4888765bfe60d627a4b Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Wed, 10 Jun 2026 17:55:38 +0200 Subject: [PATCH 36/49] feat: move trace_filter component to trace-utils decouple from data-pipeline reverts changes to StatSpan --- .../src/trace_exporter/builder.rs | 2 +- libdd-data-pipeline/src/trace_exporter/mod.rs | 12 ++- .../src/trace_exporter/stats.rs | 9 +-- .../src/span_concentrator/stat_span.rs | 30 -------- libdd-trace-utils/src/lib.rs | 1 + .../src}/trace_filter.rs | 76 +++++++++---------- 6 files changed, 46 insertions(+), 84 deletions(-) rename {libdd-data-pipeline/src/trace_exporter => libdd-trace-utils/src}/trace_filter.rs (92%) diff --git a/libdd-data-pipeline/src/trace_exporter/builder.rs b/libdd-data-pipeline/src/trace_exporter/builder.rs index 3fd418efc0..577280550a 100644 --- a/libdd-data-pipeline/src/trace_exporter/builder.rs +++ b/libdd-data-pipeline/src/trace_exporter/builder.rs @@ -8,7 +8,6 @@ use crate::otlp::OtlpTraceConfig; use crate::telemetry::TelemetryClientBuilder; use crate::trace_exporter::agent_response::AgentResponsePayloadVersion; use crate::trace_exporter::error::BuilderErrorKind; -use crate::trace_exporter::trace_filter::TraceFilterer; #[cfg(all(not(target_arch = "wasm32"), feature = "telemetry"))] use crate::trace_exporter::TelemetryConfig; #[cfg(not(target_arch = "wasm32"))] @@ -23,6 +22,7 @@ use libdd_capabilities::{HttpClientCapability, MaybeSend, SleepCapability}; use libdd_common::{parse_uri, tag, Endpoint}; use libdd_dogstatsd_client::new; use libdd_shared_runtime::SharedRuntime; +use libdd_trace_utils::trace_filter::TraceFilterer; use std::sync::Arc; use std::time::Duration; diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 2772c54ab8..e45cfb9eb9 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -5,13 +5,11 @@ pub mod builder; pub mod error; pub mod metrics; pub mod stats; -mod trace_filter; mod trace_serializer; // Re-export the builder pub use builder::TraceExporterBuilder; -use libdd_trace_utils::span::trace_utils::DroppedStats; -use trace_filter::TraceFilterer; +use libdd_trace_utils::{span::trace_utils::DroppedStats, trace_filter::TraceFilterer}; use self::agent_response::AgentResponse; use self::metrics::MetricsEmitter; @@ -355,8 +353,6 @@ impl Tra /// Async so the `Enabled` arm can await a stats-worker shutdown without `block_on`. #[cfg(not(target_arch = "wasm32"))] async fn check_agent_info(&self) { - use trace_filter::TraceFilterer; - let Some(agent_info) = agent_info::get_agent_info() else { return; }; @@ -369,8 +365,10 @@ impl Tra } self.trace_filterer.store(Arc::new(TraceFilterer::new( - &agent_info.info.filter_tags, - &agent_info.info.filter_tags_regex, + &agent_info.info.filter_tags.require, + &agent_info.info.filter_tags.reject, + &agent_info.info.filter_tags_regex.require, + &agent_info.info.filter_tags_regex.reject, &agent_info.info.ignore_resources, ))); diff --git a/libdd-data-pipeline/src/trace_exporter/stats.rs b/libdd-data-pipeline/src/trace_exporter/stats.rs index d811f1fbf1..8dd9f584f1 100644 --- a/libdd-data-pipeline/src/trace_exporter/stats.rs +++ b/libdd-data-pipeline/src/trace_exporter/stats.rs @@ -7,6 +7,9 @@ //! including starting/stopping stats workers, managing the span concentrator, //! and processing traces for stats collection. +#[cfg(not(target_arch = "wasm32"))] +use super::add_path; +use super::TracerMetadata; #[cfg(not(target_arch = "wasm32"))] use crate::agent_info::schema::AgentInfo; use arc_swap::ArcSwap; @@ -22,16 +25,12 @@ use libdd_trace_stats::span_concentrator::{ }; #[cfg(not(target_arch = "wasm32"))] use libdd_trace_stats::stats_exporter::{StatsExporter, StatsMetadata}; +use libdd_trace_utils::trace_filter::TraceFilterer; use std::sync::{Arc, Mutex}; use std::time::Duration; #[cfg(not(target_arch = "wasm32"))] use tracing::{debug, error}; -#[cfg(not(target_arch = "wasm32"))] -use super::add_path; -use super::trace_filter::TraceFilterer; -use super::TracerMetadata; - #[cfg(not(target_arch = "wasm32"))] pub(crate) const DEFAULT_STATS_ELIGIBLE_SPAN_KINDS: [&str; 4] = ["client", "server", "producer", "consumer"]; diff --git a/libdd-trace-stats/src/span_concentrator/stat_span.rs b/libdd-trace-stats/src/span_concentrator/stat_span.rs index 9841921ea4..4696d50581 100644 --- a/libdd-trace-stats/src/span_concentrator/stat_span.rs +++ b/libdd-trace-stats/src/span_concentrator/stat_span.rs @@ -37,12 +37,6 @@ pub trait StatSpan<'a> { fn get_meta(&'a self, key: &str) -> Option<&'a str>; /// Returns the value of a metrics field fn get_metrics(&'a self, key: &str) -> Option; - /// Returns the value of a parent id - fn parent_id(&'a self) -> u64; - /// Returns the value of a span id - fn span_id(&'a self) -> u64; - /// Returns the value of a trace id - fn trace_id(&'a self) -> u128; } impl<'a, T: TraceData> StatSpan<'a> for Span { @@ -97,18 +91,6 @@ impl<'a, T: TraceData> StatSpan<'a> for Span { fn get_metrics(&'a self, key: &str) -> Option { self.metrics.get(key).copied() } - - fn parent_id(&'a self) -> u64 { - self.parent_id - } - - fn span_id(&'a self) -> u64 { - self.span_id - } - - fn trace_id(&'a self) -> u128 { - self.trace_id - } } impl<'a> StatSpan<'a> for pb::Span { @@ -163,16 +145,4 @@ impl<'a> StatSpan<'a> for pb::Span { fn get_metrics(&'a self, key: &str) -> Option { self.metrics.get(key).copied() } - - fn parent_id(&'a self) -> u64 { - self.parent_id - } - - fn span_id(&'a self) -> u64 { - self.span_id - } - - fn trace_id(&'a self) -> u128 { - self.trace_id as u128 - } } diff --git a/libdd-trace-utils/src/lib.rs b/libdd-trace-utils/src/lib.rs index aa8d93c887..e43df05bd6 100644 --- a/libdd-trace-utils/src/lib.rs +++ b/libdd-trace-utils/src/lib.rs @@ -16,6 +16,7 @@ pub mod send_with_retry; pub mod stats_utils; #[cfg(any(test, feature = "test-utils"))] pub mod test_utils; +pub mod trace_filter; pub mod trace_utils; pub mod tracer_header_tags; pub mod tracer_metadata; diff --git a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs similarity index 92% rename from libdd-data-pipeline/src/trace_exporter/trace_filter.rs rename to libdd-trace-utils/src/trace_filter.rs index 1ece66ddff..f115442f58 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -2,13 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::collections::HashSet; +use std::{borrow::Borrow as _, collections::HashSet}; -use crate::agent_info::schema::FilterTagsConfig; use libdd_common::regex_engine::Regex; -use libdd_trace_stats::span_concentrator::StatSpan; +use libdd_trace_normalization::{normalize_utils, normalizer}; use tracing::{debug, error}; +use crate::span::{self, TraceData}; + trait TagFilter { /// Returns true if the given tag value matches the Filterer. fn matches_tag_value(&self, value: &str) -> bool; @@ -74,29 +75,29 @@ pub trait Span<'a> { fn get_meta(&'a self, key: &str) -> Option<&'a str>; } -impl<'a, T: StatSpan<'a>> Span<'a> for T { +impl<'a, T: TraceData> Span<'a> for span::v04::Span { fn resource(&'a self) -> &'a str { - StatSpan::resource(self) + self.resource.borrow() } fn name(&'a self) -> &'a str { - StatSpan::name(self) + self.name.borrow() } fn span_id(&'a self) -> u64 { - StatSpan::span_id(self) + self.span_id } fn parent_id(&'a self) -> u64 { - StatSpan::parent_id(self) + self.parent_id } fn trace_id(&'a self) -> u128 { - StatSpan::trace_id(self) + self.trace_id } fn get_meta(&'a self, key: &str) -> Option<&'a str> { - StatSpan::get_meta(self, key) + self.meta.get(key).map(|v| v.borrow()) } } @@ -224,14 +225,16 @@ impl TraceFilterer { } pub fn new( - filter_tags: &FilterTagsConfig, - filter_tags_regex: &FilterTagsConfig, + filter_tags_require: &[String], + filter_tags_reject: &[String], + filter_tags_regex_require: &[String], + filter_tags_regex_reject: &[String], ignore_resources: &[String], ) -> Self { - let require_regex = Self::compile_regex_filters(&filter_tags_regex.require); - let reject_regex = Self::compile_regex_filters(&filter_tags_regex.reject); - let require = Self::compile_literal_filters(&filter_tags.require); - let reject = Self::compile_literal_filters(&filter_tags.reject); + let require_regex = Self::compile_regex_filters(filter_tags_regex_require); + let reject_regex = Self::compile_regex_filters(filter_tags_regex_reject); + let require = Self::compile_literal_filters(filter_tags_require); + let reject = Self::compile_literal_filters(filter_tags_reject); let ignore_resources = Self::compile_resource_filters(ignore_resources); Self { @@ -346,12 +349,11 @@ impl TraceFilterer { }; match filter.key() { "env" => { - let normalized_value = - libdd_trace_normalization::normalize_utils::normalize_tag_cloned(value); + let normalized_value = normalize_utils::normalize_tag_cloned(value); filter.matches_tag_value(&normalized_value) } "http.status_code" => { - if !libdd_trace_normalization::normalizer::is_valid_http_status_code(value) { + if !normalizer::is_valid_http_status_code(value) { debug!(?value,"trace filter on http.status_code ignored because root span's `http.status_code` is invalid"); return false; } @@ -364,23 +366,12 @@ impl TraceFilterer { #[cfg(test)] mod tests { - use super::*; - use libdd_trace_utils::span::v04::SpanBytes; + use super::TraceFilterer; + use crate::span::v04::SpanBytes; use std::collections::HashMap; // ---- helpers ---- - fn ftc(require: &[&str], reject: &[&str]) -> FilterTagsConfig { - FilterTagsConfig { - require: require.iter().map(|s| s.to_string()).collect(), - reject: reject.iter().map(|s| s.to_string()).collect(), - } - } - - fn no_tags() -> FilterTagsConfig { - FilterTagsConfig::default() - } - fn span_with(resource: &'static str, meta: &[(&'static str, &'static str)]) -> SpanBytes { SpanBytes { service: "svc".into(), @@ -401,25 +392,28 @@ mod tests { vec![vec![s]] } - fn reject_str(tags: &[&str]) -> TraceFilterer { - TraceFilterer::new(&ftc(&[], tags), &no_tags(), &[]) + fn map_to_owned(values: &[&str]) -> Vec { + values.iter().map(|&s| s.to_owned()).collect() } fn require_str(tags: &[&str]) -> TraceFilterer { - TraceFilterer::new(&ftc(tags, &[]), &no_tags(), &[]) + TraceFilterer::new(&map_to_owned(tags), &[], &[], &[], &[]) } - fn reject_regex(tags: &[&str]) -> TraceFilterer { - TraceFilterer::new(&no_tags(), &ftc(&[], tags), &[]) + fn reject_str(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&[], &map_to_owned(tags), &[], &[], &[]) } fn require_regex(tags: &[&str]) -> TraceFilterer { - TraceFilterer::new(&no_tags(), &ftc(tags, &[]), &[]) + TraceFilterer::new(&[], &[], &map_to_owned(tags), &[], &[]) + } + + fn reject_regex(tags: &[&str]) -> TraceFilterer { + TraceFilterer::new(&[], &[], &[], &map_to_owned(tags), &[]) } fn ignore_resources(patterns: &[&str]) -> TraceFilterer { - let pats: Vec = patterns.iter().map(|s| s.to_string()).collect(); - TraceFilterer::new(&no_tags(), &no_tags(), &pats) + TraceFilterer::new(&[], &[], &[], &[], &map_to_owned(patterns)) } // ---- reject (TagStringFilter) ---- @@ -638,7 +632,7 @@ mod tests { #[test] fn no_filters_keeps_all_traces() { - let f = TraceFilterer::new(&no_tags(), &no_tags(), &[]); + let f = TraceFilterer::new(&[], &[], &[], &[], &[]); let mut traces = vec![ vec![span_with("r1", &[])], vec![span_with("r2", &[("env", "prod")])], From ab63da6cdefe3d56516dec9500ead53c38c107bc Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 11:54:25 +0200 Subject: [PATCH 37/49] fix: apply suggestions --- libdd-trace-utils/src/trace_filter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index f115442f58..4b625e8575 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -281,7 +281,7 @@ impl TraceFilterer { // 3. Require filtering: If filter_tags.require or filter_tags_regex.require contain any // filters, all of them must match tags on the root span. If any required filter doesn't // match, reject the trace. - fn should_drop<'a>(&self, root_span: &'a impl Span<'a>) -> bool { + pub fn should_drop<'a>(&self, root_span: &'a impl Span<'a>) -> bool { if !self.ignore_resources.is_empty() { let span_resource = Span::resource(root_span); // Normalization @@ -289,7 +289,7 @@ impl TraceFilterer { let span_name = root_span.name(); debug!( ?span_name, - "Trace filter fixing malformed trace. Resource is empty so using name instead" + "Trace filter: filtering on name because resource is empty" ); span_name } else { From 2fd1d046226c9c73f7c18beb0132a719fb6431b9 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 12:00:29 +0200 Subject: [PATCH 38/49] feat: add documentation to public items --- libdd-trace-utils/src/trace_filter.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index 4b625e8575..d482adee79 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -29,6 +29,10 @@ struct TagRegexFilter { value: Option, } +/// Applies trace-level filters derived from the agent's `/info` endpoint configuration: +/// `filter_tags`, `filter_tags_regex`, and `ignore_resources`. +/// +/// Filtering is evaluated on the root span of each trace. #[derive(Debug, Default)] pub struct TraceFilterer { reject: Vec, @@ -66,12 +70,14 @@ impl TagFilter for TagRegexFilter { } } +/// Minimal span interface required by [`TraceFilterer`]. pub trait Span<'a> { fn resource(&'a self) -> &'a str; fn name(&'a self) -> &'a str; fn span_id(&'a self) -> u64; fn parent_id(&'a self) -> u64; fn trace_id(&'a self) -> u128; + /// Returns the value of the given meta tag, if present. fn get_meta(&'a self, key: &str) -> Option<&'a str>; } @@ -224,6 +230,9 @@ impl TraceFilterer { .collect() } + /// Creates a new filterer from the agent's `/info` configuration fields. + /// + /// Invalid regex patterns are logged and skipped rather than causing an error. pub fn new( filter_tags_require: &[String], filter_tags_reject: &[String], @@ -245,10 +254,12 @@ impl TraceFilterer { ignore_resources, } } + /// Creates a no-op filterer that keeps all traces. pub fn with_empty_conf() -> Self { Self::default() } + /// Removes traces that fail filter checks in-place. Returns the number of traces dropped. pub fn filter_traces(&self, traces: &mut Vec>) -> usize where for<'a> T: Span<'a>, From 71705f3556d8c7284ec08284c04e1d2ee929a927 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 14:57:10 +0200 Subject: [PATCH 39/49] fix: remove unrelated changes --- .gitignore | 1 - LICENSE-3rdparty.csv | 7 ------- 2 files changed, 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0b20ce159e..5a4edd14ce 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,3 @@ examples/cxx/exporter_manager.exe examples/cxx/profiling examples/cxx/profiling.exe profile.pprof -*.snap.new diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index bf70bdcc80..16d8a34b9e 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -85,7 +85,6 @@ colorchoice,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The colorchoic colored,https://github.com/mackwic/colored,MPL-2.0,Thomas Wickham combine,https://github.com/Marwes/combine,MIT,Markus Westerlind concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT,"Stjepan Glavina , Taiki Endo , John Nunley " -console,https://github.com/console-rs/console,MIT,The console Authors console-api,https://github.com/tokio-rs/console,MIT,"Eliza Weisman , Tokio Contributors " console-subscriber,https://github.com/tokio-rs/console,MIT,"Eliza Weisman , Tokio Contributors " const_format,https://github.com/rodrimati1992/const_format_crates,Zlib,rodrimati1992 @@ -126,7 +125,6 @@ dispatch2,https://github.com/madsmtm/objc2,Zlib OR Apache-2.0 OR MIT,"Mads Marqu displaydoc,https://github.com/yaahc/displaydoc,MIT OR Apache-2.0,Jane Lusby dyn-clone,https://github.com/dtolnay/dyn-clone,MIT OR Apache-2.0,David Tolnay either,https://github.com/rayon-rs/either,MIT OR Apache-2.0,bluss -encode_unicode,https://github.com/tormol/encode_unicode,Apache-2.0 OR MIT,Torbjørn Birch Moltu encoding_rs,https://github.com/hsivonen/encoding_rs,(Apache-2.0 OR MIT) AND BSD-3-Clause,Henri Sivonen enum-as-inner,https://github.com/bluejekyll/enum-as-inner,MIT OR Apache-2.0,Benjamin Fry equivalent,https://github.com/cuviper/equivalent,Apache-2.0 OR MIT,The equivalent Authors @@ -284,10 +282,6 @@ parking_lot_core,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanie paste,https://github.com/dtolnay/paste,MIT OR Apache-2.0,David Tolnay path-tree,https://github.com/viz-rs/path-tree,MIT OR Apache-2.0,Fangdun Tsai percent-encoding,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers -pest,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice -pest_derive,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice -pest_generator,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice -pest_meta,https://github.com/pest-parser/pest,MIT OR Apache-2.0,Dragoș Tiselice petgraph,https://github.com/petgraph/petgraph,MIT OR Apache-2.0,"bluss, mitchmindtree" pico-args,https://github.com/RazrFalcon/pico-args,MIT,Yevhenii Reizner pin-project,https://github.com/taiki-e/pin-project,Apache-2.0 OR MIT,The pin-project Authors @@ -471,7 +465,6 @@ try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur typeid,https://github.com/dtolnay/typeid,MIT OR Apache-2.0,David Tolnay typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg , Andre Bogus " -ucd-trie,https://github.com/BurntSushi/ucd-generate,MIT OR Apache-2.0,Andrew Gallant unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors unicase,https://github.com/seanmonstar/unicase,MIT OR Apache-2.0,Sean McArthur unicode-ident,https://github.com/dtolnay/unicode-ident,(MIT OR Apache-2.0) AND Unicode-DFS-2016,David Tolnay From 00e7913007717dcbd0251ebd50a5af34904f08eb Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 15:01:36 +0200 Subject: [PATCH 40/49] fix: put new test in integration test group --- .../tests/test_trace_filters.rs | 110 +++++++++--------- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/libdd-data-pipeline/tests/test_trace_filters.rs b/libdd-data-pipeline/tests/test_trace_filters.rs index 6ce0752db2..c3705f13e2 100644 --- a/libdd-data-pipeline/tests/test_trace_filters.rs +++ b/libdd-data-pipeline/tests/test_trace_filters.rs @@ -12,72 +12,76 @@ use libdd_trace_utils::test_utils::datadog_test_agent::DatadogTestAgent; use rand::Rng; use serde_json::json; -#[cfg_attr(miri, ignore)] -#[tokio::test] -async fn trace_filters_snapshot_test() { - const EXTRA_INFO: &str = r#"{ +mod tracing_integration_tests { + use super::*; + + #[cfg_attr(miri, ignore)] + #[tokio::test] + async fn trace_filters_snapshot_test() { + const EXTRA_INFO: &str = r#"{ "version":"1", "filter_tags": {"reject": ["my_ignore_tag"], "require": ["my_require_tag:true"]}, "filter_tags_regex": {"reject": ["my_regex_ignore_tag:.*true.*"]}, "ignore_resources": [".*IGNORED.*"] }"#; - let relative_snapshot_path = "libdd-data-pipeline/tests/snapshots/"; - let snapshot_name = "trace_filters_snapshot_test"; - let test_agent = DatadogTestAgent::new( - Some(relative_snapshot_path), - None, - &[("DD_AGENT_EXTRA_INFO", EXTRA_INFO)], - ) - .await; - let url = test_agent.get_base_uri().await; - test_agent.start_session(snapshot_name, None).await; + let relative_snapshot_path = "libdd-data-pipeline/tests/snapshots/"; + let snapshot_name = "trace_filters_snapshot_test"; + let test_agent = DatadogTestAgent::new( + Some(relative_snapshot_path), + None, + &[("DD_AGENT_EXTRA_INFO", EXTRA_INFO)], + ) + .await; + let url = test_agent.get_base_uri().await; + test_agent.start_session(snapshot_name, None).await; - let mut builder = TraceExporter::::builder(); - builder - .enable_stats(Duration::from_secs(10)) - .set_env("staging") - .set_language("nodejs") - .set_language_interpreter("v8") - .set_language_version("1.0") - .set_service("test") - .set_test_session_token(snapshot_name) - .set_tracer_version("1.0") - .set_input_format(TraceExporterInputFormat::V04) - .set_output_format(TraceExporterOutputFormat::V04) - .set_url(url.to_string().as_ref()); + let mut builder = TraceExporter::::builder(); + builder + .enable_stats(Duration::from_secs(10)) + .set_env("staging") + .set_language("nodejs") + .set_language_interpreter("v8") + .set_language_version("1.0") + .set_service("test") + .set_test_session_token(snapshot_name) + .set_tracer_version("1.0") + .set_input_format(TraceExporterInputFormat::V04) + .set_output_format(TraceExporterOutputFormat::V04) + .set_url(url.to_string().as_ref()); - let trace_exporter = builder - .build_async::() - .await - .expect("Unable to build TraceExporter"); - let data = get_v04_trace_snapshot_test_payload(); - let timeout = Duration::from_secs(2); - let start = Instant::now(); - loop { - if std::time::Instant::now().duration_since(start) > timeout { - panic!("Timeout waiting for agent info to be ready"); - } - if agent_info::get_agent_info().is_some() { - break; + let trace_exporter = builder + .build_async::() + .await + .expect("Unable to build TraceExporter"); + let data = get_v04_trace_snapshot_test_payload(); + let timeout = Duration::from_secs(2); + let start = Instant::now(); + loop { + if std::time::Instant::now().duration_since(start) > timeout { + panic!("Timeout waiting for agent info to be ready"); + } + if agent_info::get_agent_info().is_some() { + break; + } + std::thread::sleep(Duration::from_millis(10)); } - std::thread::sleep(Duration::from_millis(10)); - } - let response = trace_exporter.send_async(data.as_ref()).await; - assert!(response.is_ok()); + let response = trace_exporter.send_async(data.as_ref()).await; + assert!(response.is_ok()); - tokio::task::spawn_blocking(move || drop(trace_exporter)) - .await - .unwrap(); + tokio::task::spawn_blocking(move || drop(trace_exporter)) + .await + .unwrap(); - let received_traces = test_agent.get_sent_traces().await; + let received_traces = test_agent.get_sent_traces().await; - println!( - "{}", - serde_json::to_string_pretty(&received_traces).unwrap() - ); + println!( + "{}", + serde_json::to_string_pretty(&received_traces).unwrap() + ); - test_agent.assert_snapshot(snapshot_name).await; + test_agent.assert_snapshot(snapshot_name).await; + } } fn get_v04_trace_snapshot_test_payload() -> Vec { From 5a054148809b83a87d44b96fad6def10949bfd39 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 16:11:56 +0200 Subject: [PATCH 41/49] fix: test span meta HashMap -> VecMap --- libdd-trace-utils/src/trace_filter.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index d482adee79..b68572599a 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -378,9 +378,7 @@ impl TraceFilterer { #[cfg(test)] mod tests { use super::TraceFilterer; - use crate::span::v04::SpanBytes; - use std::collections::HashMap; - + use crate::span::v04::{SpanBytes, VecMap}; // ---- helpers ---- fn span_with(resource: &'static str, meta: &[(&'static str, &'static str)]) -> SpanBytes { @@ -394,7 +392,7 @@ mod tests { meta: meta .iter() .map(|(k, v)| ((*k).into(), (*v).into())) - .collect::>(), + .collect::>(), ..Default::default() } } From 6e5f6bf53c9338215312beae3bdcf5ea1bc53465 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 16:22:56 +0200 Subject: [PATCH 42/49] fix: licensecheck ? --- libdd-trace-utils/src/trace_filter.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index b68572599a..3572922ac4 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -1,5 +1,6 @@ -// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 + //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). use std::{borrow::Borrow as _, collections::HashSet}; From 676e0b9572712f6f74638084dd02c5af1a8eaff8 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 17:35:13 +0200 Subject: [PATCH 43/49] Revert "fix: licensecheck ?" This reverts commit 6e5f6bf53c9338215312beae3bdcf5ea1bc53465. --- libdd-trace-utils/src/trace_filter.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index 3572922ac4..b68572599a 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -1,6 +1,5 @@ -// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 - //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). use std::{borrow::Borrow as _, collections::HashSet}; From 428a8197ee8aaf35850c4384f593584e667dd25b Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 18:08:30 +0200 Subject: [PATCH 44/49] fix: remove redundant span dedup call that broke V1 snapshot test and add missing license header The dedup() call added to send_trace_chunks_inner was reversing VecMap key order unconditionally, changing the attribute ordering in V1 msgpack output and causing snapshot mismatch. The V1 encoder already calls defensive_dedup() internally; the extra dedup is unnecessary. Also adds missing Apache 2.0 license header to test_trace_filters.rs. --- libdd-data-pipeline/src/trace_exporter/mod.rs | 6 ------ libdd-data-pipeline/tests/test_trace_filters.rs | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 0486a32678..52ae9ea970 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -624,12 +624,6 @@ impl Tra &self.trace_filterer.load(), ); - for chunk in &mut traces { - for span in chunk.iter_mut() { - span.dedup(); - } - } - // OTLP path: send sampled traces via OTLP when an OTLP endpoint is configured. // Unlike the agent path, there is no downstream agent to drop unsampled traces, // so drop_chunks is always called here regardless of whether stats are enabled. diff --git a/libdd-data-pipeline/tests/test_trace_filters.rs b/libdd-data-pipeline/tests/test_trace_filters.rs index c3705f13e2..2929144ac1 100644 --- a/libdd-data-pipeline/tests/test_trace_filters.rs +++ b/libdd-data-pipeline/tests/test_trace_filters.rs @@ -1,3 +1,6 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + use std::{ collections::HashMap, time::{Duration, Instant}, From 1dec1b0eab2e333a6825e8ad8d324b8194fa2fbe Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 18:12:38 +0200 Subject: [PATCH 45/49] fix: new copyright dates --- libdd-data-pipeline/tests/test_trace_filters.rs | 2 +- libdd-trace-utils/src/trace_filter.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libdd-data-pipeline/tests/test_trace_filters.rs b/libdd-data-pipeline/tests/test_trace_filters.rs index 2929144ac1..12c07e9be1 100644 --- a/libdd-data-pipeline/tests/test_trace_filters.rs +++ b/libdd-data-pipeline/tests/test_trace_filters.rs @@ -1,4 +1,4 @@ -// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 use std::{ diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index b68572599a..01987969ea 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -1,4 +1,4 @@ -// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). From 8996a2a24427150a27ce807faac7916a0cdcc5dc Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 18:18:58 +0200 Subject: [PATCH 46/49] re add span deduping --- libdd-data-pipeline/src/trace_exporter/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 52ae9ea970..0486a32678 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -624,6 +624,12 @@ impl Tra &self.trace_filterer.load(), ); + for chunk in &mut traces { + for span in chunk.iter_mut() { + span.dedup(); + } + } + // OTLP path: send sampled traces via OTLP when an OTLP endpoint is configured. // Unlike the agent path, there is no downstream agent to drop unsampled traces, // so drop_chunks is always called here regardless of whether stats are enabled. From b74e92e2776354a289d4d90d9be0c4212f24769a Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Thu, 11 Jun 2026 18:44:44 +0200 Subject: [PATCH 47/49] fix(tests): update V1 snapshot to match test agent v1.61.1 behavior New test agent no longer propagates _dd.p.tid and _dd.p.dm from chunk level to child spans. --- .../compare_exporter_v04_to_v1_trace_snapshot_test.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/libdd-data-pipeline/tests/snapshots/compare_exporter_v04_to_v1_trace_snapshot_test.json b/libdd-data-pipeline/tests/snapshots/compare_exporter_v04_to_v1_trace_snapshot_test.json index 039057d503..1c7db3b29a 100644 --- a/libdd-data-pipeline/tests/snapshots/compare_exporter_v04_to_v1_trace_snapshot_test.json +++ b/libdd-data-pipeline/tests/snapshots/compare_exporter_v04_to_v1_trace_snapshot_test.json @@ -11,7 +11,6 @@ "_dd.hostname": "my-host", "_dd.origin": "lambda", "_dd.p.dm": "-4", - "_dd.p.tid": "0x0", "component": "http", "env": "test-env", "runtime-id": "test-runtime-id-value", @@ -35,8 +34,6 @@ "parent_id": 1, "meta": { "_dd.origin": "lambda", - "_dd.p.dm": "-4", - "_dd.p.tid": "0x0", "env": "test-env", "runtime-id": "test-runtime-id-value", "service": "test-service", From c26d7edf7c4dfaa9751a34f85d767f63515ff5b3 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 15 Jun 2026 17:58:44 +0200 Subject: [PATCH 48/49] fix: make span trait minimal again and filter_trace only works on v04 spans --- libdd-trace-utils/src/span/trace_utils.rs | 47 ++++++++++++- libdd-trace-utils/src/trace_filter.rs | 82 ++++------------------- 2 files changed, 58 insertions(+), 71 deletions(-) diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index edd9ed3839..54699a484e 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -3,8 +3,10 @@ //! Trace-utils functionalities implementation for tinybytes based spans +use tracing::debug; + use super::{v04::Span, SpanText, TraceData}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; /// Span metric the mini agent must set for the backend to recognize top level span const TOP_LEVEL_KEY: &str = "_top_level"; @@ -56,6 +58,49 @@ where } } +pub fn get_root_span_index(trace: &[Span]) -> anyhow::Result +where + T: TraceData, +{ + if trace.is_empty() { + anyhow::bail!("Cannot find root span index in an empty trace."); + } + + // Do a first pass to find if we have an obvious root span (starting from the end) since some + // clients put the root span last. + for (i, span) in trace.iter().enumerate().rev() { + if span.parent_id == 0 { + return Ok(i); + } + } + + let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id).collect(); + + let mut root_span_id = None; + for (i, span) in trace.iter().enumerate() { + // If a span's parent is not in the trace, it is a root + if !span_ids.contains(&span.parent_id) { + if root_span_id.is_some() { + debug!( + trace_id = &trace[0].trace_id, + "trace has multiple root spans" + ); + } + root_span_id = Some(i); + } + } + Ok(match root_span_id { + Some(i) => i, + None => { + debug!( + trace_id = &trace[0].trace_id, + "Could not find the root span for trace" + ); + trace.len() - 1 + } + }) +} + /// Return true if the span has a top level key set pub fn has_top_level(span: &Span) -> bool { span.metrics diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index 01987969ea..4585faa008 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -2,13 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 //! Trace-level filter logic for client-side stats (filter_tags, filter_tags_regex, //! ignore_resources as published by the agent's /info endpoint). -use std::{borrow::Borrow as _, collections::HashSet}; +use std::borrow::Borrow as _; use libdd_common::regex_engine::Regex; use libdd_trace_normalization::{normalize_utils, normalizer}; use tracing::{debug, error}; -use crate::span::{self, TraceData}; +use crate::span::{self, trace_utils::get_root_span_index, TraceData}; trait TagFilter { /// Returns true if the given tag value matches the Filterer. @@ -44,6 +44,14 @@ pub struct TraceFilterer { ignore_resources: Vec, } +/// Minimal span interface required by [`TraceFilterer`]. +pub trait Span<'a> { + fn resource(&'a self) -> &'a str; + fn name(&'a self) -> &'a str; + /// Returns the value of the given meta tag, if present. + fn get_meta(&'a self, key: &str) -> Option<&'a str>; +} + impl TagFilter for TagLiteralFilter { fn matches_tag_value(&self, value: &str) -> bool { match &self.value { @@ -70,17 +78,6 @@ impl TagFilter for TagRegexFilter { } } -/// Minimal span interface required by [`TraceFilterer`]. -pub trait Span<'a> { - fn resource(&'a self) -> &'a str; - fn name(&'a self) -> &'a str; - fn span_id(&'a self) -> u64; - fn parent_id(&'a self) -> u64; - fn trace_id(&'a self) -> u128; - /// Returns the value of the given meta tag, if present. - fn get_meta(&'a self, key: &str) -> Option<&'a str>; -} - impl<'a, T: TraceData> Span<'a> for span::v04::Span { fn resource(&'a self) -> &'a str { self.resource.borrow() @@ -90,63 +87,11 @@ impl<'a, T: TraceData> Span<'a> for span::v04::Span { self.name.borrow() } - fn span_id(&'a self) -> u64 { - self.span_id - } - - fn parent_id(&'a self) -> u64 { - self.parent_id - } - - fn trace_id(&'a self) -> u128 { - self.trace_id - } - fn get_meta(&'a self, key: &str) -> Option<&'a str> { self.meta.get(key).map(|v| v.borrow()) } } -fn get_root_span_index<'a>(trace: &'a [impl Span<'a>]) -> anyhow::Result { - if trace.is_empty() { - anyhow::bail!("Cannot find root span index in an empty trace."); - } - - // Do a first pass to find if we have an obvious root span (starting from the end) since some - // clients put the root span last. - for (i, span) in trace.iter().enumerate().rev() { - if span.parent_id() == 0 { - return Ok(i); - } - } - - let span_ids: HashSet<_> = trace.iter().map(|span| span.span_id()).collect(); - - let mut root_span_id = None; - for (i, span) in trace.iter().enumerate() { - // If a span's parent is not in the trace, it is a root - if !span_ids.contains(&span.parent_id()) { - if root_span_id.is_some() { - debug!( - trace_id = &trace[0].trace_id(), - "trace has multiple root spans" - ); - } - root_span_id = Some(i); - } - } - Ok(match root_span_id { - Some(i) => i, - None => { - debug!( - trace_id = &trace[0].trace_id(), - "Could not find the root span for trace" - ); - trace.len() - 1 - } - }) -} - impl TraceFilterer { fn compile_literal_filters(filters: &[String]) -> Vec { let mut tag_regex_filters = Vec::new(); @@ -260,12 +205,9 @@ impl TraceFilterer { } /// Removes traces that fail filter checks in-place. Returns the number of traces dropped. - pub fn filter_traces(&self, traces: &mut Vec>) -> usize - where - for<'a> T: Span<'a>, - { + pub fn filter_traces(&self, traces: &mut Vec>>) -> usize { let traces_count_before = traces.len(); - traces.retain(|trace: &Vec| { + traces.retain(|trace| { let Ok(root_span_index) = get_root_span_index(trace) else { return true; }; From 4efe15f62bfd998d42d041c453c32c31a0908f2a Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Mon, 15 Jun 2026 21:08:31 +0200 Subject: [PATCH 49/49] feat: make Span trait even more minimal by moving normalization to the caller --- libdd-trace-utils/src/trace_filter.rs | 36 ++++++++++++--------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/libdd-trace-utils/src/trace_filter.rs b/libdd-trace-utils/src/trace_filter.rs index 4585faa008..235c4e3903 100644 --- a/libdd-trace-utils/src/trace_filter.rs +++ b/libdd-trace-utils/src/trace_filter.rs @@ -46,8 +46,8 @@ pub struct TraceFilterer { /// Minimal span interface required by [`TraceFilterer`]. pub trait Span<'a> { - fn resource(&'a self) -> &'a str; - fn name(&'a self) -> &'a str; + /// Returns the normalized resource value + fn resource_normalized(&'a self) -> &'a str; /// Returns the value of the given meta tag, if present. fn get_meta(&'a self, key: &str) -> Option<&'a str>; } @@ -79,12 +79,19 @@ impl TagFilter for TagRegexFilter { } impl<'a, T: TraceData> Span<'a> for span::v04::Span { - fn resource(&'a self) -> &'a str { - self.resource.borrow() - } - - fn name(&'a self) -> &'a str { - self.name.borrow() + fn resource_normalized(&'a self) -> &'a str { + // Normalization + let span_resource = self.resource.borrow(); + if span_resource.is_empty() { + let span_name = self.name.borrow(); + debug!( + ?span_name, + "Trace filter: filtering on name because resource is empty" + ); + span_name + } else { + span_resource + } } fn get_meta(&'a self, key: &str) -> Option<&'a str> { @@ -236,18 +243,7 @@ impl TraceFilterer { // match, reject the trace. pub fn should_drop<'a>(&self, root_span: &'a impl Span<'a>) -> bool { if !self.ignore_resources.is_empty() { - let span_resource = Span::resource(root_span); - // Normalization - let span_resource = if span_resource.is_empty() { - let span_name = root_span.name(); - debug!( - ?span_name, - "Trace filter: filtering on name because resource is empty" - ); - span_name - } else { - span_resource - }; + let span_resource = root_span.resource_normalized(); if self .ignore_resources