From 4fef562a0de8ca57cd34c15df46c75a957515f41 Mon Sep 17 00:00:00 2001 From: Ayushi Ahjolia Date: Tue, 9 Jun 2026 17:32:14 -0700 Subject: [PATCH] feat(otel): Add X-Ray e2e integration tests for span validation --- examples/pom.xml | 17 + .../examples/otel/OtelXRayStepExample.java | 55 ++++ .../examples/otel/OtelXRayWaitExample.java | 74 +++++ examples/src/main/resources/collector.yaml | 16 + .../examples/OtelXRayIntegrationTest.java | 297 ++++++++++++++++++ examples/template.yaml | 42 +++ otel-plugin/pom.xml | 2 +- 7 files changed, 502 insertions(+), 1 deletion(-) create mode 100644 examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayStepExample.java create mode 100644 examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayWaitExample.java create mode 100644 examples/src/main/resources/collector.yaml create mode 100644 examples/src/test/java/software/amazon/lambda/durable/examples/OtelXRayIntegrationTest.java diff --git a/examples/pom.xml b/examples/pom.xml index 4d711a89d..cae5ec504 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -50,6 +50,18 @@ 1.63.0 + + + io.opentelemetry + opentelemetry-exporter-otlp + 1.63.0 + + + io.grpc + grpc-netty-shaded + 1.72.0 + + com.amazonaws @@ -91,6 +103,11 @@ sts test + + software.amazon.awssdk + xray + test + org.junit.jupiter junit-jupiter diff --git a/examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayStepExample.java b/examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayStepExample.java new file mode 100644 index 000000000..f9978b8b4 --- /dev/null +++ b/examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayStepExample.java @@ -0,0 +1,55 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package software.amazon.lambda.durable.examples.otel; + +import io.opentelemetry.exporter.otlp.trace.OtlpGrpcSpanExporter; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import software.amazon.lambda.durable.DurableConfig; +import software.amazon.lambda.durable.DurableContext; +import software.amazon.lambda.durable.DurableHandler; +import software.amazon.lambda.durable.examples.types.GreetingRequest; +import software.amazon.lambda.durable.otel.OpenTelemetryDurablePlugin; + +/** + * OTel + X-Ray example: simple steps in a single invocation. + * + *

Exports spans via OTLP gRPC to the ADOT collector extension (Lambda layer), which forwards to X-Ray. Used by + * {@code OtelXRayIntegrationTest} to verify spans appear correctly in X-Ray. + * + *

Expected trace structure in X-Ray: + * + *

+ * durable.invocation
+ * ├── durable.step:create-greeting
+ * │   └── durable.step:create-greeting [attempt 1]
+ * └── durable.step:transform
+ *     └── durable.step:transform [attempt 1]
+ * 
+ */ +public class OtelXRayStepExample extends DurableHandler { + + @Override + protected DurableConfig createConfiguration() { + var otlpExporter = OtlpGrpcSpanExporter.builder() + .setEndpoint("http://localhost:4317") + .build(); + + var otelPlugin = new OpenTelemetryDurablePlugin( + SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(otlpExporter))); + + return DurableConfig.builder().withPlugins(otelPlugin).build(); + } + + @Override + public String handleRequest(GreetingRequest input, DurableContext context) { + context.getLogger().info("Starting OTel X-Ray step example for {}", input.getName()); + + var greeting = context.step("create-greeting", String.class, stepCtx -> "Hello, " + input.getName()); + + var result = context.step("transform", String.class, stepCtx -> greeting.toUpperCase() + "!"); + + context.getLogger().info("OTel X-Ray step example complete: {}", result); + return result; + } +} diff --git a/examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayWaitExample.java b/examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayWaitExample.java new file mode 100644 index 000000000..d55027ab9 --- /dev/null +++ b/examples/src/main/java/software/amazon/lambda/durable/examples/otel/OtelXRayWaitExample.java @@ -0,0 +1,74 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package software.amazon.lambda.durable.examples.otel; + +import io.opentelemetry.exporter.otlp.trace.OtlpGrpcSpanExporter; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import java.time.Duration; +import software.amazon.lambda.durable.DurableConfig; +import software.amazon.lambda.durable.DurableContext; +import software.amazon.lambda.durable.DurableHandler; +import software.amazon.lambda.durable.examples.types.GreetingRequest; +import software.amazon.lambda.durable.otel.OpenTelemetryDurablePlugin; + +/** + * OTel + X-Ray example: step → wait → step pattern that forces multiple Lambda invocations. + * + *

This handler exercises the critical multi-invocation tracing scenario: + * + *

    + *
  1. Invocation 1: "before-wait" step completes → wait suspends execution + *
  2. Invocation 2: replays "before-wait" (no-op) → wait completes → "after-wait" step runs + *
+ * + *

Exports spans via OTLP gRPC to the ADOT collector extension (Lambda layer), which forwards to X-Ray. + * + *

Used by {@code OtelXRayIntegrationTest} to verify that deterministic trace IDs correctly stitch spans from + * multiple invocations into a single X-Ray trace. + * + *

Expected trace structure in X-Ray: + * + *

+ * Trace (single trace ID across both invocations)
+ * ├── durable.invocation (invocation 1)
+ * │   ├── durable.step:before-wait
+ * │   │   └── durable.step:before-wait [attempt 1]
+ * │   └── durable.wait:pause (ended as PENDING)
+ * └── durable.invocation (invocation 2)
+ *     ├── durable.wait:pause (completed)
+ *     └── durable.step:after-wait
+ *         └── durable.step:after-wait [attempt 1]
+ * 
+ * + *

All spans share the same deterministic trace ID derived from the execution ARN. + */ +public class OtelXRayWaitExample extends DurableHandler { + + @Override + protected DurableConfig createConfiguration() { + var otlpExporter = OtlpGrpcSpanExporter.builder() + .setEndpoint("http://localhost:4317") + .build(); + + var otelPlugin = new OpenTelemetryDurablePlugin( + SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(otlpExporter))); + + return DurableConfig.builder().withPlugins(otelPlugin).build(); + } + + @Override + public String handleRequest(GreetingRequest input, DurableContext context) { + context.getLogger().info("Starting OTel X-Ray wait example for {}", input.getName()); + + var before = context.step("before-wait", String.class, stepCtx -> "Prepared: " + input.getName()); + + // This wait forces Lambda to suspend and re-invoke after the duration + context.wait("pause", Duration.ofSeconds(5)); + + var after = context.step("after-wait", String.class, stepCtx -> before + " | Resumed and completed"); + + context.getLogger().info("OTel X-Ray wait example complete: {}", after); + return after; + } +} diff --git a/examples/src/main/resources/collector.yaml b/examples/src/main/resources/collector.yaml new file mode 100644 index 000000000..2c2511f13 --- /dev/null +++ b/examples/src/main/resources/collector.yaml @@ -0,0 +1,16 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: "localhost:4317" + http: + endpoint: "localhost:4318" + +exporters: + awsxray: + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [awsxray] diff --git a/examples/src/test/java/software/amazon/lambda/durable/examples/OtelXRayIntegrationTest.java b/examples/src/test/java/software/amazon/lambda/durable/examples/OtelXRayIntegrationTest.java new file mode 100644 index 000000000..d45d9e531 --- /dev/null +++ b/examples/src/test/java/software/amazon/lambda/durable/examples/OtelXRayIntegrationTest.java @@ -0,0 +1,297 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package software.amazon.lambda.durable.examples; + +import static org.junit.jupiter.api.Assertions.*; + +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIf; +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.lambda.LambdaClient; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.xray.XRayClient; +import software.amazon.awssdk.services.xray.model.BatchGetTracesRequest; +import software.amazon.awssdk.services.xray.model.GetTraceSummariesRequest; +import software.amazon.awssdk.services.xray.model.Segment; +import software.amazon.awssdk.services.xray.model.TimeRangeType; +import software.amazon.awssdk.services.xray.model.TraceSummary; +import software.amazon.lambda.durable.examples.types.GreetingRequest; +import software.amazon.lambda.durable.model.ExecutionStatus; +import software.amazon.lambda.durable.testing.CloudDurableTestRunner; + +/** + * Integration tests that verify OTel spans exported via ADOT appear correctly in AWS X-Ray. + * + *

These tests deploy Lambda functions configured with: + * + *

    + *
  • OpenTelemetry Durable Plugin with OTLP gRPC exporter + *
  • ADOT collector layer (OTLP receiver → X-Ray exporter) + *
  • Active X-Ray tracing + *
+ * + *

After invoking the function, the test queries the X-Ray API to verify: + * + *

    + *
  • A single trace exists for the execution (deterministic trace ID works) + *
  • Expected span/segment names are present + *
  • Parent-child nesting is correct + *
  • Multi-invocation scenarios produce one unified trace + *
+ * + *

Enable with: {@code -Dtest.cloud.enabled=true} + */ +@EnabledIf("isEnabled") +class OtelXRayIntegrationTest { + + private static final Duration XRAY_INGESTION_DELAY = Duration.ofSeconds(15); + private static final int XRAY_QUERY_RETRIES = 3; + private static final Duration XRAY_RETRY_DELAY = Duration.ofSeconds(10); + + private static String account; + private static String region; + private static String functionNameSuffix; + private static LambdaClient lambdaClient; + private static XRayClient xrayClient; + + static boolean isEnabled() { + var enabled = "true".equals(System.getProperty("test.cloud.enabled")); + if (!enabled) { + System.out.println("⚠️ OTel X-Ray integration tests disabled. Enable with -Dtest.cloud.enabled=true"); + } + return enabled; + } + + @BeforeAll + static void setup() { + try { + DefaultCredentialsProvider.builder().build().resolveCredentials(); + } catch (Exception e) { + throw new IllegalStateException("AWS credentials not available"); + } + + account = System.getProperty("test.aws.account"); + region = System.getProperty("test.aws.region"); + functionNameSuffix = System.getProperty("test.function.name.suffix", "-java17-runtime"); + + if (account == null || region == null) { + try (var sts = StsClient.create()) { + if (account == null) account = sts.getCallerIdentity().account(); + if (region == null) + region = sts.serviceClientConfiguration().region().id(); + } + } + + lambdaClient = LambdaClient.builder() + .credentialsProvider(DefaultCredentialsProvider.builder().build()) + .region(Region.of(region)) + .build(); + + xrayClient = XRayClient.builder() + .credentialsProvider(DefaultCredentialsProvider.builder().build()) + .region(Region.of(region)) + .build(); + + System.out.println("☁️ Running OTel X-Ray integration tests against account " + account + " in " + region); + } + + private static String arn(String functionName) { + return "arn:aws:lambda:" + region + ":" + account + ":function:" + functionName + functionNameSuffix + + ":$LATEST"; + } + + // ─── Test: Simple Steps (Single Invocation) ────────────────────────── + + @Test + void simpleSteps_producesUnifiedTraceInXRay() throws Exception { + var startTime = Instant.now(); + + // 1. Invoke the function (use unique input to avoid stale executions) + var runner = CloudDurableTestRunner.create( + arn("otel-xray-step-example"), GreetingRequest.class, String.class, lambdaClient); + var uniqueInput = "XRay-" + System.currentTimeMillis(); + var result = runner.run(new GreetingRequest(uniqueInput)); + + assertEquals(ExecutionStatus.SUCCEEDED, result.getStatus(), "Execution failed: " + result); + assertEquals("HELLO, " + uniqueInput.toUpperCase() + "!", result.getResult()); + + // 2. Wait for X-Ray ingestion + Thread.sleep(XRAY_INGESTION_DELAY.toMillis()); + + // 3. Query X-Ray for the trace + var traces = queryTracesWithRetry(startTime, Instant.now()); + + // 4. Find our trace (filter by annotation or just take the one matching our time window) + assertFalse(traces.isEmpty(), "Expected at least one trace in X-Ray after execution"); + + // Get full trace details + var traceIds = traces.stream().map(TraceSummary::id).toList(); + var fullTraces = xrayClient.batchGetTraces( + BatchGetTracesRequest.builder().traceIds(traceIds).build()); + + // Find the trace that contains our durable spans + var durableTrace = fullTraces.traces().stream() + .filter(trace -> + trace.segments().stream().anyMatch(seg -> segmentContains(seg, "durable.step:create-greeting"))) + .findFirst() + .orElse(null); + + assertNotNull( + durableTrace, + "Expected to find a trace with durable.step:create-greeting segment. " + "Found " + traces.size() + + " traces in the time window."); + + // 5. Verify span structure + var segmentDocuments = + durableTrace.segments().stream().map(Segment::document).toList(); + var allSegmentText = String.join("\n", segmentDocuments); + + // Verify expected span names appear in the trace + assertTrue( + allSegmentText.contains("durable.invocation"), + "Expected durable.invocation span in trace. Segments: " + summarizeSegments(segmentDocuments)); + assertTrue( + allSegmentText.contains("durable.step:create-greeting"), + "Expected durable.step:create-greeting span in trace"); + assertTrue(allSegmentText.contains("durable.step:transform"), "Expected durable.step:transform span in trace"); + + // Verify all segments share the same trace ID (single unified trace) + var uniqueTraceIds = + durableTrace.segments().stream().map(seg -> durableTrace.id()).collect(Collectors.toSet()); + assertEquals(1, uniqueTraceIds.size(), "All segments should belong to a single trace"); + + System.out.println("✅ Simple steps test passed — " + + durableTrace.segments().size() + " segments in trace " + durableTrace.id()); + } + + // ─── Test: Wait + Resume (Multi-Invocation) ───────────────────────── + + @Test + void waitAndResume_producesUnifiedTraceAcrossInvocations() throws Exception { + var startTime = Instant.now(); + + // 1. Invoke the function — will suspend on wait, then resume automatically + var runner = CloudDurableTestRunner.create( + arn("otel-xray-wait-example"), GreetingRequest.class, String.class, lambdaClient); + var uniqueInput = "Wait-" + System.currentTimeMillis(); + var result = runner.run(new GreetingRequest(uniqueInput)); + + assertEquals(ExecutionStatus.SUCCEEDED, result.getStatus(), "Execution failed: " + result); + assertTrue(result.getResult().contains("Resumed and completed"), + "Expected result to contain 'Resumed and completed', got: " + result.getResult()); + + // 2. Wait for X-Ray ingestion (extra time since multi-invocation takes longer) + Thread.sleep(XRAY_INGESTION_DELAY.plus(Duration.ofSeconds(5)).toMillis()); + + // 3. Query X-Ray for the trace + var traces = queryTracesWithRetry(startTime, Instant.now()); + + assertFalse(traces.isEmpty(), "Expected at least one trace in X-Ray after multi-invocation execution"); + + // Get full trace details + var traceIds = traces.stream().map(TraceSummary::id).toList(); + var fullTraces = xrayClient.batchGetTraces( + BatchGetTracesRequest.builder().traceIds(traceIds).build()); + + // Find the trace containing our durable spans + var durableTrace = fullTraces.traces().stream() + .filter(trace -> + trace.segments().stream().anyMatch(seg -> segmentContains(seg, "durable.step:before-wait"))) + .findFirst() + .orElse(null); + + assertNotNull( + durableTrace, + "Expected to find a trace with durable.step:before-wait segment. " + "Found " + traces.size() + + " traces in the time window."); + + // 4. Verify multi-invocation trace structure + var segmentDocuments = + durableTrace.segments().stream().map(Segment::document).toList(); + var allSegmentText = String.join("\n", segmentDocuments); + + // Verify spans from BOTH invocations appear in the same trace + assertTrue( + allSegmentText.contains("durable.step:before-wait"), "Expected before-wait span from first invocation"); + assertTrue( + allSegmentText.contains("durable.step:after-wait"), "Expected after-wait span from second invocation"); + assertTrue(allSegmentText.contains("durable.wait:pause"), "Expected wait:pause span in trace"); + + // Verify multiple invocation spans (one per Lambda invocation) + var invocationCount = countOccurrences(allSegmentText, "durable.invocation"); + assertTrue( + invocationCount >= 2, + "Expected at least 2 invocation spans (multi-invocation), got " + invocationCount); + + // Critical assertion: all segments under ONE trace (deterministic ID worked) + assertEquals( + 1, + Set.of(durableTrace.id()).size(), + "All segments should belong to a single trace — deterministic trace ID must work across invocations"); + + System.out.println( + "✅ Wait + resume test passed — " + durableTrace.segments().size() + " segments across " + + invocationCount + " invocations in trace " + durableTrace.id()); + } + + // ─── Helpers ───────────────────────────────────────────────────────── + + /** Queries X-Ray for traces with retry logic to handle eventual consistency. */ + private List queryTracesWithRetry(Instant startTime, Instant endTime) throws InterruptedException { + for (int attempt = 0; attempt < XRAY_QUERY_RETRIES; attempt++) { + var response = xrayClient.getTraceSummaries(GetTraceSummariesRequest.builder() + .startTime(startTime) + .endTime(endTime) + .timeRangeType(TimeRangeType.EVENT) + .sampling(false) + .build()); + + var traces = response.traceSummaries(); + if (!traces.isEmpty()) { + return traces; + } + + System.out.println("⏳ X-Ray query returned 0 traces, retrying in " + XRAY_RETRY_DELAY.toSeconds() + "s " + + "(attempt " + (attempt + 1) + "/" + XRAY_QUERY_RETRIES + ")"); + Thread.sleep(XRAY_RETRY_DELAY.toMillis()); + } + return List.of(); + } + + /** Checks if a segment's document JSON contains the given text. */ + private static boolean segmentContains(Segment segment, String text) { + return segment.document() != null && segment.document().contains(text); + } + + /** Counts occurrences of a substring in text. */ + private static int countOccurrences(String text, String substring) { + int count = 0; + int index = 0; + while ((index = text.indexOf(substring, index)) != -1) { + count++; + index += substring.length(); + } + return count; + } + + /** Creates a brief summary of segment names for assertion error messages. */ + private static String summarizeSegments(List segmentDocuments) { + return segmentDocuments.stream() + .map(doc -> { + // Extract "name" field from the segment JSON for readability + var nameStart = doc.indexOf("\"name\":\""); + if (nameStart == -1) return "(unknown)"; + nameStart += 8; + var nameEnd = doc.indexOf("\"", nameStart); + return nameEnd > nameStart ? doc.substring(nameStart, nameEnd) : "(parse-error)"; + }) + .collect(Collectors.joining(", ", "[", "]")); + } +} diff --git a/examples/template.yaml b/examples/template.yaml index 10a27c46a..cdd719f70 100644 --- a/examples/template.yaml +++ b/examples/template.yaml @@ -355,6 +355,40 @@ Resources: Handler: "software.amazon.lambda.durable.examples.general.OtelExample" Role: !Ref RoleArn + OtelXRayStepExampleFunction: + Type: AWS::Serverless::Function + Properties: + FunctionName: !Join + - '-' + - - 'otel-xray-step-example' + - !Ref JavaVersion + - runtime + Handler: "software.amazon.lambda.durable.examples.otel.OtelXRayStepExample" + Role: !Ref RoleArn + Tracing: Active + Layers: + - !Sub "arn:aws:lambda:${AWS::Region}:901920570463:layer:aws-otel-java-wrapper-amd64-ver-1-32-0:6" + Environment: + Variables: + OPENTELEMETRY_COLLECTOR_CONFIG_FILE: "/var/task/collector.yaml" + + OtelXRayWaitExampleFunction: + Type: AWS::Serverless::Function + Properties: + FunctionName: !Join + - '-' + - - 'otel-xray-wait-example' + - !Ref JavaVersion + - runtime + Handler: "software.amazon.lambda.durable.examples.otel.OtelXRayWaitExample" + Role: !Ref RoleArn + Tracing: Active + Layers: + - !Sub "arn:aws:lambda:${AWS::Region}:901920570463:layer:aws-otel-java-wrapper-amd64-ver-1-32-0:6" + Environment: + Variables: + OPENTELEMETRY_COLLECTOR_CONFIG_FILE: "/var/task/collector.yaml" + RetryInvokeExampleFunction: Type: AWS::Serverless::Function Properties: @@ -561,6 +595,14 @@ Outputs: Description: OTel Example Function ARN Value: !GetAtt OtelExampleFunction.Arn + OtelXRayStepExampleFunction: + Description: OTel X-Ray Step Example Function ARN + Value: !GetAtt OtelXRayStepExampleFunction.Arn + + OtelXRayWaitExampleFunction: + Description: OTel X-Ray Wait Example Function ARN + Value: !GetAtt OtelXRayWaitExampleFunction.Arn + RetryInvokeExampleFunction: Description: Retry Invoke Example Function ARN Value: !GetAtt RetryInvokeExampleFunction.Arn diff --git a/otel-plugin/pom.xml b/otel-plugin/pom.xml index bb6c2be00..8df6d1251 100644 --- a/otel-plugin/pom.xml +++ b/otel-plugin/pom.xml @@ -11,7 +11,7 @@ aws-durable-execution-sdk-java-otel - AWS Lambda Durable Execution SDK - OpenTelemetry Plugin + AWS Lambda Durable Execution SDK OpenTelemetry Plugin OpenTelemetry instrumentation plugin for AWS Lambda Durable Execution SDK