diff --git a/integration-tests/jest.config.js b/integration-tests/jest.config.js index 2d10a3e21..7bac95655 100644 --- a/integration-tests/jest.config.js +++ b/integration-tests/jest.config.js @@ -10,7 +10,7 @@ module.exports = { '!tests/**/*.d.ts', ], // Increase timeout for integration tests that involve Lambda invocations and waiting for Datadog - testTimeout: 900000, // 15 minutes + testTimeout: 1800000, // 30 minutes verbose: true, // Reporters for test results reporters: [ diff --git a/integration-tests/tests/auth.test.ts b/integration-tests/tests/auth.test.ts index 7f4879a33..8416b0939 100644 --- a/integration-tests/tests/auth.test.ts +++ b/integration-tests/tests/auth.test.ts @@ -27,7 +27,7 @@ describe('Auth Integration Tests', () => { telemetry = await invokeAndCollectTelemetry(functions, 1); console.log('All invocations and data fetching completed'); - }, 600000); + }, 1800000); describe('on-demand (node)', () => { it('should invoke Lambda successfully', () => { diff --git a/integration-tests/tests/custom-metrics.test.ts b/integration-tests/tests/custom-metrics.test.ts index d00755bbf..38b1824d8 100644 --- a/integration-tests/tests/custom-metrics.test.ts +++ b/integration-tests/tests/custom-metrics.test.ts @@ -34,7 +34,7 @@ describe("Customer Metrics Exclude Tags Integration Tests", () => { metricsEndTime = Date.now(); console.log("Lambdas invoked and indexing wait complete"); - }, 900000); + }, 1800000); describe("unfiltered function (no DD_LAMBDA_CUSTOMER_METRICS_EXCLUDE_TAGS)", () => { it.each(EXCLUDED_TAGS)( diff --git a/integration-tests/tests/lmi.test.ts b/integration-tests/tests/lmi.test.ts index 36b4f4c55..abe1b3610 100644 --- a/integration-tests/tests/lmi.test.ts +++ b/integration-tests/tests/lmi.test.ts @@ -22,7 +22,7 @@ describe('LMI Integration Tests', () => { telemetry = await invokeAndCollectTelemetry(functions, 1); console.log('LMI invocation and data fetching completed'); - }, 600000); + }, 1800000); describe.each(runtimes)('%s Runtime with LMI', (runtime) => { const getResult = () => telemetry[runtime]?.threads[0]?.[0]; diff --git a/integration-tests/tests/on-demand.test.ts b/integration-tests/tests/on-demand.test.ts index ff88f6108..cb59e5cf7 100644 --- a/integration-tests/tests/on-demand.test.ts +++ b/integration-tests/tests/on-demand.test.ts @@ -22,7 +22,7 @@ describe('On-Demand Integration Tests', () => { telemetry = await invokeAndCollectTelemetry(functions, 2, 1, 5000); console.log('All invocations and data fetching completed'); - }, 600000); + }, 1800000); describe.each(runtimes)('%s runtime', (runtime) => { const getTelemetry = () => telemetry[runtime]; diff --git a/integration-tests/tests/otlp.test.ts b/integration-tests/tests/otlp.test.ts index fd836af16..c16128f34 100644 --- a/integration-tests/tests/otlp.test.ts +++ b/integration-tests/tests/otlp.test.ts @@ -33,7 +33,7 @@ describe('OTLP Integration Tests', () => { telemetry = await invokeAndCollectTelemetry(functions, 1, 1, 0, {}, DATADOG_INDEXING_WAIT_5_MIN_MS); console.log('All OTLP Lambda invocations and data fetching completed'); - }, 700000); + }, 1800000); describe.each(runtimes)('%s Runtime', (runtime) => { const getResult = () => telemetry[runtime]?.threads[0]?.[0]; diff --git a/integration-tests/tests/payload-size.test.ts b/integration-tests/tests/payload-size.test.ts index af81c6b97..b660e24ca 100644 --- a/integration-tests/tests/payload-size.test.ts +++ b/integration-tests/tests/payload-size.test.ts @@ -70,7 +70,7 @@ describe('Payload Size Integration Tests', () => { console.log(`Extension send-error log lines: ${sendErrorMessages.length}`); console.log('Invocation and telemetry collection complete'); - }, 900000); + }, 1800000); // Assert on the FIRST request's trace. Its flush is deferred to a later // invocation (cold-start race), which is why we invoke a few times — but the diff --git a/integration-tests/tests/snapstart.test.ts b/integration-tests/tests/snapstart.test.ts index 0f9aa1e61..f4b70e833 100644 --- a/integration-tests/tests/snapstart.test.ts +++ b/integration-tests/tests/snapstart.test.ts @@ -45,7 +45,7 @@ describe('Snapstart Integration Tests', () => { telemetry = await invokeAndCollectTelemetry(functions, 2, 2, 5000); console.log('All Snapstart Lambda invocations and data fetching completed'); - }, 900000); + }, 1800000); describe.each(runtimes)('%s Runtime with SnapStart', (runtime) => { // With concurrency=2, invocations=2: diff --git a/integration-tests/tests/utils/datadog.ts b/integration-tests/tests/utils/datadog.ts index 3e69dab78..f8d2bcb31 100644 --- a/integration-tests/tests/utils/datadog.ts +++ b/integration-tests/tests/utils/datadog.ts @@ -54,6 +54,49 @@ function formatDatadogError(error: unknown, query: string): string { return `Error (query: '${query}'): ${String(error)}`; } +const MAX_RETRY_WAIT_MS = 5 * 60 * 1000; +const DEFAULT_RETRY_AFTER_MS = 5000; +const MAX_SINGLE_WAIT_MS = 60 * 1000; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function parseRetryAfterMs(error: AxiosError): number { + const headers = error.response?.headers ?? {}; + const raw = headers['x-ratelimit-reset'] ?? headers['retry-after']; + const seconds = raw !== undefined ? Number(raw) : NaN; + const ms = Number.isFinite(seconds) && seconds > 0 ? seconds * 1000 : DEFAULT_RETRY_AFTER_MS; + return Math.min(ms, MAX_SINGLE_WAIT_MS); +} + +async function requestWithRetry(fn: () => Promise, query: string): Promise { + let waited = 0; + let attempt = 0; + // eslint-disable-next-line no-constant-condition + while (true) { + try { + return await fn(); + } catch (error: unknown) { + const is429 = error instanceof AxiosError && error.response?.status === 429; + if (!is429) { + throw error; + } + const jitter = Math.floor(Math.random() * 1000); + const wait = parseRetryAfterMs(error as AxiosError) + jitter; + if (waited + wait > MAX_RETRY_WAIT_MS) { + throw error; + } + attempt += 1; + waited += wait; + console.warn( + `Datadog API 429 for '${query}'; retrying in ${wait}ms (attempt ${attempt}, total waited ${waited}ms)`, + ); + await sleep(wait); + } + } +} + export interface DatadogTelemetry { threads: InvocationTracesLogs[][]; // [thread][invocation] metrics: EnhancedMetrics; @@ -137,7 +180,7 @@ export async function getTraces( try { console.log(`Searching for traces: ${query}`); - const initialResponse = await datadogClient.post('/api/v2/spans/events/search', { + const initialResponse = await requestWithRetry(() => datadogClient.post('/api/v2/spans/events/search', { data: { type: 'search_request', attributes: { @@ -152,7 +195,7 @@ export async function getTraces( sort: '-timestamp', }, }, - }); + }), query); const initialSpans = initialResponse.data.data || []; console.log(`Found ${initialSpans.length} initial span(s)`); @@ -169,12 +212,13 @@ export async function getTraces( const allSpans: any[] = []; for (const traceId of traceIds) { - const traceResponse = await datadogClient.post('/api/v2/spans/events/search', { + const traceQuery = `trace_id:${traceId}`; + const traceResponse = await requestWithRetry(() => datadogClient.post('/api/v2/spans/events/search', { data: { type: 'search_request', attributes: { filter: { - query: `trace_id:${traceId}`, + query: traceQuery, from: new Date(fromTime).toISOString(), to: new Date(toTime).toISOString(), }, @@ -183,7 +227,7 @@ export async function getTraces( }, }, }, - }); + }), traceQuery); const traceSpans = traceResponse.data.data || []; console.log(`Trace ${traceId}: ${traceSpans.length} spans`); allSpans.push(...traceSpans); @@ -239,7 +283,7 @@ export async function getLogs( try { console.log(`Searching for logs: ${query}`); - const response = await datadogClient.post('/api/v2/logs/events/search', { + const response = await requestWithRetry(() => datadogClient.post('/api/v2/logs/events/search', { filter: { query: query, from: new Date(fromTime).toISOString(), @@ -248,7 +292,7 @@ export async function getLogs( page: { limit: 1000, }, - }); + }), query); const rawLogs = response.data.data || []; console.log(`Found ${rawLogs.length} log(s)`); @@ -309,13 +353,13 @@ export async function getMetricCount( console.log(`Querying metric count: ${query}`); - const response = await datadogClient.get('/api/v1/query', { + const response = await requestWithRetry(() => datadogClient.get('/api/v1/query', { params: { query, from: Math.floor(fromTime / 1000), to: Math.floor(toTime / 1000), }, - }); + }), query); const series = response.data.series || []; if (series.length === 0) { @@ -337,13 +381,13 @@ async function getMetrics( console.log(`Querying metrics: ${query}`); - const response = await datadogClient.get('/api/v1/query', { + const response = await requestWithRetry(() => datadogClient.get('/api/v1/query', { params: { query, from: Math.floor(fromTime / 1000), to: Math.floor(toTime / 1000), }, - }); + }), query); const series = response.data.series || []; console.log(`Found ${series.length} series for ${metricName}`); @@ -375,13 +419,13 @@ export async function hasMetricWithTag( console.log(`Querying metric with tag filter: ${query}`); - const response = await datadogClient.get('/api/v1/query', { + const response = await requestWithRetry(() => datadogClient.get('/api/v1/query', { params: { query, from: Math.floor(fromTime / 1000), to: Math.floor(toTime / 1000), }, - }); + }), query); const series = response.data.series || []; const hasData = series.some((s: any) => Array.isArray(s.pointlist) && s.pointlist.length > 0);