From 88c9f06ddfc07e939b5428d61cf4ed7291180fd7 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Tue, 26 May 2026 23:30:24 -0700 Subject: [PATCH 1/2] fix(webapp): recover from ClickHouse JSON parse failures on out-of-range integers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second class of poisoned-row failure in the runs replication path. PR #3708 handled lone UTF-16 surrogates; this one handles bare JSON integer literals that exceed ClickHouse's Int64/UInt64 range. ClickHouse's `JSON(max_dynamic_paths=...)` column fits each bare integer token into Int64 (signed) or UInt64 (unsigned). Bare integers strictly outside `[-2^63, 2^64 - 1]` are rejected with `INCORRECT_DATA` (no silent fallback to Float64). JS Numbers that are integer-valued but above `Number.MAX_SAFE_INTEGER` still serialise via JSON.stringify as bare integer tokens (no exponent) while `|value| < 1e21`, so any such Number lands on the wire as a token CH cannot accept. Customer-facing symptom: `scan-social-profiles` runs continued to be stranded in `EXECUTING` on the Tasks page even after the surrogate fix landed. CloudWatch showed `Dropped batch — ClickHouse JSON parse error but sanitizer found nothing to fix` firing 8/8 times since the previous deploy. Root cause: upstream JS Number precision loss on a 21-digit Google Plus ID (`117039831458782873093` → `117039831458782870000`) — the precision-lossy value still serialises as a bare integer that exceeds UInt64.MAX, which CH rejects. Reproduced end-to-end against ClickHouse 25.12.11.4 in Docker with the exact `Cannot parse JSON object here` error from prod. `apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts`: - New private `isUnsafeJsonInteger(value)` helper — true iff value is a finite, integer-valued JS Number where `|value| < 1e21` (i.e. JSON.stringify emits integer form, not exponent) AND `value` falls outside `[Int64.MIN, UInt64.MAX]`. - `sanitizeUnknownInPlace` gains a number-branch: when the predicate holds, replace the Number with its string form. CH's dynamic JSON column accepts a `String` subtype on the same path, so the row inserts cleanly on retry. The numeric value was already precision-lossy upstream (JS Number can't represent integers above 2^53 faithfully), so type-flipping to string is information-preserving relative to what arrived. - Float-valued numbers and large floats (>= 1e21, NaN, Infinity) are left alone — JSON.stringify emits them with exponents or as `null`, both of which CH accepts. Recovery stays purely reactive — no extra cost on the hot replication path. The sanitizer only runs after a ClickHouse parse-error rejection, so healthy rows pay nothing. `apps/webapp/test/sanitizeRowsOnParseError.test.ts`: four new unit tests covering positive/negative out-of-range integers, boundary values (MAX_SAFE_INTEGER, 2^63, UInt64.MAX itself), non-integer numbers, and the actual `scan-social-profiles` nested shape with `gp_id: 117039831458782870000`. Plus an extension to `sanitizeRows` that verifies surrogate and integer fixes are counted together across rows. `.server-changes/runs-replication-bigint-recovery.md` — release notes. Refs TRI-9755. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- .../runs-replication-bigint-recovery.md | 21 ++++ .../sanitizeRowsOnParseError.server.ts | 36 ++++++ .../test/sanitizeRowsOnParseError.test.ts | 119 ++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 .server-changes/runs-replication-bigint-recovery.md diff --git a/.server-changes/runs-replication-bigint-recovery.md b/.server-changes/runs-replication-bigint-recovery.md new file mode 100644 index 00000000000..9bd1afc734f --- /dev/null +++ b/.server-changes/runs-replication-bigint-recovery.md @@ -0,0 +1,21 @@ +--- +area: webapp +type: fix +--- + +Extend the runs-replication sanitizer (`sanitizeUnknownInPlace`) to detect +JS Numbers that JSON-serialise as bare integer tokens outside the +Int64..UInt64 range and replace them with their string form, so a +following retry insert no longer trips ClickHouse's +`INCORRECT_DATA` parser failure on `JSON(max_dynamic_paths)` columns. + +This is the second class of poisoned-row failure that was stranding +`scan-social-profiles` runs in `EXECUTING` on the Tasks page even after +the UTF-16 surrogate fix (#3708 / TRI-9755). Root cause: upstream JS +Number precision loss on a 21-digit Google Plus ID +(`117039831458782873093` → `117039831458782870000`) — the precision-lossy +value still serialises as a bare integer that exceeds UInt64.MAX, +which CH's JSON column rejects with `Cannot parse JSON object here`. + +Recovery stays purely reactive (no extra cost on the hot replication +path); the sanitizer only runs after a ClickHouse parse-error rejection. diff --git a/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts b/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts index f04f9c02023..4a03a763941 100644 --- a/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts +++ b/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts @@ -7,6 +7,38 @@ import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; */ export const INVALID_UTF16_SENTINEL = "[invalid-utf16]"; +/** + * ClickHouse's `JSON(max_dynamic_paths)` column fits each bare-integer + * JSON token into Int64 (signed) or UInt64 (unsigned). Bare integers + * outside `[-2^63, 2^64 - 1]` are rejected with `INCORRECT_DATA` (no + * silent fallback to Float64). `JSON.stringify` emits any integer-valued + * Number with `|value| < 1e21` as a bare integer (no exponent), so any + * JS Number above ~9.2e18 that *happens* to be integer-valued lands on + * the wire as a token CH cannot accept. + * + * The fix: replace such Numbers with their string form. CH's dynamic + * JSON column accepts a `String` subtype on the same path, so the row + * inserts cleanly on retry. The numeric value was already + * precision-lossy upstream (JS Number can't represent integers above + * 2^53 faithfully), so type-flipping to string is information-preserving + * relative to what arrived. + * + * Float-valued numbers (including very large ones like `1e25`) serialise + * with an exponent and are accepted by CH at any magnitude, so they're + * left alone. + */ +function isUnsafeJsonInteger(value: number): boolean { + if (!Number.isFinite(value)) return false; + if (!Number.isInteger(value)) return false; + // JSON.stringify emits integer-valued Numbers as bare integer tokens + // (no exponent) only while `|value| < 1e21`; at or above that + // threshold `Number.prototype.toString` switches to exponential form, + // which CH accepts as Float64 at any magnitude. So the dangerous band + // is strictly between the Int64/UInt64 boundary and 1e21. + if (Math.abs(value) >= 1e21) return false; + return value > 18446744073709551615 || value < -9223372036854775808; +} + export type SanitizeResult = { /** How many rows had at least one string field replaced. */ rowsTouched: number; @@ -62,6 +94,10 @@ export function sanitizeUnknownInPlace(value: unknown): { value: unknown; fixed: return { value, fixed: 0 }; } + if (typeof value === "number" && isUnsafeJsonInteger(value)) { + return { value: String(value), fixed: 1 }; + } + if (Array.isArray(value)) { let fixed = 0; for (let i = 0; i < value.length; i++) { diff --git a/apps/webapp/test/sanitizeRowsOnParseError.test.ts b/apps/webapp/test/sanitizeRowsOnParseError.test.ts index fafa6ca4790..fd402201ffa 100644 --- a/apps/webapp/test/sanitizeRowsOnParseError.test.ts +++ b/apps/webapp/test/sanitizeRowsOnParseError.test.ts @@ -105,6 +105,93 @@ describe("sanitizeUnknownInPlace", () => { expect(sanitizeUnknownInPlace(null)).toEqual({ value: null, fixed: 0 }); expect(sanitizeUnknownInPlace(undefined)).toEqual({ value: undefined, fixed: 0 }); }); + + // ─── Out-of-range integers (TRI-9755) ────────────────────────────────────── + // ClickHouse's JSON(max_dynamic_paths) column rejects bare integer tokens + // outside [Int64.MIN, UInt64.MAX]. Such Numbers serialise as bare integer + // form via JSON.stringify (no exponent, since |value| < 1e21) so they reach + // ClickHouse as unquoted oversized ints. Sanitizer replaces them with the + // string form, which ClickHouse's dynamic JSON column accepts as a String + // subtype on that path. + + it("replaces an integer-valued Number above UInt64.MAX with its string form", () => { + // 117039831458782870000 is the actual prod value (Google Plus ID after + // upstream JS-Number precision loss from 117039831458782873093). + const result = sanitizeUnknownInPlace(117039831458782870000); + expect(result.value).toBe("117039831458782870000"); + expect(result.fixed).toBe(1); + }); + + it("replaces an integer-valued Number below Int64.MIN with its string form", () => { + // -9223372036854775809 is the first failing negative; in float64 it + // rounds to the same representation as Int64.MIN (-9223372036854775808), + // but for completeness we check a clearly-out-of-range negative. + const result = sanitizeUnknownInPlace(-1e20); + expect(result.value).toBe("-100000000000000000000"); + expect(result.fixed).toBe(1); + }); + + it("leaves safe integers and boundary values untouched", () => { + // 42 — safe integer + expect(sanitizeUnknownInPlace(42)).toEqual({ value: 42, fixed: 0 }); + // Number.MAX_SAFE_INTEGER (2^53 - 1) — JSON.stringify still emits as integer + expect(sanitizeUnknownInPlace(Number.MAX_SAFE_INTEGER)).toEqual({ + value: Number.MAX_SAFE_INTEGER, + fixed: 0, + }); + // 2^63 (Int64.MAX + 1) — still fits in UInt64, CH accepts it + expect(sanitizeUnknownInPlace(2 ** 63)).toEqual({ value: 2 ** 63, fixed: 0 }); + }); + + it("leaves non-integer numbers untouched (floats, NaN, Infinity)", () => { + // Numbers with a fractional part — emitted with `.` in JSON + expect(sanitizeUnknownInPlace(3.14)).toEqual({ value: 3.14, fixed: 0 }); + // Very large float-form (>= 1e21) — JSON.stringify uses exponent form, + // CH parses as Float64 successfully + expect(sanitizeUnknownInPlace(1e25)).toEqual({ value: 1e25, fixed: 0 }); + // NaN / Infinity — JSON.stringify emits `null`, so harmless on the wire + expect(sanitizeUnknownInPlace(Number.NaN)).toEqual({ value: Number.NaN, fixed: 0 }); + expect(sanitizeUnknownInPlace(Number.POSITIVE_INFINITY)).toEqual({ + value: Number.POSITIVE_INFINITY, + fixed: 0, + }); + }); + + it("finds an oversized integer nested deep inside the actual scan-social-profiles shape", () => { + const row = { + output: { + data: { + profiles: [ + { module: "linktree", query: "x@example.com" }, + { + module: "poshmark", + spec_format: [ + { + platform_variables: [ + { + key: "gp_id", + proper_key: "Gp Id", + // The actual prod value — bare JSON integer > UInt64.MAX + value: 117039831458782870000, + type: "int", + }, + ], + }, + ], + }, + ], + }, + }, + }; + const result = sanitizeUnknownInPlace(row); + expect(result.fixed).toBe(1); + expect( + (row.output.data.profiles[1].spec_format![0].platform_variables[0] as any).value + ).toBe("117039831458782870000"); + // Untouched neighbours + expect(row.output.data.profiles[0].module).toBe("linktree"); + expect(row.output.data.profiles[1].spec_format![0].platform_variables[0].type).toBe("int"); + }); }); describe("sanitizeRows", () => { @@ -158,4 +245,36 @@ describe("sanitizeRows", () => { expect(result.rowsTouched).toBe(1); expect(result.fieldsSanitized).toBe(2); }); + + it("counts surrogate fixes and out-of-range integer fixes together (TRI-9755)", () => { + const rows = [ + { + id: "r0", + attributes: { + surrogate: `bad ${HIGH_SURROGATE}`, + bigint: 117039831458782870000, + clean: "fine", + safe: 42, + }, + }, + { + id: "r1", + attributes: { + bigint: -1e20, + clean: "still fine", + }, + }, + { + id: "r2", + attributes: { clean: "no fixes needed" }, + }, + ]; + const result = sanitizeRows(rows); + expect(result.rowsTouched).toBe(2); + expect(result.fieldsSanitized).toBe(3); + expect(rows[0].attributes.surrogate).toBe(INVALID_UTF16_SENTINEL); + expect(rows[0].attributes.bigint).toBe("117039831458782870000"); + expect(rows[0].attributes.safe).toBe(42); + expect(rows[1].attributes.bigint).toBe("-100000000000000000000"); + }); }); From b2cc5623503103ecafb171f53211ce2ee462caab Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Tue, 26 May 2026 23:51:58 -0700 Subject: [PATCH 2/2] fix(webapp): compare bigint sanitizer boundaries via BigInt for exactness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review (CodeRabbit + Devin convergent finding): the literal `18446744073709551615` in JS source rounds to 2^64 in float64 (the spacing around 2^64 is 2048), so `value > 18446744073709551615` was effectively `value > 2**64` and missed the case where a JS Number's float64 value is *exactly* 2^64. `JSON.stringify(2**64)` emits "18446744073709552000" — a bare integer above UInt64.MAX that ClickHouse rejects — so the sanitizer would have let that row through unchanged and the batch would still drop. Switch to BigInt comparison against named `UINT64_MAX` / `INT64_MIN` constants. `BigInt(value)` is safe because we already gate on `Number.isInteger(value)`. The negative side is unaffected (Int64.MIN = -2^63 is exactly representable in float64) but the BigInt form keeps both sides symmetric and self-documenting. Regression test added: `sanitizeUnknownInPlace(2 ** 64)` must produce "18446744073709552000" with fixed=1. A naïve `>` literal comparison would not catch this. 23/23 sanitizer tests green; webapp typecheck clean. --- .../sanitizeRowsOnParseError.server.ts | 12 +++++++++++- apps/webapp/test/sanitizeRowsOnParseError.test.ts | 11 +++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts b/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts index 4a03a763941..0374442200a 100644 --- a/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts +++ b/apps/webapp/app/v3/eventRepository/sanitizeRowsOnParseError.server.ts @@ -27,6 +27,9 @@ export const INVALID_UTF16_SENTINEL = "[invalid-utf16]"; * with an exponent and are accepted by CH at any magnitude, so they're * left alone. */ +const UINT64_MAX = 18446744073709551615n; +const INT64_MIN = -9223372036854775808n; + function isUnsafeJsonInteger(value: number): boolean { if (!Number.isFinite(value)) return false; if (!Number.isInteger(value)) return false; @@ -36,7 +39,14 @@ function isUnsafeJsonInteger(value: number): boolean { // which CH accepts as Float64 at any magnitude. So the dangerous band // is strictly between the Int64/UInt64 boundary and 1e21. if (Math.abs(value) >= 1e21) return false; - return value > 18446744073709551615 || value < -9223372036854775808; + // Compare via BigInt for exactness. The Number literal 18446744073709551615 + // is rounded to 2**64 in float64 (the float spacing near 2^64 is 2048), so a + // direct `value > 18446744073709551615` would miss a Number whose float64 + // value is exactly 2**64 — `JSON.stringify` of that emits + // "18446744073709552000", which exceeds UInt64.MAX and ClickHouse rejects. + // `BigInt(value)` is safe here because we already gated on Number.isInteger. + const asBigInt = BigInt(value); + return asBigInt > UINT64_MAX || asBigInt < INT64_MIN; } export type SanitizeResult = { diff --git a/apps/webapp/test/sanitizeRowsOnParseError.test.ts b/apps/webapp/test/sanitizeRowsOnParseError.test.ts index fd402201ffa..6e0de52aa66 100644 --- a/apps/webapp/test/sanitizeRowsOnParseError.test.ts +++ b/apps/webapp/test/sanitizeRowsOnParseError.test.ts @@ -122,6 +122,17 @@ describe("sanitizeUnknownInPlace", () => { expect(result.fixed).toBe(1); }); + it("catches the float64 boundary at exactly 2**64 (UInt64.MAX + 1)", () => { + // float64 cannot represent UInt64.MAX (2^64 - 1) exactly — the literal + // 18446744073709551615 in JS source rounds to 2^64. JSON.stringify + // emits this Number as "18446744073709552000", which exceeds UInt64.MAX + // and trips ClickHouse. Regression for the BigInt-based comparison; + // a naïve `value > 18446744073709551615` would let this pass. + const result = sanitizeUnknownInPlace(2 ** 64); + expect(result.value).toBe("18446744073709552000"); + expect(result.fixed).toBe(1); + }); + it("replaces an integer-valued Number below Int64.MIN with its string form", () => { // -9223372036854775809 is the first failing negative; in float64 it // rounds to the same representation as Int64.MIN (-9223372036854775808),