From f74b1f4c92511df8f05c3733b3b974b55c71d01b Mon Sep 17 00:00:00 2001 From: William Zujkowski Date: Tue, 23 Jun 2026 00:48:46 -0400 Subject: [PATCH] fix(types): constrain rendered/fetched URLs to http(s) scheme z.url() accepts dangerous schemes (javascript:, data:, ftp:). uslmUrl is used to download content and sourceUrl is rendered directly as an href in the precedent UI (PrecedentDrawer.svelte, statute page), so a malformed/compromised value could yield a clickable javascript: link (XSS on click) or an off-protocol download. Add a shared HttpUrlSchema (z.url().refine(http(s))) and apply it to ReleasePointSchema.uslmUrl and CaseAnnotationSchema.sourceUrl. Tighten the web content collection's sourceUrl (was z.string()) to the same http(s)-or-empty constraint so bad data fails the content build loudly. Low real-world likelihood today (URLs are constructed from uscode.house.gov / courtlistener.com), but the scheme was previously unenforced at the schema/render boundary. New tests assert javascript: and ftp: are rejected for both fields. Closes #210 Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/web/src/content.config.ts | 9 ++++++++- packages/types/src/__tests__/schemas.test.ts | 9 +++++++++ packages/types/src/index.ts | 16 ++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/apps/web/src/content.config.ts b/apps/web/src/content.config.ts index 7f05c70..08be8f4 100644 --- a/apps/web/src/content.config.ts +++ b/apps/web/src/content.config.ts @@ -27,7 +27,14 @@ const annotations = defineCollection({ court: z.string(), date: z.string(), holdingSummary: z.string().optional(), - sourceUrl: z.string().optional(), + // Constrain to http(s) (or empty) so a malformed annotation can't inject + // a `javascript:`/`data:` URL that the precedent UI renders as an href (#210). + sourceUrl: z + .string() + .refine((u) => u === '' || /^https?:\/\//i.test(u), { + message: 'sourceUrl must use the http(s) scheme', + }) + .optional(), impact: z.string().optional(), })).default([]), }), diff --git a/packages/types/src/__tests__/schemas.test.ts b/packages/types/src/__tests__/schemas.test.ts index 168a877..4f44ecd 100644 --- a/packages/types/src/__tests__/schemas.test.ts +++ b/packages/types/src/__tests__/schemas.test.ts @@ -43,6 +43,11 @@ describe('ReleasePointSchema', () => { expect(() => ReleasePointSchema.parse({ ...valid, uslmUrl: 'not-a-url' })).toThrow(); }); + it('rejects non-http(s) uslmUrl schemes', () => { + expect(() => ReleasePointSchema.parse({ ...valid, uslmUrl: 'javascript:alert(1)' })).toThrow(); + expect(() => ReleasePointSchema.parse({ ...valid, uslmUrl: 'ftp://uscode.house.gov/x.zip' })).toThrow(); + }); + it('rejects wrong-length sha256Hash', () => { expect(() => ReleasePointSchema.parse({ ...valid, sha256Hash: 'abc' })).toThrow(); }); @@ -92,6 +97,10 @@ describe('CaseAnnotationSchema', () => { expect(() => CaseAnnotationSchema.parse({ ...valid, court: 'State' })).toThrow(); }); + it('rejects a javascript: sourceUrl (would render as an href)', () => { + expect(() => CaseAnnotationSchema.parse({ ...valid, sourceUrl: 'javascript:alert(document.cookie)' })).toThrow(); + }); + it('rejects holdingSummary over 500 chars', () => { expect(() => CaseAnnotationSchema.parse({ ...valid, holdingSummary: 'x'.repeat(501) })).toThrow(); }); diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index a53904a..4fe77bb 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -14,6 +14,18 @@ export function err(error: E): Result { return { ok: false, error }; } +// --- Shared field schemas --- + +/** + * A URL constrained to the http(s) scheme. Plain `z.url()` accepts dangerous + * schemes such as `javascript:` and `data:`; several of these values are later + * rendered as `href`s or used to download content, so we reject anything that + * is not http/https at the validation boundary (defense-in-depth — see #210). + */ +export const HttpUrlSchema = z + .url() + .refine((u) => /^https?:\/\//i.test(u), { message: 'URL must use the http(s) scheme' }); + // --- Release Point schema --- export const ReleasePointSchema = z.object({ @@ -24,7 +36,7 @@ export const ReleasePointSchema = z.object({ /** Release date in ISO 8601 datetime format (ET timezone) */ dateET: z.string().datetime(), /** URL to the USLM XML download for this release */ - uslmUrl: z.url(), + uslmUrl: HttpUrlSchema, /** SHA-256 hex digest for integrity verification (64 hex characters) */ sha256Hash: z.string().length(64), }); @@ -49,7 +61,7 @@ export const CaseAnnotationSchema = z.object({ court: z.enum(["SCOTUS", "Appellate", "District"]), date: z.string(), holdingSummary: z.string().max(500), - sourceUrl: z.url(), + sourceUrl: HttpUrlSchema, impact: PrecedentImpactSchema, /** Public Law the statute was current through when this case was decided */ statuteVersionRef: z.string().optional(),