civic-source · williamzujkowski · Jun 23, 2026 · Jun 23, 2026
@@ -100,4 +100,49 @@ describe('deduplicateCases', () => {
     const result = deduplicateCases(cases);
     expect(result).toHaveLength(1);
   });
+
+  it('retains distinct cases that both have empty citations', () => {
+    const cases = [
+      makeCase({
+        caseName: 'Alpha v. United States',
+        citation: '',
+        date: '2024-01-15',
+        sourceUrl: 'https://www.courtlistener.com/opinion/111/',
+      }),
+      makeCase({
+        caseName: 'Beta v. United States',
+        citation: '',
+        date: '2023-06-30',
+        sourceUrl: 'https://www.courtlistener.com/opinion/222/',
+      }),
+    ];
+    const result = deduplicateCases(cases);
+    expect(result).toHaveLength(2);
+    expect(result.map((c) => c.caseName)).toEqual([
+      'Alpha v. United States',
+      'Beta v. United States',
+    ]);
+  });
+
+  it('still collapses truly identical uncited cases', () => {
+    const cases = [
+      makeCase({ caseName: 'Same', citation: '', date: '2024-01-15', sourceUrl: 'https://www.courtlistener.com/opinion/999/' }),
+      makeCase({ caseName: 'Same', citation: '', date: '2024-01-15', sourceUrl: 'https://www.courtlistener.com/opinion/999/' }),
+    ];
+    const result = deduplicateCases(cases);
+    expect(result).toHaveLength(1);
+  });
+
+  it('collapses cases with the same non-empty citation while retaining distinct uncited ones', () => {
+    const cases = [
+      makeCase({ caseName: 'CitedFirst', citation: '18 USC 111' }),
+      makeCase({ caseName: 'CitedSecond', citation: '18 U.S.C. 111' }),
+      makeCase({ caseName: 'UncitedA', citation: '', sourceUrl: 'https://www.courtlistener.com/opinion/aaa/' }),
+      makeCase({ caseName: 'UncitedB', citation: '', sourceUrl: 'https://www.courtlistener.com/opinion/bbb/' }),
+    ];
+    const result = deduplicateCases(cases);
+    // One collapsed cited case + two distinct uncited cases = 3
+    expect(result).toHaveLength(3);
+    expect(result.map((c) => c.caseName)).toEqual(['CitedFirst', 'UncitedA', 'UncitedB']);
+  });
 });
@@ -13,11 +13,26 @@ export function normalizeCitation(citation: string): string {
     .toLowerCase();
 }
 
+/**
+ * Build a deduplication key for a case.
+ *
+ * Cases with a non-empty citation are keyed on the normalized citation so that
+ * genuine duplicate citations collapse. Cases with an empty citation (common
+ * for CourtListener results that lack a structured citation) fall back to a
+ * composite key of caseName + date + sourceUrl so that distinct uncited cases
+ * are NOT mistakenly collapsed into a single entry.
+ */
+function dedupeKey(c: CaseAnnotation): string {
+  const normalized = normalizeCitation(c.citation);
+  if (normalized !== '') return `cite:${normalized}`;
+  return `composite:${c.caseName}|${c.date}|${c.sourceUrl}`;
+}
+
 /** Deduplicate cases by normalized citation, preserving first occurrence */
 export function deduplicateCases(cases: CaseAnnotation[]): CaseAnnotation[] {
   const seen = new Set<string>();
   return cases.filter((c) => {
-    const key = normalizeCitation(c.citation);
+    const key = dedupeKey(c);
     if (seen.has(key)) return false;
     seen.add(key);
     return true;

@@ -381,6 +381,41 @@ describe('XmlToMarkdownAdapter', () => {
     expect(s111?.path).toBe('statutes/title-10/chapter-2/section-111.md');
   });
 
+  it('extracts appendix title number "18a" from identifier (no collision with title 18)', () => {
+    const xml = `
+<lawDoc>
+  <title identifier="/us/usc/t18a">
+    <num>Title 18 Appendix</num>
+    <chapter identifier="/us/usc/t18a/ch1">
+      <num>Chapter 1</num>
+      <section identifier="/us/usc/t18a/s1">
+        <num>1</num>
+        <heading>Appendix Rule</heading>
+        <content>Some appendix content.</content>
+      </section>
+    </chapter>
+  </title>
+</lawDoc>`;
+    const parsed = parseUslmXml(xml);
+    expect(parsed.ok).toBe(true);
+    if (!parsed.ok) return;
+    expect(parsed.value.titleNumber).toBe('18a');
+
+    const adapter = new XmlToMarkdownAdapter('PL 119-1');
+    const result = adapter.transformToFiles(xml);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+
+    const section = result.value.find((f) => f.path.includes('section-1'));
+    expect(section).toBeDefined();
+    // Appendix sections live under title-18a, distinct from main title-18
+    expect(section?.path).toBe('statutes/title-18a/chapter-1/section-1.md');
+    expect(section?.path).not.toContain('title-18/');
+    // usc_title frontmatter remains numeric (18) despite the "18a" path
+    expect(section?.content).toContain('usc_title: 18');
+    expect(section?.content).not.toContain('usc_title: 18a');
+  });
+
   it('preserves inline element text in mixed content (cross-references)', () => {
     const xml = `
 <lawDoc>

@@ -74,15 +74,19 @@ function findTitleNumber(root: unknown[]): string | undefined {
   const titleAttrs = firstTitle.attrs;
   const identifier = titleAttrs['@_identifier'];
   if (identifier) {
-    const match = /\/t(\d+)$/.exec(identifier);
+    // Capture an optional appendix suffix letter so e.g. "/us/usc/t18a"
+    // yields "18a" and does NOT collide with main Title 18 ("18").
+    const match = /\/t(\d+[a-zA-Z]?)$/.exec(identifier);
     if (match?.[1]) return match[1];
   }
 
-  // Fallback: look for num element
+  // Fallback: look for num element (e.g. "Title 18 Appendix")
   const firstNum = findElements(firstTitle.children, USLM_ELEMENTS.num)[0];
   if (firstNum) {
     const text = extractTextFromNodes(firstNum.children);
-    const numMatch = /\d+/.exec(text);
+    // Match a number with an optional appendix letter (e.g. "18a"), then fall
+    // back to a bare number.
+    const numMatch = /\d+[a-zA-Z]?/.exec(text) ?? /\d+/.exec(text);
     return numMatch?.[0];
   }