diff --git a/.github/workflows/sync-law.yml b/.github/workflows/sync-law.yml
index 6305708..42e1864 100644
--- a/.github/workflows/sync-law.yml
+++ b/.github/workflows/sync-law.yml
@@ -88,7 +88,7 @@ jobs:
run: |
mkdir -p workspace/statutes
node --input-type=module <<'SCRIPT'
- import { OlrcFetcher, HashStore } from '@civic-source/fetcher';
+ import { OlrcFetcher, HashStore, extractXmlFromZip } from '@civic-source/fetcher';
import { XmlToMarkdownAdapter } from '@civic-source/transformer';
import { writeFile, mkdir } from 'node:fs/promises';
import { join, dirname } from 'node:path';
@@ -114,8 +114,15 @@ jobs:
continue;
}
+ const xml = extractXmlFromZip(Buffer.from(xmlResult.value, 'base64'));
+ if (xml === null) {
+ console.error(`Title ${rp.title}: no XML in archive`);
+ failedTitles++;
+ continue;
+ }
+
const transformer = new XmlToMarkdownAdapter(rp.publicLaw);
- const transformResult = transformer.transformToFiles(xmlResult.value);
+ const transformResult = transformer.transformToFiles(xml);
if (!transformResult.ok) {
console.error(`Title ${rp.title}: transform failed — ${transformResult.error.message}`);
failedTitles++;
diff --git a/packages/fetcher/src/__tests__/zip.test.ts b/packages/fetcher/src/__tests__/zip.test.ts
new file mode 100644
index 0000000..0594756
--- /dev/null
+++ b/packages/fetcher/src/__tests__/zip.test.ts
@@ -0,0 +1,40 @@
+import { deflateRawSync } from 'node:zlib';
+import { describe, expect, it } from 'vitest';
+
+import { extractXmlFromZip } from '../zip.js';
+
+/**
+ * Construct a minimal single-entry ZIP buffer (local file header + data).
+ * Byte offsets match the parser in zip.ts: sig@0, method@8, compressedSize@18,
+ * fileNameLen@26, extraLen@28, filename@30, data@30+fileNameLen.
+ */
+function makeZip(fileName: string, compressionMethod: number, data: Buffer): Buffer {
+ const fileNameBytes = Buffer.from(fileName, 'utf-8');
+ const header = Buffer.alloc(30);
+ header.writeUInt32LE(0x04034b50, 0); // local file header signature
+ header.writeUInt16LE(compressionMethod, 8);
+ header.writeUInt32LE(data.length, 18); // compressed size
+ header.writeUInt32LE(data.length, 22); // uncompressed size (unused by parser)
+ header.writeUInt16LE(fileNameBytes.length, 26);
+ header.writeUInt16LE(0, 28); // extra field length
+ return Buffer.concat([header, fileNameBytes, data]);
+}
+
+describe('extractXmlFromZip', () => {
+ it('returns the contents of a stored (method 0) .xml entry', () => {
+ const xml = 'hello';
+ const zip = makeZip('doc.xml', 0, Buffer.from(xml, 'utf-8'));
+ expect(extractXmlFromZip(zip)).toBe(xml);
+ });
+
+ it('inflates a deflate (method 8) .xml entry', () => {
+ const xml = `${'a'.repeat(500)}some longer content for round-tripping`;
+ const zip = makeZip('doc.xml', 8, deflateRawSync(Buffer.from(xml, 'utf-8')));
+ expect(extractXmlFromZip(zip)).toBe(xml);
+ });
+
+ it('returns null when no .xml entry is present', () => {
+ const zip = makeZip('readme.txt', 0, Buffer.from('not xml', 'utf-8'));
+ expect(extractXmlFromZip(zip)).toBeNull();
+ });
+});
diff --git a/packages/fetcher/src/index.ts b/packages/fetcher/src/index.ts
index f501758..0a50895 100644
--- a/packages/fetcher/src/index.ts
+++ b/packages/fetcher/src/index.ts
@@ -20,6 +20,7 @@ export {
parseCurrentRelease,
type CurrentReleaseInfo,
} from './fetcher.js';
+export { extractXmlFromZip } from './zip.js';
export { HashStore } from './hash-store.js';
export { FetcherMetrics, type FetcherMetricsSnapshot, type DownloadErrorType } from './metrics.js';
export { createLogger, type Logger, type LogLevel } from '@civic-source/shared';
diff --git a/packages/fetcher/src/zip.ts b/packages/fetcher/src/zip.ts
new file mode 100644
index 0000000..b6fe933
--- /dev/null
+++ b/packages/fetcher/src/zip.ts
@@ -0,0 +1,42 @@
+import { inflateRawSync } from 'node:zlib';
+
+/**
+ * Extract the first `.xml` entry from a ZIP archive buffer.
+ *
+ * Walks ZIP local file headers (signature 0x04034b50), reading the
+ * compression method, compressed size, and filename for each entry. For the
+ * first entry whose filename ends with `.xml` it returns the decoded UTF-8
+ * contents: stored entries (method 0) are read directly, deflated entries
+ * (method 8) are inflated synchronously via `inflateRawSync`.
+ *
+ * The caller is responsible for decoding any base64 before passing the buffer.
+ *
+ * @param zip Raw ZIP archive bytes.
+ * @returns The XML string, or `null` if the archive contains no `.xml` entry.
+ */
+export function extractXmlFromZip(zip: Buffer): string | null {
+ let offset = 0;
+ while (offset < zip.length - 30) {
+ const sig = zip.readUInt32LE(offset);
+ if (sig !== 0x04034b50) break;
+
+ const compressionMethod = zip.readUInt16LE(offset + 8);
+ const compressedSize = zip.readUInt32LE(offset + 18);
+ const fileNameLen = zip.readUInt16LE(offset + 26);
+ const extraLen = zip.readUInt16LE(offset + 28);
+ const fileName = zip.toString('utf-8', offset + 30, offset + 30 + fileNameLen);
+
+ const dataStart = offset + 30 + fileNameLen + extraLen;
+
+ if (fileName.endsWith('.xml')) {
+ if (compressionMethod === 0) {
+ return zip.toString('utf-8', dataStart, dataStart + compressedSize);
+ } else if (compressionMethod === 8) {
+ return inflateRawSync(zip.subarray(dataStart, dataStart + compressedSize)).toString('utf-8');
+ }
+ }
+
+ offset = dataStart + compressedSize;
+ }
+ return null;
+}