diff --git a/apps/api/scripts/import-laddr/importer.ts b/apps/api/scripts/import-laddr/importer.ts index 7fcbedc..57a30af 100644 --- a/apps/api/scripts/import-laddr/importer.ts +++ b/apps/api/scripts/import-laddr/importer.ts @@ -388,10 +388,13 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise( '/blog', RawBlogPostSchema, - {}, + { include: '*' }, fetchOpts, )) { const bp = translateBlogPost(row, ctx); diff --git a/apps/api/scripts/import-laddr/json-fetcher.ts b/apps/api/scripts/import-laddr/json-fetcher.ts index dcbb16b..5515835 100644 --- a/apps/api/scripts/import-laddr/json-fetcher.ts +++ b/apps/api/scripts/import-laddr/json-fetcher.ts @@ -150,13 +150,37 @@ export const RawProjectBuzzSchema = z .passthrough(); export type RawProjectBuzz = z.infer; +/** + * One item in a blog post's body. Laddr's `Emergence\CMS\AbstractContent` + * stores body as an ordered list of typed items rather than a single + * markdown string. Three item classes appear in production: Markdown + * (raw markdown), Media (image reference), Embed (raw HTML — iframes etc.). + * + * Surfaced only when the request asks `?include=*`. + */ +export const RawBlogPostItemSchema = z + .object({ + ID: z.number().int().positive(), + Class: z.string(), + Order: z.number().int().optional(), + // Markdown items: Data is a string. Media items: Data is an object + // ({ MediaID, Caption }). Embed items: Data is a string (raw HTML). + Data: z.unknown().optional(), + }) + .passthrough(); +export type RawBlogPostItem = z.infer; + /** * Blog post — laddr's `BlogPost` class. The field set is best-effort * against laddr's `BlogRequestHandler` template output; unknown fields * pass through. * - * ID, Class, Handle (slug), Title, Body, Summary, + * ID, Class, Handle (slug), Title, Summary, * AuthorID, Published (epoch), Modified (epoch), Created (epoch) + * + * Body is *not* a top-level field in laddr's JSON. The body content + * lives in `items` (only surfaced when the request uses `?include=*`) + * as an ordered list of typed content blocks. */ export const RawBlogPostSchema = z .object({ @@ -164,12 +188,13 @@ export const RawBlogPostSchema = z Class: z.string(), Handle: z.string().nullable().optional(), Title: z.string().nullable().optional(), - Body: z.string().nullable().optional(), Summary: z.string().nullable().optional(), AuthorID: z.number().int().nullable().optional(), Published: z.number().int().nullable().optional(), Created: z.number().int().nullable().optional(), Modified: z.number().int().nullable().optional(), + /** Present when the request asks `?include=*`. */ + items: z.array(RawBlogPostItemSchema).optional(), }) .passthrough(); export type RawBlogPost = z.infer; diff --git a/apps/api/scripts/import-laddr/translators.ts b/apps/api/scripts/import-laddr/translators.ts index 8f20af3..f1c3bee 100644 --- a/apps/api/scripts/import-laddr/translators.ts +++ b/apps/api/scripts/import-laddr/translators.ts @@ -38,6 +38,7 @@ import type { import type { RawBlogPost, + RawBlogPostItem, RawMembership, RawPerson, RawProject, @@ -652,15 +653,80 @@ export function translateBuzz( }; } +/** + * Source host used for legacy media URLs in blog bodies. Items of class + * `Emergence\CMS\Item\Media` reference a numeric `MediaID` resolved + * against laddr's `/thumbnail//` endpoint; we render + * those as `![Caption](https:///thumbnail//1920x1920)` so the + * markdown body stays viewable on its own. Eventually those images + * should migrate into the data repo as attachments, but that's a + * separate concern from this importer pass. + */ +const LADDR_MEDIA_HOST = 'codeforphilly.org'; +const LADDR_MEDIA_DIMENSIONS = '1920x1920'; + +/** + * Assemble a blog post's markdown body from laddr's typed `items` array. + * Items are sorted by `Order` (defensive — laddr's JSON tends to come + * pre-sorted, but the contract isn't documented). + * + * Three item classes appear in production: + * - `Emergence\CMS\Item\Markdown` — `Data` is the raw markdown string; + * append verbatim. + * - `Emergence\CMS\Item\Media` — `Data` is `{ MediaID, Caption }`; + * render as a markdown image with the laddr media URL. + * - `Emergence\CMS\Item\Embed` — `Data` is raw HTML (iframes, divs); + * append as a raw HTML block (legal in CommonMark). + */ +function assembleBlogBody( + items: readonly RawBlogPostItem[] | undefined, + warnings: Warnings, + legacyId: number, +): string { + if (!items || items.length === 0) return ''; + const sorted = [...items].sort((a, b) => (a.Order ?? 0) - (b.Order ?? 0)); + const blocks: string[] = []; + for (const item of sorted) { + if (item.Class.endsWith('Item\\Markdown')) { + if (typeof item.Data === 'string') { + blocks.push(item.Data); + } + } else if (item.Class.endsWith('Item\\Media')) { + const data = item.Data; + if (data && typeof data === 'object' && 'MediaID' in data) { + const mediaId = (data as { MediaID?: unknown }).MediaID; + const caption = (data as { Caption?: unknown }).Caption; + const captionText = + typeof caption === 'string' && caption.trim().length > 0 ? caption.trim() : ''; + if (typeof mediaId === 'number') { + const url = `https://${LADDR_MEDIA_HOST}/thumbnail/${mediaId}/${LADDR_MEDIA_DIMENSIONS}`; + blocks.push(`![${captionText}](${url})`); + } + } + } else if (item.Class.endsWith('Item\\Embed')) { + if (typeof item.Data === 'string' && item.Data.trim().length > 0) { + blocks.push(item.Data); + } + } else { + warnings.push( + `[blog-posts] legacyId=${legacyId} item=${item.ID} unknown Class ${JSON.stringify(item.Class)}; skipped`, + ); + } + } + // Markdown blocks separate cleanly with a blank line. markdownlint + // (run on gitsheets serialize) will normalize any drift. + return blocks.join('\n\n'); +} + /** * Translate a laddr `BlogPost` row into a v1 `BlogPost` record. * * Slug source priority: `Handle` (laddr's URL-safe identifier) → - * slugified `Title` → `legacy-`. Bodies are kept verbatim; the - * gitsheets markdown format will normalize them via markdownlint on - * serialize. `AuthorID` resolves via the people-by-legacy map; an - * unresolved author is recorded as a warning but doesn't block the - * post (the runtime treats `authorId === null` as anonymous). + * slugified `Title` → `legacy-`. Bodies are assembled from the + * row's `items` array (see assembleBlogBody). `AuthorID` resolves via + * the people-by-legacy map; an unresolved author is recorded as a + * warning but doesn't block the post (the runtime treats + * `authorId === null` as anonymous). */ export function translateBlogPost( row: RawBlogPost, @@ -702,7 +768,7 @@ export function translateBlogPost( ? epochToIsoOr(row.Modified, createdAt) : undefined; - const body = nonEmptyStr(row.Body) ?? ''; + const body = assembleBlogBody(row.items, ctx.warnings, legacyId); const summary = nonEmptyStr(row.Summary); // The schema caps summary at 500 chars; truncate longer laddr summaries // rather than failing validation on import. diff --git a/apps/api/tests/import-laddr.test.ts b/apps/api/tests/import-laddr.test.ts index 8943a68..f082e3e 100644 --- a/apps/api/tests/import-laddr.test.ts +++ b/apps/api/tests/import-laddr.test.ts @@ -389,12 +389,19 @@ describe('translateBlogPost', () => { Class: 'BlogPost', Handle: 'civic-tech-roundup-2026', Title: 'Civic Tech Roundup, May 2026', - Body: '# Heading\n\nA blog body.', Summary: 'A short blurb.', AuthorID: 12, Published: 1746028800, // 2025-04-30 Created: 1746028800, Modified: 1746028800, + items: [ + { + ID: 100, + Class: 'Emergence\\CMS\\Item\\Markdown', + Order: 1, + Data: '# Heading\n\nA blog body.', + }, + ], }; const bp = translateBlogPost(row, c); expect(bp).not.toBeNull(); @@ -409,13 +416,110 @@ describe('translateBlogPost', () => { expect(bp!.editedAt).toBeUndefined(); }); - it('falls back through Title → legacy- when Handle is missing', () => { + it('assembles a body from interleaved Markdown / Media / Embed items', () => { + const c = ctx(); + const row: RawBlogPost = { + ID: 7, + Class: 'BlogPost', + Handle: 'multi-item', + Title: 'Multi-item Post', + Published: 1746028800, + items: [ + { + ID: 200, + Class: 'Emergence\\CMS\\Item\\Media', + Order: 1, + Data: { MediaID: 3349, Caption: 'A photo' }, + }, + { + ID: 201, + Class: 'Emergence\\CMS\\Item\\Markdown', + Order: 2, + Data: 'Some intro markdown.', + }, + { + ID: 202, + Class: 'Emergence\\CMS\\Item\\Embed', + Order: 3, + Data: '', + }, + ], + }; + const bp = translateBlogPost(row, c); + expect(bp).not.toBeNull(); + expect(bp!.body).toBe( + [ + '![A photo](https://codeforphilly.org/thumbnail/3349/1920x1920)', + 'Some intro markdown.', + '', + ].join('\n\n'), + ); + }); + + it('sorts items by Order before assembling', () => { + const c = ctx(); + const row: RawBlogPost = { + ID: 8, + Class: 'BlogPost', + Handle: 'unordered', + Title: 'Unordered', + Published: 1746028800, + items: [ + { ID: 300, Class: 'Emergence\\CMS\\Item\\Markdown', Order: 2, Data: 'second' }, + { ID: 301, Class: 'Emergence\\CMS\\Item\\Markdown', Order: 1, Data: 'first' }, + ], + }; + const bp = translateBlogPost(row, c); + expect(bp!.body).toBe('first\n\nsecond'); + }); + + it('returns an empty body when items is absent', () => { const c = ctx(); const row: RawBlogPost = { ID: 9, Class: 'BlogPost', + Handle: 'bodiless', + Title: 'Bodiless', + Published: 1746028800, + }; + const bp = translateBlogPost(row, c); + expect(bp!.body).toBe(''); + }); + + it('warns on unknown Item class but keeps the post', () => { + const c = ctx(); + const row: RawBlogPost = { + ID: 10, + Class: 'BlogPost', + Handle: 'unknown-item', + Title: 'Unknown Item', + Published: 1746028800, + items: [ + { + ID: 400, + Class: 'Emergence\\CMS\\Item\\NewType', + Order: 1, + Data: 'whatever', + }, + { + ID: 401, + Class: 'Emergence\\CMS\\Item\\Markdown', + Order: 2, + Data: 'still here', + }, + ], + }; + const bp = translateBlogPost(row, c); + expect(bp!.body).toBe('still here'); + expect(c.warnings.items.some((w) => w.includes('item=400'))).toBe(true); + }); + + it('falls back through Title → legacy- when Handle is missing', () => { + const c = ctx(); + const row: RawBlogPost = { + ID: 11, + Class: 'BlogPost', Title: 'A Hello Post', - Body: 'body', Published: 1746028800, }; const bp = translateBlogPost(row, c); @@ -426,18 +530,17 @@ describe('translateBlogPost', () => { it('warns and posts anonymously when AuthorID does not resolve', () => { const c = ctx(); const row: RawBlogPost = { - ID: 11, + ID: 12, Class: 'BlogPost', Handle: 'orphan', Title: 'Orphan', - Body: 'orphan', AuthorID: 999, Published: 1746028800, }; const bp = translateBlogPost(row, c); expect(bp).not.toBeNull(); expect(bp!.authorId).toBeUndefined(); - expect(c.warnings.items.some((w) => w.includes('legacyId=11'))).toBe(true); + expect(c.warnings.items.some((w) => w.includes('legacyId=12'))).toBe(true); }); it('sets editedAt when Modified is >60s after Published', () => { @@ -447,7 +550,6 @@ describe('translateBlogPost', () => { Class: 'BlogPost', Handle: 'edited', Title: 'Edited', - Body: 'edited body', Published: 1746028800, Modified: 1746028800 + 3600, // +1 hour }; @@ -463,7 +565,6 @@ describe('translateBlogPost', () => { Class: 'BlogPost', Handle: 'long-summary', Title: 'Long Summary', - Body: 'body', Summary: overlong, Published: 1746028800, }; @@ -653,7 +754,11 @@ function mockRoutes(): MockRoutes { ], ], [ - '/blog?format=json&limit=200&offset=0', + // Importer fetches /blog with `?include=*` so it can read the + // structured body items (laddr doesn't expose Body via the flat + // JSON fields). `*` is a sub-delim per RFC 3986 and stays + // unencoded through URLSearchParams. + '/blog?format=json&include=*&limit=200&offset=0', [ envelope( [ @@ -662,12 +767,19 @@ function mockRoutes(): MockRoutes { Class: 'BlogPost', Handle: 'hello-philly', Title: 'Hello Philly', - Body: '# Hello\n\nFirst blog post.', Summary: 'A short hello.', AuthorID: 10, Published: 1377126953, Created: 1377126953, Modified: 1377126953, + items: [ + { + ID: 1000, + Class: 'Emergence\\CMS\\Item\\Markdown', + Order: 1, + Data: '# Hello\n\nFirst blog post.', + }, + ], }, ], 1,