CodeForPhilly · themightychris · May 30, 2026 · May 30, 2026
diff --git a/apps/api/scripts/import-laddr/importer.ts b/apps/api/scripts/import-laddr/importer.ts
@@ -388,10 +388,13 @@ export async function importLaddrFromJson(opts: ImportOptions): Promise<ImportRe
 
   log(`[import] fetching blog from ${opts.sourceHost}`);
   const blogPosts: BlogPost[] = [];
+  // `?include=*` is the only way to get the body content — laddr stores
+  // it as a typed `items` array on `AbstractContent`, not as a flat Body
+  // field. translateBlogPost assembles markdown from those items.
   for await (const row of fetchAllPages<RawBlogPost>(
     '/blog',
     RawBlogPostSchema,
-    {},
+    { include: '*' },
     fetchOpts,
   )) {
     const bp = translateBlogPost(row, ctx);

diff --git a/apps/api/scripts/import-laddr/json-fetcher.ts b/apps/api/scripts/import-laddr/json-fetcher.ts
@@ -150,26 +150,51 @@ export const RawProjectBuzzSchema = z
   .passthrough();
 export type RawProjectBuzz = z.infer<typeof RawProjectBuzzSchema>;
 
+/**
+ * One item in a blog post's body. Laddr's `Emergence\CMS\AbstractContent`
+ * stores body as an ordered list of typed items rather than a single
+ * markdown string. Three item classes appear in production: Markdown
+ * (raw markdown), Media (image reference), Embed (raw HTML — iframes etc.).
+ *
+ * Surfaced only when the request asks `?include=*`.
+ */
+export const RawBlogPostItemSchema = z
+  .object({
+    ID: z.number().int().positive(),
+    Class: z.string(),
+    Order: z.number().int().optional(),
+    // Markdown items: Data is a string. Media items: Data is an object
+    // ({ MediaID, Caption }). Embed items: Data is a string (raw HTML).
+    Data: z.unknown().optional(),
+  })
+  .passthrough();
+export type RawBlogPostItem = z.infer<typeof RawBlogPostItemSchema>;
+
 /**
  * Blog post — laddr's `BlogPost` class. The field set is best-effort
  * against laddr's `BlogRequestHandler` template output; unknown fields
  * pass through.
  *
- *   ID, Class, Handle (slug), Title, Body, Summary,
+ *   ID, Class, Handle (slug), Title, Summary,
  *   AuthorID, Published (epoch), Modified (epoch), Created (epoch)
+ *
+ * Body is *not* a top-level field in laddr's JSON. The body content
+ * lives in `items` (only surfaced when the request uses `?include=*`)
+ * as an ordered list of typed content blocks.
  */
 export const RawBlogPostSchema = z
   .object({
     ID: z.number().int().positive(),
     Class: z.string(),
     Handle: z.string().nullable().optional(),
     Title: z.string().nullable().optional(),
-    Body: z.string().nullable().optional(),
     Summary: z.string().nullable().optional(),
     AuthorID: z.number().int().nullable().optional(),
     Published: z.number().int().nullable().optional(),
     Created: z.number().int().nullable().optional(),
     Modified: z.number().int().nullable().optional(),
+    /** Present when the request asks `?include=*`. */
+    items: z.array(RawBlogPostItemSchema).optional(),
   })
   .passthrough();
 export type RawBlogPost = z.infer<typeof RawBlogPostSchema>;

diff --git a/apps/api/scripts/import-laddr/translators.ts b/apps/api/scripts/import-laddr/translators.ts
@@ -38,6 +38,7 @@ import type {
 
 import type {
   RawBlogPost,
+  RawBlogPostItem,
   RawMembership,
   RawPerson,
   RawProject,
@@ -652,15 +653,80 @@ export function translateBuzz(
   };
 }
 
+/**
+ * Source host used for legacy media URLs in blog bodies. Items of class
+ * `Emergence\CMS\Item\Media` reference a numeric `MediaID` resolved
+ * against laddr's `/thumbnail/<id>/<dimensions>` endpoint; we render
+ * those as `![Caption](https://<host>/thumbnail/<id>/1920x1920)` so the
+ * markdown body stays viewable on its own. Eventually those images
+ * should migrate into the data repo as attachments, but that's a
+ * separate concern from this importer pass.
+ */
+const LADDR_MEDIA_HOST = 'codeforphilly.org';
+const LADDR_MEDIA_DIMENSIONS = '1920x1920';
+
+/**
+ * Assemble a blog post's markdown body from laddr's typed `items` array.
+ * Items are sorted by `Order` (defensive — laddr's JSON tends to come
+ * pre-sorted, but the contract isn't documented).
+ *
+ * Three item classes appear in production:
+ *   - `Emergence\CMS\Item\Markdown` — `Data` is the raw markdown string;
+ *     append verbatim.
+ *   - `Emergence\CMS\Item\Media` — `Data` is `{ MediaID, Caption }`;
+ *     render as a markdown image with the laddr media URL.
+ *   - `Emergence\CMS\Item\Embed` — `Data` is raw HTML (iframes, divs);
+ *     append as a raw HTML block (legal in CommonMark).
+ */
+function assembleBlogBody(
+  items: readonly RawBlogPostItem[] | undefined,
+  warnings: Warnings,
+  legacyId: number,
+): string {
+  if (!items || items.length === 0) return '';
+  const sorted = [...items].sort((a, b) => (a.Order ?? 0) - (b.Order ?? 0));
+  const blocks: string[] = [];
+  for (const item of sorted) {
+    if (item.Class.endsWith('Item\\Markdown')) {
+      if (typeof item.Data === 'string') {
+        blocks.push(item.Data);
+      }
+    } else if (item.Class.endsWith('Item\\Media')) {
+      const data = item.Data;
+      if (data && typeof data === 'object' && 'MediaID' in data) {
+        const mediaId = (data as { MediaID?: unknown }).MediaID;
+        const caption = (data as { Caption?: unknown }).Caption;
+        const captionText =
+          typeof caption === 'string' && caption.trim().length > 0 ? caption.trim() : '';
+        if (typeof mediaId === 'number') {
+          const url = `https://${LADDR_MEDIA_HOST}/thumbnail/${mediaId}/${LADDR_MEDIA_DIMENSIONS}`;
+          blocks.push(`![${captionText}](${url})`);
+        }
+      }
+    } else if (item.Class.endsWith('Item\\Embed')) {
+      if (typeof item.Data === 'string' && item.Data.trim().length > 0) {
+        blocks.push(item.Data);
+      }
+    } else {
+      warnings.push(
+        `[blog-posts] legacyId=${legacyId} item=${item.ID} unknown Class ${JSON.stringify(item.Class)}; skipped`,
+      );
+    }
+  }
+  // Markdown blocks separate cleanly with a blank line. markdownlint
+  // (run on gitsheets serialize) will normalize any drift.
+  return blocks.join('\n\n');
+}
+
 /**
  * Translate a laddr `BlogPost` row into a v1 `BlogPost` record.
  *
  * Slug source priority: `Handle` (laddr's URL-safe identifier) →
- * slugified `Title` → `legacy-<ID>`. Bodies are kept verbatim; the
- * gitsheets markdown format will normalize them via markdownlint on
- * serialize. `AuthorID` resolves via the people-by-legacy map; an
- * unresolved author is recorded as a warning but doesn't block the
- * post (the runtime treats `authorId === null` as anonymous).
+ * slugified `Title` → `legacy-<ID>`. Bodies are assembled from the
+ * row's `items` array (see assembleBlogBody). `AuthorID` resolves via
+ * the people-by-legacy map; an unresolved author is recorded as a
+ * warning but doesn't block the post (the runtime treats
+ * `authorId === null` as anonymous).
  */
 export function translateBlogPost(
   row: RawBlogPost,
@@ -702,7 +768,7 @@ export function translateBlogPost(
       ? epochToIsoOr(row.Modified, createdAt)
       : undefined;
 
-  const body = nonEmptyStr(row.Body) ?? '';
+  const body = assembleBlogBody(row.items, ctx.warnings, legacyId);
   const summary = nonEmptyStr(row.Summary);
   // The schema caps summary at 500 chars; truncate longer laddr summaries
   // rather than failing validation on import.

diff --git a/apps/api/tests/import-laddr.test.ts b/apps/api/tests/import-laddr.test.ts
@@ -389,12 +389,19 @@ describe('translateBlogPost', () => {
       Class: 'BlogPost',
       Handle: 'civic-tech-roundup-2026',
       Title: 'Civic Tech Roundup, May 2026',
-      Body: '# Heading\n\nA blog body.',
       Summary: 'A short blurb.',
       AuthorID: 12,
       Published: 1746028800, // 2025-04-30
       Created: 1746028800,
       Modified: 1746028800,
+      items: [
+        {
+          ID: 100,
+          Class: 'Emergence\\CMS\\Item\\Markdown',
+          Order: 1,
+          Data: '# Heading\n\nA blog body.',
+        },
+      ],
     };
     const bp = translateBlogPost(row, c);
     expect(bp).not.toBeNull();
@@ -409,13 +416,110 @@ describe('translateBlogPost', () => {
     expect(bp!.editedAt).toBeUndefined();
   });
 
-  it('falls back through Title → legacy-<id> when Handle is missing', () => {
+  it('assembles a body from interleaved Markdown / Media / Embed items', () => {
+    const c = ctx();
+    const row: RawBlogPost = {
+      ID: 7,
+      Class: 'BlogPost',
+      Handle: 'multi-item',
+      Title: 'Multi-item Post',
+      Published: 1746028800,
+      items: [
+        {
+          ID: 200,
+          Class: 'Emergence\\CMS\\Item\\Media',
+          Order: 1,
+          Data: { MediaID: 3349, Caption: 'A photo' },
+        },
+        {
+          ID: 201,
+          Class: 'Emergence\\CMS\\Item\\Markdown',
+          Order: 2,
+          Data: 'Some intro markdown.',
+        },
+        {
+          ID: 202,
+          Class: 'Emergence\\CMS\\Item\\Embed',
+          Order: 3,
+          Data: '<iframe src="https://www.youtube.com/embed/abc"></iframe>',
+        },
+      ],
+    };
+    const bp = translateBlogPost(row, c);
+    expect(bp).not.toBeNull();
+    expect(bp!.body).toBe(
+      [
+        '![A photo](https://codeforphilly.org/thumbnail/3349/1920x1920)',
+        'Some intro markdown.',
+        '<iframe src="https://www.youtube.com/embed/abc"></iframe>',
+      ].join('\n\n'),
+    );
+  });
+
+  it('sorts items by Order before assembling', () => {
+    const c = ctx();
+    const row: RawBlogPost = {
+      ID: 8,
+      Class: 'BlogPost',
+      Handle: 'unordered',
+      Title: 'Unordered',
+      Published: 1746028800,
+      items: [
+        { ID: 300, Class: 'Emergence\\CMS\\Item\\Markdown', Order: 2, Data: 'second' },
+        { ID: 301, Class: 'Emergence\\CMS\\Item\\Markdown', Order: 1, Data: 'first' },
+      ],
+    };
+    const bp = translateBlogPost(row, c);
+    expect(bp!.body).toBe('first\n\nsecond');
+  });
+
+  it('returns an empty body when items is absent', () => {
     const c = ctx();
     const row: RawBlogPost = {
       ID: 9,
       Class: 'BlogPost',
+      Handle: 'bodiless',
+      Title: 'Bodiless',
+      Published: 1746028800,
+    };
+    const bp = translateBlogPost(row, c);
+    expect(bp!.body).toBe('');
+  });
+
+  it('warns on unknown Item class but keeps the post', () => {
+    const c = ctx();
+    const row: RawBlogPost = {
+      ID: 10,
+      Class: 'BlogPost',
+      Handle: 'unknown-item',
+      Title: 'Unknown Item',
+      Published: 1746028800,
+      items: [
+        {
+          ID: 400,
+          Class: 'Emergence\\CMS\\Item\\NewType',
+          Order: 1,
+          Data: 'whatever',
+        },
+        {
+          ID: 401,
+          Class: 'Emergence\\CMS\\Item\\Markdown',
+          Order: 2,
+          Data: 'still here',
+        },
+      ],
+    };
+    const bp = translateBlogPost(row, c);
+    expect(bp!.body).toBe('still here');
+    expect(c.warnings.items.some((w) => w.includes('item=400'))).toBe(true);
+  });
+
+  it('falls back through Title → legacy-<id> when Handle is missing', () => {
+    const c = ctx();
+    const row: RawBlogPost = {
+      ID: 11,
+      Class: 'BlogPost',
       Title: 'A Hello Post',
-      Body: 'body',
       Published: 1746028800,
     };
     const bp = translateBlogPost(row, c);
@@ -426,18 +530,17 @@ describe('translateBlogPost', () => {
   it('warns and posts anonymously when AuthorID does not resolve', () => {
     const c = ctx();
     const row: RawBlogPost = {
-      ID: 11,
+      ID: 12,
       Class: 'BlogPost',
       Handle: 'orphan',
       Title: 'Orphan',
-      Body: 'orphan',
       AuthorID: 999,
       Published: 1746028800,
     };
     const bp = translateBlogPost(row, c);
     expect(bp).not.toBeNull();
     expect(bp!.authorId).toBeUndefined();
-    expect(c.warnings.items.some((w) => w.includes('legacyId=11'))).toBe(true);
+    expect(c.warnings.items.some((w) => w.includes('legacyId=12'))).toBe(true);
   });
 
   it('sets editedAt when Modified is >60s after Published', () => {
@@ -447,7 +550,6 @@ describe('translateBlogPost', () => {
       Class: 'BlogPost',
       Handle: 'edited',
       Title: 'Edited',
-      Body: 'edited body',
       Published: 1746028800,
       Modified: 1746028800 + 3600, // +1 hour
     };
@@ -463,7 +565,6 @@ describe('translateBlogPost', () => {
       Class: 'BlogPost',
       Handle: 'long-summary',
       Title: 'Long Summary',
-      Body: 'body',
       Summary: overlong,
       Published: 1746028800,
     };
@@ -653,7 +754,11 @@ function mockRoutes(): MockRoutes {
         ],
       ],
       [
-        '/blog?format=json&limit=200&offset=0',
+        // Importer fetches /blog with `?include=*` so it can read the
+        // structured body items (laddr doesn't expose Body via the flat
+        // JSON fields). `*` is a sub-delim per RFC 3986 and stays
+        // unencoded through URLSearchParams.
+        '/blog?format=json&include=*&limit=200&offset=0',
         [
           envelope(
             [
@@ -662,12 +767,19 @@ function mockRoutes(): MockRoutes {
                 Class: 'BlogPost',
                 Handle: 'hello-philly',
                 Title: 'Hello Philly',
-                Body: '# Hello\n\nFirst blog post.',
                 Summary: 'A short hello.',
                 AuthorID: 10,
                 Published: 1377126953,
                 Created: 1377126953,
                 Modified: 1377126953,
+                items: [
+                  {
+                    ID: 1000,
+                    Class: 'Emergence\\CMS\\Item\\Markdown',
+                    Order: 1,
+                    Data: '# Hello\n\nFirst blog post.',
+                  },
+                ],
               },
             ],
             1,