From f2eac3519d135666cb583fa93061cbff112e0dbf Mon Sep 17 00:00:00 2001
From: Raymond Yee <raymond.yee@gmail.com>
Date: Sun, 31 May 2026 22:28:41 -0700
Subject: [PATCH 1/4] =?UTF-8?q?feat(explorer):=20#248=20Flavor=20A=20found?=
 =?UTF-8?q?ation=20=E2=80=94=20conceptLabelForUri=20+=20buildConceptFilter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First, additive pieces for `described-by=<concept-uri>` (#248), riding the A1
search_pids machinery (Codex plan-reviewed: "mostly sound + guardrails"):

- window.conceptLabelForUri(uri): expose the facetFilters cell's URI→prefLabel
  resolver so the concept producer can label a URI without re-querying
  vocab_labels (guardrail #1).
- buildConceptFilter(uri): a SECOND search_pids producer — exact-URI match
  across the object_type/material/context columns of sample_facets_v2, with
  object_type>material>context relevance ranking. Same token-scoped staging,
  finally-drop, shared _searchFilterToken, and empty-clear invariant as
  buildSearchFilter (guardrails #2/#6). Tags __searchFilter.kind ('concept' vs
  'text') for mutual exclusivity (#5).

Not yet wired: doDescribedBy flow (shared runPidSetResults render), the
described-by= URL param + writeQueryState kind-preservation, and mutual
exclusivity at producer entry. buildConceptFilter isn't called yet, so this is
behavior-neutral. Verified: conceptLabelForUri('…organismpart')→"Organism part";
free-text a1-verify still ✅ COHERENT.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 explorer.qmd | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)
diff --git a/explorer.qmd b/explorer.qmd
index af55ac9..210b968 100644
--- a/explorer.qmd
+++ b/explorer.qmd
@@ -1763,6 +1763,10 @@ facetFilters = {
         const parts = s.replace(/[#?].*$/, "").split("/").filter(Boolean);
         return parts.length ? parts[parts.length - 1] : s;
     };
+    // #248: expose the URI→prefLabel resolver so the concept-filter producer
+    // (buildConceptFilter, in the zoomWatcher cell) can label a `described-by=`
+    // URI without re-querying vocab_labels. Best-effort: falls back to URI tail.
+    if (typeof window !== 'undefined') window.conceptLabelForUri = prettyLabel;
 
     try {
         const summaries = await db.query(`
@@ -4130,7 +4134,7 @@ zoomWatcher = {
             const cnt = Array.from(await db.query(`SELECT COUNT(*) AS n FROM search_pids`));
             const total = cnt.length ? Number(cnt[0].n) : 0;
             if (token !== _searchFilterToken) return false;
-            window.__searchFilter = { active: true, term, token, total };
+            window.__searchFilter = { active: true, term, token, total, kind: 'text' };
             window.a1dbg?.('search-build-end', { term, token, total });
             return true;
         } finally {
@@ -4138,6 +4142,47 @@ zoomWatcher = {
         }
     }
 
+    // #248 Flavor A: a SECOND producer of `search_pids` — selects samples whose
+    // iSamples-vocabulary concept (the URI-valued object_type / material /
+    // context columns in sample_facets_v2) exactly matches `uri`. Mirrors
+    // buildSearchFilter's token-scoped staging + finally-drop + token guards,
+    // sharing the same _searchFilterToken (the pid-set is a singleton). Every
+    // surface then filters via the same `pid IN (SELECT pid FROM search_pids)`
+    // semi-join, for free. Flavor B (arbitrary external/Getty URIs needing
+    // URI→label resolution + free-text) is a follow-up.
+    async function buildConceptFilter(uri) {
+        const token = ++_searchFilterToken;
+        const label = (typeof window !== 'undefined' && window.conceptLabelForUri)
+            ? window.conceptLabelForUri(uri) : uri;
+        window.a1dbg?.('search-build-start', { term: label, token, kind: 'concept' });
+        const u = escSql(uri);
+        // Rank object_type matches above material above context, then by label
+        // (Codex plan-review suggestion — cheap, no extra scan).
+        const rank = `CASE WHEN object_type = '${u}' THEN 3 WHEN material = '${u}' THEN 2 WHEN context = '${u}' THEN 1 ELSE 0 END`;
+        const staging = `search_pids_next_${token}`;
+        try {
+            await db.query(`
+                CREATE OR REPLACE TABLE ${staging} AS
+                SELECT pid, label, source, place_name, (${rank}) AS relevance_score
+                FROM read_parquet('${facets_url}')
+                WHERE pid IS NOT NULL
+                  AND (object_type = '${u}' OR material = '${u}' OR context = '${u}')
+            `);
+            if (token !== _searchFilterToken) return false;   // superseded mid-build
+            await db.query(`CREATE OR REPLACE TABLE search_pids AS
+                SELECT pid, label, source, place_name, relevance_score FROM ${staging}`);
+            if (token !== _searchFilterToken) return false;
+            const cnt = Array.from(await db.query(`SELECT COUNT(*) AS n FROM search_pids`));
+            const total = cnt.length ? Number(cnt[0].n) : 0;
+            if (token !== _searchFilterToken) return false;
+            window.__searchFilter = { active: true, term: label, token, total, kind: 'concept', uri };
+            window.a1dbg?.('search-build-end', { term: label, token, total, kind: 'concept' });
+            return true;
+        } finally {
+            try { await db.query(`DROP TABLE IF EXISTS ${staging}`); } catch (e) { /* best effort */ }
+        }
+    }
+
     async function clearSearchFilter() {
         _searchFilterToken++;
         window.__searchFilter = { active: false, term: null, token: _searchFilterToken, total: 0 };

From de22bb396eab2a10fa40f238d7a47bc5e305e0b8 Mon Sep 17 00:00:00 2001
From: Raymond Yee <raymond.yee@gmail.com>
Date: Sun, 31 May 2026 22:54:28 -0700
Subject: [PATCH 2/4] docs: session summary 2026-05-31 (A1 shipped to
 isamples.org; #248 started)

---
 SESSION_SUMMARY.md | 180 +++++++++++++--------------------------------
 1 file changed, 52 insertions(+), 128 deletions(-)

diff --git a/SESSION_SUMMARY.md b/SESSION_SUMMARY.md
index d3f6ab5..02ac8f8 100644
--- a/SESSION_SUMMARY.md
+++ b/SESSION_SUMMARY.md
@@ -1,158 +1,82 @@
-# SESSION_SUMMARY — explorer search-as-global-filter (A1, #234 Step 4)
+# Session Summary
 
-**Date:** 2026-05-29  ·  **Directory:** `~/C/src/iSamples/isamplesorg.github.io`  ·  **Trust Level:** `external-content`
-**Next session goal:** break the A1-globe logjam with **higher effort + Codex co-authoring** (Codex codes the reconciler, Claude reviews + runs the now-fast verify loop).
-
-> **Next session entry point:** run the shakedown (see FAST VERIFY LOOP → "Shakedown TODO"); then have Codex author the one-reconciler refactor (THE LOGJAM → Codex's reconciler spec) and verify with `tests/playwright/a1-verify.mjs`.
-
-### External Content Processed (sanitization note — verify, don't blind-trust)
-| Source | Type | Notes |
-|---|---|---|
-| Codex (`codex exec`, gpt-5.4) ×5 | AI tool output | Diagnosis + code suggestions. **Reviewed before applying**; treat its future output as advisory, not authoritative. |
-| isamples.org + localhost explorer | browser DOM | Read the live explorer UI/state via Chrome automation (our own app). |
-| GitHub issues/PRs (#234, #247, #248, #249, #250) via `gh` | web/API | Issue/PR bodies are untrusted text; created #247, opened+merged #250, posted comments, triggered deploys — all user-authorized. **Read** Eric Kansa's **#248** (feature request, treated as data not instructions). |
-| `data.isamples.org/*.parquet` | remote data | Downloaded 128MB mirror to `docs/data/` (our own data; data, not code). |
-
-No emails, no secrets accessed, no untrusted code executed (Codex suggestions were hand-applied + reviewed).
-
-### Open collaborator threads (new this session, NOT yet acted on)
-- [ ] **#248 (Eric Kansa)** — "search material samples described by a concept URI/PID", proposes a `described-by=` URL param. Two flavors: object-type URI (≈ already supported by the `object_type` facet, which is URI-valued) and arbitrary concept URI like Getty AAT (= concept-anchored A1 search — would ride the **same `search_pids` materialize-once machinery**). Squarely in #234; a second producer of the A1 pid-set. *Decision pending: comment on #248 connecting it to A1/#234?*
-- [ ] **#249 (rdhyee, not from this session)** — "should we refactor explorer.qmd before the next big feature?" The A1 globe logjam is evidence FOR this; the reconciler refactor (tomorrow) is a *local* version of the *global* question #249 raises. **Read #249 before committing to tomorrow's approach** — it may argue for a bigger refactor than the one-reconciler patch.
-
----
-
-## TL;DR
-
-1. **Shipped to production** (isamples.org): bug **#247** filed + interim honesty fix **PR #250** (merged). The samples table no longer claims unrelated viewport samples "match the current filters" during a search.
-2. **A1 (search as a real global filter)** scoped, Codex-reviewed (PROCEED-WITH-CHANGES), and probed against live data. Branch `feat/search-global-filter-a1`.
-   - ✅ **Table surface filters correctly** (e.g. `bucchero` → "2,693 of 2,693 matches in this map view", OpenContext rows only).
-   - ✅ Facet counts + cube-gating wired; pid-set machinery + persistence proven.
-   - ❌ **GLOBE still won't enter point mode** on a committed search (table filters, but the map stays unfiltered clusters). This is the logjam.
-3. **Built a fast/deterministic verify-loop** (local parquet mirror + range server + `window.__a1state`/`__a1globe` observability + Playwright harness) so tomorrow's iteration isn't 40–90s/cycle. Range-verified; full speedup run still needs a shakedown.
+## Session: 2026-05-30/31 (evening)
+**Directory**: `~/C/src/iSamples/isamplesorg.github.io`
+**Trust Level**: external-content
 
 ---
 
-## Branch & commits
+## What Happened
 
-`feat/search-global-filter-a1` (off `upstream/main` which already has #250):
-- `204d2df` table surface (pid-set + semi-join + summaryText)
-- `936f1f3` points/facets/cube-gate/C3 wired — globe buggy
-- `4e79830` Codex's C3 fixes (moveEnd latch, awaitable enterPointMode, search-token staleness) — globe STILL not entering point mode
-- `62d5500` dev verify-loop infra (mirror support + dev_server.py + a1dbg/__a1state/__a1globe)
+A long, productive session. Started as "tackle the fast-verify shakedown"; ended with **A1 shipped to production (isamples.org)** and **#248 underway**.
 
-Production (already merged, do NOT redo): upstream `a4da97b` (#250).
+1. **Shakedown root-caused & fixed.** The dev `?data_base=/data` override produced root-relative parquet URLs that DuckDB-WASM's httpfs can't fetch (read as a virtual-FS glob → zero fetches). Resolved to absolute against `location.origin`. This unblocked the fast verify loop (~2.3s to live).
+2. **The "globe logjam" was never real** — it was a **backgrounded-Chrome-MCP-tab artifact** (Chrome freezes rAF in hidden tabs → Cesium camera never settles → "globe won't enter point mode"). In any foreground/headless context the C3 fixes work. The reconciler refactor was unnecessary. **Lesson: drive the verify loop with `HEADLESS=1` Playwright, never the MCP tab.**
+3. **Fixed an A1 search perf regression** the CI smoke gate caught (double facets scan → materialize side-panel columns+score into `search_pids`, one scan).
+4. **Fixed the live facet-padding mismatch** RY hit (legend pad-0 vs table 0.3 → facet read low; e.g. material=rock ~166 vs ~481). Now facet == table.
+5. **Shipped A1**: opened **PR #251**, ran a 3-round **Codex review/revise loop to dual approval** (Codex caught a real `search_pids` staging-table race, heatmap search-blindness, and a stale-reader follow-on — all fixed), then **squash-merged to upstream → deployed to isamples.org** (smoke gate green).
+6. **Started #248 (Eric Kansa's concept-URI search)**: posted a connecting comment, Codex plan-reviewed ("mostly sound + guardrails"), and committed the **foundation** on `feat/described-by-concept`.
+7. **Investigated a transient camera freeze** (RY's `h3=`+`heading=` deep-link, also on isamples.org). Ruled out locked controller / tracked-entity / refresh-loop via a new `?debug=a1` `__a1camera` hook; **resolved on its own → likely transient WebGL context-loss / network**. Surfaced a real **testing gap**: no gate asserts post-hydration *interactivity*.
 
 ---
 
-## The A1 design (Strategy B — agreed + Codex-approved)
+## Safe to Carry Forward
 
-On a committed search, `buildSearchFilter()` materializes a **non-temp** DuckDB table `search_pids` (one `ILIKE` scan over `facets_url`), then every surface constrains with a cheap semi-join `AND pid IN (SELECT pid FROM search_pids)`. State on `window.__searchFilter {active,term,token,total}`; predicate via `window.searchFilterSQL(col)`.
+### Key Decisions
+- A1 ships on plain ILIKE; **BM25 (#168–172) is a perceived-perf follow-up, not a correctness blocker.**
+- `search_pids` is a **singleton**; any new producer (#248) shares one `_searchFilterToken`/`_searchSeq` and the `kind: 'text'|'concept'` tag.
+- Codex-reviewed A1 invariants to preserve: **token-scoped staging table**, **empty-table clear** (never DROP the live table), **build-failure distinguished from empty results**.
+- `?debug=a1`-gated hooks: `__a1globe`, `__a1log`/`__a1state`, and (new, uncommitted/diagnostic) `__a1camera`.
 
-**Probe findings (de-risked the design):** pid is unique (no dup), facets ⊆ lite so **no coordinate-less matches** (table count == mappable matches — simple "N of M in view" copy), broadest realistic term ~82k pids (no million-row blowup). Full scoping + Codex resolutions in **`A1_SCOPING.md`**.
+### Branch / ship state
+- **A1**: merged to upstream `main` as **`e6f9def`** (PR #251), live on isamples.org + rdhyee. Local `feat/search-global-filter-a1` is now redundant (squash-merged).
+- **#248**: branch **`feat/described-by-concept`** off merged main; foundation commit **`f2eac35`** (`conceptLabelForUri` + `buildConceptFilter`, behavior-neutral, verified).
 
-Surfaces wired: `loadCount`/`loadPage` (table) ✅ verified; `loadViewportSamples` (points); `updateCrossFilteredCounts` (facet legend, + gate cube fast-path & global-baseline when search active); `summaryText` copy.
+### Files Changed (this session, across A1 + #248)
+- `explorer.qmd` — A1 data_base fix, double-scan collapse, facet-padding, Codex fixes (staging race / heatmap / empty-clear / build-failure msg), `?debug=a1` gating; #248 `conceptLabelForUri` + `buildConceptFilter`.
+- `dev_server.py` — HTTP/1.1; `tests/playwright/a1-verify.mjs` — `HEADLESS=1` flag; new probes `globe-points-probe.mjs`, `shakedown-206.mjs`; `tests/playwright/facet-viewport.spec.js` — coherence test.
 
----
-
-## THE LOGJAM (start here tomorrow)
-
-**Symptom:** search `bucchero` → table = "2,693 of 2,693 matches" (✅), but globe phaseMsg/stat stay **cluster** and `exitPointMode` runs. Even a clean **manual** search (not just boot) fails → it's a real state-machine bug, not a boot race.
-
-**Codex's diagnosis (correct, partially fixed in `4e79830`):**
-1. ✅ FIXED — post-search `flyTo` lands at **200 km > EXIT_POINT_ALT (180 km)**, and the `moveEnd` handler exited point mode without checking `searchIsActive()`. Latched now.
-2. ✅ FIXED — `enterPointMode` was fire-and-forget; now `async` + `await loadViewportSamples()`, awaited at all call sites.
-3. ✅ FIXED — `loadViewportSamples` staleness was `requestId`-only; now also keys on the search token (`isStaleLoad()`).
-4. ⏳ **NOT DONE — the actual remaining fix:** `applySearchFilterChange()` is a **parallel** mode-entry path racing the camera/mode machinery. Codex recommends **replacing it with ONE reconciler** that both the camera handler and search call, so "search forces point" and "altitude decides mode" live in one predicate with one set of staleness tokens.
-
-**Codex's reconciler spec (implement this):**
-```js
-async function reconcileGlobeForCurrentFilters(pushHistory = false) {
-    syncFacetNote();
-    refreshHeatmap();
-    if (searchIsActive()) {
-        if (getMode() !== 'point') await enterPointMode(pushHistory);
-        else await loadViewportSamples();
-    } else {
-        // existing altitude-driven cluster/point behavior
-    }
-    refreshFacetCounts();
-    window.refreshSamplesTable?.();
-}
-```
-Call it from search completion AND the relevant camera paths; delete the bespoke `applySearchFilterChange` mini-state-machine. **Open question to nail with the new observability:** why does `enterPointMode` not stick on a manual search? (`[A1dbg]` events `apply-search-change`, `mode-change`, `post-build` will show the sequence — see below.)
-
-**Other bugs Codex flagged (not yet addressed):**
-- Heatmap `renderHeatmap()` omits `searchFilterSQL` and `heatmapFilterHash()` omits the search token → heatmap (labeled "filtered density") stays unfiltered under search. (PR#2 or fix now.)
-- Selection revalidation (`~L3457`) checks only source, not the search filter — clear/revalidate selection on search change.
+### Patterns/Learnings
+- **Backgrounded tabs freeze rAF** → corrupts every globe/camera observation. Headless Playwright is the reliable instrument.
+- **Don't pile up runs**: accumulated hung browsers hold HTTP/1.1 keep-alive + peg CPU and starve `dev_server.py`. Restart between batches.
+- **Local mirror full-downloads** (GET 200, not 206) — fine on localhost; validate range/perf on the deploy, not the mirror.
+- Codex's `codex exec ... -o FILE` often fails to capture the final message when the diff is large; read the verdict from the streamed `.log` instead (resume the session for continuity).
 
 ---
 
-## PERFORMANCE MODEL — why the UI hides the 40s, and what A1 does to it
-
-(RY's framing, 2026-05-29 — worth keeping front-of-mind for the substrate-vs-progress-UI call.)
-
-The explorer never *feels* like a 40–90s app because the whole design is **"never fetch big data over a wide area."** Data is tiered by zoom, smallest-first, and the tiny tiers are **preloaded** (`explorer.qmd` L14–17: `<link rel=preload>` for h3 res4 + facet_summaries + vocab_labels):
+## External Content Processed
 
-| User action | Fetched | Size | Felt |
-|---|---|---|---|
-| Land on globe (zoomed out) | H3 res4 | **580 KB** (preloaded) | instant (`Load Time 0.4s`) |
-| Zoom in / more | H3 res6 / res8 | 1.6 / 2.5 MB | fast |
-| Zoom **deep** → point mode | `samples_map_lite` | 60 MB file… | **still fast** ↓ |
-
-The trick on that last row: by the time `samples_map_lite` (60 MB) is touched, the camera is deep (alt < `ENTER_POINT_ALT` 120 km), so the bbox is tiny. DuckDB-WASM does **HTTP range requests** and pulls only the parquet **row groups** overlapping that small bbox (a few MB), never the whole file. So the big files are only ever read in slivers. UX masking on top: instant res4 globe, phase messages, stale-while-loading (dimmed old rows).
-
-**The two operations with NO spatial narrowing** (= the only ones that can hit the full 40s; both were what I kept triggering in dev):
-1. **Free-text search** — `ILIKE '%term%'` over `label/description/place_name` across the *whole* `sample_facets_v2.parquet` (63 MB text). ILIKE can't skip row groups; it's a full column scan. Irreducible without an index.
-2. **Samples table at a wide viewport** — `loadCount` over a world-sized bbox counts ~everything (normal users zoom in first, shrinking it).
-
-**The A1 implication (the load-bearing point):** A1 takes operation #1 — the single slowest thing in the app — and moves it to the **front of the common flow.** Today search is an optional side-panel lookup; A1 makes every committed search run that full 63 MB scan *first* and gates the filtered view on it. So A1 risks importing the one 40s wait into exactly the place the rest of the UI worked to avoid it. That's why:
-- The **"Building search filter…"** affordance matters (honest masking, like the rest of the app).
-- **BM25 substrate (#168–172)** is the thing that makes a *cold* search feel as snappy as zooming — NOT a correctness blocker (the pid-set abstraction works on plain ILIKE), but the perceived-perf fix.
-- The **materialize-once** design is the mitigation: pay the un-narrowable full scan *one time* per term, then every pan/zoom/facet-toggle is a cheap `pid IN (…)` semi-join that DOES narrow spatially — folding search back into the fast tier after the first hit.
-
-(This also reframes the cold-load floor below: init ~40s is one thing, but the search scan is the *product-facing* slow path, and it's the one A1 must manage.)
-
----
-
-## FAST VERIFY LOOP (built today — use it tomorrow)
-
-**Why today was slow:** every iteration was a cold reload. Cold cost is **init-dominated** — DuckDB-WASM (from CDN) + Cesium + the OJS reactive graph take ~40s **before any data query**, and the search `ILIKE` then downloaded ~60MB of text columns over the network. Console capture from the automation harness was also flaky.
-
-**The fix (set up, committed in `62d5500`):**
-1. **Local parquet mirror** — `docs/data/*.parquet` (128MB, gitignored via `docs` + `*.parquet`). Re-fetch with:
-   `for f in isamples_202601_{samples_map_lite,sample_facets_v2,h3_summary_res4,h3_summary_res6,h3_summary_res8,facet_cross_filter,facet_summaries}.parquet vocab_labels.parquet; do curl -s -o docs/data/$f https://data.isamples.org/$f; done`
-   (⚠️ `current/wide.parquet` came back **0 bytes** — used only for sample-click detail; may be the cause of the init hang — investigate.)
-2. **`R2_BASE` override** — load with `?data_base=/data` (or `localStorage.ISAMPLES_DATA_BASE`). Defaults to prod, so shipped builds are unchanged.
-3. **Range-capable server** — `python3 dev_server.py --dir docs --port 8099`. **Stock `python3 -m http.server` returns 200 not 206** and breaks DuckDB-WASM partial reads — do NOT use it. Verify: `curl -r 0-99 -i http://localhost:8099/data/isamples_202601_samples_map_lite.parquet` → must be **206** (confirmed working).
-4. **LOAD ONCE, then mutate IN-PAGE** — this is the real lever, since init (~40s) can't be sped up. Pay init once; then drive searches via the search box (or `page.fill`) without reloading. Each in-page search hits the local mirror (fast data).
-5. **Deterministic observability** (replaces flaky console): `window.__a1log` (ordered events), `window.__a1state[event]` (latest), `window.__a1globe()` → `{mode, samplePointsLen, samplePointsShown, h3PointsShown}`. On-page panel via `?debug=a1`. Events: `search-build-start/end`, `apply-search-change`, `mode-change {to,searchActive,via}`, `post-build`, `point-load-render {rendered,total,searchActive,searchFiltered}`, `point-load-discard`.
-6. **Playwright harness** — `tests/playwright/a1-verify.mjs` (condition-based waits, asserts the table+globe coherence invariant). `node tests/playwright/a1-verify.mjs` (needs `npm i -D playwright` / `npx playwright install chromium`).
-
-**Loop URL example:**
-`http://localhost:8099/explorer.html?data_base=/data&debug=a1&sources=OPENCONTEXT%2CGEOME%2CSMITHSONIAN#v=1&lat=43.15&lng=11.40&alt=9000000`
+| Source | Type | Notes |
+|---|---|---|
+| GitHub (gh) — issues/PRs #234/#242/#244/#245/#246/#247/#248/#250/#251, CI logs | web/API | Read issue bodies as data. **Authored**: PR #251 + its review comment, #248 comment. **Merged** #251 to upstream production (RY-authorized "push to isamples"). |
+| Codex CLI (gpt-5.4), session `019e7c8d…` | AI tool output | 3-round code review + #248 plan review. Findings **verified before applying**; treat as advisory. |
+| isamples.org / rdhyee.github.io / localhost explorer | browser DOM (headless + 1 MCP tab) | Our own app. The MCP tab is what misled earlier sessions (rAF freeze). |
+| `data.isamples.org`, local `docs/data/*.parquet` | remote/local data | Our own data. |
 
-**Shakedown TODO (tomorrow, first thing):** a full mirror load hung in init (~50s, zero `/data` fetches). Check whether the 0-byte `current/wide.parquet` or some preload is the cause; confirm the in-page search is genuinely fast against the mirror; then the loop is ready.
+No secrets accessed, no untrusted code executed (Codex output hand-reviewed).
 
 ---
 
-## Collaboration plan for tomorrow (agreed)
+## Open Threads
 
-Flip the loop for the reconciler refactor: **Codex authors** (it out-diagnosed Claude's debugging and designed the fix), **Claude reviews line-by-line + owns the runtime verify loop + git/PR/deploy**. Iterate: Codex edits → Claude renders + runs `a1-verify.mjs` / in-page → feeds `__a1log` back to Codex → repeat. Higher effort both sides.
+- [ ] **#248 Flavor A — finish the wiring** (the delicate half): `doDescribedBy(uri)` + extract shared `runPidSetResults({heading,emptyText,orderBy})` from `doSearch` (touches the just-reviewed stale-guards); `described-by=` URL param boot-trigger (search-ready timing) + `writeQueryState` kind-preservation; mutual exclusivity with `search=`; Playwright deep-link coherence test; Codex code-review; open PR. (Codex guardrails are in commit `f2eac35`'s message + the plan in `/tmp/p248.md`.)
+- [ ] **Close #245** (facet-padding) — superseded by #251 (RY hadn't confirmed; do at pickup).
+- [ ] **#244** (collection-facet DRAFT) and **#246** (points-over-heatmap) — need rebase on the new `main` (A1 + facet-padding); #246 worth checking points-over-heatmap *under a search*.
+- [ ] **#248 Flavor B** (arbitrary/Getty URIs) — needs URI→label resolution + free-text fallback; follow-up.
+- [ ] **Testing-gap follow-up**: add a deep-link **interactivity** regression test (assert `enableInputs`/no-trackedEntity + camera actually moves), using the `__a1camera` hook. (Hook is uncommitted/local; re-add when building the test.)
+- [ ] Deferred A1 items: selection revalidation on search change; BM25 substrate (#168–172).
 
 ---
 
-## Cleanup before the A1 PR is opened (don't ship these)
+## Next Session Entry Point
 
-- Remove the **`a1PersistenceProbe`** dev cell (right after the `db` cell) — persistence already proven.
-- Decide on `a1dbg`/`__a1log`/`__a1state`/`__a1globe` + `?debug=a1` panel: gate behind a dev flag or strip. The `R2_BASE ?data_base=` override and `dev_server.py` are worth KEEPING (useful, safe defaults).
-- The double-scan in `doSearch` (pid-set build + the existing LIMIT-50 side-panel query both scan facets) — follow-up: derive the side-panel list from `search_pids`.
-- Heatmap + selection-revalidation search-awareness (above).
+> Start here: continue **#248 Flavor A** on `feat/described-by-concept` (foundation `f2eac35` done). Next concrete step is `doDescribedBy` + extracting `runPidSetResults` from `doSearch`, then the `described-by=` URL plumbing + mutual-exclusivity, then test → Codex review → PR. Verify loop: `python3 dev_server.py --dir docs --port 8099` + `HEADLESS=1 node tests/playwright/a1-verify.mjs`.
 
 ---
 
-## Key references
+## Session History
 
-- `explorer.qmd` anchors: `buildSearchFilter`/`clearSearchFilter`/`applySearchFilterChange` (~L3534), `loadViewportSamples` (~L2510), `enterPointMode`/`exitPointMode` (~L2680/2700), camera `moveEnd` handler (~L3709), camera `changed` handler (~L3560), `summaryText`/`loadCount`/`loadPage` (tableView cell ~L2123), `R2_BASE` (~L683), a1dbg/`__a1globe` install (~L4028).
-- `A1_SCOPING.md` — full scope + probe + Codex resolutions.
-- `dev_server.py`, `tests/playwright/a1-verify.mjs` — the loop.
-- Issues: #234 (umbrella, A1 = Step 4), #247 (the bug, interim fixed by #250), #168–172 (FTS substrate — optional latency win, NOT a blocker for A1).
+| Date | Trust | Summary |
+|---|---|---|
+| 2026-05-30/31 | external-content | Shakedown root-caused; A1 logjam = backgrounded-tab artifact; A1 perf + facet-padding fixed; Codex loop → dual approval; **A1 merged & deployed to isamples.org** (#251); #248 started (`feat/described-by-concept` foundation). |
+| 2026-05-29 | external-content | (prior) A1 scoping + globe logjam framing (superseded — there was no logjam). |

From 3fd7fd2033711eac86d2fc06e1dac605f1ab4ae5 Mon Sep 17 00:00:00 2001
From: Raymond Yee <raymond.yee@gmail.com>
Date: Mon, 1 Jun 2026 07:51:06 -0700
Subject: [PATCH 3/4] =?UTF-8?q?feat(explorer):=20#248=20Flavor=20A=20?=
 =?UTF-8?q?=E2=80=94=20described-by=3D<concept-uri>=20filter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire the concept-URI filter end to end on top of the A1 search_pids
machinery. A `described-by=<uri>` deep link selects material samples whose
URI-valued facet concept (object_type / material / context) exactly matches
the URI, and filters EVERY surface (table, globe points, facet counts, side
panel) via the same `pid IN (SELECT pid FROM search_pids)` semi-join.

- doDescribedBy(uri): second entry point into search_pids. Reuses the already
  -reviewed buildConceptFilter (producer) + applySearchFilterChange (refresher,
  kind-agnostic) and renders its OWN side panel. Deliberately does NOT touch
  doSearch — keeps A1's just-shipped (#251) text-search stale-guard hot path
  untouched (RY decision: protect the path Kerstin demos Wed).
- writeQueryState: search= and described-by= are mutually exclusive. Keyed off
  the text input (always current) rather than window.__searchFilter (stale at
  doSearch's early writeQueryState call), so a committed text search drops
  described-by= and vice-versa.
- Boot: described-by= deep link auto-commits and wins over search= if both
  present.
- tests/playwright/described-by-verify.mjs: HEADLESS deep-link coherence test
  (globe + panel + URL all reflect the concept; mutual-exclusivity with text).

Verified: described-by-verify green; a1-verify still green (no A1 regression).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 explorer.qmd                             | 212 ++++++++++++++++++++++-
 tests/playwright/described-by-verify.mjs | 126 ++++++++++++++
 2 files changed, 335 insertions(+), 3 deletions(-)
 create mode 100644 tests/playwright/described-by-verify.mjs

diff --git a/explorer.qmd b/explorer.qmd
index 210b968..e0f6133 100644
--- a/explorer.qmd
+++ b/explorer.qmd
@@ -831,8 +831,27 @@ function writeQueryState() {
     const params = new URLSearchParams(location.search);
     const searchInput = document.getElementById('sampleSearch');
     const q = searchInput ? searchInput.value.trim() : '';
-    if (q) params.set('search', q);
-    else params.delete('search');
+    // #248 Flavor A: `search=` (free text) and `described-by=` (concept URI)
+    // both produce the singleton search_pids and are mutually exclusive.
+    // Key the choice off the TEXT INPUT, not window.__searchFilter — doSearch
+    // calls writeQueryState() EARLY (before buildSearchFilter flips
+    // __searchFilter.kind to 'text'), so __searchFilter is stale at that call
+    // site. The input value is always current: a non-empty box means a text
+    // search is being committed → `search=` wins, drop `described-by=`.
+    // doDescribedBy CLEARS the box before it calls writeQueryState, so there
+    // the box is empty and we fall through to the concept branch.
+    const sf = (typeof window !== 'undefined') ? window.__searchFilter : null;
+    const conceptActive = !!(sf && sf.active && sf.kind === 'concept' && sf.uri);
+    if (q) {
+        params.set('search', q);
+        params.delete('described-by');
+    } else if (conceptActive) {
+        params.set('described-by', sf.uri);
+        params.delete('search');
+    } else {
+        params.delete('search');
+        params.delete('described-by');
+    }
 
     const activeSources = getActiveSources();
     if (activeSources.length === SOURCE_VALUES.length) params.delete('sources');
@@ -4741,6 +4760,187 @@ zoomWatcher = {
         }
     }
 
+    // #248 Flavor A: commit a concept-URI filter. This is the SECOND entry
+    // point into the shared `search_pids` machinery — `buildConceptFilter`
+    // is the producer, `applySearchFilterChange` pushes the result to every
+    // surface (table / points / facet counts / globe mode), exactly as a text
+    // search does. It deliberately does NOT reuse doSearch's body: doSearch
+    // carries A1's text-search stale-guard + COUNT logic shipped in #251, and
+    // we keep that hot path untouched (RY decision, 2026-06-01). It renders its
+    // own, simpler side panel instead. Mutually exclusive with the free-text
+    // search: committing a concept clears the text box and the `search=` param.
+    async function doDescribedBy(uri) {
+        if (!uri) return;
+        // Share doSearch's freshness counter so a text search and a concept
+        // filter can't clobber each other's side-panel render mid-flight.
+        const searchId = ++_searchSeq;
+
+        // Mutual exclusivity: a committed concept filter OWNS search_pids, so
+        // clear the free-text box (and its sidebar mirror) before building.
+        if (searchInput) searchInput.value = '';
+        const sidebarInput = document.getElementById('sampleSearchSidebar');
+        if (sidebarInput) sidebarInput.value = '';
+
+        if (searchResults) searchResults.textContent = 'Filtering by concept…';
+
+        // Build the pid-set. buildConceptFilter mirrors buildSearchFilter's
+        // token-scoped staging + finally-drop guards and publishes the result
+        // (label, total, kind:'concept', uri) on window.__searchFilter. A build
+        // failure empties search_pids via clearSearchFilter (same as doSearch),
+        // so downstream readers see zero rows rather than a missing table.
+        let buildFailed = false;
+        try {
+            await buildConceptFilter(uri);
+        } catch (e) {
+            console.warn('#248 concept-filter build failed; surfaces stay unfiltered:', e);
+            buildFailed = true;
+            await clearSearchFilter();
+        }
+        // Superseded by a newer search/concept while building? Bail before
+        // mutating any surface. Otherwise push the filter through everything.
+        window.a1dbg?.('concept-post-build', { searchId, searchSeq: _searchSeq, active: window.__searchFilter?.active, total: window.__searchFilter?.total });
+        if (searchId === _searchSeq) {
+            await applySearchFilterChange();
+        }
+        // Persist `described-by=` (and drop `search=`) now that __searchFilter
+        // reflects the concept — writeQueryState reads window.__searchFilter.
+        writeQueryState();
+
+        // Anything past here only paints the side panel / camera; if a newer
+        // action superseded us during applySearchFilterChange, stop.
+        if (searchId !== _searchSeq) return;
+
+        const sf = window.__searchFilter;
+        const label = (sf && sf.term) ? sf.term
+            : (typeof window !== 'undefined' && window.conceptLabelForUri) ? window.conceptLabelForUri(uri) : uri;
+        const total = (sf && sf.active && sf.kind === 'concept') ? sf.total : 0;
+
+        // Sticky heading mirrors doSearch's, recolored (purple) to distinguish a
+        // concept filter from a text search. Label is vocab-resolved, not a URL.
+        const headingHTML = (suffix) =>
+            `<h4 class="search-results-heading" style="position: sticky; top: 0; z-index: 1; margin: 0 0 6px; padding: 6px 8px; background: #ede7f6; color: #5e35b1; border-radius: 4px;">Samples described by: ${label}${suffix}</h4>`;
+
+        if (buildFailed) {
+            if (searchResults) searchResults.textContent = `Concept filter failed for "${label}". Please try again.`;
+            const sElF = document.getElementById('samplesSection');
+            if (sElF) sElF.innerHTML = headingHTML('')
+                + '<div class="empty-state" style="padding: 8px;">Filter failed to build — please try again.</div>';
+            return;
+        }
+        if (total === 0) {
+            if (searchResults) searchResults.textContent = `No samples described by "${label}"`;
+            const sEl0 = document.getElementById('samplesSection');
+            if (sEl0) sEl0.innerHTML = headingHTML(' (0)')
+                + '<div class="empty-state" style="padding: 8px;">No samples matched this concept.</div>';
+            return;
+        }
+
+        // Side-panel list: read the LIMIT-50 head straight off search_pids
+        // (already built + ranked by buildConceptFilter: object_type > material
+        // > context, then label), LEFT JOIN lite for coords so coord-less
+        // samples still list (null lat/lng; the click handler guards on null).
+        let results;
+        try {
+            results = await db.query(`
+                SELECT s.pid, s.label, s.source, l.latitude, l.longitude,
+                       s.place_name, s.relevance_score
+                FROM search_pids s
+                LEFT JOIN read_parquet('${lite_url}') l USING (pid)
+                ORDER BY s.relevance_score DESC, s.label
+                LIMIT 50
+            `);
+        } catch (err) {
+            if (searchId !== _searchSeq) return;
+            console.error('#248 concept side-panel query failed:', err);
+            if (searchResults) searchResults.textContent = `Concept filter error: ${err.message}`;
+            return;
+        }
+        if (searchId !== _searchSeq) return;
+
+        const shown = results.length;
+        const ofTotal = total > shown ? `${shown} of ${total.toLocaleString()}` : `${shown}`;
+        if (searchResults) searchResults.textContent = total > shown
+            ? `${shown} of ${total.toLocaleString()} samples described by "${label}"`
+            : `${shown} sample${shown === 1 ? '' : 's'} described by "${label}"`;
+
+        const sampEl = document.getElementById('samplesSection');
+        if (sampEl) {
+            let h = headingHTML(` (${ofTotal})`);
+            for (const s of results) {
+                const color = SOURCE_COLORS[s.source] || '#666';
+                const name = SOURCE_NAMES[s.source] || s.source;
+                const sUrl = sourceUrl(s.pid);
+                h += `<div class="sample-row" style="cursor: pointer;" data-lat="${s.latitude}" data-lng="${s.longitude}" data-pid="${s.pid}">
+                    <div style="display: flex; align-items: center; gap: 6px;">
+                        ${sUrl ? `<a class="sample-label" href="${sUrl}" target="_blank" rel="noopener noreferrer" style="color: #1565c0; text-decoration: none;">${s.label || s.pid}</a>` : `<span class="sample-label">${s.label || s.pid}</span>`}
+                        <span class="source-badge" style="background: ${color}; font-size: 10px;">${name}</span>
+                    </div>
+                </div>`;
+            }
+            sampEl.innerHTML = h;
+
+            // Click a concept-result row → same full selection ceremony as a
+            // text-search row (freshness bump, card hydration, flight, lazy
+            // detail) so a slow prior load can't repaint over the navigation.
+            const resultsByPid = new Map(results.map(s => [s.pid, s]));
+            sampEl.querySelectorAll('.sample-row[data-lat]').forEach(row => {
+                row.addEventListener('click', async (e) => {
+                    if (e.target.tagName === 'A') return; // let links work
+                    const pid = row.dataset.pid;
+                    const sample = pid ? resultsByPid.get(pid) : null;
+                    if (!sample || sample.latitude == null || sample.longitude == null) return;
+
+                    const isStale = freshSelectionToken(viewer);
+                    viewer._globeState.selectedPid = pid;
+                    viewer._globeState.selectedH3 = null;
+                    updateSampleCard({
+                        pid: sample.pid,
+                        label: sample.label,
+                        source: sample.source,
+                        lat: sample.latitude,
+                        lng: sample.longitude,
+                        place_name: sample.place_name,
+                        result_time: sample.result_time
+                    });
+                    viewer.camera.flyTo({
+                        destination: Cesium.Cartesian3.fromDegrees(sample.longitude, sample.latitude, 50000),
+                        duration: 1.5
+                    });
+                    try {
+                        const detail = await db.query(`
+                            SELECT description
+                            FROM read_parquet('${wide_url}')
+                            WHERE pid = '${pid.replace(/'/g, "''")}'
+                            LIMIT 1
+                        `);
+                        if (isStale()) return;
+                        if (detail && detail.length > 0) updateSampleDetail(detail[0]);
+                        else updateSampleDetail({ description: '' });
+                    } catch(err) {
+                        if (isStale()) return;
+                        console.error("Concept-row detail query failed:", err);
+                        updateSampleDetail(null);
+                    }
+                });
+            });
+        }
+
+        // Fly to the first LOCATED result (a nudge, not a selection — clear any
+        // prior pid/h3 so the URL doesn't carry stale selection across flight).
+        // Mirrors doSearch's world-scope auto-flight.
+        const firstLocated = results.find(r => r.latitude != null && r.longitude != null);
+        if (firstLocated) {
+            viewer._globeState.selectedPid = null;
+            viewer._globeState.selectedH3 = null;
+            viewer.camera.flyTo({
+                destination: Cesium.Cartesian3.fromDegrees(firstLocated.longitude, firstLocated.latitude, 200000),
+                duration: 1.5
+            });
+        }
+    }
+    // Expose for the boot trigger + debug console (mirrors window.conceptLabelForUri).
+    if (typeof window !== 'undefined') window.doDescribedBy = doDescribedBy;
+
     if (searchAreaBtn) searchAreaBtn.addEventListener('click', () => doSearch('area'));
     if (searchWorldBtn) searchWorldBtn.addEventListener('click', () => doSearch('world'));
     // Slim-overlay submit button — same behavior as Enter on `#sampleSearch`.
@@ -4784,7 +4984,13 @@ zoomWatcher = {
         }
     });
 
-    if (searchInput && searchInput.value.trim().length >= 2) {
+    // #248 Flavor A boot: a `described-by=<concept-uri>` deep link commits the
+    // concept filter. It WINS over `search=` if a hand-crafted URL carries both
+    // (they're mutually exclusive; doDescribedBy clears the text box + param).
+    const _describedByUri = new URLSearchParams(location.search).get('described-by');
+    if (_describedByUri) {
+        doDescribedBy(_describedByUri);
+    } else if (searchInput && searchInput.value.trim().length >= 2) {
         doSearch(_searchScope);
     }
 
diff --git a/tests/playwright/described-by-verify.mjs b/tests/playwright/described-by-verify.mjs
new file mode 100644
index 0000000..e6073a5
--- /dev/null
+++ b/tests/playwright/described-by-verify.mjs
@@ -0,0 +1,126 @@
+// #248 Flavor A (`described-by=<concept-uri>`) deterministic verify harness.
+//
+// Sibling of a1-verify.mjs — same LOAD-ONCE-then-assert pattern against the
+// LOCAL parquet mirror. Verifies the concept-URI deep link drives EVERY
+// surface coherently (the A1 invariant, but for the concept producer of
+// search_pids), and that committing a text search afterward flips the URL
+// from `described-by=` to `search=` (mutual exclusivity).
+//
+// Run pattern (same as a1-verify):
+//   1. mirror parquets once:  ls docs/data/*.parquet
+//   2. python3 dev_server.py --dir docs --port 8099
+//   3. HEADLESS=1 node tests/playwright/described-by-verify.mjs
+//
+// HEADLESS=1 is strongly recommended — headed backgrounded windows freeze rAF
+// (Cesium camera never settles), corrupting globe-mode observations.
+
+import { chromium } from 'playwright';
+
+// A well-populated, clearly cross-domain concept (biology — "Whole organism
+// material sample"), to tell the iSamples cross-domain story, not archaeology.
+const URI = process.env.DB_URI
+  || 'https://w3id.org/isample/vocabulary/materialsampleobjecttype/1.0/wholeorganism';
+const EXPECT_LABEL = process.env.DB_LABEL || 'Whole organism material sample';
+
+const BASE = process.env.A1_BASE
+  || 'http://localhost:8099/explorer.html?data_base=/data&debug=a1&sources=OPENCONTEXT%2CGEOME%2CSMITHSONIAN';
+// Boot at high altitude (cluster) so we test that the concept deep link forces
+// filtered point mode from cluster — the same hard case a1-verify covers.
+const URL = `${BASE}&described-by=${encodeURIComponent(URI)}#v=1&lat=20&lng=0&alt=9000000`;
+
+const browser = await chromium.launch({ headless: process.env.HEADLESS === '1' });
+const page = await browser.newPage();
+page.on('console', (m) => { if (/A1|point mode|concept|Discarding|#248/.test(m.text())) console.log('  page>', m.text()); });
+
+console.log('Loading (cold init ~40s)…', URL);
+await page.goto(URL, { waitUntil: 'domcontentloaded' });
+
+// App live (OJS graph + DuckDB + search machinery installed).
+await page.waitForFunction(
+  () => typeof window.a1dbg === 'function' && !!window.__a1globe && !!document.querySelector('#sampleSearch'),
+  null, { timeout: 180_000 });
+console.log('App live. Boot mode:', await page.evaluate(() => window.__a1globe?.()));
+
+// The deep link should auto-commit the concept filter at boot (kind:'concept').
+await page.waitForFunction(
+  () => window.__searchFilter?.active === true
+     && window.__searchFilter?.kind === 'concept'
+     && window.__searchFilter?.total > 0,
+  null, { timeout: 120_000 });
+
+// Globe settles into filtered point mode (clusters can't be concept-filtered).
+await page.waitForFunction(() => {
+  const g = window.__a1globe?.();
+  return g && g.mode === 'point' && g.samplePointsShown === true && g.samplePointsLen > 0;
+}, null, { timeout: 60_000 }).catch(() => console.log('  !! globe did NOT reach filtered point mode'));
+
+// The side-panel render runs AFTER applySearchFilterChange returns (a separate
+// LIMIT-50 SELECT off search_pids), so wait for the concept heading to paint
+// before snapshotting — otherwise we race the "Filtering by concept…" interim.
+await page.waitForFunction(() => {
+  const h = document.querySelector('#samplesSection .search-results-heading');
+  return h && /Samples described by:/.test(h.textContent || '');
+}, null, { timeout: 60_000 }).catch(() => console.log('  !! concept side panel did NOT render'));
+
+const state = await page.evaluate(() => ({
+  search: window.__searchFilter,
+  globe: window.__a1globe?.(),
+  tableMeta: document.getElementById('tableMeta')?.textContent?.trim(),
+  panelHeading: document.querySelector('#samplesSection .search-results-heading')?.textContent?.trim(),
+  resultsLine: document.getElementById('searchResults')?.textContent?.trim(),
+  urlSearch: location.search,
+}));
+
+console.log('\n=== CONCEPT DEEP-LINK RESULT ===');
+console.log(JSON.stringify(state, null, 2));
+
+const conceptOk =
+  state.search?.active === true &&
+  state.search?.kind === 'concept' &&
+  state.search?.uri === URI &&
+  state.globe?.mode === 'point' &&
+  state.globe?.samplePointsShown === true &&
+  state.globe?.h3PointsShown === false &&
+  state.globe?.samplePointsLen > 0 &&
+  state.globe?.samplePointsLen <= state.search?.total &&
+  /Samples described by:/.test(state.panelHeading || '') &&
+  (state.panelHeading || '').includes(EXPECT_LABEL) &&
+  /described-by=/.test(state.urlSearch) &&
+  !/[?&]search=/.test(state.urlSearch);
+
+console.log(conceptOk
+  ? '\n✅ #248 CONCEPT COHERENT: globe + panel + URL all reflect the concept filter.'
+  : '\n❌ #248 INCOHERENT: see fields above.');
+
+// Mutual exclusivity: now commit a free-text search; described-by= must drop
+// out of the URL and the filter kind must flip to 'text'.
+await page.fill('#sampleSearch', 'pottery');
+await page.press('#sampleSearch', 'Enter');
+await page.waitForFunction(
+  () => window.__searchFilter?.active === true
+     && window.__searchFilter?.kind === 'text'
+     && window.__searchFilter?.term === 'pottery',
+  null, { timeout: 120_000 }).catch(() => console.log('  !! text search did not take over'));
+
+const after = await page.evaluate(() => ({
+  kind: window.__searchFilter?.kind,
+  term: window.__searchFilter?.term,
+  urlSearch: location.search,
+}));
+console.log('\n=== AFTER TEXT SEARCH (mutual exclusivity) ===');
+console.log(JSON.stringify(after, null, 2));
+
+const mutexOk =
+  after.kind === 'text' &&
+  /[?&]search=pottery/.test(after.urlSearch) &&
+  !/described-by=/.test(after.urlSearch);
+
+console.log(mutexOk
+  ? '\n✅ MUTUAL EXCLUSIVITY: text search took over; described-by= cleared from URL.'
+  : '\n❌ MUTUAL EXCLUSIVITY FAILED: see fields above.');
+
+const ok = conceptOk && mutexOk;
+console.log(ok ? '\n✅✅ #248 FLAVOR A VERIFIED.' : '\n❌ #248 verify FAILED.');
+
+if (process.env.A1_CLOSE) await browser.close();
+process.exitCode = ok ? 0 : 1;

From 30fbac93496e4c3afda35309f3eb143c4565d52f Mon Sep 17 00:00:00 2001
From: Raymond Yee <raymond.yee@gmail.com>
Date: Mon, 1 Jun 2026 09:37:26 -0700
Subject: [PATCH 4/4] fix(explorer): #248 address Codex review (URL intent,
 stale guard, XSS)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Codex round-1 found 3 real issues; all fixed + regression-tested:

1. URL could flip concept→phantom-text on draft text + facet/source toggle.
   writeQueryState is now intent-aware: doSearch passes commitText to
   authoritatively persist/clear a text search (it runs before __searchFilter
   updates); every other caller mirrors the COMMITTED filter in __searchFilter,
   so draft (un-submitted) text no longer clobbers described-by=. New Playwright
   check: draft text + source toggle preserves described-by=.
2. A superseded doDescribedBy could still rewrite the URL. Moved the
   _searchSeq stale-check to BEFORE writeQueryState (and re-check after the
   async applySearchFilterChange) so only the current producer persists state.
3. Reflected-XSS path: the concept side panel interpolated a URL-derived label
   (and result label/pid/url/name) into innerHTML. Now escapeHtml'd (the same
   helper the table renderer uses), covering text + attribute contexts.

Verified: described-by-verify green (concept + draft-text + mutual-exclusivity);
a1-verify still green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 explorer.qmd                             | 83 +++++++++++++++---------
 tests/playwright/described-by-verify.mjs | 29 ++++++++-
 2 files changed, 81 insertions(+), 31 deletions(-)

diff --git a/explorer.qmd b/explorer.qmd
index e0f6133..05fbba0 100644
--- a/explorer.qmd
+++ b/explorer.qmd
@@ -827,30 +827,42 @@ function applyQueryToFacetFilters() {
 }
 
 
-function writeQueryState() {
+function writeQueryState(opts = {}) {
     const params = new URLSearchParams(location.search);
     const searchInput = document.getElementById('sampleSearch');
     const q = searchInput ? searchInput.value.trim() : '';
     // #248 Flavor A: `search=` (free text) and `described-by=` (concept URI)
     // both produce the singleton search_pids and are mutually exclusive.
-    // Key the choice off the TEXT INPUT, not window.__searchFilter — doSearch
-    // calls writeQueryState() EARLY (before buildSearchFilter flips
-    // __searchFilter.kind to 'text'), so __searchFilter is stale at that call
-    // site. The input value is always current: a non-empty box means a text
-    // search is being committed → `search=` wins, drop `described-by=`.
-    // doDescribedBy CLEARS the box before it calls writeQueryState, so there
-    // the box is empty and we fall through to the concept branch.
+    //
+    // doSearch passes `opts.commitText` to AUTHORITATIVELY persist (a term) or
+    // clear ('') a text search: it calls writeQueryState EARLY, before
+    // buildSearchFilter flips __searchFilter.kind to 'text', so it can't read
+    // filter state — its committed term is the source of truth.
+    //
+    // Every OTHER caller (facet-change + source-change handlers, doDescribedBy)
+    // omits commitText, so the URL mirrors the COMMITTED filter in
+    // window.__searchFilter, NOT the raw text box. This is Codex finding #1:
+    // with an active concept filter, draft (un-submitted) text in the box plus
+    // a facet/source toggle must NOT clobber `described-by=` with a phantom
+    // `search=`. Keying passive writes off __searchFilter (concept) vs. the box
+    // (text) keeps the URL honest about what's actually filtering.
     const sf = (typeof window !== 'undefined') ? window.__searchFilter : null;
     const conceptActive = !!(sf && sf.active && sf.kind === 'concept' && sf.uri);
-    if (q) {
-        params.set('search', q);
+    if (opts.commitText !== undefined) {
+        if (opts.commitText) params.set('search', opts.commitText);
+        else params.delete('search');
         params.delete('described-by');
     } else if (conceptActive) {
         params.set('described-by', sf.uri);
         params.delete('search');
     } else {
-        params.delete('search');
+        // No concept filter and no explicit text commit: preserve the prior
+        // (pre-#248) behavior — reflect the text box. A committed text search
+        // keeps its term across facet/source toggles (the box still holds it);
+        // an inactive/empty state clears `search=`.
         params.delete('described-by');
+        if (q) params.set('search', q);
+        else params.delete('search');
     }
 
     const activeSources = getActiveSources();
@@ -4245,11 +4257,11 @@ zoomWatcher = {
             // page returns to its unfiltered state.
             await clearSearchFilter();
             await applySearchFilterChange();
-            writeQueryState();
+            writeQueryState({ commitText: '' });   // authoritative clear (#248)
             persistSearchScope(effectiveScope);
             return;
         }
-        writeQueryState();
+        writeQueryState({ commitText: term });     // authoritative text commit (#248)
         persistSearchScope(effectiveScope);
         // A1 (#234 Step 4): the pid-set filter is built below (after `terms`
         // is parsed) and the dependent surfaces are refreshed against it; the
@@ -4796,19 +4808,19 @@ zoomWatcher = {
             buildFailed = true;
             await clearSearchFilter();
         }
-        // Superseded by a newer search/concept while building? Bail before
-        // mutating any surface. Otherwise push the filter through everything.
         window.a1dbg?.('concept-post-build', { searchId, searchSeq: _searchSeq, active: window.__searchFilter?.active, total: window.__searchFilter?.total });
-        if (searchId === _searchSeq) {
-            await applySearchFilterChange();
-        }
-        // Persist `described-by=` (and drop `search=`) now that __searchFilter
-        // reflects the concept — writeQueryState reads window.__searchFilter.
-        writeQueryState();
-
-        // Anything past here only paints the side panel / camera; if a newer
-        // action superseded us during applySearchFilterChange, stop.
+        // Superseded during build? Bail before touching ANY shared state. The
+        // winning producer owns the surfaces AND the URL (Codex finding #2: a
+        // stale producer must not run writeQueryState off another's filter).
+        if (searchId !== _searchSeq) return;
+        await applySearchFilterChange();
+        // Re-check after the async surface refresh, BEFORE writeQueryState: if a
+        // newer search/concept superseded us mid-refresh, it — not this stale
+        // producer — must be the one that writes the URL.
         if (searchId !== _searchSeq) return;
+        // Persist `described-by=` (drops `search=`) now that __searchFilter
+        // reflects this concept and we're confirmed the current producer.
+        writeQueryState();
 
         const sf = window.__searchFilter;
         const label = (sf && sf.term) ? sf.term
@@ -4816,9 +4828,14 @@ zoomWatcher = {
         const total = (sf && sf.active && sf.kind === 'concept') ? sf.total : 0;
 
         // Sticky heading mirrors doSearch's, recolored (purple) to distinguish a
-        // concept filter from a text search. Label is vocab-resolved, not a URL.
+        // concept filter from a text search. `label` is usually a vocab pref
+        // label, but for an unknown URI it falls back to the URI tail — i.e. it
+        // can be attacker-controlled via the `described-by=` URL param. Escape
+        // it before innerHTML to close the reflected-XSS path (Codex finding #3).
+        // `suffix` is internal (counts) and safe.
+        const safeLabel = escapeHtml(label);
         const headingHTML = (suffix) =>
-            `<h4 class="search-results-heading" style="position: sticky; top: 0; z-index: 1; margin: 0 0 6px; padding: 6px 8px; background: #ede7f6; color: #5e35b1; border-radius: 4px;">Samples described by: ${label}${suffix}</h4>`;
+            `<h4 class="search-results-heading" style="position: sticky; top: 0; z-index: 1; margin: 0 0 6px; padding: 6px 8px; background: #ede7f6; color: #5e35b1; border-radius: 4px;">Samples described by: ${safeLabel}${suffix}</h4>`;
 
         if (buildFailed) {
             if (searchResults) searchResults.textContent = `Concept filter failed for "${label}". Please try again.`;
@@ -4867,12 +4884,18 @@ zoomWatcher = {
         if (sampEl) {
             let h = headingHTML(` (${ofTotal})`);
             for (const s of results) {
-                const color = SOURCE_COLORS[s.source] || '#666';
-                const name = SOURCE_NAMES[s.source] || s.source;
+                // Escape every interpolated value (Codex finding #3): label/pid
+                // come from the parquet, color/name from controlled maps, sUrl
+                // into an href attribute — escapeHtml handles both text and
+                // attribute contexts (it escapes quotes). lat/lng are numeric.
+                const color = escapeHtml(SOURCE_COLORS[s.source] || '#666');
+                const name = escapeHtml(SOURCE_NAMES[s.source] || s.source);
                 const sUrl = sourceUrl(s.pid);
-                h += `<div class="sample-row" style="cursor: pointer;" data-lat="${s.latitude}" data-lng="${s.longitude}" data-pid="${s.pid}">
+                const labelText = escapeHtml(s.label || s.pid);
+                const pidAttr = escapeHtml(s.pid || '');
+                h += `<div class="sample-row" style="cursor: pointer;" data-lat="${s.latitude}" data-lng="${s.longitude}" data-pid="${pidAttr}">
                     <div style="display: flex; align-items: center; gap: 6px;">
-                        ${sUrl ? `<a class="sample-label" href="${sUrl}" target="_blank" rel="noopener noreferrer" style="color: #1565c0; text-decoration: none;">${s.label || s.pid}</a>` : `<span class="sample-label">${s.label || s.pid}</span>`}
+                        ${sUrl ? `<a class="sample-label" href="${escapeHtml(sUrl)}" target="_blank" rel="noopener noreferrer" style="color: #1565c0; text-decoration: none;">${labelText}</a>` : `<span class="sample-label">${labelText}</span>`}
                         <span class="source-badge" style="background: ${color}; font-size: 10px;">${name}</span>
                     </div>
                 </div>`;
diff --git a/tests/playwright/described-by-verify.mjs b/tests/playwright/described-by-verify.mjs
index e6073a5..34d61c3 100644
--- a/tests/playwright/described-by-verify.mjs
+++ b/tests/playwright/described-by-verify.mjs
@@ -92,6 +92,33 @@ console.log(conceptOk
   ? '\n✅ #248 CONCEPT COHERENT: globe + panel + URL all reflect the concept filter.'
   : '\n❌ #248 INCOHERENT: see fields above.');
 
+// Codex finding #1 regression: with an active concept filter, DRAFT (un-
+// submitted) text in the box + a passive writeQueryState (fired by a source/
+// facet toggle) must NOT clobber `described-by=` with a phantom `search=`.
+await page.fill('#sampleSearch', 'draftNotSubmitted');   // type, do NOT press Enter
+const firstSource = page.locator('#sourceFilter input[type="checkbox"]').first();
+await firstSource.uncheck();                              // fires the passive source-change writeQueryState
+await page.waitForFunction(() => !document.body.classList.contains('explorer-busy'),
+  null, { timeout: 60_000 }).catch(() => {});
+const draftState = await page.evaluate(() => ({
+  kind: window.__searchFilter?.kind,
+  urlSearch: location.search,
+}));
+console.log('\n=== DRAFT-TEXT + SOURCE TOGGLE (Codex #1) ===');
+console.log(JSON.stringify(draftState, null, 2));
+const draftOk =
+  draftState.kind === 'concept' &&
+  /described-by=/.test(draftState.urlSearch) &&
+  !/[?&]search=/.test(draftState.urlSearch);
+console.log(draftOk
+  ? '\n✅ DRAFT-TEXT SAFE: described-by= survived draft text + source toggle.'
+  : '\n❌ DRAFT-TEXT BUG: described-by= was clobbered by un-submitted text.');
+// Restore clean state for the next check.
+await firstSource.check();
+await page.fill('#sampleSearch', '');
+await page.waitForFunction(() => !document.body.classList.contains('explorer-busy'),
+  null, { timeout: 60_000 }).catch(() => {});
+
 // Mutual exclusivity: now commit a free-text search; described-by= must drop
 // out of the URL and the filter kind must flip to 'text'.
 await page.fill('#sampleSearch', 'pottery');
@@ -119,7 +146,7 @@ console.log(mutexOk
   ? '\n✅ MUTUAL EXCLUSIVITY: text search took over; described-by= cleared from URL.'
   : '\n❌ MUTUAL EXCLUSIVITY FAILED: see fields above.');
 
-const ok = conceptOk && mutexOk;
+const ok = conceptOk && draftOk && mutexOk;
 console.log(ok ? '\n✅✅ #248 FLAVOR A VERIFIED.' : '\n❌ #248 verify FAILED.');
 
 if (process.env.A1_CLOSE) await browser.close();