From f2eac3519d135666cb583fa93061cbff112e0dbf Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Sun, 31 May 2026 22:28:41 -0700 Subject: [PATCH 1/4] =?UTF-8?q?feat(explorer):=20#248=20Flavor=20A=20found?= =?UTF-8?q?ation=20=E2=80=94=20conceptLabelForUri=20+=20buildConceptFilter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First, additive pieces for `described-by=` (#248), riding the A1 search_pids machinery (Codex plan-reviewed: "mostly sound + guardrails"): - window.conceptLabelForUri(uri): expose the facetFilters cell's URI→prefLabel resolver so the concept producer can label a URI without re-querying vocab_labels (guardrail #1). - buildConceptFilter(uri): a SECOND search_pids producer — exact-URI match across the object_type/material/context columns of sample_facets_v2, with object_type>material>context relevance ranking. Same token-scoped staging, finally-drop, shared _searchFilterToken, and empty-clear invariant as buildSearchFilter (guardrails #2/#6). Tags __searchFilter.kind ('concept' vs 'text') for mutual exclusivity (#5). Not yet wired: doDescribedBy flow (shared runPidSetResults render), the described-by= URL param + writeQueryState kind-preservation, and mutual exclusivity at producer entry. buildConceptFilter isn't called yet, so this is behavior-neutral. Verified: conceptLabelForUri('…organismpart')→"Organism part"; free-text a1-verify still ✅ COHERENT. Co-Authored-By: Claude Opus 4.8 (1M context) --- explorer.qmd | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/explorer.qmd b/explorer.qmd index af55ac9..210b968 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -1763,6 +1763,10 @@ facetFilters = { const parts = s.replace(/[#?].*$/, "").split("/").filter(Boolean); return parts.length ? parts[parts.length - 1] : s; }; + // #248: expose the URI→prefLabel resolver so the concept-filter producer + // (buildConceptFilter, in the zoomWatcher cell) can label a `described-by=` + // URI without re-querying vocab_labels. Best-effort: falls back to URI tail. + if (typeof window !== 'undefined') window.conceptLabelForUri = prettyLabel; try { const summaries = await db.query(` @@ -4130,7 +4134,7 @@ zoomWatcher = { const cnt = Array.from(await db.query(`SELECT COUNT(*) AS n FROM search_pids`)); const total = cnt.length ? Number(cnt[0].n) : 0; if (token !== _searchFilterToken) return false; - window.__searchFilter = { active: true, term, token, total }; + window.__searchFilter = { active: true, term, token, total, kind: 'text' }; window.a1dbg?.('search-build-end', { term, token, total }); return true; } finally { @@ -4138,6 +4142,47 @@ zoomWatcher = { } } + // #248 Flavor A: a SECOND producer of `search_pids` — selects samples whose + // iSamples-vocabulary concept (the URI-valued object_type / material / + // context columns in sample_facets_v2) exactly matches `uri`. Mirrors + // buildSearchFilter's token-scoped staging + finally-drop + token guards, + // sharing the same _searchFilterToken (the pid-set is a singleton). Every + // surface then filters via the same `pid IN (SELECT pid FROM search_pids)` + // semi-join, for free. Flavor B (arbitrary external/Getty URIs needing + // URI→label resolution + free-text) is a follow-up. + async function buildConceptFilter(uri) { + const token = ++_searchFilterToken; + const label = (typeof window !== 'undefined' && window.conceptLabelForUri) + ? window.conceptLabelForUri(uri) : uri; + window.a1dbg?.('search-build-start', { term: label, token, kind: 'concept' }); + const u = escSql(uri); + // Rank object_type matches above material above context, then by label + // (Codex plan-review suggestion — cheap, no extra scan). + const rank = `CASE WHEN object_type = '${u}' THEN 3 WHEN material = '${u}' THEN 2 WHEN context = '${u}' THEN 1 ELSE 0 END`; + const staging = `search_pids_next_${token}`; + try { + await db.query(` + CREATE OR REPLACE TABLE ${staging} AS + SELECT pid, label, source, place_name, (${rank}) AS relevance_score + FROM read_parquet('${facets_url}') + WHERE pid IS NOT NULL + AND (object_type = '${u}' OR material = '${u}' OR context = '${u}') + `); + if (token !== _searchFilterToken) return false; // superseded mid-build + await db.query(`CREATE OR REPLACE TABLE search_pids AS + SELECT pid, label, source, place_name, relevance_score FROM ${staging}`); + if (token !== _searchFilterToken) return false; + const cnt = Array.from(await db.query(`SELECT COUNT(*) AS n FROM search_pids`)); + const total = cnt.length ? Number(cnt[0].n) : 0; + if (token !== _searchFilterToken) return false; + window.__searchFilter = { active: true, term: label, token, total, kind: 'concept', uri }; + window.a1dbg?.('search-build-end', { term: label, token, total, kind: 'concept' }); + return true; + } finally { + try { await db.query(`DROP TABLE IF EXISTS ${staging}`); } catch (e) { /* best effort */ } + } + } + async function clearSearchFilter() { _searchFilterToken++; window.__searchFilter = { active: false, term: null, token: _searchFilterToken, total: 0 }; From de22bb396eab2a10fa40f238d7a47bc5e305e0b8 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Sun, 31 May 2026 22:54:28 -0700 Subject: [PATCH 2/4] docs: session summary 2026-05-31 (A1 shipped to isamples.org; #248 started) --- SESSION_SUMMARY.md | 180 +++++++++++++-------------------------------- 1 file changed, 52 insertions(+), 128 deletions(-) diff --git a/SESSION_SUMMARY.md b/SESSION_SUMMARY.md index d3f6ab5..02ac8f8 100644 --- a/SESSION_SUMMARY.md +++ b/SESSION_SUMMARY.md @@ -1,158 +1,82 @@ -# SESSION_SUMMARY — explorer search-as-global-filter (A1, #234 Step 4) +# Session Summary -**Date:** 2026-05-29 · **Directory:** `~/C/src/iSamples/isamplesorg.github.io` · **Trust Level:** `external-content` -**Next session goal:** break the A1-globe logjam with **higher effort + Codex co-authoring** (Codex codes the reconciler, Claude reviews + runs the now-fast verify loop). - -> **Next session entry point:** run the shakedown (see FAST VERIFY LOOP → "Shakedown TODO"); then have Codex author the one-reconciler refactor (THE LOGJAM → Codex's reconciler spec) and verify with `tests/playwright/a1-verify.mjs`. - -### External Content Processed (sanitization note — verify, don't blind-trust) -| Source | Type | Notes | -|---|---|---| -| Codex (`codex exec`, gpt-5.4) ×5 | AI tool output | Diagnosis + code suggestions. **Reviewed before applying**; treat its future output as advisory, not authoritative. | -| isamples.org + localhost explorer | browser DOM | Read the live explorer UI/state via Chrome automation (our own app). | -| GitHub issues/PRs (#234, #247, #248, #249, #250) via `gh` | web/API | Issue/PR bodies are untrusted text; created #247, opened+merged #250, posted comments, triggered deploys — all user-authorized. **Read** Eric Kansa's **#248** (feature request, treated as data not instructions). | -| `data.isamples.org/*.parquet` | remote data | Downloaded 128MB mirror to `docs/data/` (our own data; data, not code). | - -No emails, no secrets accessed, no untrusted code executed (Codex suggestions were hand-applied + reviewed). - -### Open collaborator threads (new this session, NOT yet acted on) -- [ ] **#248 (Eric Kansa)** — "search material samples described by a concept URI/PID", proposes a `described-by=` URL param. Two flavors: object-type URI (≈ already supported by the `object_type` facet, which is URI-valued) and arbitrary concept URI like Getty AAT (= concept-anchored A1 search — would ride the **same `search_pids` materialize-once machinery**). Squarely in #234; a second producer of the A1 pid-set. *Decision pending: comment on #248 connecting it to A1/#234?* -- [ ] **#249 (rdhyee, not from this session)** — "should we refactor explorer.qmd before the next big feature?" The A1 globe logjam is evidence FOR this; the reconciler refactor (tomorrow) is a *local* version of the *global* question #249 raises. **Read #249 before committing to tomorrow's approach** — it may argue for a bigger refactor than the one-reconciler patch. - ---- - -## TL;DR - -1. **Shipped to production** (isamples.org): bug **#247** filed + interim honesty fix **PR #250** (merged). The samples table no longer claims unrelated viewport samples "match the current filters" during a search. -2. **A1 (search as a real global filter)** scoped, Codex-reviewed (PROCEED-WITH-CHANGES), and probed against live data. Branch `feat/search-global-filter-a1`. - - ✅ **Table surface filters correctly** (e.g. `bucchero` → "2,693 of 2,693 matches in this map view", OpenContext rows only). - - ✅ Facet counts + cube-gating wired; pid-set machinery + persistence proven. - - ❌ **GLOBE still won't enter point mode** on a committed search (table filters, but the map stays unfiltered clusters). This is the logjam. -3. **Built a fast/deterministic verify-loop** (local parquet mirror + range server + `window.__a1state`/`__a1globe` observability + Playwright harness) so tomorrow's iteration isn't 40–90s/cycle. Range-verified; full speedup run still needs a shakedown. +## Session: 2026-05-30/31 (evening) +**Directory**: `~/C/src/iSamples/isamplesorg.github.io` +**Trust Level**: external-content --- -## Branch & commits +## What Happened -`feat/search-global-filter-a1` (off `upstream/main` which already has #250): -- `204d2df` table surface (pid-set + semi-join + summaryText) -- `936f1f3` points/facets/cube-gate/C3 wired — globe buggy -- `4e79830` Codex's C3 fixes (moveEnd latch, awaitable enterPointMode, search-token staleness) — globe STILL not entering point mode -- `62d5500` dev verify-loop infra (mirror support + dev_server.py + a1dbg/__a1state/__a1globe) +A long, productive session. Started as "tackle the fast-verify shakedown"; ended with **A1 shipped to production (isamples.org)** and **#248 underway**. -Production (already merged, do NOT redo): upstream `a4da97b` (#250). +1. **Shakedown root-caused & fixed.** The dev `?data_base=/data` override produced root-relative parquet URLs that DuckDB-WASM's httpfs can't fetch (read as a virtual-FS glob → zero fetches). Resolved to absolute against `location.origin`. This unblocked the fast verify loop (~2.3s to live). +2. **The "globe logjam" was never real** — it was a **backgrounded-Chrome-MCP-tab artifact** (Chrome freezes rAF in hidden tabs → Cesium camera never settles → "globe won't enter point mode"). In any foreground/headless context the C3 fixes work. The reconciler refactor was unnecessary. **Lesson: drive the verify loop with `HEADLESS=1` Playwright, never the MCP tab.** +3. **Fixed an A1 search perf regression** the CI smoke gate caught (double facets scan → materialize side-panel columns+score into `search_pids`, one scan). +4. **Fixed the live facet-padding mismatch** RY hit (legend pad-0 vs table 0.3 → facet read low; e.g. material=rock ~166 vs ~481). Now facet == table. +5. **Shipped A1**: opened **PR #251**, ran a 3-round **Codex review/revise loop to dual approval** (Codex caught a real `search_pids` staging-table race, heatmap search-blindness, and a stale-reader follow-on — all fixed), then **squash-merged to upstream → deployed to isamples.org** (smoke gate green). +6. **Started #248 (Eric Kansa's concept-URI search)**: posted a connecting comment, Codex plan-reviewed ("mostly sound + guardrails"), and committed the **foundation** on `feat/described-by-concept`. +7. **Investigated a transient camera freeze** (RY's `h3=`+`heading=` deep-link, also on isamples.org). Ruled out locked controller / tracked-entity / refresh-loop via a new `?debug=a1` `__a1camera` hook; **resolved on its own → likely transient WebGL context-loss / network**. Surfaced a real **testing gap**: no gate asserts post-hydration *interactivity*. --- -## The A1 design (Strategy B — agreed + Codex-approved) +## Safe to Carry Forward -On a committed search, `buildSearchFilter()` materializes a **non-temp** DuckDB table `search_pids` (one `ILIKE` scan over `facets_url`), then every surface constrains with a cheap semi-join `AND pid IN (SELECT pid FROM search_pids)`. State on `window.__searchFilter {active,term,token,total}`; predicate via `window.searchFilterSQL(col)`. +### Key Decisions +- A1 ships on plain ILIKE; **BM25 (#168–172) is a perceived-perf follow-up, not a correctness blocker.** +- `search_pids` is a **singleton**; any new producer (#248) shares one `_searchFilterToken`/`_searchSeq` and the `kind: 'text'|'concept'` tag. +- Codex-reviewed A1 invariants to preserve: **token-scoped staging table**, **empty-table clear** (never DROP the live table), **build-failure distinguished from empty results**. +- `?debug=a1`-gated hooks: `__a1globe`, `__a1log`/`__a1state`, and (new, uncommitted/diagnostic) `__a1camera`. -**Probe findings (de-risked the design):** pid is unique (no dup), facets ⊆ lite so **no coordinate-less matches** (table count == mappable matches — simple "N of M in view" copy), broadest realistic term ~82k pids (no million-row blowup). Full scoping + Codex resolutions in **`A1_SCOPING.md`**. +### Branch / ship state +- **A1**: merged to upstream `main` as **`e6f9def`** (PR #251), live on isamples.org + rdhyee. Local `feat/search-global-filter-a1` is now redundant (squash-merged). +- **#248**: branch **`feat/described-by-concept`** off merged main; foundation commit **`f2eac35`** (`conceptLabelForUri` + `buildConceptFilter`, behavior-neutral, verified). -Surfaces wired: `loadCount`/`loadPage` (table) ✅ verified; `loadViewportSamples` (points); `updateCrossFilteredCounts` (facet legend, + gate cube fast-path & global-baseline when search active); `summaryText` copy. +### Files Changed (this session, across A1 + #248) +- `explorer.qmd` — A1 data_base fix, double-scan collapse, facet-padding, Codex fixes (staging race / heatmap / empty-clear / build-failure msg), `?debug=a1` gating; #248 `conceptLabelForUri` + `buildConceptFilter`. +- `dev_server.py` — HTTP/1.1; `tests/playwright/a1-verify.mjs` — `HEADLESS=1` flag; new probes `globe-points-probe.mjs`, `shakedown-206.mjs`; `tests/playwright/facet-viewport.spec.js` — coherence test. ---- - -## THE LOGJAM (start here tomorrow) - -**Symptom:** search `bucchero` → table = "2,693 of 2,693 matches" (✅), but globe phaseMsg/stat stay **cluster** and `exitPointMode` runs. Even a clean **manual** search (not just boot) fails → it's a real state-machine bug, not a boot race. - -**Codex's diagnosis (correct, partially fixed in `4e79830`):** -1. ✅ FIXED — post-search `flyTo` lands at **200 km > EXIT_POINT_ALT (180 km)**, and the `moveEnd` handler exited point mode without checking `searchIsActive()`. Latched now. -2. ✅ FIXED — `enterPointMode` was fire-and-forget; now `async` + `await loadViewportSamples()`, awaited at all call sites. -3. ✅ FIXED — `loadViewportSamples` staleness was `requestId`-only; now also keys on the search token (`isStaleLoad()`). -4. ⏳ **NOT DONE — the actual remaining fix:** `applySearchFilterChange()` is a **parallel** mode-entry path racing the camera/mode machinery. Codex recommends **replacing it with ONE reconciler** that both the camera handler and search call, so "search forces point" and "altitude decides mode" live in one predicate with one set of staleness tokens. - -**Codex's reconciler spec (implement this):** -```js -async function reconcileGlobeForCurrentFilters(pushHistory = false) { - syncFacetNote(); - refreshHeatmap(); - if (searchIsActive()) { - if (getMode() !== 'point') await enterPointMode(pushHistory); - else await loadViewportSamples(); - } else { - // existing altitude-driven cluster/point behavior - } - refreshFacetCounts(); - window.refreshSamplesTable?.(); -} -``` -Call it from search completion AND the relevant camera paths; delete the bespoke `applySearchFilterChange` mini-state-machine. **Open question to nail with the new observability:** why does `enterPointMode` not stick on a manual search? (`[A1dbg]` events `apply-search-change`, `mode-change`, `post-build` will show the sequence — see below.) - -**Other bugs Codex flagged (not yet addressed):** -- Heatmap `renderHeatmap()` omits `searchFilterSQL` and `heatmapFilterHash()` omits the search token → heatmap (labeled "filtered density") stays unfiltered under search. (PR#2 or fix now.) -- Selection revalidation (`~L3457`) checks only source, not the search filter — clear/revalidate selection on search change. +### Patterns/Learnings +- **Backgrounded tabs freeze rAF** → corrupts every globe/camera observation. Headless Playwright is the reliable instrument. +- **Don't pile up runs**: accumulated hung browsers hold HTTP/1.1 keep-alive + peg CPU and starve `dev_server.py`. Restart between batches. +- **Local mirror full-downloads** (GET 200, not 206) — fine on localhost; validate range/perf on the deploy, not the mirror. +- Codex's `codex exec ... -o FILE` often fails to capture the final message when the diff is large; read the verdict from the streamed `.log` instead (resume the session for continuity). --- -## PERFORMANCE MODEL — why the UI hides the 40s, and what A1 does to it - -(RY's framing, 2026-05-29 — worth keeping front-of-mind for the substrate-vs-progress-UI call.) - -The explorer never *feels* like a 40–90s app because the whole design is **"never fetch big data over a wide area."** Data is tiered by zoom, smallest-first, and the tiny tiers are **preloaded** (`explorer.qmd` L14–17: `` for h3 res4 + facet_summaries + vocab_labels): +## External Content Processed -| User action | Fetched | Size | Felt | -|---|---|---|---| -| Land on globe (zoomed out) | H3 res4 | **580 KB** (preloaded) | instant (`Load Time 0.4s`) | -| Zoom in / more | H3 res6 / res8 | 1.6 / 2.5 MB | fast | -| Zoom **deep** → point mode | `samples_map_lite` | 60 MB file… | **still fast** ↓ | - -The trick on that last row: by the time `samples_map_lite` (60 MB) is touched, the camera is deep (alt < `ENTER_POINT_ALT` 120 km), so the bbox is tiny. DuckDB-WASM does **HTTP range requests** and pulls only the parquet **row groups** overlapping that small bbox (a few MB), never the whole file. So the big files are only ever read in slivers. UX masking on top: instant res4 globe, phase messages, stale-while-loading (dimmed old rows). - -**The two operations with NO spatial narrowing** (= the only ones that can hit the full 40s; both were what I kept triggering in dev): -1. **Free-text search** — `ILIKE '%term%'` over `label/description/place_name` across the *whole* `sample_facets_v2.parquet` (63 MB text). ILIKE can't skip row groups; it's a full column scan. Irreducible without an index. -2. **Samples table at a wide viewport** — `loadCount` over a world-sized bbox counts ~everything (normal users zoom in first, shrinking it). - -**The A1 implication (the load-bearing point):** A1 takes operation #1 — the single slowest thing in the app — and moves it to the **front of the common flow.** Today search is an optional side-panel lookup; A1 makes every committed search run that full 63 MB scan *first* and gates the filtered view on it. So A1 risks importing the one 40s wait into exactly the place the rest of the UI worked to avoid it. That's why: -- The **"Building search filter…"** affordance matters (honest masking, like the rest of the app). -- **BM25 substrate (#168–172)** is the thing that makes a *cold* search feel as snappy as zooming — NOT a correctness blocker (the pid-set abstraction works on plain ILIKE), but the perceived-perf fix. -- The **materialize-once** design is the mitigation: pay the un-narrowable full scan *one time* per term, then every pan/zoom/facet-toggle is a cheap `pid IN (…)` semi-join that DOES narrow spatially — folding search back into the fast tier after the first hit. - -(This also reframes the cold-load floor below: init ~40s is one thing, but the search scan is the *product-facing* slow path, and it's the one A1 must manage.) - ---- - -## FAST VERIFY LOOP (built today — use it tomorrow) - -**Why today was slow:** every iteration was a cold reload. Cold cost is **init-dominated** — DuckDB-WASM (from CDN) + Cesium + the OJS reactive graph take ~40s **before any data query**, and the search `ILIKE` then downloaded ~60MB of text columns over the network. Console capture from the automation harness was also flaky. - -**The fix (set up, committed in `62d5500`):** -1. **Local parquet mirror** — `docs/data/*.parquet` (128MB, gitignored via `docs` + `*.parquet`). Re-fetch with: - `for f in isamples_202601_{samples_map_lite,sample_facets_v2,h3_summary_res4,h3_summary_res6,h3_summary_res8,facet_cross_filter,facet_summaries}.parquet vocab_labels.parquet; do curl -s -o docs/data/$f https://data.isamples.org/$f; done` - (⚠️ `current/wide.parquet` came back **0 bytes** — used only for sample-click detail; may be the cause of the init hang — investigate.) -2. **`R2_BASE` override** — load with `?data_base=/data` (or `localStorage.ISAMPLES_DATA_BASE`). Defaults to prod, so shipped builds are unchanged. -3. **Range-capable server** — `python3 dev_server.py --dir docs --port 8099`. **Stock `python3 -m http.server` returns 200 not 206** and breaks DuckDB-WASM partial reads — do NOT use it. Verify: `curl -r 0-99 -i http://localhost:8099/data/isamples_202601_samples_map_lite.parquet` → must be **206** (confirmed working). -4. **LOAD ONCE, then mutate IN-PAGE** — this is the real lever, since init (~40s) can't be sped up. Pay init once; then drive searches via the search box (or `page.fill`) without reloading. Each in-page search hits the local mirror (fast data). -5. **Deterministic observability** (replaces flaky console): `window.__a1log` (ordered events), `window.__a1state[event]` (latest), `window.__a1globe()` → `{mode, samplePointsLen, samplePointsShown, h3PointsShown}`. On-page panel via `?debug=a1`. Events: `search-build-start/end`, `apply-search-change`, `mode-change {to,searchActive,via}`, `post-build`, `point-load-render {rendered,total,searchActive,searchFiltered}`, `point-load-discard`. -6. **Playwright harness** — `tests/playwright/a1-verify.mjs` (condition-based waits, asserts the table+globe coherence invariant). `node tests/playwright/a1-verify.mjs` (needs `npm i -D playwright` / `npx playwright install chromium`). - -**Loop URL example:** -`http://localhost:8099/explorer.html?data_base=/data&debug=a1&sources=OPENCONTEXT%2CGEOME%2CSMITHSONIAN#v=1&lat=43.15&lng=11.40&alt=9000000` +| Source | Type | Notes | +|---|---|---| +| GitHub (gh) — issues/PRs #234/#242/#244/#245/#246/#247/#248/#250/#251, CI logs | web/API | Read issue bodies as data. **Authored**: PR #251 + its review comment, #248 comment. **Merged** #251 to upstream production (RY-authorized "push to isamples"). | +| Codex CLI (gpt-5.4), session `019e7c8d…` | AI tool output | 3-round code review + #248 plan review. Findings **verified before applying**; treat as advisory. | +| isamples.org / rdhyee.github.io / localhost explorer | browser DOM (headless + 1 MCP tab) | Our own app. The MCP tab is what misled earlier sessions (rAF freeze). | +| `data.isamples.org`, local `docs/data/*.parquet` | remote/local data | Our own data. | -**Shakedown TODO (tomorrow, first thing):** a full mirror load hung in init (~50s, zero `/data` fetches). Check whether the 0-byte `current/wide.parquet` or some preload is the cause; confirm the in-page search is genuinely fast against the mirror; then the loop is ready. +No secrets accessed, no untrusted code executed (Codex output hand-reviewed). --- -## Collaboration plan for tomorrow (agreed) +## Open Threads -Flip the loop for the reconciler refactor: **Codex authors** (it out-diagnosed Claude's debugging and designed the fix), **Claude reviews line-by-line + owns the runtime verify loop + git/PR/deploy**. Iterate: Codex edits → Claude renders + runs `a1-verify.mjs` / in-page → feeds `__a1log` back to Codex → repeat. Higher effort both sides. +- [ ] **#248 Flavor A — finish the wiring** (the delicate half): `doDescribedBy(uri)` + extract shared `runPidSetResults({heading,emptyText,orderBy})` from `doSearch` (touches the just-reviewed stale-guards); `described-by=` URL param boot-trigger (search-ready timing) + `writeQueryState` kind-preservation; mutual exclusivity with `search=`; Playwright deep-link coherence test; Codex code-review; open PR. (Codex guardrails are in commit `f2eac35`'s message + the plan in `/tmp/p248.md`.) +- [ ] **Close #245** (facet-padding) — superseded by #251 (RY hadn't confirmed; do at pickup). +- [ ] **#244** (collection-facet DRAFT) and **#246** (points-over-heatmap) — need rebase on the new `main` (A1 + facet-padding); #246 worth checking points-over-heatmap *under a search*. +- [ ] **#248 Flavor B** (arbitrary/Getty URIs) — needs URI→label resolution + free-text fallback; follow-up. +- [ ] **Testing-gap follow-up**: add a deep-link **interactivity** regression test (assert `enableInputs`/no-trackedEntity + camera actually moves), using the `__a1camera` hook. (Hook is uncommitted/local; re-add when building the test.) +- [ ] Deferred A1 items: selection revalidation on search change; BM25 substrate (#168–172). --- -## Cleanup before the A1 PR is opened (don't ship these) +## Next Session Entry Point -- Remove the **`a1PersistenceProbe`** dev cell (right after the `db` cell) — persistence already proven. -- Decide on `a1dbg`/`__a1log`/`__a1state`/`__a1globe` + `?debug=a1` panel: gate behind a dev flag or strip. The `R2_BASE ?data_base=` override and `dev_server.py` are worth KEEPING (useful, safe defaults). -- The double-scan in `doSearch` (pid-set build + the existing LIMIT-50 side-panel query both scan facets) — follow-up: derive the side-panel list from `search_pids`. -- Heatmap + selection-revalidation search-awareness (above). +> Start here: continue **#248 Flavor A** on `feat/described-by-concept` (foundation `f2eac35` done). Next concrete step is `doDescribedBy` + extracting `runPidSetResults` from `doSearch`, then the `described-by=` URL plumbing + mutual-exclusivity, then test → Codex review → PR. Verify loop: `python3 dev_server.py --dir docs --port 8099` + `HEADLESS=1 node tests/playwright/a1-verify.mjs`. --- -## Key references +## Session History -- `explorer.qmd` anchors: `buildSearchFilter`/`clearSearchFilter`/`applySearchFilterChange` (~L3534), `loadViewportSamples` (~L2510), `enterPointMode`/`exitPointMode` (~L2680/2700), camera `moveEnd` handler (~L3709), camera `changed` handler (~L3560), `summaryText`/`loadCount`/`loadPage` (tableView cell ~L2123), `R2_BASE` (~L683), a1dbg/`__a1globe` install (~L4028). -- `A1_SCOPING.md` — full scope + probe + Codex resolutions. -- `dev_server.py`, `tests/playwright/a1-verify.mjs` — the loop. -- Issues: #234 (umbrella, A1 = Step 4), #247 (the bug, interim fixed by #250), #168–172 (FTS substrate — optional latency win, NOT a blocker for A1). +| Date | Trust | Summary | +|---|---|---| +| 2026-05-30/31 | external-content | Shakedown root-caused; A1 logjam = backgrounded-tab artifact; A1 perf + facet-padding fixed; Codex loop → dual approval; **A1 merged & deployed to isamples.org** (#251); #248 started (`feat/described-by-concept` foundation). | +| 2026-05-29 | external-content | (prior) A1 scoping + globe logjam framing (superseded — there was no logjam). | From 3fd7fd2033711eac86d2fc06e1dac605f1ab4ae5 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Mon, 1 Jun 2026 07:51:06 -0700 Subject: [PATCH 3/4] =?UTF-8?q?feat(explorer):=20#248=20Flavor=20A=20?= =?UTF-8?q?=E2=80=94=20described-by=3D=20filter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the concept-URI filter end to end on top of the A1 search_pids machinery. A `described-by=` deep link selects material samples whose URI-valued facet concept (object_type / material / context) exactly matches the URI, and filters EVERY surface (table, globe points, facet counts, side panel) via the same `pid IN (SELECT pid FROM search_pids)` semi-join. - doDescribedBy(uri): second entry point into search_pids. Reuses the already -reviewed buildConceptFilter (producer) + applySearchFilterChange (refresher, kind-agnostic) and renders its OWN side panel. Deliberately does NOT touch doSearch — keeps A1's just-shipped (#251) text-search stale-guard hot path untouched (RY decision: protect the path Kerstin demos Wed). - writeQueryState: search= and described-by= are mutually exclusive. Keyed off the text input (always current) rather than window.__searchFilter (stale at doSearch's early writeQueryState call), so a committed text search drops described-by= and vice-versa. - Boot: described-by= deep link auto-commits and wins over search= if both present. - tests/playwright/described-by-verify.mjs: HEADLESS deep-link coherence test (globe + panel + URL all reflect the concept; mutual-exclusivity with text). Verified: described-by-verify green; a1-verify still green (no A1 regression). Co-Authored-By: Claude Opus 4.8 (1M context) --- explorer.qmd | 212 ++++++++++++++++++++++- tests/playwright/described-by-verify.mjs | 126 ++++++++++++++ 2 files changed, 335 insertions(+), 3 deletions(-) create mode 100644 tests/playwright/described-by-verify.mjs diff --git a/explorer.qmd b/explorer.qmd index 210b968..e0f6133 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -831,8 +831,27 @@ function writeQueryState() { const params = new URLSearchParams(location.search); const searchInput = document.getElementById('sampleSearch'); const q = searchInput ? searchInput.value.trim() : ''; - if (q) params.set('search', q); - else params.delete('search'); + // #248 Flavor A: `search=` (free text) and `described-by=` (concept URI) + // both produce the singleton search_pids and are mutually exclusive. + // Key the choice off the TEXT INPUT, not window.__searchFilter — doSearch + // calls writeQueryState() EARLY (before buildSearchFilter flips + // __searchFilter.kind to 'text'), so __searchFilter is stale at that call + // site. The input value is always current: a non-empty box means a text + // search is being committed → `search=` wins, drop `described-by=`. + // doDescribedBy CLEARS the box before it calls writeQueryState, so there + // the box is empty and we fall through to the concept branch. + const sf = (typeof window !== 'undefined') ? window.__searchFilter : null; + const conceptActive = !!(sf && sf.active && sf.kind === 'concept' && sf.uri); + if (q) { + params.set('search', q); + params.delete('described-by'); + } else if (conceptActive) { + params.set('described-by', sf.uri); + params.delete('search'); + } else { + params.delete('search'); + params.delete('described-by'); + } const activeSources = getActiveSources(); if (activeSources.length === SOURCE_VALUES.length) params.delete('sources'); @@ -4741,6 +4760,187 @@ zoomWatcher = { } } + // #248 Flavor A: commit a concept-URI filter. This is the SECOND entry + // point into the shared `search_pids` machinery — `buildConceptFilter` + // is the producer, `applySearchFilterChange` pushes the result to every + // surface (table / points / facet counts / globe mode), exactly as a text + // search does. It deliberately does NOT reuse doSearch's body: doSearch + // carries A1's text-search stale-guard + COUNT logic shipped in #251, and + // we keep that hot path untouched (RY decision, 2026-06-01). It renders its + // own, simpler side panel instead. Mutually exclusive with the free-text + // search: committing a concept clears the text box and the `search=` param. + async function doDescribedBy(uri) { + if (!uri) return; + // Share doSearch's freshness counter so a text search and a concept + // filter can't clobber each other's side-panel render mid-flight. + const searchId = ++_searchSeq; + + // Mutual exclusivity: a committed concept filter OWNS search_pids, so + // clear the free-text box (and its sidebar mirror) before building. + if (searchInput) searchInput.value = ''; + const sidebarInput = document.getElementById('sampleSearchSidebar'); + if (sidebarInput) sidebarInput.value = ''; + + if (searchResults) searchResults.textContent = 'Filtering by concept…'; + + // Build the pid-set. buildConceptFilter mirrors buildSearchFilter's + // token-scoped staging + finally-drop guards and publishes the result + // (label, total, kind:'concept', uri) on window.__searchFilter. A build + // failure empties search_pids via clearSearchFilter (same as doSearch), + // so downstream readers see zero rows rather than a missing table. + let buildFailed = false; + try { + await buildConceptFilter(uri); + } catch (e) { + console.warn('#248 concept-filter build failed; surfaces stay unfiltered:', e); + buildFailed = true; + await clearSearchFilter(); + } + // Superseded by a newer search/concept while building? Bail before + // mutating any surface. Otherwise push the filter through everything. + window.a1dbg?.('concept-post-build', { searchId, searchSeq: _searchSeq, active: window.__searchFilter?.active, total: window.__searchFilter?.total }); + if (searchId === _searchSeq) { + await applySearchFilterChange(); + } + // Persist `described-by=` (and drop `search=`) now that __searchFilter + // reflects the concept — writeQueryState reads window.__searchFilter. + writeQueryState(); + + // Anything past here only paints the side panel / camera; if a newer + // action superseded us during applySearchFilterChange, stop. + if (searchId !== _searchSeq) return; + + const sf = window.__searchFilter; + const label = (sf && sf.term) ? sf.term + : (typeof window !== 'undefined' && window.conceptLabelForUri) ? window.conceptLabelForUri(uri) : uri; + const total = (sf && sf.active && sf.kind === 'concept') ? sf.total : 0; + + // Sticky heading mirrors doSearch's, recolored (purple) to distinguish a + // concept filter from a text search. Label is vocab-resolved, not a URL. + const headingHTML = (suffix) => + `

Samples described by: ${label}${suffix}

`; + + if (buildFailed) { + if (searchResults) searchResults.textContent = `Concept filter failed for "${label}". Please try again.`; + const sElF = document.getElementById('samplesSection'); + if (sElF) sElF.innerHTML = headingHTML('') + + '
Filter failed to build — please try again.
'; + return; + } + if (total === 0) { + if (searchResults) searchResults.textContent = `No samples described by "${label}"`; + const sEl0 = document.getElementById('samplesSection'); + if (sEl0) sEl0.innerHTML = headingHTML(' (0)') + + '
No samples matched this concept.
'; + return; + } + + // Side-panel list: read the LIMIT-50 head straight off search_pids + // (already built + ranked by buildConceptFilter: object_type > material + // > context, then label), LEFT JOIN lite for coords so coord-less + // samples still list (null lat/lng; the click handler guards on null). + let results; + try { + results = await db.query(` + SELECT s.pid, s.label, s.source, l.latitude, l.longitude, + s.place_name, s.relevance_score + FROM search_pids s + LEFT JOIN read_parquet('${lite_url}') l USING (pid) + ORDER BY s.relevance_score DESC, s.label + LIMIT 50 + `); + } catch (err) { + if (searchId !== _searchSeq) return; + console.error('#248 concept side-panel query failed:', err); + if (searchResults) searchResults.textContent = `Concept filter error: ${err.message}`; + return; + } + if (searchId !== _searchSeq) return; + + const shown = results.length; + const ofTotal = total > shown ? `${shown} of ${total.toLocaleString()}` : `${shown}`; + if (searchResults) searchResults.textContent = total > shown + ? `${shown} of ${total.toLocaleString()} samples described by "${label}"` + : `${shown} sample${shown === 1 ? '' : 's'} described by "${label}"`; + + const sampEl = document.getElementById('samplesSection'); + if (sampEl) { + let h = headingHTML(` (${ofTotal})`); + for (const s of results) { + const color = SOURCE_COLORS[s.source] || '#666'; + const name = SOURCE_NAMES[s.source] || s.source; + const sUrl = sourceUrl(s.pid); + h += `
+
+ ${sUrl ? `${s.label || s.pid}` : `${s.label || s.pid}`} + ${name} +
+
`; + } + sampEl.innerHTML = h; + + // Click a concept-result row → same full selection ceremony as a + // text-search row (freshness bump, card hydration, flight, lazy + // detail) so a slow prior load can't repaint over the navigation. + const resultsByPid = new Map(results.map(s => [s.pid, s])); + sampEl.querySelectorAll('.sample-row[data-lat]').forEach(row => { + row.addEventListener('click', async (e) => { + if (e.target.tagName === 'A') return; // let links work + const pid = row.dataset.pid; + const sample = pid ? resultsByPid.get(pid) : null; + if (!sample || sample.latitude == null || sample.longitude == null) return; + + const isStale = freshSelectionToken(viewer); + viewer._globeState.selectedPid = pid; + viewer._globeState.selectedH3 = null; + updateSampleCard({ + pid: sample.pid, + label: sample.label, + source: sample.source, + lat: sample.latitude, + lng: sample.longitude, + place_name: sample.place_name, + result_time: sample.result_time + }); + viewer.camera.flyTo({ + destination: Cesium.Cartesian3.fromDegrees(sample.longitude, sample.latitude, 50000), + duration: 1.5 + }); + try { + const detail = await db.query(` + SELECT description + FROM read_parquet('${wide_url}') + WHERE pid = '${pid.replace(/'/g, "''")}' + LIMIT 1 + `); + if (isStale()) return; + if (detail && detail.length > 0) updateSampleDetail(detail[0]); + else updateSampleDetail({ description: '' }); + } catch(err) { + if (isStale()) return; + console.error("Concept-row detail query failed:", err); + updateSampleDetail(null); + } + }); + }); + } + + // Fly to the first LOCATED result (a nudge, not a selection — clear any + // prior pid/h3 so the URL doesn't carry stale selection across flight). + // Mirrors doSearch's world-scope auto-flight. + const firstLocated = results.find(r => r.latitude != null && r.longitude != null); + if (firstLocated) { + viewer._globeState.selectedPid = null; + viewer._globeState.selectedH3 = null; + viewer.camera.flyTo({ + destination: Cesium.Cartesian3.fromDegrees(firstLocated.longitude, firstLocated.latitude, 200000), + duration: 1.5 + }); + } + } + // Expose for the boot trigger + debug console (mirrors window.conceptLabelForUri). + if (typeof window !== 'undefined') window.doDescribedBy = doDescribedBy; + if (searchAreaBtn) searchAreaBtn.addEventListener('click', () => doSearch('area')); if (searchWorldBtn) searchWorldBtn.addEventListener('click', () => doSearch('world')); // Slim-overlay submit button — same behavior as Enter on `#sampleSearch`. @@ -4784,7 +4984,13 @@ zoomWatcher = { } }); - if (searchInput && searchInput.value.trim().length >= 2) { + // #248 Flavor A boot: a `described-by=` deep link commits the + // concept filter. It WINS over `search=` if a hand-crafted URL carries both + // (they're mutually exclusive; doDescribedBy clears the text box + param). + const _describedByUri = new URLSearchParams(location.search).get('described-by'); + if (_describedByUri) { + doDescribedBy(_describedByUri); + } else if (searchInput && searchInput.value.trim().length >= 2) { doSearch(_searchScope); } diff --git a/tests/playwright/described-by-verify.mjs b/tests/playwright/described-by-verify.mjs new file mode 100644 index 0000000..e6073a5 --- /dev/null +++ b/tests/playwright/described-by-verify.mjs @@ -0,0 +1,126 @@ +// #248 Flavor A (`described-by=`) deterministic verify harness. +// +// Sibling of a1-verify.mjs — same LOAD-ONCE-then-assert pattern against the +// LOCAL parquet mirror. Verifies the concept-URI deep link drives EVERY +// surface coherently (the A1 invariant, but for the concept producer of +// search_pids), and that committing a text search afterward flips the URL +// from `described-by=` to `search=` (mutual exclusivity). +// +// Run pattern (same as a1-verify): +// 1. mirror parquets once: ls docs/data/*.parquet +// 2. python3 dev_server.py --dir docs --port 8099 +// 3. HEADLESS=1 node tests/playwright/described-by-verify.mjs +// +// HEADLESS=1 is strongly recommended — headed backgrounded windows freeze rAF +// (Cesium camera never settles), corrupting globe-mode observations. + +import { chromium } from 'playwright'; + +// A well-populated, clearly cross-domain concept (biology — "Whole organism +// material sample"), to tell the iSamples cross-domain story, not archaeology. +const URI = process.env.DB_URI + || 'https://w3id.org/isample/vocabulary/materialsampleobjecttype/1.0/wholeorganism'; +const EXPECT_LABEL = process.env.DB_LABEL || 'Whole organism material sample'; + +const BASE = process.env.A1_BASE + || 'http://localhost:8099/explorer.html?data_base=/data&debug=a1&sources=OPENCONTEXT%2CGEOME%2CSMITHSONIAN'; +// Boot at high altitude (cluster) so we test that the concept deep link forces +// filtered point mode from cluster — the same hard case a1-verify covers. +const URL = `${BASE}&described-by=${encodeURIComponent(URI)}#v=1&lat=20&lng=0&alt=9000000`; + +const browser = await chromium.launch({ headless: process.env.HEADLESS === '1' }); +const page = await browser.newPage(); +page.on('console', (m) => { if (/A1|point mode|concept|Discarding|#248/.test(m.text())) console.log(' page>', m.text()); }); + +console.log('Loading (cold init ~40s)…', URL); +await page.goto(URL, { waitUntil: 'domcontentloaded' }); + +// App live (OJS graph + DuckDB + search machinery installed). +await page.waitForFunction( + () => typeof window.a1dbg === 'function' && !!window.__a1globe && !!document.querySelector('#sampleSearch'), + null, { timeout: 180_000 }); +console.log('App live. Boot mode:', await page.evaluate(() => window.__a1globe?.())); + +// The deep link should auto-commit the concept filter at boot (kind:'concept'). +await page.waitForFunction( + () => window.__searchFilter?.active === true + && window.__searchFilter?.kind === 'concept' + && window.__searchFilter?.total > 0, + null, { timeout: 120_000 }); + +// Globe settles into filtered point mode (clusters can't be concept-filtered). +await page.waitForFunction(() => { + const g = window.__a1globe?.(); + return g && g.mode === 'point' && g.samplePointsShown === true && g.samplePointsLen > 0; +}, null, { timeout: 60_000 }).catch(() => console.log(' !! globe did NOT reach filtered point mode')); + +// The side-panel render runs AFTER applySearchFilterChange returns (a separate +// LIMIT-50 SELECT off search_pids), so wait for the concept heading to paint +// before snapshotting — otherwise we race the "Filtering by concept…" interim. +await page.waitForFunction(() => { + const h = document.querySelector('#samplesSection .search-results-heading'); + return h && /Samples described by:/.test(h.textContent || ''); +}, null, { timeout: 60_000 }).catch(() => console.log(' !! concept side panel did NOT render')); + +const state = await page.evaluate(() => ({ + search: window.__searchFilter, + globe: window.__a1globe?.(), + tableMeta: document.getElementById('tableMeta')?.textContent?.trim(), + panelHeading: document.querySelector('#samplesSection .search-results-heading')?.textContent?.trim(), + resultsLine: document.getElementById('searchResults')?.textContent?.trim(), + urlSearch: location.search, +})); + +console.log('\n=== CONCEPT DEEP-LINK RESULT ==='); +console.log(JSON.stringify(state, null, 2)); + +const conceptOk = + state.search?.active === true && + state.search?.kind === 'concept' && + state.search?.uri === URI && + state.globe?.mode === 'point' && + state.globe?.samplePointsShown === true && + state.globe?.h3PointsShown === false && + state.globe?.samplePointsLen > 0 && + state.globe?.samplePointsLen <= state.search?.total && + /Samples described by:/.test(state.panelHeading || '') && + (state.panelHeading || '').includes(EXPECT_LABEL) && + /described-by=/.test(state.urlSearch) && + !/[?&]search=/.test(state.urlSearch); + +console.log(conceptOk + ? '\n✅ #248 CONCEPT COHERENT: globe + panel + URL all reflect the concept filter.' + : '\n❌ #248 INCOHERENT: see fields above.'); + +// Mutual exclusivity: now commit a free-text search; described-by= must drop +// out of the URL and the filter kind must flip to 'text'. +await page.fill('#sampleSearch', 'pottery'); +await page.press('#sampleSearch', 'Enter'); +await page.waitForFunction( + () => window.__searchFilter?.active === true + && window.__searchFilter?.kind === 'text' + && window.__searchFilter?.term === 'pottery', + null, { timeout: 120_000 }).catch(() => console.log(' !! text search did not take over')); + +const after = await page.evaluate(() => ({ + kind: window.__searchFilter?.kind, + term: window.__searchFilter?.term, + urlSearch: location.search, +})); +console.log('\n=== AFTER TEXT SEARCH (mutual exclusivity) ==='); +console.log(JSON.stringify(after, null, 2)); + +const mutexOk = + after.kind === 'text' && + /[?&]search=pottery/.test(after.urlSearch) && + !/described-by=/.test(after.urlSearch); + +console.log(mutexOk + ? '\n✅ MUTUAL EXCLUSIVITY: text search took over; described-by= cleared from URL.' + : '\n❌ MUTUAL EXCLUSIVITY FAILED: see fields above.'); + +const ok = conceptOk && mutexOk; +console.log(ok ? '\n✅✅ #248 FLAVOR A VERIFIED.' : '\n❌ #248 verify FAILED.'); + +if (process.env.A1_CLOSE) await browser.close(); +process.exitCode = ok ? 0 : 1; From 30fbac93496e4c3afda35309f3eb143c4565d52f Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Mon, 1 Jun 2026 09:37:26 -0700 Subject: [PATCH 4/4] fix(explorer): #248 address Codex review (URL intent, stale guard, XSS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex round-1 found 3 real issues; all fixed + regression-tested: 1. URL could flip concept→phantom-text on draft text + facet/source toggle. writeQueryState is now intent-aware: doSearch passes commitText to authoritatively persist/clear a text search (it runs before __searchFilter updates); every other caller mirrors the COMMITTED filter in __searchFilter, so draft (un-submitted) text no longer clobbers described-by=. New Playwright check: draft text + source toggle preserves described-by=. 2. A superseded doDescribedBy could still rewrite the URL. Moved the _searchSeq stale-check to BEFORE writeQueryState (and re-check after the async applySearchFilterChange) so only the current producer persists state. 3. Reflected-XSS path: the concept side panel interpolated a URL-derived label (and result label/pid/url/name) into innerHTML. Now escapeHtml'd (the same helper the table renderer uses), covering text + attribute contexts. Verified: described-by-verify green (concept + draft-text + mutual-exclusivity); a1-verify still green. Co-Authored-By: Claude Opus 4.8 (1M context) --- explorer.qmd | 83 +++++++++++++++--------- tests/playwright/described-by-verify.mjs | 29 ++++++++- 2 files changed, 81 insertions(+), 31 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index e0f6133..05fbba0 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -827,30 +827,42 @@ function applyQueryToFacetFilters() { } -function writeQueryState() { +function writeQueryState(opts = {}) { const params = new URLSearchParams(location.search); const searchInput = document.getElementById('sampleSearch'); const q = searchInput ? searchInput.value.trim() : ''; // #248 Flavor A: `search=` (free text) and `described-by=` (concept URI) // both produce the singleton search_pids and are mutually exclusive. - // Key the choice off the TEXT INPUT, not window.__searchFilter — doSearch - // calls writeQueryState() EARLY (before buildSearchFilter flips - // __searchFilter.kind to 'text'), so __searchFilter is stale at that call - // site. The input value is always current: a non-empty box means a text - // search is being committed → `search=` wins, drop `described-by=`. - // doDescribedBy CLEARS the box before it calls writeQueryState, so there - // the box is empty and we fall through to the concept branch. + // + // doSearch passes `opts.commitText` to AUTHORITATIVELY persist (a term) or + // clear ('') a text search: it calls writeQueryState EARLY, before + // buildSearchFilter flips __searchFilter.kind to 'text', so it can't read + // filter state — its committed term is the source of truth. + // + // Every OTHER caller (facet-change + source-change handlers, doDescribedBy) + // omits commitText, so the URL mirrors the COMMITTED filter in + // window.__searchFilter, NOT the raw text box. This is Codex finding #1: + // with an active concept filter, draft (un-submitted) text in the box plus + // a facet/source toggle must NOT clobber `described-by=` with a phantom + // `search=`. Keying passive writes off __searchFilter (concept) vs. the box + // (text) keeps the URL honest about what's actually filtering. const sf = (typeof window !== 'undefined') ? window.__searchFilter : null; const conceptActive = !!(sf && sf.active && sf.kind === 'concept' && sf.uri); - if (q) { - params.set('search', q); + if (opts.commitText !== undefined) { + if (opts.commitText) params.set('search', opts.commitText); + else params.delete('search'); params.delete('described-by'); } else if (conceptActive) { params.set('described-by', sf.uri); params.delete('search'); } else { - params.delete('search'); + // No concept filter and no explicit text commit: preserve the prior + // (pre-#248) behavior — reflect the text box. A committed text search + // keeps its term across facet/source toggles (the box still holds it); + // an inactive/empty state clears `search=`. params.delete('described-by'); + if (q) params.set('search', q); + else params.delete('search'); } const activeSources = getActiveSources(); @@ -4245,11 +4257,11 @@ zoomWatcher = { // page returns to its unfiltered state. await clearSearchFilter(); await applySearchFilterChange(); - writeQueryState(); + writeQueryState({ commitText: '' }); // authoritative clear (#248) persistSearchScope(effectiveScope); return; } - writeQueryState(); + writeQueryState({ commitText: term }); // authoritative text commit (#248) persistSearchScope(effectiveScope); // A1 (#234 Step 4): the pid-set filter is built below (after `terms` // is parsed) and the dependent surfaces are refreshed against it; the @@ -4796,19 +4808,19 @@ zoomWatcher = { buildFailed = true; await clearSearchFilter(); } - // Superseded by a newer search/concept while building? Bail before - // mutating any surface. Otherwise push the filter through everything. window.a1dbg?.('concept-post-build', { searchId, searchSeq: _searchSeq, active: window.__searchFilter?.active, total: window.__searchFilter?.total }); - if (searchId === _searchSeq) { - await applySearchFilterChange(); - } - // Persist `described-by=` (and drop `search=`) now that __searchFilter - // reflects the concept — writeQueryState reads window.__searchFilter. - writeQueryState(); - - // Anything past here only paints the side panel / camera; if a newer - // action superseded us during applySearchFilterChange, stop. + // Superseded during build? Bail before touching ANY shared state. The + // winning producer owns the surfaces AND the URL (Codex finding #2: a + // stale producer must not run writeQueryState off another's filter). + if (searchId !== _searchSeq) return; + await applySearchFilterChange(); + // Re-check after the async surface refresh, BEFORE writeQueryState: if a + // newer search/concept superseded us mid-refresh, it — not this stale + // producer — must be the one that writes the URL. if (searchId !== _searchSeq) return; + // Persist `described-by=` (drops `search=`) now that __searchFilter + // reflects this concept and we're confirmed the current producer. + writeQueryState(); const sf = window.__searchFilter; const label = (sf && sf.term) ? sf.term @@ -4816,9 +4828,14 @@ zoomWatcher = { const total = (sf && sf.active && sf.kind === 'concept') ? sf.total : 0; // Sticky heading mirrors doSearch's, recolored (purple) to distinguish a - // concept filter from a text search. Label is vocab-resolved, not a URL. + // concept filter from a text search. `label` is usually a vocab pref + // label, but for an unknown URI it falls back to the URI tail — i.e. it + // can be attacker-controlled via the `described-by=` URL param. Escape + // it before innerHTML to close the reflected-XSS path (Codex finding #3). + // `suffix` is internal (counts) and safe. + const safeLabel = escapeHtml(label); const headingHTML = (suffix) => - `

Samples described by: ${label}${suffix}

`; + `

Samples described by: ${safeLabel}${suffix}

`; if (buildFailed) { if (searchResults) searchResults.textContent = `Concept filter failed for "${label}". Please try again.`; @@ -4867,12 +4884,18 @@ zoomWatcher = { if (sampEl) { let h = headingHTML(` (${ofTotal})`); for (const s of results) { - const color = SOURCE_COLORS[s.source] || '#666'; - const name = SOURCE_NAMES[s.source] || s.source; + // Escape every interpolated value (Codex finding #3): label/pid + // come from the parquet, color/name from controlled maps, sUrl + // into an href attribute — escapeHtml handles both text and + // attribute contexts (it escapes quotes). lat/lng are numeric. + const color = escapeHtml(SOURCE_COLORS[s.source] || '#666'); + const name = escapeHtml(SOURCE_NAMES[s.source] || s.source); const sUrl = sourceUrl(s.pid); - h += `
+ const labelText = escapeHtml(s.label || s.pid); + const pidAttr = escapeHtml(s.pid || ''); + h += `
- ${sUrl ? `${s.label || s.pid}` : `${s.label || s.pid}`} + ${sUrl ? `${labelText}` : `${labelText}`} ${name}
`; diff --git a/tests/playwright/described-by-verify.mjs b/tests/playwright/described-by-verify.mjs index e6073a5..34d61c3 100644 --- a/tests/playwright/described-by-verify.mjs +++ b/tests/playwright/described-by-verify.mjs @@ -92,6 +92,33 @@ console.log(conceptOk ? '\n✅ #248 CONCEPT COHERENT: globe + panel + URL all reflect the concept filter.' : '\n❌ #248 INCOHERENT: see fields above.'); +// Codex finding #1 regression: with an active concept filter, DRAFT (un- +// submitted) text in the box + a passive writeQueryState (fired by a source/ +// facet toggle) must NOT clobber `described-by=` with a phantom `search=`. +await page.fill('#sampleSearch', 'draftNotSubmitted'); // type, do NOT press Enter +const firstSource = page.locator('#sourceFilter input[type="checkbox"]').first(); +await firstSource.uncheck(); // fires the passive source-change writeQueryState +await page.waitForFunction(() => !document.body.classList.contains('explorer-busy'), + null, { timeout: 60_000 }).catch(() => {}); +const draftState = await page.evaluate(() => ({ + kind: window.__searchFilter?.kind, + urlSearch: location.search, +})); +console.log('\n=== DRAFT-TEXT + SOURCE TOGGLE (Codex #1) ==='); +console.log(JSON.stringify(draftState, null, 2)); +const draftOk = + draftState.kind === 'concept' && + /described-by=/.test(draftState.urlSearch) && + !/[?&]search=/.test(draftState.urlSearch); +console.log(draftOk + ? '\n✅ DRAFT-TEXT SAFE: described-by= survived draft text + source toggle.' + : '\n❌ DRAFT-TEXT BUG: described-by= was clobbered by un-submitted text.'); +// Restore clean state for the next check. +await firstSource.check(); +await page.fill('#sampleSearch', ''); +await page.waitForFunction(() => !document.body.classList.contains('explorer-busy'), + null, { timeout: 60_000 }).catch(() => {}); + // Mutual exclusivity: now commit a free-text search; described-by= must drop // out of the URL and the filter kind must flip to 'text'. await page.fill('#sampleSearch', 'pottery'); @@ -119,7 +146,7 @@ console.log(mutexOk ? '\n✅ MUTUAL EXCLUSIVITY: text search took over; described-by= cleared from URL.' : '\n❌ MUTUAL EXCLUSIVITY FAILED: see fields above.'); -const ok = conceptOk && mutexOk; +const ok = conceptOk && draftOk && mutexOk; console.log(ok ? '\n✅✅ #248 FLAVOR A VERIFIED.' : '\n❌ #248 verify FAILED.'); if (process.env.A1_CLOSE) await browser.close();