diff --git a/docs/playground.css b/docs/playground.css index 6b9050e..e158b0b 100644 --- a/docs/playground.css +++ b/docs/playground.css @@ -138,6 +138,17 @@ button:disabled { min-height: 1.25rem; } +.real-status { + color: var(--ink-soft, #5a6b67); + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 0.82rem; + min-height: 1.1rem; +} + +.real-status.real-error { + color: #b5482f; +} + .status-strip { display: grid; gap: 10px; diff --git a/docs/playground.html b/docs/playground.html index 5096c1f..be5ab46 100644 --- a/docs/playground.html +++ b/docs/playground.html @@ -65,6 +65,10 @@

Playground

Compare +
@@ -74,6 +78,7 @@

Playground

+
@@ -129,6 +134,7 @@

Trace

+ diff --git a/docs/playground.js b/docs/playground.js index e47cb9a..e4da4f4 100644 --- a/docs/playground.js +++ b/docs/playground.js @@ -45,6 +45,8 @@ answer: document.getElementById("answerSelect"), failureFilter: document.getElementById("failureFilter"), compare: document.getElementById("compareToggle"), + realPython: document.getElementById("realPythonToggle"), + realStatus: document.getElementById("realStatus"), reset: document.getElementById("resetButton"), step: document.getElementById("stepButton"), run: document.getElementById("runButton"), @@ -72,16 +74,13 @@ let copyStatusTimer = null; elements.scenario.addEventListener("change", () => { - stopRun(); - state = buildState(); - updateLocation(false); - render(); + if (elements.scenario.value !== "clarifying") { + elements.realPython.checked = false; + } + rebuild(); }); elements.answer.addEventListener("change", () => { - stopRun(); - state = buildState(); - updateLocation(false); - render(); + rebuild(); }); elements.failureFilter.addEventListener("change", () => { updateLocation(false); @@ -91,15 +90,15 @@ updateLocation(false); render(); }); + elements.realPython.addEventListener("change", () => { + rebuild(); + }); elements.replay.addEventListener("input", () => { state.replayIndex = clampReplayIndex(elements.replay.value); render(); }); elements.reset.addEventListener("click", () => { - stopRun(); - state = buildState(); - updateLocation(false); - render(); + rebuild(); }); elements.step.addEventListener("click", () => { stepOnce(); @@ -123,23 +122,127 @@ window.setTimeout(startRun, 180); } - function buildState() { + function buildState(realConfig) { const scenario = elements.scenario.value; const answer = elements.answer.value; - const config = - scenario === "household" - ? buildHouseholdScenario(answer) - : buildClarifyingScenario(answer); + let config; + let source; + if (realConfig) { + config = realConfig; + source = "python"; + } else { + config = + scenario === "household" + ? buildHouseholdScenario(answer) + : buildClarifyingScenario(answer); + source = "js"; + } return { scenario, answer, config, + source, index: 0, trace: [], replayIndex: null, }; } + // Rebuild the playground state, honoring the "Run real Python" toggle. When the + // toggle is on (clarifying only), we lazily boot Pyodide and run the actual + // examples/embodied_ai/35_clarifying_question.py loop, then draw its real trace. + // Otherwise we use the instant JS preview so first paint never waits on Pyodide. + async function rebuild() { + stopRun(); + const useReal = + elements.realPython.checked && elements.scenario.value === "clarifying"; + if (useReal) { + setRealStatus("booting Pyodide + running real Python…"); + try { + const config = await fetchRealClarifyingConfig(elements.answer.value); + state = buildState(config); + setRealStatus( + "running real Python: examples/embodied_ai/35_clarifying_question.py" + ); + } catch (error) { + elements.realPython.checked = false; + state = buildState(); + setRealStatus("Pyodide failed (" + error + ") — showing JS preview", true); + } + } else { + state = buildState(); + setRealStatus(""); + } + updateLocation(false); + render(); + } + + function setRealStatus(message, isError) { + if (!elements.realStatus) { + return; + } + elements.realStatus.textContent = message || ""; + elements.realStatus.classList.toggle("real-error", Boolean(isError)); + } + + // --- Real Python in the browser (Pyodide), lazy-loaded on first use --------- + let pyodideReadyPromise = null; + const realConfigCache = {}; + + async function ensurePyodide() { + if (pyodideReadyPromise) { + return pyodideReadyPromise; + } + pyodideReadyPromise = (async () => { + // eslint-disable-next-line no-undef + const pyodide = await loadPyodide({ + indexURL: "https://cdn.jsdelivr.net/pyodide/v0.27.2/full/", + }); + await pyodide.loadPackage("numpy"); + const buffer = await (await fetch("./pyodide/pir_bundle.zip")).arrayBuffer(); + await pyodide.unpackArchive(buffer, "zip"); + return pyodide; + })(); + return pyodideReadyPromise; + } + + // Runs the unmodified example headless and serializes its Trace with the same + // pir.viz.playground_trace helper that tests/test_playground_trace.py pins. + // ANSWER is replaced with a JSON string literal before execution. + const CLARIFYING_DRIVER = [ + "import json, os, sys, importlib.util", + "cwd = os.getcwd()", + "if cwd not in sys.path:", + " sys.path.insert(0, cwd)", + "class _NoMatplotlib:", + " def find_spec(self, name, path=None, target=None):", + " if name == 'matplotlib' or name.startswith('matplotlib.'):", + " raise ImportError('matplotlib is intentionally unavailable on the headless browser path')", + " return None", + "sys.meta_path.insert(0, _NoMatplotlib())", + "path = os.path.join(cwd, 'examples', 'embodied_ai', '35_clarifying_question.py')", + "spec = importlib.util.spec_from_file_location('clarifying_question', path)", + "mod = importlib.util.module_from_spec(spec)", + "spec.loader.exec_module(mod)", + "from pir.viz.playground_trace import clarifying_trace_to_playground", + "answer = ANSWER", + "trace = mod.run(command='pick the block', answer=answer, render=False)", + "json.dumps(clarifying_trace_to_playground(trace, command='pick the block', answer=answer))", + ].join("\n"); + + async function fetchRealClarifyingConfig(answer) { + const cacheKey = "clarifying:" + answer; + if (realConfigCache[cacheKey]) { + return realConfigCache[cacheKey]; + } + const pyodide = await ensurePyodide(); + const code = CLARIFYING_DRIVER.replace("ANSWER", JSON.stringify(answer)); + const json = await pyodide.runPythonAsync(code); + const config = JSON.parse(json); + realConfigCache[cacheKey] = config; + return config; + } + function stepOnce() { if (state.index >= state.config.steps.length) { render(); @@ -626,6 +729,7 @@ elements.run.disabled = state.index >= state.config.steps.length && !timer; elements.copyTrace.disabled = state.trace.length === 0; elements.compare.disabled = state.scenario !== "household"; + elements.realPython.disabled = state.scenario !== "clarifying"; renderReplay(replayIndex); renderCompare(); diff --git a/docs/pyodide/pir_bundle.zip b/docs/pyodide/pir_bundle.zip index 72db0f8..6dcbcb1 100644 Binary files a/docs/pyodide/pir_bundle.zip and b/docs/pyodide/pir_bundle.zip differ diff --git a/docs/pyodide_playground_strategy.md b/docs/pyodide_playground_strategy.md index 600ac48..42888a2 100644 --- a/docs/pyodide_playground_strategy.md +++ b/docs/pyodide_playground_strategy.md @@ -101,10 +101,33 @@ that `loadPyodide`/`unpackArchive`/`fetch` behave as expected. Open `docs/pyodide/poc.html` via a local server (e.g. `python3 -m http.server` from `docs/`) or on GitHub Pages and click “Run the real loop”. -**Phase 1 — one real loop on the page (1–2 days).** Add a "Run real Python" -toggle to the existing playground for `clarifying_question` (its renderer already -exists). Python produces the trace; JS draws it; delete the JS dynamics for that -scenario. This is the first honest "real Python in your browser" claim. +**Phase 1 — one real loop on the page. ✅ built (Python path verified; needs a +browser check).** The playground ([`docs/playground.html`](playground.html)) +has a **"Run real Python"** toggle for `clarifying_question`. When on, it lazily +boots Pyodide, unpacks the bundle, and runs the **unmodified** +`examples/embodied_ai/35_clarifying_question.py` `run(...)` headless; the real +`Trace` is serialized by [`pir/viz/playground_trace.py`](../pir/viz/playground_trace.py) +into the exact config the existing JS renderer consumes, and the page draws that. +The JS preview stays the default first paint (Pyodide is loaded only on toggle), +so the instant experience is preserved. + +The trace→render JSON is now a **pinned contract**: +[`tests/test_playground_trace.py`](../tests/test_playground_trace.py) runs the +real loop, serializes it, and asserts every field the renderer reads is present +and plain-JSON (no numpy leaks) — exactly the drift guard the risk list calls for. + +Verified locally (CPython simulating Pyodide's unpack-into-cwd, the exact driver +from `playground.js`): for `answer=red` the serializer returns the real +`ask → look → pick` trace (`ambiguous_goal` then resolved belief, pick at +`[32, 56]`), identical to the CLI loop. **Still to confirm in a real browser:** +toggling "Run real Python" boots Pyodide, runs the loop, and the scene/belief/ +timeline redraw from the real trace. + +Deliberately **deferred** (not yet done): deleting the JS `buildClarifyingScenario` +dynamics. It is kept as the no-Pyodide instant fallback so first paint never waits +on a multi-MB download. Full deletion waits until the real path is the verified +default — at which point the JS mock can be dropped and the contract test becomes +the single source of truth. **Phase 2 — tabletop renderer + hero loop (1–2 days).** Add the continuous tabletop renderer and wire `pick_and_retry`. Now the README hero GIF has a diff --git a/pir/viz/playground_trace.py b/pir/viz/playground_trace.py new file mode 100644 index 0000000..73bcbc3 --- /dev/null +++ b/pir/viz/playground_trace.py @@ -0,0 +1,154 @@ +"""Serialize a clarifying-question `Trace` into the playground's render shape. + +The browser playground (docs/playground.js) draws scenes from a plain config +object: an ``initial`` snapshot plus a list of per-step events. Historically that +config was hand-written in JavaScript, a reimplementation of the Python loop that +could silently drift from the tested example. Pyodide lets the browser run the +*real* ``examples/embodied_ai/35_clarifying_question.py`` loop; this module turns +the resulting `Trace` into the exact JSON the JS renderer already consumes, so +the browser draws real Python output instead of a JS mock. + +Everything here is pure Python + JSON-friendly scalars (no numpy, no matplotlib), +so it runs unchanged in CPython, in tests, and in Pyodide. The shape it produces +is pinned by tests/test_playground_trace.py — that test is the drift guard the +design memo (docs/pyodide_playground_strategy.md) calls "the contract". +""" + +from __future__ import annotations + +from math import log2 +from typing import Any + +from pir.core.types import Failure + +# The clarifying tabletop is fixed: red block at (0.32, 0.56), blue at +# (0.68, 0.56). The JS renderer draws on a 0-100 SVG canvas, so positions are +# scaled by 100. This mirrors ClarifyingQuestionWorld.reset(). +_SVG_SCALE = 100.0 + + +def _entropy(distribution: dict[str, float]) -> float: + total = sum(p * log2(p) for p in distribution.values() if p > 0.0) + return -total if total else 0.0 # avoid -0.0 for a point-mass belief + + +def _belief(resolved_color: str | None) -> dict[str, Any]: + """Two-block belief: uniform until clarified, then a point mass.""" + + if resolved_color is None: + distribution = {"red": 0.5, "blue": 0.5} + entropy = _entropy(distribution) + return { + "red": distribution["red"], + "blue": distribution["blue"], + "entropy": entropy, + "askGain": entropy, + "policy": "ask", + } + distribution = { + "red": 1.0 if resolved_color == "red" else 0.0, + "blue": 1.0 if resolved_color == "blue" else 0.0, + } + return { + "red": distribution["red"], + "blue": distribution["blue"], + "entropy": _entropy(distribution), + "askGain": 0.0, + "policy": "act", + } + + +def _failure_kind(info: dict[str, Any]) -> str: + failure = info.get("failure") + return failure.kind if isinstance(failure, Failure) else "" + + +def _action_label(action: dict[str, Any]) -> str: + action_type = action.get("type", "noop") + if action_type == "ask": + return "ask(which_block)" + color = action.get("color") + if color: + return f"{action_type}({color})" + return action_type + + +def _pick_at(info: dict[str, Any]) -> list[float] | None: + position = info.get("pick_position") + if position is None: + return None + return [ + round(float(position[0]) * _SVG_SCALE, 4), + round(float(position[1]) * _SVG_SCALE, 4), + ] + + +def _initial_snapshot(command: str) -> dict[str, Any]: + return { + "type": "tabletop", + "command": command, + "target": "unresolved", + "agentState": "parse_command", + "failure": "none", + "belief": _belief(None), + "picked": None, + "pickAt": None, + "focus": None, + "question": None, + "answer": None, + } + + +def _step_snapshot(command: str, info: dict[str, Any], obs: dict[str, Any]) -> dict[str, Any]: + resolved = info.get("resolved_goal") or {} + color = resolved.get("color") + return { + "type": "tabletop", + "command": command, + "target": color or "unresolved", + "agentState": info.get("agent_state", ""), + "failure": _failure_kind(info) or "none", + "belief": _belief(color), + "picked": obs.get("picked_color"), + "pickAt": _pick_at(info), + "focus": obs.get("focus_color"), + "question": obs.get("last_question") or info.get("question"), + "answer": obs.get("last_answer"), + } + + +def clarifying_trace_to_playground( + trace: Any, + *, + command: str = "pick the block", + answer: str = "red", +) -> dict[str, Any]: + """Convert a clarifying-question `Trace` into the playground config object. + + The returned dict matches what docs/playground.js builds in + ``buildClarifyingScenario`` — ``{command, totalSteps, initial, steps}`` where + each step is ``{action, reward, failure, agentState, snapshot}`` — so the JS + renderer can draw it with no changes. Unlike the JS mock, every field here is + derived from the real loop's observations and info dicts. + """ + + _ = answer # answer is encoded in the trace; kept for a self-describing call site + steps: list[dict[str, Any]] = [] + for action, reward, info, obs in zip( + trace.actions, trace.rewards, trace.infos, trace.observations + ): + steps.append( + { + "action": _action_label(action), + "reward": round(float(reward), 4), + "failure": _failure_kind(info), + "agentState": info.get("agent_state", ""), + "snapshot": _step_snapshot(command, info, obs), + } + ) + return { + "command": command, + "totalSteps": len(steps), + "initial": _initial_snapshot(command), + "steps": steps, + } diff --git a/scripts/build_pyodide_bundle.py b/scripts/build_pyodide_bundle.py index 723f72a..5522639 100644 --- a/scripts/build_pyodide_bundle.py +++ b/scripts/build_pyodide_bundle.py @@ -22,6 +22,7 @@ # get a JS renderer (see docs/pyodide_playground_strategy.md). BUNDLED_EXAMPLES = [ "examples/manipulation/01_pick_and_retry.py", + "examples/embodied_ai/35_clarifying_question.py", ] diff --git a/tests/test_playground_trace.py b/tests/test_playground_trace.py new file mode 100644 index 0000000..0c3fcea --- /dev/null +++ b/tests/test_playground_trace.py @@ -0,0 +1,100 @@ +"""Pin the trace-to-playground JSON contract shared by Python and JS. + +docs/playground.js reads a fixed set of fields off each snapshot. If the Python +serializer (pir/viz/playground_trace.py) stops emitting one of them, the browser +would silently render a blank/garbled scene. These tests run the real clarifying +loop, serialize it, and assert the renderer's contract holds — including that the +output is plain JSON (no numpy leaks that would break json.dumps in Pyodide). +""" + +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + +from pir.viz.playground_trace import clarifying_trace_to_playground + +ROOT = Path(__file__).resolve().parents[1] + +# Fields docs/playground.js reads off a tabletop snapshot (renderTabletop + +# renderBelief + the status strip). Keep in sync with the renderer. +SNAPSHOT_FIELDS = { + "type", + "command", + "target", + "agentState", + "failure", + "belief", + "picked", + "pickAt", + "focus", + "question", + "answer", +} +BELIEF_FIELDS = {"red", "blue", "entropy", "askGain", "policy"} +STEP_FIELDS = {"action", "reward", "failure", "agentState", "snapshot"} + + +def _run(answer: str): + path = ROOT / "examples" / "embodied_ai" / "35_clarifying_question.py" + spec = importlib.util.spec_from_file_location("clarifying_question_contract", path) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.run(command="pick the block", answer=answer, render=False) + + +def test_config_shape_matches_renderer_contract() -> None: + config = clarifying_trace_to_playground(_run("red"), answer="red") + + assert config["command"] == "pick the block" + assert config["totalSteps"] == len(config["steps"]) == 3 + + initial = config["initial"] + assert set(initial) == SNAPSHOT_FIELDS + assert initial["type"] == "tabletop" + assert initial["target"] == "unresolved" + assert initial["belief"]["policy"] == "ask" + assert abs(initial["belief"]["entropy"] - 1.0) < 1e-9 + + for step in config["steps"]: + assert set(step) == STEP_FIELDS + snapshot = step["snapshot"] + assert set(snapshot) == SNAPSHOT_FIELDS + assert set(snapshot["belief"]) == BELIEF_FIELDS + + +def test_real_loop_resolves_and_picks_red() -> None: + config = clarifying_trace_to_playground(_run("red"), answer="red") + steps = config["steps"] + + # ask -> look -> pick, the real loop's three steps. + assert [step["action"] for step in steps] == [ + "ask(which_block)", + "look(red)", + "pick(red)", + ] + assert steps[0]["failure"] == "ambiguous_goal" + assert steps[0]["snapshot"]["target"] == "red" + assert steps[0]["snapshot"]["belief"]["policy"] == "act" + + final = steps[-1] + assert final["agentState"] == "done" + assert final["snapshot"]["picked"] == "red" + assert final["snapshot"]["pickAt"] == [32.0, 56.0] + + +def test_answer_blue_resolves_blue() -> None: + config = clarifying_trace_to_playground(_run("blue"), answer="blue") + final = config["steps"][-1] + assert final["action"] == "pick(blue)" + assert final["snapshot"]["picked"] == "blue" + assert final["snapshot"]["pickAt"] == [68.0, 56.0] + + +def test_config_is_plain_json() -> None: + # Pyodide returns this via json.dumps; a stray numpy array would raise here. + config = clarifying_trace_to_playground(_run("red"), answer="red") + reparsed = json.loads(json.dumps(config)) + assert reparsed == config