diff --git a/docs/playground.html b/docs/playground.html index be5ab46..f4027b4 100644 --- a/docs/playground.html +++ b/docs/playground.html @@ -41,6 +41,7 @@

Playground

Scenario diff --git a/docs/playground.js b/docs/playground.js index e4da4f4..fc64a51 100644 --- a/docs/playground.js +++ b/docs/playground.js @@ -16,10 +16,12 @@ const sourceLinks = { clarifying: "https://github.com/rsasaki0109/PythonInteractiveRobotics/blob/main/examples/embodied_ai/35_clarifying_question.py", + pickretry: + "https://github.com/rsasaki0109/PythonInteractiveRobotics/blob/main/examples/manipulation/01_pick_and_retry.py", household: "https://github.com/rsasaki0109/PythonInteractiveRobotics/blob/main/examples/embodied_ai/36_household_task_agent.py", }; - const scenarioValues = new Set(["clarifying", "household"]); + const scenarioValues = new Set(["clarifying", "pickretry", "household"]); const answerValues = new Set(["red", "blue"]); const failureValues = new Set([ "all", @@ -74,7 +76,7 @@ let copyStatusTimer = null; elements.scenario.addEventListener("change", () => { - if (elements.scenario.value !== "clarifying") { + if (elements.scenario.value === "household") { elements.realPython.checked = false; } rebuild(); @@ -118,7 +120,15 @@ }); render(); - if (readAutoplayParam()) { + if (elements.scenario.value === "pickretry") { + // Real-Python-only scenario: boot Pyodide and run it on load so shared + // ?scenario=pickretry links show the real loop, not the static placeholder. + rebuild().then(() => { + if (readAutoplayParam()) { + startRun(); + } + }); + } else if (readAutoplayParam()) { window.setTimeout(startRun, 180); } @@ -130,11 +140,16 @@ if (realConfig) { config = realConfig; source = "python"; + } else if (scenario === "household") { + config = buildHouseholdScenario(answer); + source = "js"; + } else if (scenario === "pickretry") { + // No JS dynamics for pick_and_retry; show the static tabletop until the + // real Python config (from Pyodide) is available. + config = buildPickRetryPlaceholder(); + source = "pending"; } else { - config = - scenario === "household" - ? buildHouseholdScenario(answer) - : buildClarifyingScenario(answer); + config = buildClarifyingScenario(answer); source = "js"; } return { @@ -154,20 +169,31 @@ // Otherwise we use the instant JS preview so first paint never waits on Pyodide. async function rebuild() { stopRun(); + const scenario = elements.scenario.value; + // pick_and_retry is real-Python-only: its dynamics are stochastic and + // belief-driven, so a hand-written JS mock would be exactly the drift we are + // removing. clarifying keeps a JS preview and opts into real Python. const useReal = - elements.realPython.checked && elements.scenario.value === "clarifying"; + scenario === "pickretry" || + (elements.realPython.checked && scenario === "clarifying"); if (useReal) { setRealStatus("booting Pyodide + running real Python…"); try { - const config = await fetchRealClarifyingConfig(elements.answer.value); + const config = + scenario === "pickretry" + ? await fetchRealPickRetryConfig() + : await fetchRealClarifyingConfig(elements.answer.value); state = buildState(config); - setRealStatus( - "running real Python: examples/embodied_ai/35_clarifying_question.py" - ); + setRealStatus("running real Python: " + sourcePath(scenario)); } catch (error) { - elements.realPython.checked = false; - state = buildState(); - setRealStatus("Pyodide failed (" + error + ") — showing JS preview", true); + if (scenario === "pickretry") { + state = buildState(); + setRealStatus("Pyodide failed (" + error + ") — pick_and_retry needs it", true); + } else { + elements.realPython.checked = false; + state = buildState(); + setRealStatus("Pyodide failed (" + error + ") — showing JS preview", true); + } } } else { state = buildState(); @@ -177,6 +203,12 @@ render(); } + function sourcePath(scenario) { + return scenario === "pickretry" + ? "examples/manipulation/01_pick_and_retry.py" + : "examples/embodied_ai/35_clarifying_question.py"; + } + function setRealStatus(message, isError) { if (!elements.realStatus) { return; @@ -243,6 +275,66 @@ return config; } + // Reads the real Tabletop2D geometry (true object, occluder, camera) and runs + // the unmodified pick_and_retry loop, then serializes with the same helper + // pinned by tests/test_playground_trace.py. + const PICKRETRY_DRIVER = [ + "import json, os, sys, importlib.util", + "cwd = os.getcwd()", + "if cwd not in sys.path:", + " sys.path.insert(0, cwd)", + "class _NoMatplotlib:", + " def find_spec(self, name, path=None, target=None):", + " if name == 'matplotlib' or name.startswith('matplotlib.'):", + " raise ImportError('matplotlib is intentionally unavailable on the headless browser path')", + " return None", + "sys.meta_path.insert(0, _NoMatplotlib())", + "path = os.path.join(cwd, 'examples', 'manipulation', '01_pick_and_retry.py')", + "spec = importlib.util.spec_from_file_location('pick_and_retry', path)", + "mod = importlib.util.module_from_spec(spec)", + "spec.loader.exec_module(mod)", + "from pir.viz.playground_trace import pick_and_retry_trace_to_playground", + "from pir.worlds.tabletop_2d import Tabletop2D", + "geom = Tabletop2D(seed=3)", + "trace = mod.run(seed=3, render=False)", + "json.dumps(pick_and_retry_trace_to_playground(", + " trace,", + " object_xy=[float(geom.obj.position[0]), float(geom.obj.position[1])],", + " occluder=[float(v) for v in geom.occluder],", + " camera=[float(geom.camera_pos[0]), float(geom.camera_pos[1])],", + "))", + ].join("\n"); + + async function fetchRealPickRetryConfig() { + const cacheKey = "pickretry:3"; + if (realConfigCache[cacheKey]) { + return realConfigCache[cacheKey]; + } + const pyodide = await ensurePyodide(); + const json = await pyodide.runPythonAsync(PICKRETRY_DRIVER); + const config = JSON.parse(json); + realConfigCache[cacheKey] = config; + return config; + } + + function buildPickRetryPlaceholder() { + const initial = { + type: "tabletop2d", + command: "pick the block", + target: "block", + agentState: "scan_for_object", + failure: "none", + object: [64, 54], + occluder: [43, 42, 57, 68], + camera: [16, 50], + detection: null, + pickAt: null, + holding: false, + belief: { meanXY: null, radius: null, attempts: 0, retries: 0, policy: "scan" }, + }; + return { command: "pick the block", totalSteps: 0, initial, steps: [] }; + } + function stepOnce() { if (state.index >= state.config.steps.length) { render(); @@ -729,7 +821,14 @@ elements.run.disabled = state.index >= state.config.steps.length && !timer; elements.copyTrace.disabled = state.trace.length === 0; elements.compare.disabled = state.scenario !== "household"; + // clarifying opts in; pick_and_retry is always real Python; household is JS. elements.realPython.disabled = state.scenario !== "clarifying"; + if (state.scenario === "pickretry") { + elements.realPython.checked = true; + } else if (state.scenario === "household") { + elements.realPython.checked = false; + } + elements.answer.disabled = state.scenario === "pickretry"; renderReplay(replayIndex); renderCompare(); @@ -819,6 +918,12 @@ } elements.beliefPanel.hidden = false; + // Spatial belief (pick_and_retry): position estimate + shrinking uncertainty. + if (typeof belief.red !== "number") { + renderSpatialBelief(belief); + return; + } + const bars = document.createElement("div"); bars.className = "belief-bars"; [ @@ -847,6 +952,51 @@ elements.beliefPanel.appendChild(metrics); } + function renderSpatialBelief(belief) { + // One "uncertainty" bar (belief radius, smaller = more confident) plus the + // attempt/retry counters and the current policy. + const bars = document.createElement("div"); + bars.className = "belief-bars"; + const hasRadius = typeof belief.radius === "number"; + // radius is on the 0..100 canvas; ~14 is the agent's initial uncertainty. + const fraction = hasRadius ? Math.min(1, belief.radius / 14) : 0; + const row = document.createElement("div"); + row.className = "belief-row"; + const name = document.createElement("span"); + name.textContent = "uncertainty"; + row.appendChild(name); + const track = document.createElement("div"); + track.className = "belief-track"; + const fill = document.createElement("div"); + fill.className = "belief-fill belief-red"; + fill.style.width = Math.round(fraction * 100) + "%"; + track.appendChild(fill); + row.appendChild(track); + const value = document.createElement("strong"); + value.className = "belief-value"; + value.textContent = hasRadius ? belief.radius.toFixed(1) : "—"; + row.appendChild(value); + bars.appendChild(row); + + const metrics = document.createElement("div"); + metrics.className = "belief-metrics"; + [ + ["attempts", String(belief.attempts || 0)], + ["retries", String(belief.retries || 0)], + ["policy", belief.policy || "scan"], + ].forEach(([label, value]) => { + const metric = document.createElement("span"); + metric.textContent = label; + const strong = document.createElement("strong"); + strong.textContent = value; + metric.appendChild(strong); + metrics.appendChild(metric); + }); + + elements.beliefPanel.appendChild(bars); + elements.beliefPanel.appendChild(metrics); + } + function renderBeliefRow(label, probability) { const row = document.createElement("div"); row.className = "belief-row"; @@ -928,11 +1078,117 @@ elements.scene.textContent = ""; if (snapshot.type === "household") { renderHousehold(snapshot); + } else if (snapshot.type === "tabletop2d") { + renderTabletop2D(snapshot); } else { renderTabletop(snapshot); } } + // Continuous tabletop for pick_and_retry: true object vs. the agent's spatial + // belief (mean + uncertainty radius), the occluder, camera, last detection, + // and the current pick attempt. Mirrors the matplotlib render in tabletop_2d.py. + function renderTabletop2D(snapshot) { + const svg = createSvg("svg", { + class: "tabletop-svg", + viewBox: "0 0 100 100", + "aria-label": "Pick and retry tabletop", + }); + svg.appendChild(createSvg("rect", { x: 4, y: 4, width: 92, height: 92, rx: 2, fill: "#fbfaf7" })); + for (let i = 10; i < 100; i += 10) { + svg.appendChild(createSvg("line", { x1: i, y1: 5, x2: i, y2: 95, class: "tabletop-grid" })); + svg.appendChild(createSvg("line", { x1: 5, y1: i, x2: 95, y2: i, class: "tabletop-grid" })); + } + + const occ = snapshot.occluder; + if (occ) { + svg.appendChild( + createSvg("rect", { + x: occ[0], + y: occ[1], + width: occ[2] - occ[0], + height: occ[3] - occ[1], + fill: "#2a2a2a", + opacity: 0.16, + }) + ); + } + + // true object (ground truth the agent never sees directly) + if (snapshot.object && !snapshot.holding) { + svg.appendChild( + createSvg("circle", { cx: snapshot.object[0], cy: snapshot.object[1], r: 4.5, fill: "#d94b3d", opacity: 0.85 }) + ); + } + + // camera + if (snapshot.camera) { + svg.appendChild( + createSvg("rect", { + x: snapshot.camera[0] - 2, + y: snapshot.camera[1] - 2, + width: 4, + height: 4, + fill: "#2b6cb0", + }) + ); + } + + // last detection (orange x) + if (snapshot.detection) { + svg.appendChild(drawCross(snapshot.detection, 3.5, "#dd7711", 1.6)); + } + + // agent belief: mean + uncertainty radius (green dashed circle) + const belief = snapshot.belief || {}; + if (belief.meanXY) { + const r = Math.max(2, belief.radius || 0); + svg.appendChild( + createSvg("circle", { + cx: belief.meanXY[0], + cy: belief.meanXY[1], + r, + fill: "none", + stroke: "#47743a", + "stroke-width": 1.6, + "stroke-dasharray": "3 2", + }) + ); + svg.appendChild( + createSvg("circle", { cx: belief.meanXY[0], cy: belief.meanXY[1], r: 1.1, fill: "#47743a" }) + ); + } + + // pick attempt (black +) + if (snapshot.pickAt) { + svg.appendChild(drawCross(snapshot.pickAt, 4, "#17201f", 1.8)); + } + + const caption = createSvg("text", { x: 7, y: 11, class: "svg-small" }); + const bits = ["attempts: " + (belief.attempts || 0)]; + if (snapshot.holding) { + bits.push("holding block"); + } else if (snapshot.failure && snapshot.failure !== "none") { + bits.push(snapshot.failure); + } + caption.textContent = bits.join(" "); + svg.appendChild(caption); + elements.scene.appendChild(svg); + } + + function drawCross(point, size, color, width) { + return createSvg("path", { + d: + "M " + (point[0] - size) + " " + point[1] + + " L " + (point[0] + size) + " " + point[1] + + " M " + point[0] + " " + (point[1] - size) + + " L " + point[0] + " " + (point[1] + size), + stroke: color, + "stroke-width": width, + "stroke-linecap": "round", + }); + } + function renderTabletop(snapshot) { const svg = createSvg("svg", { class: "tabletop-svg", diff --git a/docs/pyodide/pir_bundle.zip b/docs/pyodide/pir_bundle.zip index 6dcbcb1..93752d9 100644 Binary files a/docs/pyodide/pir_bundle.zip and b/docs/pyodide/pir_bundle.zip differ diff --git a/docs/pyodide_playground_strategy.md b/docs/pyodide_playground_strategy.md index 42888a2..ef7d298 100644 --- a/docs/pyodide_playground_strategy.md +++ b/docs/pyodide_playground_strategy.md @@ -129,9 +129,33 @@ on a multi-MB download. Full deletion waits until the real path is the verified default — at which point the JS mock can be dropped and the contract test becomes the single source of truth. -**Phase 2 — tabletop renderer + hero loop (1–2 days).** Add the continuous -tabletop renderer and wire `pick_and_retry`. Now the README hero GIF has a -"run it yourself" twin. +**Phase 2 — tabletop renderer + hero loop. ✅ built (Python path verified; needs +a browser check).** The playground has a **"Pick and retry (real Python)"** +scenario. It is real-Python-only: selecting it boots Pyodide and runs the +**unmodified** `examples/manipulation/01_pick_and_retry.py` `run(seed=3)` loop — +there is deliberately no JS mock, because the dynamics are stochastic and +belief-driven and a hand-faked version would reintroduce the exact drift Pyodide +removes. The README hero GIF now has a "run it yourself" twin. + +A continuous `tabletop2d` renderer draws the real scene: the true object vs. the +agent's **spatial belief** (mean + a shrinking uncertainty radius), the occluder, +the camera, the last detection, and each pick attempt — mirroring the matplotlib +render in `pir/worlds/tabletop_2d.py`. The belief panel switches to a spatial +layout (uncertainty bar + attempts/retries/policy). + +To make the loop's belief inspectable without the live agent object, +`01_pick_and_retry.py` now records `belief_mean`/`belief_radius`/`retry_count` +into the trace `info` each step (belief becomes first-class in the `Trace`). +Scene geometry (true object, occluder, camera) is ground truth the agent never +sees, so the driver reads it from a real `Tabletop2D` and passes it to +`pick_and_retry_trace_to_playground` rather than hard-coding world constants. +`tests/test_playground_trace.py` pins this second contract too. + +Verified locally (unpacked-bundle sim, the exact `playground.js` driver): +`seed=3` yields the hero story — `scan → pick(miss) → pick(miss) → pick(done)`, +belief radius shrinking 10 → 9.8 → 9.5 → 2.5, `holding=True`, `retries=2`. +**Still to confirm in a real browser:** selecting the scenario boots Pyodide and +the tabletop/belief/timeline redraw from the real trace. **Phase 3 — editable code cell (1–2 days).** Expose the agent's `act()` in a small editor so visitors can tweak the retry/belief logic and re-run. This is diff --git a/examples/manipulation/01_pick_and_retry.py b/examples/manipulation/01_pick_and_retry.py index 074df49..a1f4750 100644 --- a/examples/manipulation/01_pick_and_retry.py +++ b/examples/manipulation/01_pick_and_retry.py @@ -105,6 +105,13 @@ def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace: result = env.step(action) obs, reward, done, info = result.as_tuple() agent.update(obs, reward, info) + # Record the agent's belief in the trace so it is inspectable without the + # live agent object (used by the browser playground and trace tooling). + info["belief_mean"] = ( + None if agent.belief_mean is None else agent.belief_mean.copy() + ) + info["belief_radius"] = float(agent.belief_radius) + info["retry_count"] = agent.retry_count trace.append(obs, action, reward, info) if render: diff --git a/pir/viz/playground_trace.py b/pir/viz/playground_trace.py index 73bcbc3..9dd8d5e 100644 --- a/pir/viz/playground_trace.py +++ b/pir/viz/playground_trace.py @@ -152,3 +152,137 @@ def clarifying_trace_to_playground( "initial": _initial_snapshot(command), "steps": steps, } + + +# --- pick_and_retry (continuous tabletop) ----------------------------------- +# +# This loop has no red/blue distribution; its "belief" is a 2D position estimate +# (mean + shrinking radius) that the JS renderer draws spatially. Scene geometry +# (true object, occluder, initial camera) is ground truth the agent never sees, +# so the caller passes it in from the real Tabletop2D rather than this module +# hard-coding world constants. Everything emitted here is plain JSON. + + +def _xy(point: Any) -> list[float]: + return [round(float(point[0]) * _SVG_SCALE, 4), round(float(point[1]) * _SVG_SCALE, 4)] + + +def _pick_and_retry_state(info: dict[str, Any], has_belief: bool) -> str: + if info.get("success"): + return "done" + if _failure_kind(info) == "grasp_miss": + return "update_belief_and_retry" + if (info.get("action_type") or "") == "look": + return "scan_for_object" + return info.get("action_type") or "noop" + + +def _pick_and_retry_policy(info: dict[str, Any], has_belief: bool) -> str: + if info.get("success"): + return "done" + if not has_belief: + return "scan" + if _failure_kind(info) == "grasp_miss": + return "retry" + return "act" + + +def pick_and_retry_trace_to_playground( + trace: Any, + *, + object_xy: Any, + occluder: Any, + camera: Any, + command: str = "pick the block", +) -> dict[str, Any]: + """Convert a pick_and_retry `Trace` into the playground's tabletop2d config. + + ``object_xy`` / ``occluder`` / ``camera`` are the real Tabletop2D geometry in + table coordinates (0..1); they are scaled to the renderer's 0..100 canvas. + """ + + obj = _xy(object_xy) + occ = [round(float(v) * _SVG_SCALE, 4) for v in occluder] + cam0 = _xy(camera) + + initial = { + "type": "tabletop2d", + "command": command, + "target": "block", + "agentState": "scan_for_object", + "failure": "none", + "object": obj, + "occluder": occ, + "camera": cam0, + "detection": None, + "pickAt": None, + "holding": False, + "belief": {"meanXY": None, "radius": None, "attempts": 0, "retries": 0, "policy": "scan"}, + } + + steps: list[dict[str, Any]] = [] + last_detection: list[float] | None = None + for action, reward, info, obs in zip( + trace.actions, trace.rewards, trace.infos, trace.observations + ): + detections = obs.get("detections") or [] + if detections: + last_detection = _xy(detections[0]["position"]) + + belief_mean = info.get("belief_mean") + mean_xy = None if belief_mean is None else _xy(belief_mean) + belief_radius = info.get("belief_radius") + radius_svg = ( + None if belief_radius is None else round(float(belief_radius) * _SVG_SCALE, 3) + ) + pick_position = info.get("pick_position") + pick_at = None if pick_position is None else _xy(pick_position) + holding = bool((obs.get("gripper") or {}).get("holding")) or bool(info.get("success")) + attempts = int(info.get("attempts", 0)) + retries = int(info.get("retry_count", 0)) + failure = _failure_kind(info) + action_type = info.get("action_type") or action.get("type", "noop") + + if action_type == "look": + label = "look(scan)" + elif action_type == "pick": + label = f"pick(attempt {attempts})" + else: + label = action_type + + snapshot = { + "type": "tabletop2d", + "command": command, + "target": "held" if holding else "block", + "agentState": _pick_and_retry_state(info, mean_xy is not None), + "failure": failure or "none", + "object": obj, + "occluder": occ, + "camera": _xy(obs["camera"]) if obs.get("camera") is not None else cam0, + "detection": None if holding else last_detection, + "pickAt": pick_at, + "holding": holding, + "belief": { + "meanXY": mean_xy, + "radius": radius_svg, + "attempts": attempts, + "retries": retries, + "policy": _pick_and_retry_policy(info, mean_xy is not None), + }, + } + steps.append( + { + "action": label, + "reward": round(float(reward), 4), + "failure": failure, + "agentState": snapshot["agentState"], + "snapshot": snapshot, + } + ) + + return { + "command": command, + "totalSteps": len(steps), + "initial": initial, + "steps": steps, + } diff --git a/tests/test_playground_trace.py b/tests/test_playground_trace.py index 0c3fcea..fabeae4 100644 --- a/tests/test_playground_trace.py +++ b/tests/test_playground_trace.py @@ -13,7 +13,11 @@ import json from pathlib import Path -from pir.viz.playground_trace import clarifying_trace_to_playground +from pir.viz.playground_trace import ( + clarifying_trace_to_playground, + pick_and_retry_trace_to_playground, +) +from pir.worlds.tabletop_2d import Tabletop2D ROOT = Path(__file__).resolve().parents[1] @@ -98,3 +102,80 @@ def test_config_is_plain_json() -> None: config = clarifying_trace_to_playground(_run("red"), answer="red") reparsed = json.loads(json.dumps(config)) assert reparsed == config + + +# --- pick_and_retry (continuous tabletop) ----------------------------------- + +TABLETOP2D_FIELDS = { + "type", + "command", + "target", + "agentState", + "failure", + "object", + "occluder", + "camera", + "detection", + "pickAt", + "holding", + "belief", +} +SPATIAL_BELIEF_FIELDS = {"meanXY", "radius", "attempts", "retries", "policy"} + + +def _run_pick_and_retry(seed: int = 3): + path = ROOT / "examples" / "manipulation" / "01_pick_and_retry.py" + spec = importlib.util.spec_from_file_location("pick_and_retry_contract", path) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.run(seed=seed, render=False) + + +def _pick_and_retry_config(seed: int = 3): + geom = Tabletop2D(seed=seed) + return pick_and_retry_trace_to_playground( + _run_pick_and_retry(seed), + object_xy=list(map(float, geom.obj.position)), + occluder=[float(v) for v in geom.occluder], + camera=list(map(float, geom.camera_pos)), + ) + + +def test_pick_and_retry_shape_matches_tabletop2d_contract() -> None: + config = _pick_and_retry_config() + + assert config["command"] == "pick the block" + assert config["totalSteps"] == len(config["steps"]) >= 1 + assert set(config["initial"]) == TABLETOP2D_FIELDS + assert config["initial"]["type"] == "tabletop2d" + # occluder is the real Tabletop2D rectangle scaled to the 0..100 canvas. + assert config["initial"]["occluder"] == [43.0, 42.0, 57.0, 68.0] + + for step in config["steps"]: + assert set(step) == STEP_FIELDS + snapshot = step["snapshot"] + assert set(snapshot) == TABLETOP2D_FIELDS + assert set(snapshot["belief"]) == SPATIAL_BELIEF_FIELDS + + +def test_pick_and_retry_misses_then_picks_and_belief_appears() -> None: + config = _pick_and_retry_config(seed=3) + steps = config["steps"] + + # seed=3 is the hero seed: at least one grasp_miss before the pick succeeds. + assert any(step["failure"] == "grasp_miss" for step in steps) + + final = steps[-1] + assert final["agentState"] == "done" + assert final["snapshot"]["holding"] is True + + # Belief becomes a concrete spatial estimate once the object is detected. + assert any(step["snapshot"]["belief"]["meanXY"] is not None for step in steps) + # Retry count is non-decreasing and ends at >=1 (it missed at least once). + assert final["snapshot"]["belief"]["retries"] >= 1 + + +def test_pick_and_retry_config_is_plain_json() -> None: + config = _pick_and_retry_config() + assert json.loads(json.dumps(config)) == config