rsasaki0109 · rsasaki0109 · Jun 4, 2026 · Jun 4, 2026
diff --git a/docs/playground.css b/docs/playground.css
@@ -149,6 +149,55 @@ button:disabled {
   color: #b5482f;
 }
 
+.code-panel {
+  border: 1px solid #d7dcd6;
+  border-radius: 10px;
+  margin-top: 14px;
+  overflow: hidden;
+}
+
+.code-head {
+  align-items: center;
+  background: #eef1ec;
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  justify-content: space-between;
+  padding: 8px 12px;
+}
+
+.code-head span {
+  color: var(--ink-soft, #5a6b67);
+  font-size: 0.82rem;
+}
+
+.code-head code {
+  background: #dfe4dd;
+  border-radius: 3px;
+  padding: 0 3px;
+}
+
+.code-actions {
+  display: flex;
+  gap: 8px;
+}
+
+.code-cell {
+  background: #0f1419;
+  border: 0;
+  box-sizing: border-box;
+  color: #d6deeb;
+  display: block;
+  font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+  font-size: 0.8rem;
+  line-height: 1.45;
+  padding: 12px;
+  resize: vertical;
+  tab-size: 4;
+  white-space: pre;
+  width: 100%;
+}
+
 .status-strip {
   display: grid;
   gap: 10px;

diff --git a/docs/playground.html b/docs/playground.html
@@ -113,6 +113,17 @@ <h1 id="playground-title">Playground</h1>
           <div id="comparePanel" class="compare-panel" aria-label="Policy comparison" hidden></div>
 
           <div id="scene" class="scene" role="img" aria-label="Current playground scene"></div>
+
+          <section id="codePanel" class="code-panel" aria-label="Editable agent" hidden>
+            <div class="code-head">
+              <span>Agent brain — edit <code>act()</code> / <code>offset_schedule</code>, then re-run the real loop in your browser</span>
+              <div class="code-actions">
+                <button id="resetCodeButton" type="button">Reset code</button>
+                <button id="runCodeButton" type="button">Run edited agent</button>
+              </div>
+            </div>
+            <textarea id="codeCell" class="code-cell" spellcheck="false" rows="20" aria-label="Agent source code"></textarea>
+          </section>
         </div>
 
         <aside class="trace-panel" aria-labelledby="trace-title">

diff --git a/docs/playground.js b/docs/playground.js
@@ -49,6 +49,10 @@
     compare: document.getElementById("compareToggle"),
     realPython: document.getElementById("realPythonToggle"),
     realStatus: document.getElementById("realStatus"),
+    codePanel: document.getElementById("codePanel"),
+    codeCell: document.getElementById("codeCell"),
+    runCode: document.getElementById("runCodeButton"),
+    resetCode: document.getElementById("resetCodeButton"),
     reset: document.getElementById("resetButton"),
     step: document.getElementById("stepButton"),
     run: document.getElementById("runButton"),
@@ -95,6 +99,12 @@
   elements.realPython.addEventListener("change", () => {
     rebuild();
   });
+  elements.runCode.addEventListener("click", () => {
+    runEditedAgent();
+  });
+  elements.resetCode.addEventListener("click", () => {
+    resetEditedAgent();
+  });
   elements.replay.addEventListener("input", () => {
     state.replayIndex = clampReplayIndex(elements.replay.value);
     render();
@@ -185,6 +195,9 @@
             : await fetchRealClarifyingConfig(elements.answer.value);
         state = buildState(config);
         setRealStatus("running real Python: " + sourcePath(scenario));
+        if (scenario === "pickretry") {
+          populateDefaultAgentSource();
+        }
       } catch (error) {
         if (scenario === "pickretry") {
           state = buildState();
@@ -335,6 +348,118 @@
     return { command: "pick the block", totalSteps: 0, initial, steps: [] };
   }
 
+  // --- Editable agent "brain" (Phase 3) ---------------------------------------
+  // Visitors edit the real PickAndRetryAgent and re-run it against the real
+  // Tabletop2D, entirely in their own browser (Pyodide is a client-side WASM
+  // sandbox — exec'ing the edited code is no more privileged than a local REPL).
+  let defaultAgentSource = null;
+
+  const AGENT_SOURCE_DRIVER = [
+    "import os, sys, importlib.util, inspect",
+    "cwd = os.getcwd()",
+    "if cwd not in sys.path:",
+    "    sys.path.insert(0, cwd)",
+    "path = os.path.join(cwd, 'examples', 'manipulation', '01_pick_and_retry.py')",
+    "spec = importlib.util.spec_from_file_location('pick_and_retry_src', path)",
+    "mod = importlib.util.module_from_spec(spec)",
+    "sys.modules[spec.name] = mod  # inspect.getsource needs the module registered",
+    "spec.loader.exec_module(mod)",
+    "header = 'from __future__ import annotations\\nimport numpy as np\\nfrom typing import Any\\nfrom pir.core.types import Failure\\n\\n\\n'",
+    "header + inspect.getsource(mod.PickAndRetryAgent)",
+  ].join("\n");
+
+  // Execs the user's source (which must define PickAndRetryAgent) and runs it via
+  // the example's own run_agent(), so the loop is identical to the CLI's run().
+  // USER_SRC is injected through pyodide.globals to avoid string-escaping issues.
+  const PICKRETRY_EDIT_DRIVER = [
+    "import json, os, sys, importlib.util",
+    "cwd = os.getcwd()",
+    "if cwd not in sys.path:",
+    "    sys.path.insert(0, cwd)",
+    "class _NoMatplotlib:",
+    "    def find_spec(self, name, path=None, target=None):",
+    "        if name == 'matplotlib' or name.startswith('matplotlib.'):",
+    "            raise ImportError('matplotlib is intentionally unavailable on the headless browser path')",
+    "        return None",
+    "sys.meta_path.insert(0, _NoMatplotlib())",
+    "path = os.path.join(cwd, 'examples', 'manipulation', '01_pick_and_retry.py')",
+    "spec = importlib.util.spec_from_file_location('pick_and_retry', path)",
+    "mod = importlib.util.module_from_spec(spec)",
+    "spec.loader.exec_module(mod)",
+    "from pir.viz.playground_trace import pick_and_retry_trace_to_playground",
+    "from pir.worlds.tabletop_2d import Tabletop2D",
+    "ns = {}",
+    "exec(USER_SRC, ns)",
+    "Agent = ns.get('PickAndRetryAgent')",
+    "if Agent is None:",
+    "    raise ValueError('Your code must define a class named PickAndRetryAgent')",
+    "trace = mod.run_agent(Agent(), seed=3, render=False)",
+    "geom = Tabletop2D(seed=3)",
+    "json.dumps(pick_and_retry_trace_to_playground(",
+    "    trace,",
+    "    object_xy=[float(geom.obj.position[0]), float(geom.obj.position[1])],",
+    "    occluder=[float(v) for v in geom.occluder],",
+    "    camera=[float(geom.camera_pos[0]), float(geom.camera_pos[1])],",
+    "))",
+  ].join("\n");
+
+  async function getDefaultAgentSource() {
+    if (defaultAgentSource !== null) {
+      return defaultAgentSource;
+    }
+    const pyodide = await ensurePyodide();
+    defaultAgentSource = await pyodide.runPythonAsync(AGENT_SOURCE_DRIVER);
+    return defaultAgentSource;
+  }
+
+  async function populateDefaultAgentSource() {
+    if (elements.codeCell.value.trim()) {
+      return;
+    }
+    try {
+      elements.codeCell.value = await getDefaultAgentSource();
+    } catch (error) {
+      // Non-fatal: the run already worked; leave the editor empty.
+    }
+  }
+
+  async function runEditedAgent() {
+    const code = elements.codeCell.value;
+    if (!code.trim()) {
+      setRealStatus("nothing to run — the editor is empty", true);
+      return;
+    }
+    stopRun();
+    elements.runCode.disabled = true;
+    setRealStatus("running your edited agent…");
+    try {
+      const pyodide = await ensurePyodide();
+      pyodide.globals.set("USER_SRC", code);
+      const json = await pyodide.runPythonAsync(PICKRETRY_EDIT_DRIVER);
+      state = buildState(JSON.parse(json));
+      setRealStatus("ran your edited agent — " + state.config.totalSteps + " steps");
+      render();
+    } catch (error) {
+      setRealStatus("your agent raised: " + error, true);
+    } finally {
+      elements.runCode.disabled = false;
+    }
+  }
+
+  async function resetEditedAgent() {
+    elements.codeCell.value = "";
+    setRealStatus("restoring the original agent…");
+    try {
+      elements.codeCell.value = await getDefaultAgentSource();
+      const config = await fetchRealPickRetryConfig();
+      state = buildState(config);
+      setRealStatus("running real Python: " + sourcePath("pickretry"));
+      render();
+    } catch (error) {
+      setRealStatus("reset failed: " + error, true);
+    }
+  }
+
   function stepOnce() {
     if (state.index >= state.config.steps.length) {
       render();
@@ -829,6 +954,7 @@
       elements.realPython.checked = false;
     }
     elements.answer.disabled = state.scenario === "pickretry";
+    elements.codePanel.hidden = state.scenario !== "pickretry";
 
     renderReplay(replayIndex);
     renderCompare();

diff --git a/docs/pyodide/pir_bundle.zip b/docs/pyodide/pir_bundle.zip
diff --git a/docs/pyodide_playground_strategy.md b/docs/pyodide_playground_strategy.md
@@ -157,13 +157,31 @@ belief radius shrinking 10 → 9.8 → 9.5 → 2.5, `holding=True`, `retries=2`.
 **Still to confirm in a real browser:** selecting the scenario boots Pyodide and
 the tabletop/belief/timeline redraw from the real trace.
 
-**Phase 3 — editable code cell (1–2 days).** Expose the agent's `act()` in a
-small editor so visitors can tweak the retry/belief logic and re-run. This is
-the "wow, I can edit the robot's brain in the browser" moment that converts to
-stars.
-
-Ship Phase 0–1 behind the existing playground before any Hacker News launch;
-Phases 2–3 can follow the launch.
+**Phase 3 — editable code cell. ✅ built (Python path verified; needs a browser
+check).** The pick_and_retry scenario now shows an **"Agent brain"** editor
+pre-filled with the *real* `PickAndRetryAgent` source (fetched via
+`inspect.getsource` so it can never drift from the file). Editing it and clicking
+**Run edited agent** execs the user's class in Pyodide and runs it against the
+real `Tabletop2D` via the example's own `run_agent(...)` — the same loop the CLI
+uses, so there is no second loop to drift. Syntax/runtime errors surface inline.
+
+This is the "edit the robot's brain in the browser" moment. The edited code runs
+entirely client-side (Pyodide is a WASM sandbox — exec'ing it is no more
+privileged than a local REPL), `USER_SRC` is passed via `pyodide.globals` to
+avoid string-escaping, and a custom agent that drops the belief attributes still
+runs (`run_agent` reads them defensively).
+
+Verified locally (unpacked-bundle sim, the exact drivers): the default source
+reproduces `seed=3` (`4 steps, retries=2`); removing the deliberate first offset
+makes it grab the belief mean immediately (`2 steps, retries=0`) — a live lesson
+in *why* the retry schedule exists; malformed code raises a caught error.
+`tests/test_playground_trace.py` exercises `run_agent` with a custom agent.
+**Still to confirm in a real browser:** the editor populates with the real
+source, edits re-run, and errors render inline.
+
+All three phases are built and Python-verified; the remaining work is a single
+browser pass over Phases 0–3 and (optionally) deleting the JS `clarifying`
+preview once the real path is the trusted default.
 
 ## Risks / watch-list
 

diff --git a/examples/manipulation/01_pick_and_retry.py b/examples/manipulation/01_pick_and_retry.py
@@ -93,9 +93,19 @@ def _integrate_observation(self, obs: dict[str, Any]) -> None:
         self.belief_radius = max(0.035, self.belief_radius * 0.72)
 
 
-def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
+def run_agent(
+    agent: Any, *, seed: int = 3, render: bool = False, max_steps: int = 40
+) -> Trace:
+    """Run any pick-and-retry-style agent against the real Tabletop2D world.
+
+    The agent only needs ``reset()`` / ``act(obs)`` / ``update(obs, reward, info)``.
+    Belief attributes are read defensively, so a custom agent (for example one
+    edited in the browser playground) that changes or drops them still runs and
+    serializes. The agent's belief is recorded into the trace ``info`` each step
+    so it is inspectable without the live agent object.
+    """
+
     env = Tabletop2D(seed=seed)
-    agent = PickAndRetryAgent()
     obs = env.reset(seed=seed)
     agent.reset()
     trace = Trace()
@@ -105,13 +115,14 @@ def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
         result = env.step(action)
         obs, reward, done, info = result.as_tuple()
         agent.update(obs, reward, info)
-        # Record the agent's belief in the trace so it is inspectable without the
-        # live agent object (used by the browser playground and trace tooling).
+
+        belief_mean = getattr(agent, "belief_mean", None)
+        belief_radius = getattr(agent, "belief_radius", None)
         info["belief_mean"] = (
-            None if agent.belief_mean is None else agent.belief_mean.copy()
+            None if belief_mean is None else np.asarray(belief_mean, dtype=float).copy()
         )
-        info["belief_radius"] = float(agent.belief_radius)
-        info["retry_count"] = agent.retry_count
+        info["belief_radius"] = None if belief_radius is None else float(belief_radius)
+        info["retry_count"] = int(getattr(agent, "retry_count", 0) or 0)
         trace.append(obs, action, reward, info)
 
         if render:
@@ -123,6 +134,10 @@ def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
     return trace
 
 
+def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
+    return run_agent(PickAndRetryAgent(), seed=seed, render=render, max_steps=max_steps)
+
+
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--seed", type=int, default=3)