Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions docs/playground.css
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,55 @@ button:disabled {
color: #b5482f;
}

.code-panel {
border: 1px solid #d7dcd6;
border-radius: 10px;
margin-top: 14px;
overflow: hidden;
}

.code-head {
align-items: center;
background: #eef1ec;
display: flex;
flex-wrap: wrap;
gap: 8px;
justify-content: space-between;
padding: 8px 12px;
}

.code-head span {
color: var(--ink-soft, #5a6b67);
font-size: 0.82rem;
}

.code-head code {
background: #dfe4dd;
border-radius: 3px;
padding: 0 3px;
}

.code-actions {
display: flex;
gap: 8px;
}

.code-cell {
background: #0f1419;
border: 0;
box-sizing: border-box;
color: #d6deeb;
display: block;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
font-size: 0.8rem;
line-height: 1.45;
padding: 12px;
resize: vertical;
tab-size: 4;
white-space: pre;
width: 100%;
}

.status-strip {
display: grid;
gap: 10px;
Expand Down
11 changes: 11 additions & 0 deletions docs/playground.html
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ <h1 id="playground-title">Playground</h1>
<div id="comparePanel" class="compare-panel" aria-label="Policy comparison" hidden></div>

<div id="scene" class="scene" role="img" aria-label="Current playground scene"></div>

<section id="codePanel" class="code-panel" aria-label="Editable agent" hidden>
<div class="code-head">
<span>Agent brain — edit <code>act()</code> / <code>offset_schedule</code>, then re-run the real loop in your browser</span>
<div class="code-actions">
<button id="resetCodeButton" type="button">Reset code</button>
<button id="runCodeButton" type="button">Run edited agent</button>
</div>
</div>
<textarea id="codeCell" class="code-cell" spellcheck="false" rows="20" aria-label="Agent source code"></textarea>
</section>
</div>

<aside class="trace-panel" aria-labelledby="trace-title">
Expand Down
126 changes: 126 additions & 0 deletions docs/playground.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
compare: document.getElementById("compareToggle"),
realPython: document.getElementById("realPythonToggle"),
realStatus: document.getElementById("realStatus"),
codePanel: document.getElementById("codePanel"),
codeCell: document.getElementById("codeCell"),
runCode: document.getElementById("runCodeButton"),
resetCode: document.getElementById("resetCodeButton"),
reset: document.getElementById("resetButton"),
step: document.getElementById("stepButton"),
run: document.getElementById("runButton"),
Expand Down Expand Up @@ -95,6 +99,12 @@
elements.realPython.addEventListener("change", () => {
rebuild();
});
elements.runCode.addEventListener("click", () => {
runEditedAgent();
});
elements.resetCode.addEventListener("click", () => {
resetEditedAgent();
});
elements.replay.addEventListener("input", () => {
state.replayIndex = clampReplayIndex(elements.replay.value);
render();
Expand Down Expand Up @@ -185,6 +195,9 @@
: await fetchRealClarifyingConfig(elements.answer.value);
state = buildState(config);
setRealStatus("running real Python: " + sourcePath(scenario));
if (scenario === "pickretry") {
populateDefaultAgentSource();
}
} catch (error) {
if (scenario === "pickretry") {
state = buildState();
Expand Down Expand Up @@ -335,6 +348,118 @@
return { command: "pick the block", totalSteps: 0, initial, steps: [] };
}

// --- Editable agent "brain" (Phase 3) ---------------------------------------
// Visitors edit the real PickAndRetryAgent and re-run it against the real
// Tabletop2D, entirely in their own browser (Pyodide is a client-side WASM
// sandbox — exec'ing the edited code is no more privileged than a local REPL).
let defaultAgentSource = null;

const AGENT_SOURCE_DRIVER = [
"import os, sys, importlib.util, inspect",
"cwd = os.getcwd()",
"if cwd not in sys.path:",
" sys.path.insert(0, cwd)",
"path = os.path.join(cwd, 'examples', 'manipulation', '01_pick_and_retry.py')",
"spec = importlib.util.spec_from_file_location('pick_and_retry_src', path)",
"mod = importlib.util.module_from_spec(spec)",
"sys.modules[spec.name] = mod # inspect.getsource needs the module registered",
"spec.loader.exec_module(mod)",
"header = 'from __future__ import annotations\\nimport numpy as np\\nfrom typing import Any\\nfrom pir.core.types import Failure\\n\\n\\n'",
"header + inspect.getsource(mod.PickAndRetryAgent)",
].join("\n");

// Execs the user's source (which must define PickAndRetryAgent) and runs it via
// the example's own run_agent(), so the loop is identical to the CLI's run().
// USER_SRC is injected through pyodide.globals to avoid string-escaping issues.
const PICKRETRY_EDIT_DRIVER = [
"import json, os, sys, importlib.util",
"cwd = os.getcwd()",
"if cwd not in sys.path:",
" sys.path.insert(0, cwd)",
"class _NoMatplotlib:",
" def find_spec(self, name, path=None, target=None):",
" if name == 'matplotlib' or name.startswith('matplotlib.'):",
" raise ImportError('matplotlib is intentionally unavailable on the headless browser path')",
" return None",
"sys.meta_path.insert(0, _NoMatplotlib())",
"path = os.path.join(cwd, 'examples', 'manipulation', '01_pick_and_retry.py')",
"spec = importlib.util.spec_from_file_location('pick_and_retry', path)",
"mod = importlib.util.module_from_spec(spec)",
"spec.loader.exec_module(mod)",
"from pir.viz.playground_trace import pick_and_retry_trace_to_playground",
"from pir.worlds.tabletop_2d import Tabletop2D",
"ns = {}",
"exec(USER_SRC, ns)",
"Agent = ns.get('PickAndRetryAgent')",
"if Agent is None:",
" raise ValueError('Your code must define a class named PickAndRetryAgent')",
"trace = mod.run_agent(Agent(), seed=3, render=False)",
"geom = Tabletop2D(seed=3)",
"json.dumps(pick_and_retry_trace_to_playground(",
" trace,",
" object_xy=[float(geom.obj.position[0]), float(geom.obj.position[1])],",
" occluder=[float(v) for v in geom.occluder],",
" camera=[float(geom.camera_pos[0]), float(geom.camera_pos[1])],",
"))",
].join("\n");

async function getDefaultAgentSource() {
if (defaultAgentSource !== null) {
return defaultAgentSource;
}
const pyodide = await ensurePyodide();
defaultAgentSource = await pyodide.runPythonAsync(AGENT_SOURCE_DRIVER);
return defaultAgentSource;
}

async function populateDefaultAgentSource() {
if (elements.codeCell.value.trim()) {
return;
}
try {
elements.codeCell.value = await getDefaultAgentSource();
} catch (error) {
// Non-fatal: the run already worked; leave the editor empty.
}
}

async function runEditedAgent() {
const code = elements.codeCell.value;
if (!code.trim()) {
setRealStatus("nothing to run — the editor is empty", true);
return;
}
stopRun();
elements.runCode.disabled = true;
setRealStatus("running your edited agent…");
try {
const pyodide = await ensurePyodide();
pyodide.globals.set("USER_SRC", code);
const json = await pyodide.runPythonAsync(PICKRETRY_EDIT_DRIVER);
state = buildState(JSON.parse(json));
setRealStatus("ran your edited agent — " + state.config.totalSteps + " steps");
render();
} catch (error) {
setRealStatus("your agent raised: " + error, true);
} finally {
elements.runCode.disabled = false;
}
}

async function resetEditedAgent() {
elements.codeCell.value = "";
setRealStatus("restoring the original agent…");
try {
elements.codeCell.value = await getDefaultAgentSource();
const config = await fetchRealPickRetryConfig();
state = buildState(config);
setRealStatus("running real Python: " + sourcePath("pickretry"));
render();
} catch (error) {
setRealStatus("reset failed: " + error, true);
}
}

function stepOnce() {
if (state.index >= state.config.steps.length) {
render();
Expand Down Expand Up @@ -829,6 +954,7 @@
elements.realPython.checked = false;
}
elements.answer.disabled = state.scenario === "pickretry";
elements.codePanel.hidden = state.scenario !== "pickretry";

renderReplay(replayIndex);
renderCompare();
Expand Down
Binary file modified docs/pyodide/pir_bundle.zip
Binary file not shown.
32 changes: 25 additions & 7 deletions docs/pyodide_playground_strategy.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,31 @@ belief radius shrinking 10 → 9.8 → 9.5 → 2.5, `holding=True`, `retries=2`.
**Still to confirm in a real browser:** selecting the scenario boots Pyodide and
the tabletop/belief/timeline redraw from the real trace.

**Phase 3 — editable code cell (1–2 days).** Expose the agent's `act()` in a
small editor so visitors can tweak the retry/belief logic and re-run. This is
the "wow, I can edit the robot's brain in the browser" moment that converts to
stars.

Ship Phase 0–1 behind the existing playground before any Hacker News launch;
Phases 2–3 can follow the launch.
**Phase 3 — editable code cell. ✅ built (Python path verified; needs a browser
check).** The pick_and_retry scenario now shows an **"Agent brain"** editor
pre-filled with the *real* `PickAndRetryAgent` source (fetched via
`inspect.getsource` so it can never drift from the file). Editing it and clicking
**Run edited agent** execs the user's class in Pyodide and runs it against the
real `Tabletop2D` via the example's own `run_agent(...)` — the same loop the CLI
uses, so there is no second loop to drift. Syntax/runtime errors surface inline.

This is the "edit the robot's brain in the browser" moment. The edited code runs
entirely client-side (Pyodide is a WASM sandbox — exec'ing it is no more
privileged than a local REPL), `USER_SRC` is passed via `pyodide.globals` to
avoid string-escaping, and a custom agent that drops the belief attributes still
runs (`run_agent` reads them defensively).

Verified locally (unpacked-bundle sim, the exact drivers): the default source
reproduces `seed=3` (`4 steps, retries=2`); removing the deliberate first offset
makes it grab the belief mean immediately (`2 steps, retries=0`) — a live lesson
in *why* the retry schedule exists; malformed code raises a caught error.
`tests/test_playground_trace.py` exercises `run_agent` with a custom agent.
**Still to confirm in a real browser:** the editor populates with the real
source, edits re-run, and errors render inline.

All three phases are built and Python-verified; the remaining work is a single
browser pass over Phases 0–3 and (optionally) deleting the JS `clarifying`
preview once the real path is the trusted default.

## Risks / watch-list

Expand Down
29 changes: 22 additions & 7 deletions examples/manipulation/01_pick_and_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,19 @@ def _integrate_observation(self, obs: dict[str, Any]) -> None:
self.belief_radius = max(0.035, self.belief_radius * 0.72)


def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
def run_agent(
agent: Any, *, seed: int = 3, render: bool = False, max_steps: int = 40
) -> Trace:
"""Run any pick-and-retry-style agent against the real Tabletop2D world.

The agent only needs ``reset()`` / ``act(obs)`` / ``update(obs, reward, info)``.
Belief attributes are read defensively, so a custom agent (for example one
edited in the browser playground) that changes or drops them still runs and
serializes. The agent's belief is recorded into the trace ``info`` each step
so it is inspectable without the live agent object.
"""

env = Tabletop2D(seed=seed)
agent = PickAndRetryAgent()
obs = env.reset(seed=seed)
agent.reset()
trace = Trace()
Expand All @@ -105,13 +115,14 @@ def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
result = env.step(action)
obs, reward, done, info = result.as_tuple()
agent.update(obs, reward, info)
# Record the agent's belief in the trace so it is inspectable without the
# live agent object (used by the browser playground and trace tooling).

belief_mean = getattr(agent, "belief_mean", None)
belief_radius = getattr(agent, "belief_radius", None)
info["belief_mean"] = (
None if agent.belief_mean is None else agent.belief_mean.copy()
None if belief_mean is None else np.asarray(belief_mean, dtype=float).copy()
)
info["belief_radius"] = float(agent.belief_radius)
info["retry_count"] = agent.retry_count
info["belief_radius"] = None if belief_radius is None else float(belief_radius)
info["retry_count"] = int(getattr(agent, "retry_count", 0) or 0)
trace.append(obs, action, reward, info)

if render:
Expand All @@ -123,6 +134,10 @@ def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
return trace


def run(seed: int = 3, render: bool = True, max_steps: int = 40) -> Trace:
return run_agent(PickAndRetryAgent(), seed=seed, render=render, max_steps=max_steps)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=3)
Expand Down
Loading
Loading