From 260c540667a4eced8eed6580686abfc3ab665f81 Mon Sep 17 00:00:00 2001 From: rsasaki0109 Date: Thu, 4 Jun 2026 23:22:21 +0900 Subject: [PATCH] Pyodide Phase 1: run the real clarifying loop in the playground MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a "Run real Python" toggle to the playground for the clarifying-question scenario. When enabled, the page lazily boots Pyodide, unpacks the pir bundle, and runs the unmodified examples/embodied_ai/35_clarifying_question.py loop headless (numpy only, matplotlib blocked). The real Trace is serialized into the exact config the existing JS renderer consumes, so the browser draws real Python output instead of the JS reimplementation. The JS preview stays the default first paint — Pyodide loads only on toggle — so the instant experience is preserved. Deleting the JS clarifying dynamics is deliberately deferred until the real path is the verified default (noted in the design memo). - pir/viz/playground_trace.py: pure-Python, JSON-friendly Trace -> playground serializer (no numpy/matplotlib), runnable in CPython and Pyodide alike. - tests/test_playground_trace.py: pins the trace->render JSON contract (every field the renderer reads, plain-JSON, real ask->look->pick outcome). - build_pyodide_bundle.py: bundle 35_clarifying_question.py too; rebuilt zip. - Verified the exact browser driver's Python path in CPython (unpack-into-cwd sim): answer=red yields the real ambiguous_goal->resolved->pick at [32,56]. Browser check still pending (boot Pyodide + redraw from the real trace). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/playground.css | 11 ++ docs/playground.html | 6 ++ docs/playground.js | 138 ++++++++++++++++++++++--- docs/pyodide/pir_bundle.zip | Bin 24057 -> 30562 bytes docs/pyodide_playground_strategy.md | 31 +++++- pir/viz/playground_trace.py | 154 ++++++++++++++++++++++++++++ scripts/build_pyodide_bundle.py | 1 + tests/test_playground_trace.py | 100 ++++++++++++++++++ 8 files changed, 420 insertions(+), 21 deletions(-) create mode 100644 pir/viz/playground_trace.py create mode 100644 tests/test_playground_trace.py diff --git a/docs/playground.css b/docs/playground.css index 6b9050e..e158b0b 100644 --- a/docs/playground.css +++ b/docs/playground.css @@ -138,6 +138,17 @@ button:disabled { min-height: 1.25rem; } +.real-status { + color: var(--ink-soft, #5a6b67); + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 0.82rem; + min-height: 1.1rem; +} + +.real-status.real-error { + color: #b5482f; +} + .status-strip { display: grid; gap: 10px; diff --git a/docs/playground.html b/docs/playground.html index 5096c1f..be5ab46 100644 --- a/docs/playground.html +++ b/docs/playground.html @@ -65,6 +65,10 @@

Playground

Compare +
@@ -74,6 +78,7 @@

Playground

+
@@ -129,6 +134,7 @@

Trace

+ diff --git a/docs/playground.js b/docs/playground.js index e47cb9a..e4da4f4 100644 --- a/docs/playground.js +++ b/docs/playground.js @@ -45,6 +45,8 @@ answer: document.getElementById("answerSelect"), failureFilter: document.getElementById("failureFilter"), compare: document.getElementById("compareToggle"), + realPython: document.getElementById("realPythonToggle"), + realStatus: document.getElementById("realStatus"), reset: document.getElementById("resetButton"), step: document.getElementById("stepButton"), run: document.getElementById("runButton"), @@ -72,16 +74,13 @@ let copyStatusTimer = null; elements.scenario.addEventListener("change", () => { - stopRun(); - state = buildState(); - updateLocation(false); - render(); + if (elements.scenario.value !== "clarifying") { + elements.realPython.checked = false; + } + rebuild(); }); elements.answer.addEventListener("change", () => { - stopRun(); - state = buildState(); - updateLocation(false); - render(); + rebuild(); }); elements.failureFilter.addEventListener("change", () => { updateLocation(false); @@ -91,15 +90,15 @@ updateLocation(false); render(); }); + elements.realPython.addEventListener("change", () => { + rebuild(); + }); elements.replay.addEventListener("input", () => { state.replayIndex = clampReplayIndex(elements.replay.value); render(); }); elements.reset.addEventListener("click", () => { - stopRun(); - state = buildState(); - updateLocation(false); - render(); + rebuild(); }); elements.step.addEventListener("click", () => { stepOnce(); @@ -123,23 +122,127 @@ window.setTimeout(startRun, 180); } - function buildState() { + function buildState(realConfig) { const scenario = elements.scenario.value; const answer = elements.answer.value; - const config = - scenario === "household" - ? buildHouseholdScenario(answer) - : buildClarifyingScenario(answer); + let config; + let source; + if (realConfig) { + config = realConfig; + source = "python"; + } else { + config = + scenario === "household" + ? buildHouseholdScenario(answer) + : buildClarifyingScenario(answer); + source = "js"; + } return { scenario, answer, config, + source, index: 0, trace: [], replayIndex: null, }; } + // Rebuild the playground state, honoring the "Run real Python" toggle. When the + // toggle is on (clarifying only), we lazily boot Pyodide and run the actual + // examples/embodied_ai/35_clarifying_question.py loop, then draw its real trace. + // Otherwise we use the instant JS preview so first paint never waits on Pyodide. + async function rebuild() { + stopRun(); + const useReal = + elements.realPython.checked && elements.scenario.value === "clarifying"; + if (useReal) { + setRealStatus("booting Pyodide + running real Python…"); + try { + const config = await fetchRealClarifyingConfig(elements.answer.value); + state = buildState(config); + setRealStatus( + "running real Python: examples/embodied_ai/35_clarifying_question.py" + ); + } catch (error) { + elements.realPython.checked = false; + state = buildState(); + setRealStatus("Pyodide failed (" + error + ") — showing JS preview", true); + } + } else { + state = buildState(); + setRealStatus(""); + } + updateLocation(false); + render(); + } + + function setRealStatus(message, isError) { + if (!elements.realStatus) { + return; + } + elements.realStatus.textContent = message || ""; + elements.realStatus.classList.toggle("real-error", Boolean(isError)); + } + + // --- Real Python in the browser (Pyodide), lazy-loaded on first use --------- + let pyodideReadyPromise = null; + const realConfigCache = {}; + + async function ensurePyodide() { + if (pyodideReadyPromise) { + return pyodideReadyPromise; + } + pyodideReadyPromise = (async () => { + // eslint-disable-next-line no-undef + const pyodide = await loadPyodide({ + indexURL: "https://cdn.jsdelivr.net/pyodide/v0.27.2/full/", + }); + await pyodide.loadPackage("numpy"); + const buffer = await (await fetch("./pyodide/pir_bundle.zip")).arrayBuffer(); + await pyodide.unpackArchive(buffer, "zip"); + return pyodide; + })(); + return pyodideReadyPromise; + } + + // Runs the unmodified example headless and serializes its Trace with the same + // pir.viz.playground_trace helper that tests/test_playground_trace.py pins. + // ANSWER is replaced with a JSON string literal before execution. + const CLARIFYING_DRIVER = [ + "import json, os, sys, importlib.util", + "cwd = os.getcwd()", + "if cwd not in sys.path:", + " sys.path.insert(0, cwd)", + "class _NoMatplotlib:", + " def find_spec(self, name, path=None, target=None):", + " if name == 'matplotlib' or name.startswith('matplotlib.'):", + " raise ImportError('matplotlib is intentionally unavailable on the headless browser path')", + " return None", + "sys.meta_path.insert(0, _NoMatplotlib())", + "path = os.path.join(cwd, 'examples', 'embodied_ai', '35_clarifying_question.py')", + "spec = importlib.util.spec_from_file_location('clarifying_question', path)", + "mod = importlib.util.module_from_spec(spec)", + "spec.loader.exec_module(mod)", + "from pir.viz.playground_trace import clarifying_trace_to_playground", + "answer = ANSWER", + "trace = mod.run(command='pick the block', answer=answer, render=False)", + "json.dumps(clarifying_trace_to_playground(trace, command='pick the block', answer=answer))", + ].join("\n"); + + async function fetchRealClarifyingConfig(answer) { + const cacheKey = "clarifying:" + answer; + if (realConfigCache[cacheKey]) { + return realConfigCache[cacheKey]; + } + const pyodide = await ensurePyodide(); + const code = CLARIFYING_DRIVER.replace("ANSWER", JSON.stringify(answer)); + const json = await pyodide.runPythonAsync(code); + const config = JSON.parse(json); + realConfigCache[cacheKey] = config; + return config; + } + function stepOnce() { if (state.index >= state.config.steps.length) { render(); @@ -626,6 +729,7 @@ elements.run.disabled = state.index >= state.config.steps.length && !timer; elements.copyTrace.disabled = state.trace.length === 0; elements.compare.disabled = state.scenario !== "household"; + elements.realPython.disabled = state.scenario !== "clarifying"; renderReplay(replayIndex); renderCompare(); diff --git a/docs/pyodide/pir_bundle.zip b/docs/pyodide/pir_bundle.zip index 72db0f82a9ba8ef124b5be352600eebb9baaeea9..6dcbcb189bdc9ceabebfb0ae13ce0f8d1971fcd5 100644 GIT binary patch delta 6641 zcmb7pWmMFGx9w1ZG@^jCG)M_Zr*uh&APg~dBOU(e6c7fG?(XhR>1M#889Jp0P_FmA z5C65^diTTK=gT=~?RCDMefB}Nqimg`)Fd$jMXdv$o8=P|x-dZ?_)8Fo0t5m%+qiOh z*!XZc+nah@xjMN!LcngWrsfu$&fbms@L2k0B7d0POCOq*v<4q5e`#8CJa&@UVA+j< z6x9oEvMcq~L>aa6Y;oXwLdt8XoSB3A0*#QTMW+nh6X7vn+r{ zIhf%&o1N+WeNNiM7ENZu$Ahp_33825U(J{~L{q{q14@25Pu1&GM*4Im@;bwrQ9u5N z2xQWXFV1p_yiK9-pv1EPHx`_67XW%)vB+Px3yhm3#os5k&bygMV`ON?ye}Ur7Uo>BYIiPFB=e)9^~Q zafxCl!`_+yo|bx0)kgb>XNgbqd>SYDZPW9NQDpq(+w+?e+%-G;OpHcliW-3Nb*T- z%-FCKk6BB|wf&(R7On5fcAf~#1Ao!QUgMp0km{)$66ET4%Yt{c6vyE~+w#l95_Ijk z7Bl#Z#S6sFfy(@?uwiE2tsS`ePSoYm*VyRMXfRQqx==tFY*lKf6yk=~Ks0{a3mICc zM7xW1zGRW3WwtD{;xM$vqkUVLS*u)TV`AZs^%i{j6dm6dYK|T#^k)zJbVGUW<3~2c zL})ViTnibXJ6onIEz&yYPl8TR#Kd7Q+$w}vmb%@VlPP22CCOz+%G@|SP!N({t%XkV_f8Ufk zVeDNYw-rzPmXPdq0B`leRGmQ(x@3i1%5$#*7?L#G$}JPPz7`E`k+yN+Mnl57<1PpT zSXWWgd(fm{(E>1-8*tfth9Y;#B6?Y}po`>92+*q*75k8pGrpUG93{gW?V!o{^Nwj` zy|J{MGLqIvc*+Xn8>ztA`kw<4pCcG}$}(ah4I5EAbMmHAUG$rQdxw`|65A>V`s+$N zj+NC6lJ&5uop0xRgaPD5-&t&w^{B5JGUp8P@p;aQb}!^nQ~-Nn=Fl?Pb=3HyRsnMc zW)6-Zq46++Plh8(GSv#AwmLIG^7fRBTJ));BBmUbkV-_Ggd$m?V0{&hhNO~_T|L8{ z)KdYC8~vX*L(o7%&rLpB_;9CjNWe2*Wrmc#Fp1zaL60T~x{2|1Y zPE(1SmHpz60szJ{kT_$GW*sB}lF04Zx8bc%h=#rzU7JP_MKoS4wGl@$uBNx*vuxy1 zLE0>?d)22kv5%3s4IAmGAz0tm^>yi5Xk_yivSZqY2wt#?_q&OXP!MiX?lwF1KT2lK z*aLMxNPR+~NmE*1vxI&t3IbG##AIBv=K7Mu2fdrPc&h2+pi@8~^2+B$|7b>nV zLLS7axUpBh9R_5$U*a9gy|5V3sFO3D^mwgqfxIVHWt|gDx^8r%mfShU67N-|tSrGu zV8;tq^L<6?oEv6k`qIx_BPh}496tiTFO~jJbD-(i9ImNN+j^O+;ay@jvoZaPEo)LP ziiz+9SHRXOQ&jcF|6_K?uP=NQNlfHo#rxEm z9jV)j7qY|pg6>xy+8blS*AK;=sc^d%57}-1gqa`2WyYT`b_W?f#a{kyG`w(+0cnwzUfMNWg4p%)1<(3Ds(REQZ@5cgm*Qpe;?lClBsPD z1^{t8C2}K=bQF`(wGwAfOp&*TbjkrwDYt*zlQYVEwkThvAu+J=nmRUuN}rcLTQ$g0 zBV&5>meZF*GNMw$Besz57@1?T&m^&nnK@+0jdMk7A$g&VdY=$e3gGFVXhdylRX0Ru zHAgw-ehm`WAN;B*uD+Z$KiLw(eX&pcsR83C8vpcBb~%Id*p>c7!v=a^$J$9iB1fnA z5PMZ=8@9gw@cZt(o7E!NDP}Q+qVeIOIO}**!#^0eIb*zQI{Hsjg*{Jb}NmjR)WPbD!|QA04{2_iE=96HbX4MzI`h*>QK?l6FSbM`~A+_lKWpc*TPbU^$`oDOSZri-p zRr8$dz8()5w*lhb!??)s0|WljxV{FSp$f>9vCnhW1&4;6r<1W=07YFHtv;4HpBq3! z|F`);SRgUKfzK!h-0OjaAW)(h2*mlXYvE<;;B0U4h0DUh%n4#+0RfxZaPhteo7S+%3Mi**H1=(+8atc23*;7wrIs8ki1D^=5gEv+mivS#3BfCZ!|%1oh8% z@*uN0eu__;*9!cqf7YEW?+94|O^>D%qnf29gtK7AEp94H)LP>$yVVh$)4+{lqR5Ti z6#Sz$WBWF|Y?B@ADTs?E^eD0tX#~FJFw_f|Hg-%{KAy`ua$+wVyw7sycGc59dWeWj zzTOEFaF~FZy}f%jU3J;DgFe6GiE&nzc5<1#sGfA-_}LRa3~OE;9B9 z6N}k27)&(=e<0a_R~^8N2&^sdS#TuVcDDlOIs~s|*Lpc0{szdi!R6Q3 z^}$Xi4UBCbI>zi9>N~o8nB-~h-u2s5zJw0hq=R!oCBcyioAEn!VRNzfpq_M^himeE z!`LF2j>h}L#GW?;pnL$?v>o#zRDVywo9BU(6ob-Is774+P#;&mjM^)h67nMijFT%I zQKduQ5|`)3Bviaq;LRHj{ow$H>7XUDu1F3`C3>Sk^Zb6|)Q%vKRyR%D?RFPUllXZK z%hpB2n8w$cp>I=VHTcpU(hqy;?BV&}>b^yyIpWC#@g0nx*aJnwX-1g>9I$nGu`Xxz zr6V{(r1Qr2j*Ypkm^?`*ru%I+ZdIMcyqdF@?qdD&FaeEtZQ|mfWE4xlckkKrCN^p> zQ8g1i6CK|N+xNtV^Ce_@v56$9SmS2Cn7TZe*KHveWz~tm>B~B)T40eV;bhX~h3x6^PI25?Q-9K!}zmUPy2v zSkfY>i>9KNiMZ|Wl8K&pv};sKU+7V~q$;6S{PE zS{BW7jx0q3+@FF>##}GI?DY?E#(~2_E|Qkn1Fsoge~a6d_L9Gr zx|%ug1WeX;2R*%KTuJ5XhVp0fD!I9Z!P{KBXx#w$2 z_g6;dk#8E!Kk!*2n|^5%Z+tB8FS3QzI<)mzehCoBHX=MPXlE2AB4pA;`+jGKo(PEJ zpk<{5Ymw=5U`BL;dIb_t@WT}K!X#dUHibPYzG-l~?8G{jzeoSXL&_RX95VXz&j%Cr zSx#WC?25jqazJN^=+eIHBM<(NV-r z_ad_A?uHF{zYt`^SiN3BVjnEUvr3|~Hi3qf=12e2(1=m58XWFHo2$~ods~j52^{9I zA-b*#oE-nIGmFLMGevf_1-oI)W%iC=5dxFh4aHS$dyFl)9~xFGaT-u9+KgT+5NneP zuX8v|D9a2JIEZ>m(+%jSbuBP|=IHE~_P|e7oU+RAtz0^HRrSd1=#ap@!zaThcU{@ko7Nxhm977Fs@PcZjLlDnc}1lz^UA*_Ax~Js)b5 zSbbR2u-rzh)8hH0iFKgc6n+aEGY6b^F_cJakej`1MzOs}nn_?DHQ(gp%;8(Cd+9n! zeI9KP?$#AYAR9-lGZqw9S9WMk2;{IdFL-IK-)0k)G`^Mmv@YjdS>fNmc7TJ(7gYEs zV|b;*(0A++!kBYC3Z?k<%Vy}f6-XPdMPHk$b-BydBe|O-&@U8e7!1SHxkN}3MJKv_ zT#vjbN8+Po8s+%t&(B~i9%aavt--FJnuwZvas~giLh(!VjPe6>Mo-CRpstLub&fgv z7^QM<9t&Lq4QX6h7v5ob_C`a(#HkFv$Jjp~Hh@){o77~UkG8@67co1>#C&cz5&iOQ zA2+0@{v%Q;GVyW8WTuwFi}d&fod|>Ytfa#HCG?lE_JaPZj@zZ1kpT&dFiZ2~u|HKq zzG+$c`ic58xFVgbGIJ;cz!tb~I4n?|LSy>`0#|hhaB0&#$@<0HC>=srTM|gxa)i}- znWzhw+TOadxoF^O=?0Ap+c3w&1JkEB9j>y3t;-^kCzN!OOHTU-MlaJOZsm^oUPN_i zHaw8YdhjyxJkBWMP%e#H+s~vlSoU_vSe;r5Gjmttuozzx7X8u#a6B?UtG3z}+oR&% z=Z_X<=ycd96i0-S-LTcvw<_1XfiyvWfWOzq&_$daXVbF|jjNIt)>-L(70e@}g%~E6 zgnHlmU-9Wt?@~afC$eF)xx}KmRx`IHscrPXQ<~YsSIp2#jR%{SGz$JY7M5+lQ1Sg#CN8PZ-X^+z{(v}eNU~IN}9P0SIqzey7s3C6F~#4Xq|UrWu*=n@5Jc zC)D{z@Cznm?3_)ugYl%;w`86&OS38*}J; z)D^MN$1tQ;okz4=#V83s&SPU8015N)6w+}(nZ8Nl(r=m#Nr)`Xh$&lccnvZ8;=bG5 z;5Qfx|G-hXcXi_La5ZlFO`}Ly6*s=mRZ_#J zgX|)z)n%@fm5WJn+iN{qNj37VyT5fMj5RH0GVqJ#puQTTn4-F1T4#1ReW>&jj?7us z=U0lFh=yvET@32p40Y0_ORcZuI1D1do14!A2jj!9nECT{LRefP3;H`B4Qk*?0F@H- zMMf3L-m%kxK6KtZ2N`{)6wYm*+x(C(4yJ5&QLcHLX-=Hb>f_~6BuO`Z$QV&!=8de$ z@ady}qs=I+ai@^7f%gO2%xOvAn^CT;=)9Cs_hz4@PZZ(HMb1KK6^6q0Hh?Dm2NwkM(kbi5~DC()^}-rr*bfkkUGmEt-7{MvO4nk{yKv89IcBOhLXEz!A+|9EdS%TO}Ow`sQ;Mo{)O;n!kab+I4}PQ2{J^TXKSo)lS1^?W zEz9&Aft+p*C}!DVH*UT%+kaiE1CE=)kTPJ*6^ee{kE5l@?j)FyJI|12SxN4qhaYT9HKbiP5_kXs zB{XDLvUyTyJgR0h0>byMeA>-EE0uw9<8m7e4wkaCC!(V|L$)fukPhd`(6=G-0jFBP^k6e=69YI+2phsLis$imBSU*cXt*mr% zaw4G@?Z2%Gte5T(EQom4-SJ>A!RH0ahUqjGP7nR=q#8N}X!!~gt$Mr>3j-2p14eT~`<();a0%RvOFf&z-yB0}k-dLnOGFUU}83UTw1NFk~A0ve4^1}w`S2R*bJ<(9}@*n7h z`hC-WpDbKi&cmf%1S4&kg$@16LP)g=a!OQAP8jF&^jYww>lAYxxpZWD`qzYG#O&Rc zg+pH{jl964n%X&fYD1ZXmv&)GeNU%jO$;+Gsl}m^idD(lZr!k`u#ZGcPp&?$xtQZC zS?!`G+w`euuB2svi%G=4JoU6=AW==*saDsT9}eT|dkg a$>e|Qz^E(#vvmJ~^k01Xmt8vlclBRj&EGo! delta 112 zcmaF#j`8Pi#tl=|Cp(miY+j(>VX*mY0TT<5C&$#SJ~^S(iK$a#@*D%1$p=ccn06aZ zK4&R4nXOEVN!EU{fxG-lFVN4+B2 diff --git a/docs/pyodide_playground_strategy.md b/docs/pyodide_playground_strategy.md index 600ac48..42888a2 100644 --- a/docs/pyodide_playground_strategy.md +++ b/docs/pyodide_playground_strategy.md @@ -101,10 +101,33 @@ that `loadPyodide`/`unpackArchive`/`fetch` behave as expected. Open `docs/pyodide/poc.html` via a local server (e.g. `python3 -m http.server` from `docs/`) or on GitHub Pages and click “Run the real loop”. -**Phase 1 — one real loop on the page (1–2 days).** Add a "Run real Python" -toggle to the existing playground for `clarifying_question` (its renderer already -exists). Python produces the trace; JS draws it; delete the JS dynamics for that -scenario. This is the first honest "real Python in your browser" claim. +**Phase 1 — one real loop on the page. ✅ built (Python path verified; needs a +browser check).** The playground ([`docs/playground.html`](playground.html)) +has a **"Run real Python"** toggle for `clarifying_question`. When on, it lazily +boots Pyodide, unpacks the bundle, and runs the **unmodified** +`examples/embodied_ai/35_clarifying_question.py` `run(...)` headless; the real +`Trace` is serialized by [`pir/viz/playground_trace.py`](../pir/viz/playground_trace.py) +into the exact config the existing JS renderer consumes, and the page draws that. +The JS preview stays the default first paint (Pyodide is loaded only on toggle), +so the instant experience is preserved. + +The trace→render JSON is now a **pinned contract**: +[`tests/test_playground_trace.py`](../tests/test_playground_trace.py) runs the +real loop, serializes it, and asserts every field the renderer reads is present +and plain-JSON (no numpy leaks) — exactly the drift guard the risk list calls for. + +Verified locally (CPython simulating Pyodide's unpack-into-cwd, the exact driver +from `playground.js`): for `answer=red` the serializer returns the real +`ask → look → pick` trace (`ambiguous_goal` then resolved belief, pick at +`[32, 56]`), identical to the CLI loop. **Still to confirm in a real browser:** +toggling "Run real Python" boots Pyodide, runs the loop, and the scene/belief/ +timeline redraw from the real trace. + +Deliberately **deferred** (not yet done): deleting the JS `buildClarifyingScenario` +dynamics. It is kept as the no-Pyodide instant fallback so first paint never waits +on a multi-MB download. Full deletion waits until the real path is the verified +default — at which point the JS mock can be dropped and the contract test becomes +the single source of truth. **Phase 2 — tabletop renderer + hero loop (1–2 days).** Add the continuous tabletop renderer and wire `pick_and_retry`. Now the README hero GIF has a diff --git a/pir/viz/playground_trace.py b/pir/viz/playground_trace.py new file mode 100644 index 0000000..73bcbc3 --- /dev/null +++ b/pir/viz/playground_trace.py @@ -0,0 +1,154 @@ +"""Serialize a clarifying-question `Trace` into the playground's render shape. + +The browser playground (docs/playground.js) draws scenes from a plain config +object: an ``initial`` snapshot plus a list of per-step events. Historically that +config was hand-written in JavaScript, a reimplementation of the Python loop that +could silently drift from the tested example. Pyodide lets the browser run the +*real* ``examples/embodied_ai/35_clarifying_question.py`` loop; this module turns +the resulting `Trace` into the exact JSON the JS renderer already consumes, so +the browser draws real Python output instead of a JS mock. + +Everything here is pure Python + JSON-friendly scalars (no numpy, no matplotlib), +so it runs unchanged in CPython, in tests, and in Pyodide. The shape it produces +is pinned by tests/test_playground_trace.py — that test is the drift guard the +design memo (docs/pyodide_playground_strategy.md) calls "the contract". +""" + +from __future__ import annotations + +from math import log2 +from typing import Any + +from pir.core.types import Failure + +# The clarifying tabletop is fixed: red block at (0.32, 0.56), blue at +# (0.68, 0.56). The JS renderer draws on a 0-100 SVG canvas, so positions are +# scaled by 100. This mirrors ClarifyingQuestionWorld.reset(). +_SVG_SCALE = 100.0 + + +def _entropy(distribution: dict[str, float]) -> float: + total = sum(p * log2(p) for p in distribution.values() if p > 0.0) + return -total if total else 0.0 # avoid -0.0 for a point-mass belief + + +def _belief(resolved_color: str | None) -> dict[str, Any]: + """Two-block belief: uniform until clarified, then a point mass.""" + + if resolved_color is None: + distribution = {"red": 0.5, "blue": 0.5} + entropy = _entropy(distribution) + return { + "red": distribution["red"], + "blue": distribution["blue"], + "entropy": entropy, + "askGain": entropy, + "policy": "ask", + } + distribution = { + "red": 1.0 if resolved_color == "red" else 0.0, + "blue": 1.0 if resolved_color == "blue" else 0.0, + } + return { + "red": distribution["red"], + "blue": distribution["blue"], + "entropy": _entropy(distribution), + "askGain": 0.0, + "policy": "act", + } + + +def _failure_kind(info: dict[str, Any]) -> str: + failure = info.get("failure") + return failure.kind if isinstance(failure, Failure) else "" + + +def _action_label(action: dict[str, Any]) -> str: + action_type = action.get("type", "noop") + if action_type == "ask": + return "ask(which_block)" + color = action.get("color") + if color: + return f"{action_type}({color})" + return action_type + + +def _pick_at(info: dict[str, Any]) -> list[float] | None: + position = info.get("pick_position") + if position is None: + return None + return [ + round(float(position[0]) * _SVG_SCALE, 4), + round(float(position[1]) * _SVG_SCALE, 4), + ] + + +def _initial_snapshot(command: str) -> dict[str, Any]: + return { + "type": "tabletop", + "command": command, + "target": "unresolved", + "agentState": "parse_command", + "failure": "none", + "belief": _belief(None), + "picked": None, + "pickAt": None, + "focus": None, + "question": None, + "answer": None, + } + + +def _step_snapshot(command: str, info: dict[str, Any], obs: dict[str, Any]) -> dict[str, Any]: + resolved = info.get("resolved_goal") or {} + color = resolved.get("color") + return { + "type": "tabletop", + "command": command, + "target": color or "unresolved", + "agentState": info.get("agent_state", ""), + "failure": _failure_kind(info) or "none", + "belief": _belief(color), + "picked": obs.get("picked_color"), + "pickAt": _pick_at(info), + "focus": obs.get("focus_color"), + "question": obs.get("last_question") or info.get("question"), + "answer": obs.get("last_answer"), + } + + +def clarifying_trace_to_playground( + trace: Any, + *, + command: str = "pick the block", + answer: str = "red", +) -> dict[str, Any]: + """Convert a clarifying-question `Trace` into the playground config object. + + The returned dict matches what docs/playground.js builds in + ``buildClarifyingScenario`` — ``{command, totalSteps, initial, steps}`` where + each step is ``{action, reward, failure, agentState, snapshot}`` — so the JS + renderer can draw it with no changes. Unlike the JS mock, every field here is + derived from the real loop's observations and info dicts. + """ + + _ = answer # answer is encoded in the trace; kept for a self-describing call site + steps: list[dict[str, Any]] = [] + for action, reward, info, obs in zip( + trace.actions, trace.rewards, trace.infos, trace.observations + ): + steps.append( + { + "action": _action_label(action), + "reward": round(float(reward), 4), + "failure": _failure_kind(info), + "agentState": info.get("agent_state", ""), + "snapshot": _step_snapshot(command, info, obs), + } + ) + return { + "command": command, + "totalSteps": len(steps), + "initial": _initial_snapshot(command), + "steps": steps, + } diff --git a/scripts/build_pyodide_bundle.py b/scripts/build_pyodide_bundle.py index 723f72a..5522639 100644 --- a/scripts/build_pyodide_bundle.py +++ b/scripts/build_pyodide_bundle.py @@ -22,6 +22,7 @@ # get a JS renderer (see docs/pyodide_playground_strategy.md). BUNDLED_EXAMPLES = [ "examples/manipulation/01_pick_and_retry.py", + "examples/embodied_ai/35_clarifying_question.py", ] diff --git a/tests/test_playground_trace.py b/tests/test_playground_trace.py new file mode 100644 index 0000000..0c3fcea --- /dev/null +++ b/tests/test_playground_trace.py @@ -0,0 +1,100 @@ +"""Pin the trace-to-playground JSON contract shared by Python and JS. + +docs/playground.js reads a fixed set of fields off each snapshot. If the Python +serializer (pir/viz/playground_trace.py) stops emitting one of them, the browser +would silently render a blank/garbled scene. These tests run the real clarifying +loop, serialize it, and assert the renderer's contract holds — including that the +output is plain JSON (no numpy leaks that would break json.dumps in Pyodide). +""" + +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + +from pir.viz.playground_trace import clarifying_trace_to_playground + +ROOT = Path(__file__).resolve().parents[1] + +# Fields docs/playground.js reads off a tabletop snapshot (renderTabletop + +# renderBelief + the status strip). Keep in sync with the renderer. +SNAPSHOT_FIELDS = { + "type", + "command", + "target", + "agentState", + "failure", + "belief", + "picked", + "pickAt", + "focus", + "question", + "answer", +} +BELIEF_FIELDS = {"red", "blue", "entropy", "askGain", "policy"} +STEP_FIELDS = {"action", "reward", "failure", "agentState", "snapshot"} + + +def _run(answer: str): + path = ROOT / "examples" / "embodied_ai" / "35_clarifying_question.py" + spec = importlib.util.spec_from_file_location("clarifying_question_contract", path) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.run(command="pick the block", answer=answer, render=False) + + +def test_config_shape_matches_renderer_contract() -> None: + config = clarifying_trace_to_playground(_run("red"), answer="red") + + assert config["command"] == "pick the block" + assert config["totalSteps"] == len(config["steps"]) == 3 + + initial = config["initial"] + assert set(initial) == SNAPSHOT_FIELDS + assert initial["type"] == "tabletop" + assert initial["target"] == "unresolved" + assert initial["belief"]["policy"] == "ask" + assert abs(initial["belief"]["entropy"] - 1.0) < 1e-9 + + for step in config["steps"]: + assert set(step) == STEP_FIELDS + snapshot = step["snapshot"] + assert set(snapshot) == SNAPSHOT_FIELDS + assert set(snapshot["belief"]) == BELIEF_FIELDS + + +def test_real_loop_resolves_and_picks_red() -> None: + config = clarifying_trace_to_playground(_run("red"), answer="red") + steps = config["steps"] + + # ask -> look -> pick, the real loop's three steps. + assert [step["action"] for step in steps] == [ + "ask(which_block)", + "look(red)", + "pick(red)", + ] + assert steps[0]["failure"] == "ambiguous_goal" + assert steps[0]["snapshot"]["target"] == "red" + assert steps[0]["snapshot"]["belief"]["policy"] == "act" + + final = steps[-1] + assert final["agentState"] == "done" + assert final["snapshot"]["picked"] == "red" + assert final["snapshot"]["pickAt"] == [32.0, 56.0] + + +def test_answer_blue_resolves_blue() -> None: + config = clarifying_trace_to_playground(_run("blue"), answer="blue") + final = config["steps"][-1] + assert final["action"] == "pick(blue)" + assert final["snapshot"]["picked"] == "blue" + assert final["snapshot"]["pickAt"] == [68.0, 56.0] + + +def test_config_is_plain_json() -> None: + # Pyodide returns this via json.dumps; a stray numpy array would raise here. + config = clarifying_trace_to_playground(_run("red"), answer="red") + reparsed = json.loads(json.dumps(config)) + assert reparsed == config