diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index a2a1823c..94683156 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -1,4 +1,13 @@ # Creates a PR benchmark comment with a comparison to main +# +# SECURITY: this workflow builds and EXECUTES PR-branch code (which may come +# from a fork). The job topology is load-bearing: +# - `benchmark` runs the untrusted code and must never hold credentials — +# no secrets, no token minting. Its only output is an uploaded artifact. +# - `comment` is the only privileged job (mints the GitHub App token). It +# runs on a fresh runner and executes no checked-out code, so untrusted +# build scripts can never observe the token. +# Do not merge these jobs or move the token step into `benchmark`. name: Benchmark pull requests on: @@ -30,9 +39,13 @@ jobs: - uses: actions/checkout@v6 - name: Parse PR comment body id: bench-params + env: + # Passed via env (not inline interpolation) so comment text can + # never be evaluated as shell syntax. + COMMENT_BODY: ${{ github.event.comment.body }} run: | # Parse `issue_comment` body - printf '${{ github.event.comment.body }}' > comment.txt + printf '%s' "$COMMENT_BODY" > comment.txt BENCH_COMMAND=$(head -n 1 comment.txt | tr -d '\r') echo "$BENCH_COMMAND" @@ -60,15 +73,17 @@ jobs: bench: ${{ fromJSON(needs.setup.outputs.benches) }} steps: - name: Set env vars + env: + ENV_VARS: ${{ needs.setup.outputs.env-vars }} run: | # Overrides default env vars with those specified in the `issue_comment` input if identically named - for var in ${{ needs.setup.outputs.env-vars }} + for var in $ENV_VARS do echo "$var" | tee -a $GITHUB_ENV done - uses: actions/checkout@v6 # Get base branch of the PR - - uses: xt0rted/pull-request-comment-branch@v3 + - uses: xt0rted/pull-request-comment-branch@e8b8daa837e8ea7331c0003c9c316a64c6d8b0b1 # v3 id: comment-branch - name: Checkout base branch uses: actions/checkout@v6 @@ -76,7 +91,7 @@ jobs: ref: ${{ steps.comment-branch.outputs.base_sha }} path : ${{ github.workspace }}/base - name: Run `lake build` on base branch - uses: leanprover/lean-action@v1 + uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: lake-package-directory: ${{ github.workspace }}/base test: false @@ -94,7 +109,7 @@ jobs: path: ${{ github.workspace }}/pr ref: ${{ steps.comment-branch.outputs.head_sha }} - name: Run `lake build` on PR branch - uses: leanprover/lean-action@v1 + uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: lake-package-directory: ${{ github.workspace }}/pr test: false @@ -109,43 +124,74 @@ jobs: run: | BENCH_REPORT=1 lake exe ${{ matrix.bench }} working-directory: ${{ github.workspace }}/pr - - name: Get env for PR body - if: always() - run: | - SHORT_SHA_PR=$(git rev-parse --short HEAD) - REPO_URL=${{ github.server_url }}/${{ github.repository }} - echo "COMMIT_LINK=[\`$SHORT_SHA_PR\`]($REPO_URL/commit/${{ steps.comment-branch.outputs.head_sha }})" | tee -a $GITHUB_ENV - echo "WORKFLOW_LINK=[Workflow logs]($REPO_URL/actions/runs/${{ github.run_id }})" | tee -a $GITHUB_ENV - working-directory: ${{ github.workspace }}/pr - - name: Generate token to write PR comment - uses: actions/create-github-app-token@v3 - if: always() - id: app-token - with: - app-id: ${{ secrets.TOKEN_APP_ID }} - private-key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }} - name: Build benchmark comment body if: success() run: | { - echo '## Benchmark for `${{ matrix.bench }}` at ${{ env.COMMIT_LINK }}'; + echo '## Benchmark for `${{ matrix.bench }}`'; echo ""; for file in .lake/benches/*/report.md; do [ -f "$file" ] && cat "$file" && echo "" done - echo "${{ env.WORKFLOW_LINK }}"; } > ${{ github.workspace }}/comment-body.md working-directory: ${{ github.workspace }}/pr - - name: Comment on successful run + - name: Upload benchmark comment body if: success() - uses: peter-evans/create-or-update-comment@v5 + uses: actions/upload-artifact@v4 + with: + name: comment-body-${{ matrix.bench }} + path: ${{ github.workspace }}/comment-body.md + + comment: + # Privileged job: mints the App token. Runs no checked-out code (see the + # header comment). `always()` so failures still produce a PR comment. + needs: [ setup, benchmark ] + if: always() && needs.setup.result == 'success' + runs-on: ubuntu-latest + strategy: + matrix: + bench: ${{ fromJSON(needs.setup.outputs.benches) }} + steps: + - uses: xt0rted/pull-request-comment-branch@e8b8daa837e8ea7331c0003c9c316a64c6d8b0b1 # v3 + id: comment-branch + - name: Get env for PR body + run: | + REPO_URL=${{ github.server_url }}/${{ github.repository }} + HEAD_SHA=${{ steps.comment-branch.outputs.head_sha }} + SHORT_SHA_PR=$(printf '%s' "$HEAD_SHA" | cut -c1-7) + echo "COMMIT_LINK=[\`$SHORT_SHA_PR\`]($REPO_URL/commit/$HEAD_SHA)" | tee -a $GITHUB_ENV + echo "WORKFLOW_LINK=[Workflow logs]($REPO_URL/actions/runs/${{ github.run_id }})" | tee -a $GITHUB_ENV + - name: Download benchmark comment body + id: download + continue-on-error: true + uses: actions/download-artifact@v4 + with: + name: comment-body-${{ matrix.bench }} + - name: Assemble comment body + if: steps.download.outcome == 'success' + run: | + { + echo '## Benchmark for `${{ matrix.bench }}` at ${{ env.COMMIT_LINK }}'; + echo ""; + cat comment-body.md; + echo "${{ env.WORKFLOW_LINK }}"; + } > final-comment-body.md + - name: Generate token to write PR comment + uses: actions/create-github-app-token@v3 + id: app-token + with: + app-id: ${{ secrets.TOKEN_APP_ID }} + private-key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }} + - name: Comment on successful run + if: steps.download.outcome == 'success' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: token: ${{ steps.app-token.outputs.token }} issue-number: ${{ github.event.issue.number }} - body-path: 'comment-body.md' + body-path: 'final-comment-body.md' - name: Comment on failing run - if: failure() - uses: peter-evans/create-or-update-comment@v5 + if: steps.download.outcome != 'success' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: token: ${{ steps.app-token.outputs.token }} issue-number: ${{ github.event.issue.number }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09c67279..7f80e309 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,8 +18,8 @@ jobs: runs-on: warp-ubuntu-latest-x64-16x steps: - uses: actions/checkout@v6 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - - uses: leanprover/lean-action@v1 + - uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1 + - uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: build-args: "--wfail -v" - name: Test Ix CLI @@ -33,15 +33,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - - uses: taiki-e/install-action@nextest + - uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1 + - uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2 + with: + tool: nextest # Install Lean for rust-bindgen step - - uses: leanprover/lean-action@v1 + - uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: auto-config: false use-github-cache: false - name: Check Rustfmt code style - uses: actions-rust-lang/rustfmt@v1 + uses: actions-rust-lang/rustfmt@4066006ec54a31931b9b1fddfd38f2fdf2d27143 # v1 - name: Check clippy warnings run: cargo clippy --workspace --all-targets --all-features -- -D warnings - name: Check *everything* compiles @@ -52,6 +54,6 @@ jobs: run: | echo "RUST_VERSION=$(awk -F '"' '/^channel/ {print $2}' rust-toolchain.toml)" | tee -a $GITHUB_ENV - name: Cargo-deny - uses: EmbarkStudios/cargo-deny-action@v2 + uses: EmbarkStudios/cargo-deny-action@8f84122a46a358a27cb0625d85ad60ab436a1b87 # v2 with: rust-version: ${{ env.RUST_VERSION }} diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index e039d5cb..0d5d4f06 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - uses: leanprover/lean-action@v1 + - uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: auto-config: false build: true @@ -56,7 +56,7 @@ jobs: - if: matrix.bench == 'FC' run: echo "COMPILE_DIR=${{env.COMPILE_DIR}}FC" | tee -a $GITHUB_ENV # Download elan and fetch mathlib cache - - uses: leanprover/lean-action@v1 + - uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: lake-package-directory: ${{ env.COMPILE_DIR }} auto-config: false @@ -102,7 +102,9 @@ jobs: [ "$sample" -gt 64 ] && sample=64 echo "sample=$sample" | tee -a "$GITHUB_OUTPUT" # Deploy benchmark results to https://bencher.dev/console/projects/ix/plots - - uses: bencherdev/bencher@main + # Pinned by SHA (v0.6.6): this step's environment later holds + # BENCHER_API_TOKEN, so it must not track a moving branch. + - uses: bencherdev/bencher@50fb1e138651a46d2fb704fab1adab38c181552e # v0.6.6 - name: Track benchmarks run: | # On a lean-toolchain bump (sample=0), omit file-size and constants diff --git a/.github/workflows/ignored.yml b/.github/workflows/ignored.yml index 5e92186a..9b70e114 100644 --- a/.github/workflows/ignored.yml +++ b/.github/workflows/ignored.yml @@ -17,14 +17,14 @@ jobs: runs-on: warp-ubuntu-latest-x64-32x steps: - uses: actions/checkout@v6 - - uses: actions-rust-lang/setup-rust-toolchain@v1 + - uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1 # Only restore the cache, since `ci.yml` will save the test binary to the cache first - uses: actions/cache/restore@v5 with: path: .lake key: lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }}-${{ github.sha }} restore-keys: lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }} - - uses: leanprover/lean-action@v1 + - uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: auto-config: false # `test-args` is currently broken, so we run `lake test` separately @@ -39,14 +39,14 @@ jobs: runs-on: warp-ubuntu-latest-x64-8x steps: - uses: actions/checkout@v6 - - uses: actions-rust-lang/setup-rust-toolchain@v1 + - uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1 # Only restore the cache, since `ci.yml` will save the test binary to the cache first - uses: actions/cache/restore@v5 with: path: .lake key: lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }}-${{ github.sha }} restore-keys: lake-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('lean-toolchain') }}-${{ hashFiles('lake-manifest.json') }} - - uses: leanprover/lean-action@v1 + - uses: leanprover/lean-action@38fbc41a8c28c4cbaec22d7f7de508ec2e7c0dd9 # v1 with: auto-config: false build: true diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 265380c3..49bacaf9 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -20,7 +20,7 @@ jobs: runs-on: warp-ubuntu-latest-x64-8x steps: - uses: actions/checkout@v6 - - uses: cachix/install-nix-action@v31 + - uses: cachix/install-nix-action@b97f05dcb019ddea06450a50ef6203d2fdc19fee # v31 with: nix_path: nixpkgs=channel:nixos-unstable github_access_token: ${{ secrets.GITHUB_TOKEN }} @@ -36,7 +36,7 @@ jobs: runs-on: warp-ubuntu-latest-x64-8x steps: - uses: actions/checkout@v6 - - uses: cachix/install-nix-action@v31 + - uses: cachix/install-nix-action@b97f05dcb019ddea06450a50ef6203d2fdc19fee # v31 with: nix_path: nixpkgs=channel:nixos-unstable github_access_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/Apps/ZKVoting/Verifier.lean b/Apps/ZKVoting/Verifier.lean index d018db86..ac7c002c 100644 --- a/Apps/ZKVoting/Verifier.lean +++ b/Apps/ZKVoting/Verifier.lean @@ -1,5 +1,10 @@ import Ix.Claim +/-- NOT IMPLEMENTED. A verifier stub that exits 0 unconditionally is worse + than none at all — anything scripted against it silently treats every + proof as valid — so it fails loudly until real verification (parse + commitments, deserialize the proof, check every vote is in the claim, + run the STARK verifier) lands. -/ def main (_args : List String) : IO UInt32 := do -- TODO -- let mut votes := #[] @@ -14,4 +19,5 @@ def main (_args : List String) : IO UInt32 := do -- -- TODO: print the resulting vote counts in the claim -- -- TODO: assert that every vote in `votes` is in the proof claim -- -- TODO: verify proof - return 0 + IO.eprintln "error: ZKVoting verifier is not implemented; refusing to report success" + return 1 diff --git a/DebugIxVM.lean b/DebugIxVM.lean new file mode 100644 index 00000000..d3a8cd14 --- /dev/null +++ b/DebugIxVM.lean @@ -0,0 +1,36 @@ +import Ix.IxVM +import Tests.Ix.IxVM +import Tests.Aiur.Common + +def buildIxVMEnv : IO AiurTestEnv := do + match AiurTestEnv.build IxVM.ixVM with + | .error e => throw <| IO.userError s!"Aiur setup failed: {e}" + | .ok env => pure env + +def mkCase (caseName : String) (leanEnv : Lean.Environment) : IO AiurTestCase := do + match caseName with + | "kernel_unit_tests" => pure <| AiurTestCase.exec `kernel_unit_tests + | "serde" => serdeNatAddComm leanEnv + | "Nat.add_comm" => kernelCheck ``Nat.add_comm leanEnv + | "Nat.sub_le_of_le_add" => kernelCheck ``Nat.sub_le_of_le_add leanEnv + | "String.Internal.append" => kernelCheck ``String.Internal.append leanEnv + | "extractMainModule" => + kernelCheck (`_private.Init.Prelude |>.mkNum 0 |>.mkStr "Lean.extractMainModule._unsafe_rec") leanEnv + | other => throw <| IO.userError s!"unknown case: {other}" + +def runCase (env : AiurTestEnv) (tc : AiurTestCase) : IO UInt32 := do + IO.println s!"running {tc.label}" + let some funIdx := env.compiled.getFuncIdx tc.functionName + | throw <| IO.userError s!"missing function: {tc.functionName}" + let (execOutput, execIOBuffer, _queryCounts) := + env.compiled.bytecode.execute funIdx tc.input tc.inputIOBuffer + IO.println s!"output ok: {execOutput == tc.expectedOutput}" + IO.println s!"io ok: {execIOBuffer == tc.expectedIOBuffer}" + pure <| if execOutput == tc.expectedOutput && execIOBuffer == tc.expectedIOBuffer then 0 else 1 + +def main (args : List String) : IO UInt32 := do + let caseName := args.headD "kernel_unit_tests" + let leanEnv ← get_env! + let aiurEnv ← buildIxVMEnv + let tc ← mkCase caseName leanEnv + runCase aiurEnv tc diff --git a/Ix/Aiur/Compiler/Check.lean b/Ix/Aiur/Compiler/Check.lean index 4475024d..90bae3d8 100644 --- a/Ix/Aiur/Compiler/Check.lean +++ b/Ix/Aiur/Compiler/Check.lean @@ -584,12 +584,14 @@ def inferTerm (t : Term) : CheckM Typed.Term := match t with | .unit => pure (.unit .unit false) | .var x => do let ctx ← read - match (ctx.varTypes[x]?, x) with - | (some t, _) => pure (.var t false x) - | (none, Local.str localName) => - let (typ, _) ← refLookup (Global.init localName) - pure (.var typ false x) - | (none, _) => throw $ .unboundLocal x + match ctx.varTypes[x]? with + | some t => pure (.var t false x) + -- No global-function fallback here: neither `SourceEval` nor `Lower` + -- implements one, so a program that typechecked via the fallback + -- would evaluate/compile to something else entirely (silent + -- miscompile). Unbound locals are errors; reference globals with + -- `.ref`. + | none => throw $ .unboundLocal x | .ref x => do let (typ, tArgs) ← refLookup x pure (.ref typ false x tArgs) diff --git a/Ix/Aiur/Compiler/Lower.lean b/Ix/Aiur/Compiler/Lower.lean index 01b3d71f..de49a4ef 100644 --- a/Ix/Aiur/Compiler/Lower.lean +++ b/Ix/Aiur/Compiler/Lower.lean @@ -122,6 +122,17 @@ def pushOp (op : Bytecode.Op) (size : Nat := 1) : CompileM (Array Bytecode.ValId def extractOps : CompileM (Array Bytecode.Op) := modifyGet fun s => (s.ops, {s with ops := #[]}) +/-- Look up a local's value indices. A missing binding here is a compiler + bug (`Check` guarantees scoping); the old silent defaults (`#[0]`, + `#[]`) compiled such bugs into reads of arbitrary value slot 0 or + empty values — a miscompile of the *proven program* — instead of + failing loudly. -/ +def lookupBinding (bindings : Std.HashMap Local (Array Bytecode.ValIdx)) + (x : Local) (who : String) : CompileM (Array Bytecode.ValIdx) := + match bindings[x]? with + | some idxs => pure idxs + | none => throw s!"{who}: unbound local `{repr x}`" + open Concrete in mutual @@ -141,19 +152,22 @@ def toIndex let size ← match typSize layoutMap dstTyp with | .error e => throw e | .ok n => pure n - let ptrIdxs := bindings[src]?.getD #[0] + let ptrIdxs ← lookupBinding bindings src "letLoad" let loaded ← pushOp (.load size ptrIdxs[0]!) size toIndex layoutMap (bindings.insert dst loaded) bod - | .var _ _ name => pure (bindings[name]?.getD #[]) + | .var _ _ name => lookupBinding bindings name "var" | .ref _ _ name => match layoutMap[name]? with | some (.function layout) => do pushOp (.const (.ofNat layout.index)) | some (.constructor layout) => do let size := layout.size - let paddingOp := Bytecode.Op.const (.ofNat layout.index) - let index ← pushOp paddingOp + let index ← pushOp (.const (.ofNat layout.index)) if index.size < size then - let padding := (← pushOp paddingOp)[0]! + -- Pad with zeros, matching the `.app` lowering and + -- `flattenValue`: padding with the tag value gives the same + -- logical value two flat representations, and `assertEq`/`store` + -- compare full arrays. + let padding := (← pushOp (.const (.ofNat 0)))[0]! pure $ index ++ Array.replicate (size - index.size) padding else pure index @@ -381,7 +395,7 @@ def Concrete.Term.compile (Concrete.Term.match matchTyp valEscapes scrut cases defaultOpt).compile returnTyp layoutMap bindings yieldCtrl else - let idxs := bindings[scrut]?.getD #[0] + let idxs ← lookupBinding bindings scrut "letVar-match scrutinee" let ops ← extractOps let (matchCases, defaultBlock) ← cases.foldlM (init := (#[], .none)) (Concrete.addCase layoutMap bindings returnTyp idxs (yieldCtrl := true)) @@ -410,7 +424,7 @@ def Concrete.Term.compile (Concrete.Term.match matchTyp valEscapes scrut cases defaultOpt).compile returnTyp layoutMap bindings yieldCtrl else - let idxs := bindings[scrut]?.getD #[0] + let idxs ← lookupBinding bindings scrut "letWild-match scrutinee" let ops ← extractOps let (matchCases, defaultBlock) ← cases.foldlM (init := (#[], .none)) (Concrete.addCase layoutMap bindings returnTyp idxs (yieldCtrl := true)) @@ -441,7 +455,7 @@ def Concrete.Term.compile let size ← match typSize layoutMap dstTyp with | .error e => throw e | .ok n => pure n - let ptrIdxs := bindings[src]?.getD #[0] + let ptrIdxs ← lookupBinding bindings src "letLoad" let loaded ← pushOp (.load size ptrIdxs[0]!) size bod.compile returnTyp layoutMap (bindings.insert dst loaded) yieldCtrl | .debug _ _ label term ret => do @@ -466,7 +480,7 @@ def Concrete.Term.compile modify fun stt => { stt with ops := stt.ops.push (.ioWrite channel[0]! data) } ret.compile returnTyp layoutMap bindings yieldCtrl | .match _ _ scrut cases defaultOpt => do - let idxs := bindings[scrut]?.getD #[0] + let idxs ← lookupBinding bindings scrut "match scrutinee" let ops ← extractOps let (bcCases, defaultBlock) ← cases.foldlM (init := (#[], .none)) (Concrete.addCase layoutMap bindings returnTyp idxs yieldCtrl) diff --git a/Ix/Aiur/Protocol.lean b/Ix/Aiur/Protocol.lean index df320811..6d9dfccd 100644 --- a/Ix/Aiur/Protocol.lean +++ b/Ix/Aiur/Protocol.lean @@ -22,8 +22,12 @@ namespace Proof @[extern "rs_aiur_proof_to_bytes"] opaque toBytes : @& Proof → ByteArray +/-- Deserialize proof bytes. The bytes come from an untrusted, + prover-supplied wrapper, so failure is an `Except.error` (the old + signature panicked inside the FFI, killing the process under + `panic = "abort"`). -/ @[extern "rs_aiur_proof_of_bytes"] -opaque ofBytes : @& ByteArray → Proof +opaque ofBytes : @& ByteArray → Except String Proof end Proof diff --git a/Ix/Aiur/Semantics/BytecodeEval.lean b/Ix/Aiur/Semantics/BytecodeEval.lean index 66215f1b..acb700ce 100644 --- a/Ix/Aiur/Semantics/BytecodeEval.lean +++ b/Ix/Aiur/Semantics/BytecodeEval.lean @@ -98,6 +98,17 @@ def appendMap (st : EvalState) (gs : Array G) : EvalState := def setIoBuffer (st : EvalState) (ioBuffer : IOBuffer) : EvalState := { st with ioBuffer } +/-- How a block terminated. Mirrors Rust `execute.rs`: a `Ctrl.return` + is a *function-level* return that propagates out of any enclosing + `matchContinue` (its continuation is skipped), while a `Ctrl.yield` + feeds the enclosing `matchContinue`'s merged outputs and resumes its + continuation. Conflating the two (the old evaluator treated a branch + `return` as a yield) silently diverges from the proven semantics. -/ +inductive BlockExit where + | returned + | yielded + deriving BEq + /-! ## Termination helpers Lemmas proving that a `Block`'s sub-parts have strictly smaller `sizeOf`. @@ -181,7 +192,10 @@ def evalOp (t : Bytecode.Toplevel) (fuel : Nat) (op : Op) (st : EvalState) : | fuel+1 => match evalBlock t fuel f.body innerSt with | .error e => .error e - | .ok (outs, innerSt') => + -- The function boundary absorbs the exit kind: a body that ends + -- in a (top-level) return and one whose outer matchContinue + -- continuation returns both just produce outputs here. + | .ok (_, outs, innerSt') => if outs.size != outputSize then .error .callOutputSizeMismatch else @@ -310,7 +324,8 @@ decreasing_by all_goals first | decreasing_tactic | omega /-- Evaluate a `Block`: run all ops, then dispatch on the control. -/ def evalBlock (t : Bytecode.Toplevel) (fuel : Nat) - (b : Block) (st : EvalState) : Except BytecodeError (Array G × EvalState) := + (b : Block) (st : EvalState) : + Except BytecodeError (BlockExit × Array G × EvalState) := match runOps t fuel b.ops st 0 with | .error e => .error e | .ok st' => evalCtrl t fuel b.ctrl st' @@ -323,16 +338,17 @@ decreasing_by | omega def evalCtrl (t : Bytecode.Toplevel) (fuel : Nat) - (ctrl : Ctrl) (st : EvalState) : Except BytecodeError (Array G × EvalState) := + (ctrl : Ctrl) (st : EvalState) : + Except BytecodeError (BlockExit × Array G × EvalState) := match ctrl with | .return _ outs => match readIdxs st outs with | .error e => .error e - | .ok gs => .ok (gs, st) + | .ok gs => .ok (.returned, gs, st) | .yield _ outs => match readIdxs st outs with | .error e => .error e - | .ok gs => .ok (gs, st) + | .ok gs => .ok (.yielded, gs, st) | .match scrutIdx cases defaultBlock => match readIdx st scrutIdx with | .error e => .error e @@ -344,7 +360,12 @@ def evalCtrl (t : Bytecode.Toplevel) (fuel : Nat) | .ok scrut => match evalMatchArm t fuel cases defaultBlock scrut st with | .error e => .error e - | .ok (gs, st') => + | .ok (.returned, gs, st') => + -- A branch `return` is a function-level return: the continuation + -- is skipped entirely (Rust `execute.rs` truncates here). Keep the + -- branch's state — the function is done with it. + .ok (.returned, gs, st') + | .ok (.yielded, gs, st') => -- The match acts as an atomic operation from the continuation's -- perspective: any values a case pushed onto `map` are local to the -- case and must not leak into the continuation's namespace. Restore @@ -362,7 +383,7 @@ which lets `Array.sizeOf_get` discharge the termination goal when we call def evalMatchArm (t : Bytecode.Toplevel) (fuel : Nat) (cases : Array (G × Block)) (defaultBlock : Option Block) (scrut : G) (st : EvalState) (i : Nat := 0) : - Except BytecodeError (Array G × EvalState) := + Except BytecodeError (BlockExit × Array G × EvalState) := if h : i < cases.size then if cases[i].fst == scrut then evalBlock t fuel cases[i].snd st else evalMatchArm t fuel cases defaultBlock scrut st (i + 1) @@ -383,7 +404,7 @@ decreasing_by /-- Dispatch on the optional default block at the end of a `.match` arm. -/ def evalDefaultBlock (t : Bytecode.Toplevel) (fuel : Nat) (defaultBlock : Option Block) (st : EvalState) : - Except BytecodeError (Array G × EvalState) := + Except BytecodeError (BlockExit × Array G × EvalState) := match defaultBlock with | some block => evalBlock t fuel block st | none => .error .unreachableAfterLayout @@ -411,7 +432,7 @@ def runFunction (t : Bytecode.Toplevel) (funIdx : FunIdx) (args : Array G) let st : EvalState := { map := args, ioBuffer } match evalBlock t fuel f.body st with | .error e => .error e - | .ok (outs, st') => .ok (outs, st'.ioBuffer) + | .ok (_, outs, st') => .ok (outs, st'.ioBuffer) else .error (.invalidFunIdx funIdx) end Bytecode.Eval diff --git a/Ix/Aiur/Semantics/SourceEval.lean b/Ix/Aiur/Semantics/SourceEval.lean index 4147f45a..7aafa219 100644 --- a/Ix/Aiur/Semantics/SourceEval.lean +++ b/Ix/Aiur/Semantics/SourceEval.lean @@ -175,9 +175,9 @@ def applyLocal (decls : Decls) (fuel : Nat) (v : Value) (args : List Value) termination_by (fuel, 1, 0) /-- Big-step evaluator on `Source.Term`. `fuel` is only consumed at function-call -boundaries; the intra-body recursion here is structural. On a `.ret`, the return -value escapes via a bespoke path that the caller unwraps — implemented here as -a sentinel pattern in the `Except` result. -/ +boundaries; the intra-body recursion here is structural. NOTE: `.ret` does +not yet escape non-tail contexts (no sentinel path is implemented) — see the +known-divergence note on the `.ret` arm below. -/ def interp (decls : Decls) (fuel : Nat) (bindings : Bindings) (t : Term) (st : EvalState) : EvalResult := match t with @@ -205,9 +205,14 @@ def interp (decls : Decls) (fuel : Nat) (bindings : Bindings) | .ok (vs, st') => .ok (.array vs, st') | .ann _ t => interp decls fuel bindings t st | .ret sub => - -- Explicit returns only appear inside function bodies; the body recursion - -- here returns normally (the surrounding caller treats the full body value - -- as the return value). + -- KNOWN DIVERGENCE: a `.ret` in *non-tail* position (e.g. inside a + -- let-bound match branch) should escape the whole function — Rust + -- `execute.rs` and `BytecodeEval` (`BlockExit.returned`) truncate the + -- continuation — but this evaluator returns the value into the + -- enclosing continuation instead. Fixing it requires threading an + -- escape marker through every `interp` consumer. Until then, + -- differential tests against `BytecodeEval` will (correctly) flag + -- programs that early-return inside a non-tail match. interp decls fuel bindings sub st | .let p t1 t2 => match interp decls fuel bindings t1 st with diff --git a/Ix/Cli/ProveCmd.lean b/Ix/Cli/ProveCmd.lean index 5b1fb25a..1828e1b4 100644 --- a/Ix/Cli/ProveCmd.lean +++ b/Ix/Cli/ProveCmd.lean @@ -83,6 +83,11 @@ def proveOne (aiurSystem : Aiur.AiurSystem) def runProveCmd (p : Cli.Parsed) : IO UInt32 := do Std.Internal.UV.System.osSetenv "IX_QUIET" "1" + -- The pinned multi-stark/Plonky3 backend has no witness blinding/masking + -- yet, so proofs are succinct but NOT hiding — a published proof can + -- reveal witness rows (private constants, commitment secrets, program + -- bytes). Warn until ZK masking lands. + IO.eprintln "warning: proofs are NOT zero-knowledge yet (no witness blinding/masking); do not publish proofs over private programs or data" let keepGoing := p.hasFlag "keep-going" let ixePath : Option String := (p.flag? "ixe").map (·.as! String) let claimHex : Option String := (p.flag? "claim").map (·.as! String) diff --git a/Ix/Cli/VerifyCmd.lean b/Ix/Cli/VerifyCmd.lean index e66b5548..bc7a74a3 100644 --- a/Ix/Cli/VerifyCmd.lean +++ b/Ix/Cli/VerifyCmd.lean @@ -47,13 +47,53 @@ private def friParameters : Aiur.FriParameters := { queryProofOfWorkBits := 0 } +/-- The assumption set a claim is conditional on, if any. -/ +private def claimAssumptions : Ix.Claim → Option Address + | .eval _ _ asm | .check _ asm | .checkEnv _ asm => asm + | .reveal .. | .contains .. => none + +/-- Human-readable rendering of a decoded claim. Shown at verify time so the + user sees *what* was proven (variant + subject addresses) and whether it + is conditional — a bare "ok" hides that a `Check(X, asm ∋ X)` claim is + vacuous: everything in the assumption tree, including possibly X itself, + was skipped rather than checked. -/ +private def describeClaim : Ix.Claim → String + | .eval input output asm => + s!"Eval input {input} ⇓ output {output}{describeAssumptions asm}" + | .check c asm => s!"Check {c}{describeAssumptions asm}" + | .checkEnv root asm => s!"CheckEnv root {root}{describeAssumptions asm}" + | .reveal comm _ => s!"Reveal commitment {comm}" + | .contains tree c => s!"Contains tree {tree} ∋ {c}" +where + describeAssumptions : Option Address → String + | none => " (unconditional)" + | some root => s!" (CONDITIONAL on unchecked assumption tree {root})" + /-- Verify one persisted `Ixon.Proof` wrapper (by store address) against its - bundled claim, using an already-built Aiur backend. -/ + bundled claim, using an already-built Aiur backend. + + `allowConditional` gates claims with a nonempty assumption set: such a + claim says "X holds *if* every constant in the tree does" — the + assumptions are skipped, not checked, so accepting one as if it were + unconditional is unsound for any caller keying on exit code 0. Shard + composition passes `true` because it separately binds each proof's + digest to a reconstructed shard claim whose frontier it validates. -/ def verifyOneProof (aiurSystem : Aiur.AiurSystem) (compiled : Aiur.CompiledToplevel) - (proofAddr : Address) : IO UInt32 := do - let bytes ← StoreIO.toIO (Store.read proofAddr) + (proofAddr : Address) (allowConditional : Bool := false) : IO UInt32 := do + let bytes ← StoreIO.toIO (Store.readVerified proofAddr) let wrapper ← IO.ofExcept (Ixon.Proof.de bytes) - let proof := Aiur.Proof.ofBytes wrapper.proof + let proof ← match Aiur.Proof.ofBytes wrapper.proof with + | .ok p => pure p + | .error e => + IO.eprintln s!"error: proof {proofAddr}: {e}" + return 1 + if !allowConditional then + if let some root := claimAssumptions wrapper.claim then + IO.eprintln s!"error: proof {proofAddr} carries a conditional claim:" + IO.eprintln s!" {describeClaim wrapper.claim}" + IO.eprintln s!" the assumption tree {root} was NOT checked by this proof;" + IO.eprintln s!" pass --allow-conditional to accept it anyway" + return 1 -- `verify_claim` takes the 32-G blake3 digest of the serialized claim. let claimDigest := Address.blake3 (Ix.Claim.ser wrapper.claim) let funIdx ← match compiled.getFuncIdx `verify_claim with @@ -66,6 +106,7 @@ def verifyOneProof (aiurSystem : Aiur.AiurSystem) (compiled : Aiur.CompiledTople match aiurSystem.verify friParameters aiurClaim proof with | .ok () => IO.println s!"ok: proof {proofAddr} verifies claim {claimDigest}" + IO.println s!" claim: {describeClaim wrapper.claim}" return 0 | .error e => IO.eprintln s!"error: verification failed: {e}" @@ -102,7 +143,7 @@ def verifyShardComposition (ixePath manifestPath : String) (shardK? : Option Nat | .ok d => pure (some d) let claimDigestOfProof (hex : String) : IO (Address × Address) := do let proofAddr ← addrOfHex! "proof" hex - let wrapper ← IO.ofExcept (Ixon.Proof.de (← StoreIO.toIO (Store.read proofAddr))) + let wrapper ← IO.ofExcept (Ixon.Proof.de (← StoreIO.toIO (Store.readVerified proofAddr))) pure (proofAddr, Address.blake3 (Ix.Claim.ser wrapper.claim)) match shardK? with | some k => @@ -119,7 +160,9 @@ def verifyShardComposition (ixePath manifestPath : String) (shardK? : Option Nat if d != expected then IO.eprintln s!"[verify] FAIL: proof {proofAddr} (claim {d}) is not shard {k} (claim {expected})" rc := 1 - else if (← verifyOneProof aiurSystem compiled proofAddr) != 0 then rc := 1 + -- Shard claims are conditional by construction (frontier assumptions); + -- the digest binding above already pins them to the manifest. + else if (← verifyOneProof aiurSystem compiled proofAddr (allowConditional := true)) != 0 then rc := 1 return rc | none => if !(← Ix.Cli.CheckCmd.shardsCover ixonEnv shards) then return 1 @@ -138,7 +181,7 @@ def verifyShardComposition (ixePath manifestPath : String) (shardK? : Option Nat match digestToShard.get? d with | none => IO.eprintln s!"[verify] FAIL: proof {proofAddr} (claim {d}) matches no shard"; rc := 1 | some k => - if (← verifyOneProof aiurSystem compiled proofAddr) != 0 then rc := 1 + if (← verifyOneProof aiurSystem compiled proofAddr (allowConditional := true)) != 0 then rc := 1 else covered := covered.insert k let missing := (List.range shards.size).filter (fun k => !covered.contains k) if !missing.isEmpty then @@ -157,13 +200,14 @@ def runVerifyCmd (p : Cli.Parsed) : IO UInt32 := do if proofs.isEmpty then p.printError "error: must specify ... (or --ixe + --ixes for a shard partition)" return 1 + let allowConditional := p.hasFlag "allow-conditional" let (aiurSystem, compiled) ← match (← buildBackend) with | .error e => IO.eprintln e; return 1 | .ok b => pure b let mut rc : UInt32 := 0 for hex in proofs do let proofAddr ← addrOfHex! "proof" hex - if (← verifyOneProof aiurSystem compiled proofAddr) != 0 then rc := 1 + if (← verifyOneProof aiurSystem compiled proofAddr allowConditional) != 0 then rc := 1 return rc end Ix.Cli.VerifyCmd @@ -177,6 +221,7 @@ def verifyCmd : Cli.Cmd := `[Cli| "ixe" : String; "Path to a serialized `.ixe` env (with --ixes). With no proof args and no --shard: verify the partition off-circuit (every constant owned by exactly one shard)." "ixes" : String; "Path to a `.ixes` shard manifest (with --ixe)." "shard" : Nat; "0-based shard index K (with --ixe + --ixes). No proof: print shard K's reconstructed CheckEnv claim digest. With proof(s): bind each to shard K and verify." + "allow-conditional"; "Accept proofs whose claim is conditional on a nonempty assumption set. The assumptions are NOT checked by the proof — without this flag such proofs are rejected." ARGS: ...proofs : String; "32-byte hex address(es) of persisted `Ixon.Proof` wrappers in `~/.ix/store/`. Omit when using --ixe + --ixes." diff --git a/Ix/Iroh/Connect.lean b/Ix/Iroh/Connect.lean index 5dea06da..b8b392bd 100644 --- a/Ix/Iroh/Connect.lean +++ b/Ix/Iroh/Connect.lean @@ -29,12 +29,16 @@ private opaque getBytes' : @& String → @& Array String → @& String → @& St def getBytes (nodeId : @& String) (addrs : @& Array String) (relayUrl : @& String) (hash : @& String) (writeToDisk : Bool): IO Unit := do match getBytes' nodeId addrs relayUrl hash with | .ok response => - let string := String.fromUTF8! response.bytes if writeToDisk then + -- Write raw bytes: env/proof payloads are binary, and a String + -- round-trip would panic (`fromUTF8!`) or corrupt them. IO.println s!"Writing bytes to ./{response.hash}" - IO.FS.writeFile response.hash string + IO.FS.writeBinFile response.hash response.bytes else - IO.print string + match String.fromUTF8? response.bytes with + | some string => IO.print string + | none => + IO.println s!"(binary response: {response.bytes.size} bytes — pass --write-to-disk to save it)" | .error e => throw (IO.userError e) end Iroh.Connect diff --git a/Ix/IxVM/ClaimHarness.lean b/Ix/IxVM/ClaimHarness.lean index d3d92d10..e2b7edcd 100644 --- a/Ix/IxVM/ClaimHarness.lean +++ b/Ix/IxVM/ClaimHarness.lean @@ -85,13 +85,17 @@ def loadSharedIxonEnv (names : Array Lean.Name) (leanEnv : Lean.Environment) : let rawEnv ← Ix.CompileM.rsCompileEnvFFI deduped.toList pure rawEnv.toEnv -/-- Walk the Constant ref-graph from `target` to compute the set of - addresses needed to type-check it. Mirrors Aiur's `load_with_deps`: - follow `Constant.refs` plus the projection's `block_addr` (the parent - Muts wrapper) for IPrj/CPrj/RPrj/DPrj. -/ -partial def closureFrom (env : Ixon.Env) (target : Address) : Std.HashSet Address := Id.run do +/-- Walk the Constant ref-graph from every target to compute the set of + addresses needed to type-check them, with one shared visited set. + Mirrors Aiur's `load_with_deps`: follow `Constant.refs` plus the + projection's `block_addr` (the parent Muts wrapper) for + IPrj/CPrj/RPrj/DPrj. Unioning per-root closures instead would + re-traverse dependencies shared between roots once per root — + quadratic when the roots are a shard's owned constants over a + heavily-shared prelude. -/ +partial def closureFromAll (env : Ixon.Env) (targets : Array Address) : Std.HashSet Address := Id.run do let mut visited : Std.HashSet Address := {} - let mut worklist : Array Address := #[target] + let mut worklist : Array Address := targets while !worklist.isEmpty do let addr := worklist.back! worklist := worklist.pop @@ -107,6 +111,9 @@ partial def closureFrom (env : Ixon.Env) (target : Address) : Std.HashSet Addres | _ => pure () return visited +@[inline] def closureFrom (env : Ixon.Env) (target : Address) : Std.HashSet Address := + closureFromAll env #[target] + /-- Build the `ixon_serde_test` / `ixon_serde_blake3_bench` IOBuffer: one entry per const, keyed by its index. Returns the buffer and the count `n` (which the Aiur entrypoint receives as input). -/ @@ -250,12 +257,9 @@ def buildClaimWitness (env : Ixon.Env) (claim : Ix.Claim) def shardCheckEnvClaim (env : Ixon.Env) (owned : Array Address) : Except String (Ix.Claim × Std.HashSet Address × Std.HashMap Address Ix.AssumptionTree) := do let ownedSet : Std.HashSet Address := owned.foldl (·.insert ·) {} - let closure : Std.HashSet Address := Id.run do - let mut s : Std.HashSet Address := {} - for a in owned do - for x in (closureFrom env a).toArray do - s := s.insert x - return s + -- One multi-root walk; per-owner closures re-traverse the shared + -- prelude once per owned constant (quadratic at shard scale). + let closure : Std.HashSet Address := closureFromAll env owned let frontier : Array Address := closure.toArray.filter (fun a => !ownedSet.contains a) let some envTree := Ix.AssumptionTree.canonical closure.toArray diff --git a/Ix/IxVM/Kernel/Claim.lean b/Ix/IxVM/Kernel/Claim.lean index 27c9a0ca..2efe3095 100644 --- a/Ix/IxVM/Kernel/Claim.lean +++ b/Ix/IxVM/Kernel/Claim.lean @@ -808,29 +808,52 @@ def claim := ⟦ -- check_all variant that skips positions whose addr is in the -- supplied assumption-leaf list. -- ============================================================================ + -- Membership map over the assumption leaves, keyed by ptr_val. Sound by + -- the same invariant as `build_addr_pos_map` (Ingress.lean): one pointer + -- maps to one content, so a ptr hit ⟹ the address IS a leaf (no false + -- skip); a de-interned pointer reads as absent, which here only means + -- the constant gets CHECKED instead of skipped — conservative. Honest + -- provers intern, so honest traces never miss. Replaces a per-constant + -- linear `addr_in_list` scan that cost O(closure × frontier) rows at + -- shard scale; this is O(F log F) to build + O(log F) per constant. + fn build_asm_leaf_map(leaves: List‹Addr›) -> RBTreeMap‹G› { + match load(leaves) { + ListNode.Nil => RBTreeMap.Nil, + ListNode.Cons(a, rest) => + rbtree_map_insert(ptr_val(a), 1, build_asm_leaf_map(rest)), + } + } + fn check_all_skipping(consts: List‹&KConstantInfo›, top: List‹&KConstantInfo›, addrs: List‹Addr›, asm_leaves: List‹Addr›) { let _ = check_canonical_block_sort(top); - check_all_skipping_iter(consts, top, addrs, asm_leaves, 0) + let asm_map = store(build_asm_leaf_map(asm_leaves)); + check_all_skipping_iter(consts, top, addrs, addrs, asm_map, 0) } + -- `cur_addrs` is the suffix of `addrs` aligned with `consts` — walked in + -- lockstep instead of `list_lookup(addrs, pos)`, which re-walks the list + -- prefix every iteration (quadratic in closure size on its own). fn check_all_skipping_iter(consts: List‹&KConstantInfo›, top: List‹&KConstantInfo›, addrs: List‹Addr›, - asm_leaves: List‹Addr›, + cur_addrs: List‹Addr›, + asm_map: &RBTreeMap‹G›, pos: G) { match load(consts) { ListNode.Nil => (), ListNode.Cons(&ci, rest) => - let addr = list_lookup(addrs, pos); - match addr_in_list(addr, asm_leaves) { - 1 => - check_all_skipping_iter(rest, top, addrs, asm_leaves, pos + 1), - _ => - let _ = check_const(ci, pos, top, addrs); - check_all_skipping_iter(rest, top, addrs, asm_leaves, pos + 1), + match load(cur_addrs) { + ListNode.Cons(addr, rest_addrs) => + match rbtree_map_lookup_or_default(ptr_val(addr), load(asm_map), 0) { + 0 => + let _ = check_const(ci, pos, top, addrs); + check_all_skipping_iter(rest, top, addrs, rest_addrs, asm_map, pos + 1), + _ => + check_all_skipping_iter(rest, top, addrs, rest_addrs, asm_map, pos + 1), + }, }, } } diff --git a/Ix/IxVM/Kernel/DefEq.lean b/Ix/IxVM/Kernel/DefEq.lean index c5dde3b6..13f19258 100644 --- a/Ix/IxVM/Kernel/DefEq.lean +++ b/Ix/IxVM/Kernel/DefEq.lean @@ -144,7 +144,11 @@ def defEq := ⟦ } } - -- 1 iff ty is `Const(I, _) args` for non-rec 1-ctor 0-field inductive. + -- 1 iff ty is `Const(I, _) args` for non-rec 0-index 1-ctor 0-field + -- inductive. The `num_indices == 0` check is required for soundness: + -- an *indexed* singleton (e.g. `MyId : A -> Type`) is not unit-like — + -- treating it as such yields definitional UIP on Type-valued families + -- (mirror def_eq.rs:881, lean4lean TypeChecker.lean:648). fn is_unit_like_type(ty: KExpr, top: List‹&KConstantInfo›) -> G { match collect_spine_simple(ty) { (head, _) => @@ -152,18 +156,22 @@ def defEq := ⟦ KExprNode.Const(idx, _) => let ci = load(list_lookup(top, idx)); match ci { - KConstantInfo.Induct(_, _, _, _, ctor_indices, is_rec, _, _, _, _) => + KConstantInfo.Induct(_, _, _, num_indices, ctor_indices, is_rec, _, _, _, _) => match is_rec { 1 => 0, 0 => - match list_length(ctor_indices) { - 1 => - let ctor_idx = list_lookup(ctor_indices, 0); - let ctor_ci = load(list_lookup(top, ctor_idx)); - match ctor_ci { - KConstantInfo.Ctor(_, _, _, _, _, n_fields, _) => - match n_fields { - 0 => 1, + match num_indices { + 0 => + match list_length(ctor_indices) { + 1 => + let ctor_idx = list_lookup(ctor_indices, 0); + let ctor_ci = load(list_lookup(top, ctor_idx)); + match ctor_ci { + KConstantInfo.Ctor(_, _, _, _, _, n_fields, _) => + match n_fields { + 0 => 1, + _ => 0, + }, _ => 0, }, _ => 0, @@ -290,9 +298,19 @@ def defEq := ⟦ match struct_like { 0 => 0, 1 => - compare_struct_fields(induct_idx, num_params, - num_fields, t, s_args, 0, - types, top, addrs), + -- Types must be def-eq before the field loop + -- (mirror def_eq.rs:1180-1197, lean4lean:515); + -- otherwise S1.mk a ≡ S2.mk a can be accepted + -- across distinct structures sharing field types. + let s_ty = k_infer(s, types, top, addrs); + let t_ty = k_infer(t, types, top, addrs); + match k_is_def_eq(t_ty, s_ty, types, top, addrs) { + 0 => 0, + 1 => + compare_struct_fields(induct_idx, num_params, + num_fields, t, s_args, 0, + types, top, addrs), + }, }, _ => 0, }, diff --git a/Ix/IxVM/Kernel/Whnf.lean b/Ix/IxVM/Kernel/Whnf.lean index a80c5ddb..b4e4bc3f 100644 --- a/Ix/IxVM/Kernel/Whnf.lean +++ b/Ix/IxVM/Kernel/Whnf.lean @@ -193,8 +193,21 @@ def whnf := ⟦ let cci = load(list_lookup(top, cidx)); match cci { KConstantInfo.Ctor(_, _, _, _, nparams, _, _) => - let field = list_lookup_or_nil(inner_args, nparams + fidx); - whnf_with_spine(field, spine, types, top, addrs), + -- Out-of-range projection (bad field index or + -- under-applied ctor) must stay STUCK: the old + -- `list_lookup_or_nil` default reduced it to + -- BVar(0), inventing a term that exists nowhere + -- in the input (mirror whnf.rs spine `.get`). + let target = nparams + fidx; + let len = list_length(inner_args); + match u32_less_than(target, len) { + 1 => + let field = list_lookup(inner_args, target); + whnf_with_spine(field, spine, types, top, addrs), + _ => + let stuck = store(KExprNode.Proj(tidx, fidx, inner_whnf)); + apply_spine(stuck, spine), + }, _ => let stuck = store(KExprNode.Proj(tidx, fidx, inner_whnf)); apply_spine(stuck, spine), diff --git a/Ix/Store.lean b/Ix/Store.lean index 08c323be..1c77a452 100644 --- a/Ix/Store.lean +++ b/Ix/Store.lean @@ -11,12 +11,16 @@ inductive StoreError | ioError (e: IO.Error) | ixonError (e: String) | noHome +| corrupt (expected actual : Address) def storeErrorToIOError : StoreError -> IO.Error | .unknownAddress a => IO.Error.userError s!"unknown address {repr a}" | .ioError e => e | .ixonError e => IO.Error.userError s!"ixon error {e}" | .noHome => IO.Error.userError s!"no HOME environment variable" +| .corrupt e a => + IO.Error.userError + s!"store corruption: content stored under {repr e} hashes to {repr a}" abbrev StoreIO := EIO StoreError @@ -65,10 +69,25 @@ def writeAt (addr: Address) (bytes: ByteArray) : StoreIO Unit := do let path <- storePath addr IO.toEIO .ioError (IO.FS.writeBinFile path bytes) +/-- Raw read with NO integrity check. Only appropriate for `writeAt`-keyed + content whose key is not `blake3(bytes)` (e.g. assumption-tree blobs + keyed by merkle root) — callers of those must verify against their + domain-specific key. For `Store.write`-keyed content use + `readVerified`. -/ def read (a: Address) : StoreIO ByteArray := do let path <- storePath a IO.toEIO .ioError (IO.FS.readBinFile path) +/-- Read a blob written by `Store.write` (keyed by `blake3(bytes)`), + re-verifying the content hash. `~/.ix/store` is a plain directory with + no integrity protection of its own, so an unverified read lets on-disk + tampering feed the caller arbitrary bytes under a trusted address. -/ +def readVerified (a: Address) : StoreIO ByteArray := do + let bytes ← read a + let actual := Address.blake3 bytes + if actual == a then return bytes + else throw (.corrupt a actual) + end Store end diff --git a/README.md b/README.md index 0f7af7e6..1c3d020d 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,15 @@ ----------- The Ix platform enables the compilation of [Lean -4](https://github.com/leanprover/lean4) programs into zero-knowledge succinct -non-interactive arguments of knowledge (zk-SNARKs). This allows the execution -and typechecking of any Lean program to be verified by performing a +4](https://github.com/leanprover/lean4) programs into succinct non-interactive +arguments of knowledge (zk-SNARKs). The design goal is for the execution and +typechecking of any Lean program to be verifiable by performing a sub-100-millisecond operation against an approximately 1 kilobyte certificate, -regardless of the size of the original Lean program. In fact, the correctness of -the entire [mathlib](https://github.com/leanprover-community/mathlib4) library -of formal mathematics, containing around 2 million lines of code, may be -compiled in this way into a single kilobyte sized cryptographic certificate. +regardless of the size of the original Lean program — up to and including the +correctness of the entire +[mathlib](https://github.com/leanprover-community/mathlib4) library of formal +mathematics, containing around 2 million lines of code. These figures are +design targets, not yet measured properties of the current implementation. We call this technique zero-knowledge proof-carrying code or **zkPCC**, as an extension of the well-known [proof-carrying @@ -32,6 +33,13 @@ artifact that demonstrates some formal property to the user, in **zkPCC** this proof artifact may be kept private, which opens up new possibilities for economic transactions over proofs. +> :warning: **The current proving backend is NOT zero-knowledge.** The pinned +> multi-stark/Plonky3 configuration applies no blinding or masking, and FRI +> query openings reveal witness rows — which can include private program +> bytes, commitment secrets, and I/O buffer contents. Until ZK masking lands, +> Ix produces *succinct* arguments without the *hiding* property: do not +> publish proofs over programs or data you intend to keep private. + > :warning: **This repository is a pre-alpha work in progress and should not be used for any purpose.** ## Use Cases diff --git a/Tests/Aiur/Common.lean b/Tests/Aiur/Common.lean index 5535d89c..3bfff978 100644 --- a/Tests/Aiur/Common.lean +++ b/Tests/Aiur/Common.lean @@ -78,7 +78,9 @@ def AiurTestEnv.interpTest (env : AiurTestEnv) (testCase : AiurTestCase) def AiurTestEnv.runTestCase (env : AiurTestEnv) (testCase : AiurTestCase) : TestSeq := let label := testCase.label - let funIdx := env.compiled.getFuncIdx testCase.functionName |>.get! + match env.compiled.getFuncIdx testCase.functionName with + | none => test s!"Function `{testCase.functionName}` exists for {label}" false + | some funIdx => match env.compiled.bytecode.execute funIdx testCase.input testCase.inputIOBuffer with | .error e => test s!"Execute succeeds for {label}: {e}" false | .ok (execOutput, execIOBuffer, _queryCounts) => @@ -98,10 +100,34 @@ def AiurTestEnv.runTestCase (env : AiurTestEnv) (testCase : AiurTestCase) : Test (claim == Aiur.buildClaim funIdx testCase.input testCase.expectedOutput) let ioTest := test s!"IOBuffer matches for {label}" (ioBuffer == testCase.expectedIOBuffer) - let proof := .ofBytes proof.toBytes - let pvTest := withExceptOk s!"Prove/verify works for {label}" - (env.aiurSystem.verify friParameters claim proof) fun _ => .done + let pvTest := withExceptOk s!"Proof byte roundtrip decodes for {label}" + (Aiur.Proof.ofBytes proof.toBytes) fun proof => + withExceptOk s!"Prove/verify works for {label}" + (env.aiurSystem.verify friParameters claim proof) fun _ => .done + -- Negative coverage: a verifier that accepts a mismatched claim or + -- tampered proof bytes is the classic zkVM soundness regression — + -- the honest-path checks above would ship it silently. + let wrongClaim := claim.set! 0 (claim.getD 0 (.ofNat 0) + (1 : Aiur.G)) + let claimRejected : Bool := + match env.aiurSystem.verify friParameters wrongClaim proof with + | .ok _ => false + | .error _ => true + let negClaimTest := + test s!"Verify rejects mismatched claim for {label}" claimRejected + let tamperedBytes := + let bs := proof.toBytes + bs.set! 0 (bs.get! 0 ^^^ 0x01) + let proofRejected : Bool := + match Aiur.Proof.ofBytes tamperedBytes with + | .error _ => true -- refusing to decode is also a rejection + | .ok tampered => + match env.aiurSystem.verify friParameters claim tampered with + | .ok _ => false + | .error _ => true + let negProofTest := + test s!"Verify rejects tampered proof for {label}" proofRejected execTest ++ interpTest ++ claimTest ++ ioTest ++ pvTest + ++ negClaimTest ++ negProofTest def mkAiurTests (toplevelFn : Except Aiur.Global Aiur.Source.Toplevel) (cases : List AiurTestCase) : TestSeq := diff --git a/Tests/Ix/IxVM.lean b/Tests/Ix/IxVM.lean index 2826e798..f43de5c2 100644 --- a/Tests/Ix/IxVM.lean +++ b/Tests/Ix/IxVM.lean @@ -141,6 +141,18 @@ private def nameOfString (str : String) : Lean.Name := public def kernelChecks (env : Lean.Environment) : IO (List AiurTestCase) := kernelCheckNames.map nameOfString |>.mapM (kernelCheck · env) +/-- One end-to-end **prove → verify** of a real kernel `Check` claim — the + product's core path. Every other case in this suite is + `executionOnly`, so without this one the kernel program is never + *proven* and a certificate is never cryptographically *verified* + anywhere in the test suite: an unsoundness or breakage in + kernel-program proving would ship undetected. `HEq` keeps the closure + (and proving time) small. -/ +public def kernelProveCheck (env : Lean.Environment) : IO AiurTestCase := do + let tc ← kernelCheck `HEq env + pure { tc with label := s!"{tc.label} (prove+verify)" + executionOnly := false } + /-! ## Claim variant smoke tests Each builds an `AiurTestCase` exercising one of the non-`Check-None` diff --git a/Tests/Main.lean b/Tests/Main.lean index 441a3446..acabe9f3 100644 --- a/Tests/Main.lean +++ b/Tests/Main.lean @@ -95,9 +95,10 @@ def ignoredRunners (env : Lean.Environment) : List (String × IO UInt32) := [ let kernelUnitTests := .exec `kernel_unit_tests let serdeNatAddCommTest ← serdeNatAddComm env let kernelChecks ← kernelChecks env + let kernelPv ← kernelProveCheck env let claimSmokes ← claimSmokeTests env let aiurTests := [kernelUnitTests, serdeNatAddCommTest] - ++ kernelChecks ++ claimSmokes + ++ kernelChecks ++ [kernelPv] ++ claimSmokes -- The arena suite shares the compiled toplevel with the AiurTestCase -- runs above; build it once here and weave the resulting TestSeq in -- alongside `mkAiurTests`'s output. diff --git a/deny.toml b/deny.toml index 672a07e4..7d054daf 100644 --- a/deny.toml +++ b/deny.toml @@ -73,8 +73,8 @@ ignore = [ "RUSTSEC-2024-0436", # `paste` crate is unmaintained "RUSTSEC-2023-0089", # `atomic-polyfill` crate is unmaintained "RUSTSEC-2025-0141", # `bincode` crate is unmaintained - "RUSTSEC-2026-0118", # `hickory-proto` Iroh vulnerability - "RUSTSEC-2026-0119", # `hickory-proto` Iroh vultnerability + { id = "RUSTSEC-2026-0118", reason = "hickory-proto vuln reachable only through the optional `net` (iroh) feature; the iroh stack is demo-grade and off the proving trust path. Re-check on every iroh bump and drop this once iroh ships a fixed hickory-proto." }, + { id = "RUSTSEC-2026-0119", reason = "same exposure and exit condition as RUSTSEC-2026-0118" }, #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, @@ -232,16 +232,26 @@ skip-tree = [ # https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html [sources] # Lint level for what to happen when a crate from a crate registry that is not -# in the allow list is encountered -unknown-registry = "warn" +# in the allow list is encountered. +# "deny" so an unreviewed registry or git source fails CI instead of warning — +# the git deps below sit on the proving trust path (multi-stark is the STARK +# backend), so silently picking up a new source is exactly what this check +# exists to stop. +unknown-registry = "deny" # Lint level for what to happen when a crate from a git repository that is not # in the allow list is encountered -unknown-git = "warn" +unknown-git = "deny" # List of URLs for allowed crate registries. Defaults to the crates.io index # if not specified. If it is specified but empty, no registries are allowed. allow-registry = ["https://github.com/rust-lang/crates.io-index"] -# List of URLs for allowed Git repositories -allow-git = [] +# List of URLs for allowed Git repositories. Plonky3 is a transitive git +# dependency of multi-stark (the STARK backend). +allow-git = [ + "https://github.com/argumentcomputer/lean-ffi", + "https://github.com/argumentcomputer/multi-stark.git", + "https://github.com/argumentcomputer/tracing-texray", + "https://github.com/Plonky3/Plonky3", +] [sources.allow-org] # github.com organizations to allow git sources for diff --git a/src/ffi/aiur/protocol.rs b/src/ffi/aiur/protocol.rs index ed2ec7b4..83715874 100644 --- a/src/ffi/aiur/protocol.rs +++ b/src/ffi/aiur/protocol.rs @@ -47,14 +47,21 @@ extern "C" fn rs_aiur_proof_to_bytes( LeanByteArray::from_bytes(&bytes) } -/// `Aiur.Proof.ofBytes : @& ByteArray → Proof` +/// `Aiur.Proof.ofBytes : @& ByteArray → Except String Proof` #[unsafe(no_mangle)] extern "C" fn rs_aiur_proof_of_bytes( byte_array: LeanByteArray>, -) -> LeanExternal { - let proof = - Proof::from_bytes(byte_array.as_bytes()).expect("Deserialization error"); - LeanExternal::alloc(&AIUR_PROOF_CLASS, proof) +) -> LeanExcept { + // Proof bytes arrive inside a prover-supplied wrapper on the `ix verify` + // path: malformed bytes must surface as `Except.error`, never a panic — + // under `panic = "abort"` a panicking decode kills the whole process, + // and unwinding across `extern "C"` would be UB anyway. + match Proof::from_bytes(byte_array.as_bytes()) { + Ok(proof) => LeanExcept::ok(LeanExternal::alloc(&AIUR_PROOF_CLASS, proof)), + Err(err) => { + LeanExcept::error_string(&format!("malformed proof bytes: {err:?}")) + }, + } } /// `AiurSystem.build : @&Bytecode.Toplevel → @&CommitmentParameters → AiurSystem` diff --git a/src/ffi/ixon/expr.rs b/src/ffi/ixon/expr.rs index 4fb71091..c9d47777 100644 --- a/src/ffi/ixon/expr.rs +++ b/src/ffi/ixon/expr.rs @@ -22,97 +22,133 @@ fn decode_u64_array(obj: LeanArray>) -> Vec { impl LeanIxonExpr { /// Build Ixon.Expr (12 constructors). + /// + /// Iterative post-order construction: expression trees can nest + /// arbitrarily deep (`.ixe`-derived data), and FFI entry points run on + /// default-size stacks where structural recursion overflows — a hard + /// process abort under `panic = "abort"`. pub fn build(expr: &IxonExpr) -> Self { - match expr { - IxonExpr::Sort(idx) => { - let ctor = LeanIxonExpr::alloc(0); - ctor.set_num_64(0, *idx); - ctor - }, - IxonExpr::Var(idx) => { - let ctor = LeanIxonExpr::alloc(1); - ctor.set_num_64(0, *idx); - ctor - }, - IxonExpr::Ref(ref_idx, univ_idxs) => { - let arr = LeanArray::alloc(univ_idxs.len()); - for (i, idx) in univ_idxs.iter().enumerate() { - arr.set(i, LeanOwned::box_u64(*idx)); - } - let ctor = LeanIxonExpr::alloc(2); - ctor.set_obj(0, arr); - ctor.set_num_64(0, *ref_idx); - ctor - }, - IxonExpr::Rec(rec_idx, univ_idxs) => { - let arr = LeanArray::alloc(univ_idxs.len()); - for (i, idx) in univ_idxs.iter().enumerate() { - arr.set(i, LeanOwned::box_u64(*idx)); - } - let ctor = LeanIxonExpr::alloc(3); - ctor.set_obj(0, arr); - ctor.set_num_64(0, *rec_idx); - ctor - }, - IxonExpr::Prj(type_ref_idx, field_idx, val) => { - let val_obj = Self::build(val); - let ctor = LeanIxonExpr::alloc(4); - ctor.set_obj(0, val_obj); - ctor.set_num_64(0, *type_ref_idx); - ctor.set_num_64(1, *field_idx); - ctor - }, - IxonExpr::Str(ref_idx) => { - let ctor = LeanIxonExpr::alloc(5); - ctor.set_num_64(0, *ref_idx); - ctor - }, - IxonExpr::Nat(ref_idx) => { - let ctor = LeanIxonExpr::alloc(6); - ctor.set_num_64(0, *ref_idx); - ctor - }, - IxonExpr::App(fun, arg) => { - let fun_obj = Self::build(fun); - let arg_obj = Self::build(arg); - let ctor = LeanIxonExpr::alloc(7); - ctor.set_obj(0, fun_obj); - ctor.set_obj(1, arg_obj); - ctor - }, - IxonExpr::Lam(ty, body) => { - let ty_obj = Self::build(ty); - let body_obj = Self::build(body); - let ctor = LeanIxonExpr::alloc(8); - ctor.set_obj(0, ty_obj); - ctor.set_obj(1, body_obj); - ctor - }, - IxonExpr::All(ty, body) => { - let ty_obj = Self::build(ty); - let body_obj = Self::build(body); - let ctor = LeanIxonExpr::alloc(9); - ctor.set_obj(0, ty_obj); - ctor.set_obj(1, body_obj); - ctor - }, - IxonExpr::Let(non_dep, ty, val, body) => { - let ty_obj = Self::build(ty); - let val_obj = Self::build(val); - let body_obj = Self::build(body); - let ctor = LeanIxonExpr::alloc(10); - ctor.set_obj(0, ty_obj); - ctor.set_obj(1, val_obj); - ctor.set_obj(2, body_obj); - ctor.set_num_8(0, u8::from(*non_dep)); - ctor - }, - IxonExpr::Share(idx) => { - let ctor = LeanIxonExpr::alloc(11); - ctor.set_num_64(0, *idx); - ctor - }, + enum Walk<'a> { + Visit(&'a IxonExpr), + Assemble(&'a IxonExpr), + } + let mut work: Vec> = vec![Walk::Visit(expr)]; + let mut out: Vec> = Vec::new(); + while let Some(w) = work.pop() { + match w { + Walk::Visit(e) => match e { + // Leaves: assemble immediately. + IxonExpr::Sort(idx) => { + let ctor = LeanIxonExpr::alloc(0); + ctor.set_num_64(0, *idx); + out.push(ctor); + }, + IxonExpr::Var(idx) => { + let ctor = LeanIxonExpr::alloc(1); + ctor.set_num_64(0, *idx); + out.push(ctor); + }, + IxonExpr::Ref(ref_idx, univ_idxs) => { + let arr = LeanArray::alloc(univ_idxs.len()); + for (i, idx) in univ_idxs.iter().enumerate() { + arr.set(i, LeanOwned::box_u64(*idx)); + } + let ctor = LeanIxonExpr::alloc(2); + ctor.set_obj(0, arr); + ctor.set_num_64(0, *ref_idx); + out.push(ctor); + }, + IxonExpr::Rec(rec_idx, univ_idxs) => { + let arr = LeanArray::alloc(univ_idxs.len()); + for (i, idx) in univ_idxs.iter().enumerate() { + arr.set(i, LeanOwned::box_u64(*idx)); + } + let ctor = LeanIxonExpr::alloc(3); + ctor.set_obj(0, arr); + ctor.set_num_64(0, *rec_idx); + out.push(ctor); + }, + IxonExpr::Str(ref_idx) => { + let ctor = LeanIxonExpr::alloc(5); + ctor.set_num_64(0, *ref_idx); + out.push(ctor); + }, + IxonExpr::Nat(ref_idx) => { + let ctor = LeanIxonExpr::alloc(6); + ctor.set_num_64(0, *ref_idx); + out.push(ctor); + }, + IxonExpr::Share(idx) => { + let ctor = LeanIxonExpr::alloc(11); + ctor.set_num_64(0, *idx); + out.push(ctor); + }, + // Interior nodes: assemble after children. Children are pushed + // in reverse so they complete left-to-right; `Assemble` then + // pops them right-to-left. + IxonExpr::Prj(_, _, val) => { + work.push(Walk::Assemble(e)); + work.push(Walk::Visit(val)); + }, + IxonExpr::App(fun, arg) => { + work.push(Walk::Assemble(e)); + work.push(Walk::Visit(arg)); + work.push(Walk::Visit(fun)); + }, + IxonExpr::Lam(ty, body) | IxonExpr::All(ty, body) => { + work.push(Walk::Assemble(e)); + work.push(Walk::Visit(body)); + work.push(Walk::Visit(ty)); + }, + IxonExpr::Let(_, ty, val, body) => { + work.push(Walk::Assemble(e)); + work.push(Walk::Visit(body)); + work.push(Walk::Visit(val)); + work.push(Walk::Visit(ty)); + }, + }, + Walk::Assemble(e) => match e { + IxonExpr::Prj(type_ref_idx, field_idx, _) => { + let val_obj = out.pop().expect("Prj child"); + let ctor = LeanIxonExpr::alloc(4); + ctor.set_obj(0, val_obj); + ctor.set_num_64(0, *type_ref_idx); + ctor.set_num_64(1, *field_idx); + out.push(ctor); + }, + IxonExpr::App(..) => { + let arg_obj = out.pop().expect("App arg"); + let fun_obj = out.pop().expect("App fun"); + let ctor = LeanIxonExpr::alloc(7); + ctor.set_obj(0, fun_obj); + ctor.set_obj(1, arg_obj); + out.push(ctor); + }, + IxonExpr::Lam(..) | IxonExpr::All(..) => { + let body_obj = out.pop().expect("binder body"); + let ty_obj = out.pop().expect("binder type"); + let tag = if matches!(e, IxonExpr::Lam(..)) { 8 } else { 9 }; + let ctor = LeanIxonExpr::alloc(tag); + ctor.set_obj(0, ty_obj); + ctor.set_obj(1, body_obj); + out.push(ctor); + }, + IxonExpr::Let(non_dep, ..) => { + let body_obj = out.pop().expect("Let body"); + let val_obj = out.pop().expect("Let val"); + let ty_obj = out.pop().expect("Let type"); + let ctor = LeanIxonExpr::alloc(10); + ctor.set_obj(0, ty_obj); + ctor.set_obj(1, val_obj); + ctor.set_obj(2, body_obj); + ctor.set_num_8(0, u8::from(*non_dep)); + out.push(ctor); + }, + _ => unreachable!("leaves are assembled at Visit"), + }, + } } + out.pop().expect("build: exactly one root") } /// Build an Array of Ixon.Expr. diff --git a/src/ffi/lean_env.rs b/src/ffi/lean_env.rs index 13296fa9..9e635588 100644 --- a/src/ffi/lean_env.rs +++ b/src/ffi/lean_env.rs @@ -1711,6 +1711,13 @@ extern "C" fn rs_compile_validate_aux( // `stt` is `mut` so Phase 7 can `std::mem::take(&mut stt.env)` to extract // the Ixon env for serialization while freeing the rest of the state // (kctx, name_to_addr, etc.) before serialize allocates a 3 GB buffer. + // + // NOTE: both workspace profiles set `panic = "abort"` (unwinding across + // the `extern "C"` boundary is UB), so in normal builds a panic inside + // compile aborts the whole process *before* this `catch_unwind` can + // observe it — the panic arm below fires only in a custom + // `panic = "unwind"` debugging build. Do not rely on the validator + // surviving a compile panic in production. let mut stt = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { compile_env_with_options(&env, CompileOptions::default()) diff --git a/src/ffi/primitives.rs b/src/ffi/primitives.rs index c73e1448..f6c1b43f 100644 --- a/src/ffi/primitives.rs +++ b/src/ffi/primitives.rs @@ -66,7 +66,7 @@ pub extern "C" fn rs_roundtrip_dhashmap_raw_nat_nat( } // Rebuild buckets - let new_buckets = LeanArray::alloc(num_buckets); + let mut new_buckets = LeanArray::alloc(num_buckets); let nil = LeanOwned::box_usize(0); for i in 0..num_buckets { new_buckets.set(i, nil.clone()); @@ -88,7 +88,10 @@ pub extern "C" fn rs_roundtrip_dhashmap_raw_nat_nat( new_bucket.set(0, Nat::to_lean(k)); new_bucket.set(1, Nat::to_lean(v)); new_bucket.set(2, old_bucket); - new_buckets.set(bucket_idx, new_bucket); + // `uset`, not `set`: `set` is a raw slot write that never releases the + // reference the slot held to the previous bucket list, leaking one + // cons cell per hash collision; `lean_array_uset` decrements it. + new_buckets = new_buckets.uset(bucket_idx, new_bucket); } // Build Raw diff --git a/src/iroh.rs b/src/iroh.rs index 2e32a2c7..b3602269 100644 --- a/src/iroh.rs +++ b/src/iroh.rs @@ -5,18 +5,40 @@ pub mod common { use bincode::{Decode, Encode}; use serde::{Deserialize, Serialize}; + /// ALPN for the ix iroh protocol. Versioned and ix-specific (this code + /// was derived from the iroh examples, which use + /// `n0/iroh/examples/magic/0` — keeping that would collide with anything + /// else built from the same example). + pub const ALPN: &[u8] = b"ix/iroh/0"; + #[derive(Debug, Serialize, Deserialize, Encode, Decode)] pub enum Request { Put(PutRequest), Get(GetRequest), } + /// Decode a length-framed message from untrusted network bytes. + /// + /// Never panics: any malformed/truncated/trailing-garbage input is an + /// `Err`. With `panic = "abort"` a panicking decode would let a single + /// bad frame kill the whole process. + fn decode_untrusted>(bytes: &[u8]) -> Result { + match bincode::decode_from_slice(bytes, bincode::config::standard()) { + Ok((v, consumed)) if consumed == bytes.len() => Ok(v), + Ok((_, consumed)) => Err(format!( + "trailing garbage: consumed {consumed} of {} bytes", + bytes.len() + )), + Err(e) => Err(format!("malformed message: {e}")), + } + } + impl Request { pub fn to_bytes(&self) -> Vec { bincode::encode_to_vec(self, bincode::config::standard()).unwrap() } - pub fn from_bytes(bytes: &[u8]) -> Self { - bincode::decode_from_slice(bytes, bincode::config::standard()).unwrap().0 + pub fn from_bytes(bytes: &[u8]) -> Result { + decode_untrusted(bytes) } } @@ -40,8 +62,8 @@ pub mod common { pub fn to_bytes(&self) -> Vec { bincode::encode_to_vec(self, bincode::config::standard()).unwrap() } - pub fn from_bytes(bytes: &[u8]) -> Self { - bincode::decode_from_slice(bytes, bincode::config::standard()).unwrap().0 + pub fn from_bytes(bytes: &[u8]) -> Result { + decode_untrusted(bytes) } } diff --git a/src/iroh/client.rs b/src/iroh/client.rs index fe8afbf8..b53a49f6 100644 --- a/src/iroh/client.rs +++ b/src/iroh/client.rs @@ -9,10 +9,8 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::{EnvFilter, fmt}; -use crate::iroh::common::{Request, Response}; +use crate::iroh::common::{ALPN, Request, Response}; -// An example ALPN that we are using to communicate over the `Endpoint` -const EXAMPLE_ALPN: &[u8] = b"n0/iroh/examples/magic/0"; // Maximum number of characters to read from the server. Connection automatically closed if this is exceeded const READ_SIZE_LIMIT: usize = 100_000_000; @@ -40,7 +38,7 @@ pub async fn connect( // The secret key is used to authenticate with other nodes. The PublicKey portion of this secret key is how we identify nodes, often referred to as the `node_id` in our codebase. .secret_key(secret_key) // Set the ALPN protocols this endpoint will accept on incoming connections - .alpns(vec![EXAMPLE_ALPN.to_vec()]) + .alpns(vec![ALPN.to_vec()]) // `RelayMode::Default` means that we will use the default relay servers to holepunch and relay. // Use `RelayMode::Custom` to pass in a `RelayMap` with custom relay urls. // Use `RelayMode::Disable` to disable holepunching and relaying over HTTPS @@ -78,7 +76,7 @@ pub async fn connect( // Attempt to connect, over the given ALPN. // Returns a Quinn connection. - let conn = endpoint.connect(addr, EXAMPLE_ALPN).await?; + let conn = endpoint.connect(addr, ALPN).await?; info!("connected"); // Use the Quinn API to send and recv content. @@ -89,7 +87,29 @@ pub async fn connect( // Call `finish` to close the send side of the connection gracefully. send.finish().anyerr()?; let message = recv.read_to_end(READ_SIZE_LIMIT).await.anyerr()?; - let response = Response::from_bytes(&message); + // Untrusted bytes: a malformed response must surface as an error, not a + // panic (with `panic = "abort"` a panicking decode kills the process). + let response = + Response::from_bytes(&message).map_err(std::io::Error::other).anyerr()?; + + // Content addressing is the only integrity this protocol has: a GET + // response is only valid if the fetched bytes actually hash to the + // address we asked for. The server is untrusted. + if let (Request::Get(get_request), Response::Get(get_response)) = + (&request, &response) + && !get_response.is_err + { + let got = blake3::hash(&get_response.bytes).to_hex().as_str().to_owned(); + if got != get_request.hash { + conn.close(1u32.into(), b"content hash mismatch"); + endpoint.close().await; + return Err(std::io::Error::other(format!( + "content hash mismatch: requested {} but bytes hash to {got}", + get_request.hash + ))) + .anyerr(); + } + } // Close the connection, then wait for the endpoint to flush. conn.close(0u32.into(), b"done"); diff --git a/src/iroh/server.rs b/src/iroh/server.rs index 4042308e..6d813fd3 100644 --- a/src/iroh/server.rs +++ b/src/iroh/server.rs @@ -12,12 +12,19 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::{EnvFilter, fmt}; -use crate::iroh::common::{GetResponse, PutResponse, Request, Response}; +use crate::iroh::common::{ALPN, GetResponse, PutResponse, Request, Response}; -// An example ALPN that we are using to communicate over the `Endpoint` -const EXAMPLE_ALPN: &[u8] = b"n0/iroh/examples/magic/0"; // Maximum number of characters to read from the client. Connection automatically closed if this is exceeded const READ_SIZE_LIMIT: usize = 100_000_000; +// Maximum number of concurrently-served connections. Connections beyond +// this are dropped at accept time so one set of slow/hostile peers cannot +// grow server memory without bound (each stream may buffer up to +// `READ_SIZE_LIMIT`). +const MAX_CONNECTIONS: usize = 64; +// Cap on total bytes pinned in the in-memory store. PUTs past this are +// refused (fail-closed) rather than growing memory unboundedly; eviction +// policy is left for the future file-backed store. +const MAX_STORE_BYTES: usize = 1 << 30; // 1 GiB // Largely taken from https://github.com/n0-computer/iroh/blob/main/iroh/examples/listen.rs pub async fn serve() -> n0_error::Result<()> { @@ -38,7 +45,7 @@ pub async fn serve() -> n0_error::Result<()> { // The secret key is used to authenticate with other nodes. The PublicKey portion of this secret key is how we identify nodes, often referred to as the `node_id` in our codebase. .secret_key(secret_key) // set the ALPN protocols this endpoint will accept on incoming connections - .alpns(vec![EXAMPLE_ALPN.to_vec()]) + .alpns(vec![ALPN.to_vec()]) // `RelayMode::Default` means that we will use the default relay servers to holepunch and relay. // Use `RelayMode::Custom` to pass in a `RelayMap` with custom relay urls. // Use `RelayMode::Disable` to disable holepunching and relaying over HTTPS @@ -79,8 +86,13 @@ pub async fn serve() -> n0_error::Result<()> { // TODO: Switch to dashmap for performance or elsa/frozen map for safe append-only multithreading // TODO: Add a backing local file store rather than keeping data in-memory, and enable garbage // collection - let store: Arc>>> = - Arc::new(Mutex::new(BTreeMap::new())); + // + // The map holds pinned blobs; the usize tracks total resident bytes so + // PUTs can be refused once `MAX_STORE_BYTES` is reached. + let store: Arc>, usize)>> = + Arc::new(Mutex::new((BTreeMap::new(), 0))); + // Bounds concurrently-served connections; see `MAX_CONNECTIONS`. + let conn_permits = Arc::new(tokio::sync::Semaphore::new(MAX_CONNECTIONS)); // accept incoming connections, returns a normal QUIC connection while let Some(incoming) = endpoint.accept().await { let mut accepting = match incoming.accept() { @@ -92,31 +104,66 @@ pub async fn serve() -> n0_error::Result<()> { continue; }, }; - let alpn = accepting.alpn().await?; - let conn = accepting.await?; - let endpoint_id = conn.remote_id(); - info!( - "new connection from {endpoint_id} with ALPN {}", - String::from_utf8_lossy(&alpn), - ); + let Ok(permit) = conn_permits.clone().try_acquire_owned() else { + warn!( + "connection limit ({MAX_CONNECTIONS}) reached, dropping incoming connection" + ); + continue; + }; let store_clone = store.clone(); - // spawn a task to handle reading and writing off of the connection + // Spawn a task to handle the handshake plus reading and writing off of + // the connection. Everything per-peer lives inside the task: an + // `await?` in the accept loop lets a single failed or slow peer + // handshake shut down — or head-of-line-block — the whole server. tokio::spawn(async move { + let _permit = permit; + let alpn = accepting.alpn().await?; + let conn = accepting.await?; + let endpoint_id = conn.remote_id(); + info!( + "new connection from {endpoint_id} with ALPN {}", + String::from_utf8_lossy(&alpn), + ); // accept a bi-directional QUIC connection // use the `quinn` APIs to send and recv content let (mut send, mut recv) = conn.accept_bi().await.anyerr()?; debug!("accepted bi stream, waiting for data..."); let message = recv.read_to_end(READ_SIZE_LIMIT).await.anyerr()?; - let request = Request::from_bytes(&message); + // Untrusted bytes: a malformed frame must only cost this connection, + // never panic (with `panic = "abort"` that would kill the node). + let request = match Request::from_bytes(&message) { + Ok(request) => request, + Err(err) => { + warn!("dropping connection from {endpoint_id}: {err}"); + conn.close(1u32.into(), b"malformed request"); + return n0_error::Ok(()); + }, + }; let response: Response = match request { Request::Put(put_request) => { let hash = blake3::hash(&put_request.bytes).to_hex().as_str().to_owned(); - store_clone.lock().unwrap().insert(hash.clone(), put_request.bytes); - println!("received PUT request for bytes with hash {hash}"); - let message = format!("pinned hash {hash}\nat endpoint ID {me}"); - Response::Put(PutResponse { is_err: false, message, hash }) + let len = put_request.bytes.len(); + let mut guard = store_clone.lock().unwrap(); + let already_pinned = guard.0.contains_key(&hash); + if !already_pinned && guard.1.saturating_add(len) > MAX_STORE_BYTES { + let message = format!( + "error: store full ({} bytes pinned, cap {MAX_STORE_BYTES})", + guard.1 + ); + drop(guard); + Response::Put(PutResponse { is_err: true, message, hash }) + } else { + if !already_pinned { + guard.1 += len; + guard.0.insert(hash.clone(), put_request.bytes); + } + drop(guard); + println!("received PUT request for bytes with hash {hash}"); + let message = format!("pinned hash {hash}\nat endpoint ID {me}"); + Response::Put(PutResponse { is_err: false, message, hash }) + } }, Request::Get(get_request) => { println!( @@ -124,7 +171,7 @@ pub async fn serve() -> n0_error::Result<()> { get_request.hash ); if let Some(bytes) = - store_clone.lock().unwrap().get(&get_request.hash) + store_clone.lock().unwrap().0.get(&get_request.hash) { let message = format!("retrieved bytes for hash {}", get_request.hash); diff --git a/src/ix/compile.rs b/src/ix/compile.rs index a16b2bc1..35cfa66a 100644 --- a/src/ix/compile.rs +++ b/src/ix/compile.rs @@ -183,8 +183,17 @@ pub struct CachedExpr { pub struct BlockCache { /// Cache for compiled expressions (keyed by Lean hash address) pub exprs: FxHashMap, - /// Cache for compiled universes (Level -> Univ conversion) + /// Cache for compiled universes (Level -> Univ conversion). + /// + /// `Param` levels compile to *positional* `Univ::var(idx)` against the + /// current constant's `univ_params` list, so entries are only valid for + /// the param list in `univ_cache_params` — `compile_univ` clears the + /// cache whenever the list changes. Without that, reusing one + /// `BlockCache` across constants with different level-param orders + /// silently rewrites universes (wrong `var` indices). pub univ_cache: FxHashMap>, + /// The `univ_params` list `univ_cache` entries were computed against. + pub univ_cache_params: Vec, /// Cache for expression comparisons pub cmps: FxHashMap<(Name, Name), Ordering>, /// Arena for expression metadata (append-only within a constant) @@ -379,6 +388,13 @@ pub fn compile_univ( univ_params: &[Name], cache: &mut BlockCache, ) -> Result, CompileError> { + // Cached `Param → var(idx)` translations are positional in + // `univ_params`; invalidate on any param-list change (see the field doc + // on `univ_cache`). Param lists are tiny, so the compare is cheap. + if cache.univ_cache_params.as_slice() != univ_params { + cache.univ_cache.clear(); + cache.univ_cache_params = univ_params.to_vec(); + } if let Some(cached) = cache.univ_cache.get(level) { return Ok(cached.clone()); } @@ -654,10 +670,19 @@ pub fn compile_expr( let mut stack: Vec = vec![Frame::Compile(expr.clone())]; let mut results: Vec> = Vec::new(); + // Set (to 1) by the surgery branches right after they push their head + // Const frame. LIFO guarantees that frame is the very next `Compile` + // popped, so this marks exactly the surgered heads — which are exempt + // from the bare-reference check in the Const arm below. + let mut surgery_heads_pending: usize = 0; while let Some(frame) = stack.pop() { match frame { Frame::Compile(e) => { + let is_surgery_head = surgery_heads_pending > 0; + if is_surgery_head { + surgery_heads_pending -= 1; + } let e_key = Address::from_blake3_hash(*e.get_hash()); if let Some(cached) = cache.exprs.get(&e_key).cloned() { // O(1) cache hit: arena root already valid @@ -682,6 +707,46 @@ pub fn compile_expr( }, ExprData::Const(name, levels, _) => { + // Bare reference to a rewritten auxiliary (i.e. not the head + // of a surgered application): there are no arguments to + // permute, so the reference cannot be represented against the + // canonical layout — the same silent meaning change as an + // under-applied call. Aux-regen bodies are exempt (their + // references are canonical by construction), as are surgery + // heads (their args were just permuted by the App branch). + if !is_surgery_head + && (stt + .call_site_plans + .get(name) + .is_some_and(|p| !p.is_identity()) + || stt + .below_call_site_plans + .get(name) + .is_some_and(|p| !p.is_identity()) + || stt + .brec_on_call_site_plans + .get(name) + .is_some_and(|p| !p.is_identity())) + { + let compiling_is_aux_regen = + cache.compiling.as_ref().is_some_and(|n| { + crate::ix::decompile::classify_aux_gen(n).is_some_and( + |(_, root)| { + stt.env.named.get(&root).is_some_and(|named| { + matches!(named.meta.info, ConstantMetaInfo::Indc { .. }) + }) + }, + ) + }); + if !compiling_is_aux_regen { + return Err(CompileError::UnsupportedExpr { + desc: format!( + "bare reference to '{}', whose canonical layout permutes arguments; eta-expand the reference", + name.pretty() + ), + }); + } + } let univ_indices = compile_univ_indices(levels, univ_params, cache)?; let name_addr = compile_name(name, stt); @@ -771,10 +836,21 @@ pub fn compile_expr( // original's Ixon still lives in `stt.env.consts` and its // arena must be decompile-safe (decompile iterates all // constants). - let compiling_is_aux_regen = cache - .compiling - .as_ref() - .is_some_and(crate::ix::decompile::is_aux_gen_suffix); + // Tightening: the suffix alone misfires on *user* constants + // that merely look aux-like (e.g. a def in a non-inductive + // namespace named `rec_1`), silently disabling required + // surgery for them. A genuine AuxRegen name classifies to a + // root that is a compiled inductive — require that too. + let compiling_is_aux_regen = + cache.compiling.as_ref().is_some_and(|n| { + crate::ix::decompile::classify_aux_gen(n).is_some_and( + |(_, root)| { + stt.env.named.get(&root).is_some_and(|named| { + matches!(named.meta.info, ConstantMetaInfo::Indc { .. }) + }) + }, + ) + }); if !compiling_is_aux_regen { if let Some(plan) = stt.call_site_plans.get(name) && !plan.is_identity() @@ -923,8 +999,23 @@ pub fn compile_expr( stack.push(Frame::Compile(arg.clone())); } stack.push(Frame::Compile(head_expr.clone())); + surgery_heads_pending += 1; continue; } + // Under-applied (or bare) reference to an auxiliary + // whose canonical layout permutes arguments: surgery + // cannot reorder arguments that are not present, and + // compiling the reference un-permuted silently changes + // its meaning (the canonical constant expects canonical + // argument order). Fail loudly instead. + return Err(CompileError::UnsupportedExpr { + desc: format!( + "under-applied reference to '{}' ({} of {} required args) whose canonical layout permutes arguments; eta-expand the call site", + name.pretty(), + args.len(), + expected_total + ), + }); } if let Some(plan) = stt.below_call_site_plans.get(name) && !plan.is_identity() @@ -1024,8 +1115,20 @@ pub fn compile_expr( stack.push(Frame::Compile(arg.clone())); } stack.push(Frame::Compile(head_expr.clone())); + surgery_heads_pending += 1; continue; } + // See the call_site_plans branch above: an un-permuted + // compile of an under-applied reference silently changes + // meaning. Fail loudly. + return Err(CompileError::UnsupportedExpr { + desc: format!( + "under-applied reference to '{}' ({} of {} required args) whose canonical below-layout permutes arguments; eta-expand the call site", + name.pretty(), + args.len(), + expected_total + ), + }); } if let Some(plan) = stt.brec_on_call_site_plans.get(name) && !plan.is_identity() @@ -1155,8 +1258,20 @@ pub fn compile_expr( stack.push(Frame::Compile(arg.clone())); } stack.push(Frame::Compile(head_expr.clone())); + surgery_heads_pending += 1; continue; } + // See the call_site_plans branch above: an un-permuted + // compile of an under-applied reference silently changes + // meaning. Fail loudly. + return Err(CompileError::UnsupportedExpr { + desc: format!( + "under-applied reference to '{}' ({} of {} required args) whose canonical brecOn-layout permutes arguments; eta-expand the call site", + name.pretty(), + args.len(), + expected_total + ), + }); } } } diff --git a/src/ix/compile/aux_gen/brecon.rs b/src/ix/compile/aux_gen/brecon.rs index dff154d8..e330bf0d 100644 --- a/src/ix/compile/aux_gen/brecon.rs +++ b/src/ix/compile/aux_gen/brecon.rs @@ -235,6 +235,21 @@ fn build_prop_brecon( let n_indices = try_nat_to_usize(&ind.num_indices)?; let ind_level_params = &ind.cnst.level_params; + // Prop-level *nested* blocks are not supported here: the recursor + // carries one motive per flat member (primaries + nested auxiliaries), + // but this Prop path derives `below` names for the `n_classes` primary + // classes only (aux `below_N` generation is skipped for Prop blocks) — + // indexing `below_names[j]` past `n_classes` would panic. Fail closed + // until Prop aux-below generation lands. + if n_motives > n_classes { + return Err(CompileError::UnsupportedExpr { + desc: format!( + "prop brecOn for '{}': nested Prop-level block has {n_motives} motives but only {n_classes} primary below classes; aux below_N generation for Prop blocks is not implemented", + ind.cnst.name.pretty() + ), + }); + } + // For Prop brecOn with large elimination (drec), substitute u -> Level::zero(). // Invariant: generate_canonical_recursors always prepends the elimination level // as level_params[0] for large recursors (recursor.rs:192-194), so [0] is correct. diff --git a/src/ix/compile/aux_gen/recursor.rs b/src/ix/compile/aux_gen/recursor.rs index c5c51539..39644406 100644 --- a/src/ix/compile/aux_gen/recursor.rs +++ b/src/ix/compile/aux_gen/recursor.rs @@ -2485,13 +2485,14 @@ fn compute_is_large_and_k( })?; // Spec-level override: non-Prop inductives always get large elimination - // (Lean C++ `inductive.cpp:539-548`). Our kernel's `is_large_eliminator` - // only early-returns when the result level is *provably* non-zero; a - // Param universe that happens to be non-zero syntactically (e.g., u+1) - // falls through to the single-ctor check and can come back "small". - // Correct that here using the WHNF-reduced result level. + // (Lean C++ `inductive.cpp:539-548`) — where "non-Prop" means the result + // level `is_not_zero`, i.e. *provably* non-zero under every parameter + // assignment (`u+1`, `max(u,1)`, …). A bare `Param`/`Max`/`IMax` level + // that merely *might* be non-zero must NOT force large elimination: such + // an inductive can be instantiated at Prop, where unrestricted large + // elimination is unsound. `is_never_zero` is exactly Lean's `is_not_zero`. let is_large = - if !is_large && !result_kuniv.is_zero() { true } else { is_large }; + if !is_large && result_kuniv.is_never_zero() { true } else { is_large }; // Prop determination: use the WHNF-reduced kernel-derived level, not the // raw LeanExpr-syntactic path. For reducible-alias targets the syntactic diff --git a/src/ix/condense.rs b/src/ix/condense.rs index ab0a6b27..a4df2016 100644 --- a/src/ix/condense.rs +++ b/src/ix/condense.rs @@ -25,7 +25,11 @@ pub struct CondensedBlocks { /// Returns a map from each node to the set of nodes in its SCC. pub fn compute_sccs(refs: &RefMap) -> CondensedBlocks { fn neighbors(refs: &RefMap, n: &Name) -> Vec { - refs.get(n).unwrap().iter().cloned().collect() + // A name referenced but absent from the map (a dangling edge — two FFI + // entry points can hand us such graphs) is a node with no out-edges: + // it condenses to its own singleton SCC instead of panicking the + // process (`panic = "abort"`). + refs.get(n).map(|s| s.iter().cloned().collect()).unwrap_or_default() } struct Frame { @@ -109,7 +113,9 @@ pub fn compute_sccs(refs: &RefMap) -> CondensedBlocks { let mut all_refs = NameSet::default(); for node in &component { block_low_links.insert(node.clone(), v.clone()); - for r in refs.get(node).unwrap() { + // Dangling nodes (in `component` via the tolerant `neighbors` + // above) have no entry in `refs` — and no out-edges. + for r in refs.get(node).into_iter().flatten() { if !component.contains(r) && refs.contains_key(r) { all_refs.insert(r.clone()); } diff --git a/src/ix/decompile.rs b/src/ix/decompile.rs index 3a954a7b..2d39ffdf 100644 --- a/src/ix/decompile.rs +++ b/src/ix/decompile.rs @@ -559,7 +559,23 @@ pub fn decompile_expr( let mut args: Vec> = Vec::new(); let mut cur = expr.clone(); loop { + // Fuel bounds one contiguous run of Share→Share hops (fresh per App + // layer, so DAG-style re-sharing of the same slot from many sites is + // unaffected). Within a run the next slot is fully determined by the + // current one, so revisiting any slot means looping forever; by + // pigeonhole an acyclic run makes at most `len` hops. + let mut share_fuel = cache.sharing.len().saturating_add(1); while let Expr::Share(share_idx) = cur.as_ref() { + if share_fuel == 0 { + return Err(DecompileError::BadConstantFormat { + msg: format!( + "cyclic sharing vector (chain exceeds {} entries) in '{}'", + cache.sharing.len(), + cache.current_const + ), + }); + } + share_fuel -= 1; cur = cache .sharing .get(*share_idx as usize) @@ -603,17 +619,37 @@ pub fn decompile_expr( while let Some(frame) = stack.pop() { match frame { Frame::Decompile(e, idx) => { - // Expand Share transparently with the SAME arena_idx - if let Expr::Share(share_idx) = e.as_ref() { - let shared_expr = cache - .sharing - .get(*share_idx as usize) - .ok_or_else(|| DecompileError::InvalidShareIndex { - idx: *share_idx, - max: cache.sharing.len(), - constant: cache.current_const.clone(), - })? - .clone(); + // Expand Share transparently with the SAME arena_idx. Expansion is + // fuel-bounded inline (instead of re-pushing one frame per hop) so + // a cyclic sharing vector errors instead of looping forever. The + // fuel is per expansion run: each hop's successor is determined by + // the current slot, so a revisit within one run ⟺ a true cycle, + // while repeated references to the same slot from different + // expression positions each get fresh fuel. + if matches!(e.as_ref(), Expr::Share(_)) { + let mut shared_expr = e.clone(); + let mut share_fuel = cache.sharing.len().saturating_add(1); + while let Expr::Share(share_idx) = shared_expr.as_ref() { + if share_fuel == 0 { + return Err(DecompileError::BadConstantFormat { + msg: format!( + "cyclic sharing vector (chain exceeds {} entries) in '{}'", + cache.sharing.len(), + cache.current_const + ), + }); + } + share_fuel -= 1; + shared_expr = cache + .sharing + .get(*share_idx as usize) + .ok_or_else(|| DecompileError::InvalidShareIndex { + idx: *share_idx, + max: cache.sharing.len(), + constant: cache.current_const.clone(), + })? + .clone(); + } stack.push(Frame::Decompile(shared_expr, idx)); continue; } @@ -625,12 +661,25 @@ pub fn decompile_expr( continue; } - // Follow Mdata chain in arena, collecting mdata layers + // Follow Mdata chain in arena, collecting mdata layers. + // Fuel-bounded: an acyclic chain visits each arena node at most + // once, so a longer walk means the arena has a child-index cycle. let mut current_idx = idx; let mut mdata_layers: LeanMdata = Vec::new(); + let mut mdata_fuel = arena.nodes.len().saturating_add(1); while let ExprMetaData::Mdata { mdata, child } = arena_lookup(arena, current_idx, &cache.current_const)? { + if mdata_fuel == 0 { + return Err(DecompileError::BadConstantFormat { + msg: format!( + "cyclic Mdata arena chain (exceeds {} nodes) in '{}'", + arena.nodes.len(), + cache.current_const + ), + }); + } + mdata_fuel -= 1; for kvm in mdata { mdata_layers.push(decompile_kvmap(kvm, stt)?); } @@ -1894,7 +1943,7 @@ fn decompile_const( /// Recognized aux_gen suffix kinds, ordered by dependency. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum AuxKind { +pub(crate) enum AuxKind { Rec, RecOn, CasesOn, @@ -1919,7 +1968,7 @@ pub(crate) fn is_aux_gen_suffix(name: &Name) -> bool { /// Classify an aux_gen constant by suffix, returning (kind, root_inductive). /// The root inductive is the base inductive the auxiliary is derived from. -fn classify_aux_gen(name: &Name) -> Option<(AuxKind, Name)> { +pub(crate) fn classify_aux_gen(name: &Name) -> Option<(AuxKind, Name)> { use crate::ix::env::NameData; let s1 = name.last_str()?; let p1 = match name.as_data() { @@ -3313,13 +3362,16 @@ fn indc_source_all(name: &Name, stt: &CompileState) -> Option> { } } -fn stored_plan_blocks_for_original_all( - original_all: &[Name], - stt: &CompileState, -) -> Vec { - let original_set: FxHashSet = original_all.iter().cloned().collect(); - let mut candidates = Vec::new(); - let mut seen: FxHashSet> = FxHashSet::default(); +/// Stored `Muts` call-site plans bucketed by their members' source block +/// (`Indc.all`). Built once per decompile by [`build_muts_plan_index`] +/// (one pass over `env.named`) and consulted per aux block — the per-block +/// full-map rescan it replaces was O(blocks × named) at mathlib scale. +type MutsPlanIndex = FxHashMap, Vec>; + +fn build_muts_plan_index(stt: &CompileState) -> MutsPlanIndex { + let mut index: MutsPlanIndex = FxHashMap::default(); + // Per-bucket dedup by flat member list, mirroring the old per-call `seen`. + let mut seen: FxHashSet<(Vec, Vec)> = FxHashSet::default(); for muts_entry in stt.env.named.iter() { let ConstantMetaInfo::Muts { all, aux_layout } = @@ -3346,27 +3398,44 @@ fn stored_plan_blocks_for_original_all( if !valid || flat_names.is_empty() { continue; } - if !flat_names.iter().all(|name| original_set.contains(name)) { - continue; - } - let same_source_all = flat_names.iter().any(|name| { - indc_source_all(name, stt) - .is_some_and(|source_all| source_all.as_slice() == original_all) - }); - if !same_source_all { - continue; + // Bucket under each distinct member source block: the old per-call + // filter kept a candidate iff *any* member's `Indc.all` equalled the + // queried block, which is exactly bucket membership. + let mut source_alls: Vec> = Vec::new(); + for name in &flat_names { + if let Some(sa) = indc_source_all(name, stt) + && !source_alls.contains(&sa) + { + source_alls.push(sa); + } } - - if !seen.insert(flat_names.clone()) { - continue; + for sa in source_alls { + if !seen.insert((sa.clone(), flat_names.clone())) { + continue; + } + index.entry(sa).or_default().push(StoredPlanBlock { + class_names: class_names.clone(), + aux_layout: aux_layout.clone(), + flat_names: flat_names.clone(), + }); } - candidates.push(StoredPlanBlock { - class_names, - aux_layout: aux_layout.clone(), - flat_names, - }); } + index +} + +fn stored_plan_blocks_for_original_all( + original_all: &[Name], + plan_index: &MutsPlanIndex, +) -> Vec { + let Some(bucket) = plan_index.get(original_all) else { + return Vec::new(); + }; + let original_set: FxHashSet = original_all.iter().cloned().collect(); + let candidates: Vec<&StoredPlanBlock> = bucket + .iter() + .filter(|c| c.flat_names.iter().all(|name| original_set.contains(name))) + .collect(); // Prefer persisted minimal SCCs. If a stale/full source block is present, // it is a strict superset of the minimal candidates and would recreate an @@ -3381,7 +3450,7 @@ fn stored_plan_blocks_for_original_all( && other.flat_names.iter().all(|name| candidate_set.contains(name)) }) }) - .cloned() + .map(|c| (*c).clone()) .collect() } @@ -3421,6 +3490,7 @@ fn install_decompile_call_site_plans( aux_members: &[(AuxKind, Name)], env: &LeanEnv, stt: &CompileState, + plan_index: &MutsPlanIndex, ) -> Result<(), DecompileError> { use crate::ix::compile::{aux_gen, surgery}; @@ -3429,7 +3499,8 @@ fn install_decompile_call_site_plans( } let original_all: Vec = all_names.to_vec(); - let mut plan_blocks = stored_plan_blocks_for_original_all(&original_all, stt); + let mut plan_blocks = + stored_plan_blocks_for_original_all(&original_all, plan_index); if plan_blocks.is_empty() { plan_blocks = fallback_plan_blocks_from_sort(all_names, env, stt)?; } @@ -3505,6 +3576,7 @@ fn decompile_block_aux_gen( kctx: &mut crate::ix::compile::KernelCtx, stt: &CompileState, dstt: &DecompileState, + plan_index: &MutsPlanIndex, ) -> Vec<(Name, DecompileError)> { use crate::ix::compile::aux_gen::{ below::{BelowConstant, generate_below_constants}, @@ -3681,9 +3753,13 @@ fn decompile_block_aux_gen( dstt.env.entry(n.clone()).or_insert_with(|| ci.clone()); } - if let Err(e) = - install_decompile_call_site_plans(all_names, aux_members, env, stt) - { + if let Err(e) = install_decompile_call_site_plans( + all_names, + aux_members, + env, + stt, + plan_index, + ) { aux_gen_errors.push((all_names[0].clone(), e)); } @@ -4398,6 +4474,10 @@ pub fn decompile_env( // for every block (still O(n) across all blocks combined). let mut ingressed: FxHashSet = FxHashSet::default(); + // Stored call-site plan index: one O(named) pass here instead of a full + // `env.named` rescan per aux block inside the loop below. + let muts_plan_index = build_muts_plan_index(stt); + // Progress tracking. Per-block progress logs (every `log_stride` blocks or // every 5 s) are opt-in via `IX_DECOMPILE_PROGRESS`; slow-block warnings // (any single block exceeding `slow_threshold`) are always emitted. @@ -4448,6 +4528,7 @@ pub fn decompile_env( &mut kctx, stt, &dstt, + &muts_plan_index, ); aux_gen_errors.extend(errors); diff --git a/src/ix/graph.rs b/src/ix/graph.rs index f90c7a95..5ffdcea5 100644 --- a/src/ix/graph.rs +++ b/src/ix/graph.rs @@ -140,40 +140,85 @@ fn get_expr_references<'a>( expr: &'a Expr, cache: &mut FxHashMap<&'a Expr, NameSet>, ) -> NameSet { - if let Some(cached) = cache.get(expr) { - return cached.clone(); + // Iterative post-order walk: this runs on default-size rayon worker + // stacks, where structural recursion over mathlib-scale (or untrusted) + // terms can overflow — a hard process abort under `panic = "abort"`. + enum Walk<'a> { + Visit(&'a Expr), + Finish(&'a Expr), } - let name_set = match expr.as_data() { - ExprData::Const(name, ..) => NameSet::from_iter([name.clone()]), - ExprData::App(f, a, _) => { - let f_name_set = get_expr_references(f, cache); - let a_name_set = get_expr_references(a, cache); - merge_name_sets(f_name_set, a_name_set) - }, - ExprData::Lam(_, typ, body, ..) | ExprData::ForallE(_, typ, body, ..) => { - let typ_name_set = get_expr_references(typ, cache); - let body_name_set = get_expr_references(body, cache); - merge_name_sets(typ_name_set, body_name_set) - }, - ExprData::LetE(_, typ, value, body, ..) => { - let typ_name_set = get_expr_references(typ, cache); - let value_name_set = get_expr_references(value, cache); - let body_name_set = get_expr_references(body, cache); - merge_name_sets( - typ_name_set, - merge_name_sets(value_name_set, body_name_set), - ) - }, - ExprData::Mdata(_, expr, _) => get_expr_references(expr, cache), - ExprData::Proj(type_name, _, expr, _) => { - let mut name_set = get_expr_references(expr, cache); - name_set.insert(type_name.clone()); - name_set - }, - _ => NameSet::default(), - }; - cache.insert(expr, name_set.clone()); - name_set + let mut work: Vec> = vec![Walk::Visit(expr)]; + let mut out: Vec = Vec::new(); + while let Some(w) = work.pop() { + match w { + Walk::Visit(e) => { + if let Some(cached) = cache.get(e) { + out.push(cached.clone()); + continue; + } + // Children are pushed in reverse so they complete left-to-right; + // `Finish` pops their sets right-to-left. + match e.as_data() { + ExprData::App(f, a, _) => { + work.push(Walk::Finish(e)); + work.push(Walk::Visit(a)); + work.push(Walk::Visit(f)); + }, + ExprData::Lam(_, typ, body, ..) + | ExprData::ForallE(_, typ, body, ..) => { + work.push(Walk::Finish(e)); + work.push(Walk::Visit(body)); + work.push(Walk::Visit(typ)); + }, + ExprData::LetE(_, typ, value, body, ..) => { + work.push(Walk::Finish(e)); + work.push(Walk::Visit(body)); + work.push(Walk::Visit(value)); + work.push(Walk::Visit(typ)); + }, + ExprData::Mdata(_, inner, _) | ExprData::Proj(_, _, inner, _) => { + work.push(Walk::Finish(e)); + work.push(Walk::Visit(inner)); + }, + ExprData::Const(name, ..) => { + let name_set = NameSet::from_iter([name.clone()]); + cache.insert(e, name_set.clone()); + out.push(name_set); + }, + _ => { + let name_set = NameSet::default(); + cache.insert(e, name_set.clone()); + out.push(name_set); + }, + } + }, + Walk::Finish(e) => { + let name_set = match e.as_data() { + ExprData::App(..) | ExprData::Lam(..) | ExprData::ForallE(..) => { + let right = out.pop().expect("walk: right child"); + let left = out.pop().expect("walk: left child"); + merge_name_sets(left, right) + }, + ExprData::LetE(..) => { + let body = out.pop().expect("walk: let body"); + let value = out.pop().expect("walk: let value"); + let typ = out.pop().expect("walk: let type"); + merge_name_sets(typ, merge_name_sets(value, body)) + }, + ExprData::Mdata(..) => out.pop().expect("walk: mdata inner"), + ExprData::Proj(type_name, ..) => { + let mut name_set = out.pop().expect("walk: proj inner"); + name_set.insert(type_name.clone()); + name_set + }, + _ => unreachable!("leaves are finished at Visit"), + }; + cache.insert(e, name_set.clone()); + out.push(name_set); + }, + } + } + out.pop().unwrap_or_default() } #[cfg(test)] diff --git a/src/ix/ixon/metadata.rs b/src/ix/ixon/metadata.rs index e6f48db1..c90eabed 100644 --- a/src/ix/ixon/metadata.rs +++ b/src/ix/ixon/metadata.rs @@ -277,17 +277,17 @@ impl ConstantMeta { // Extension tables: always present (put_indexed always writes them, // even when empty — three zero-length vectors). let sharing_len = get_vec_len(buf)?; - let mut meta_sharing = Vec::with_capacity(sharing_len); + let mut meta_sharing = Vec::with_capacity(capped(sharing_len, buf)); for _ in 0..sharing_len { meta_sharing.push(get_expr(buf)?); } let refs_len = get_vec_len(buf)?; - let mut meta_refs = Vec::with_capacity(refs_len); + let mut meta_refs = Vec::with_capacity(capped(refs_len, buf)); for _ in 0..refs_len { meta_refs.push(get_address_raw(buf)?); } let univs_len = get_vec_len(buf)?; - let mut meta_univs = Vec::with_capacity(univs_len); + let mut meta_univs = Vec::with_capacity(capped(univs_len, buf)); for _ in 0..univs_len { meta_univs.push(get_univ(buf)?); } @@ -344,7 +344,7 @@ pub fn resolve_kvmap( // rather than in `decompile.rs` (which depends on CompileState). let bytes = ixon_env.get_blob(a)?; let mut buf = bytes.as_slice(); - let syn = deser_syntax(&mut buf, ixon_env)?; + let syn = deser_syntax(&mut buf, ixon_env, 0)?; env::DataValue::OfSyntax(Box::new(syn)) }, }; @@ -446,7 +446,7 @@ fn deser_preresolved( 1 => { let name = ixon_env.get_name(&deser_addr(buf)?)?; let count = deser_tag0(buf)? as usize; - let mut fields = Vec::with_capacity(count); + let mut fields = Vec::with_capacity(capped(count, buf)); for _ in 0..count { let addr = deser_addr(buf)?; fields.push(String::from_utf8(ixon_env.get_blob(&addr)?).ok()?); @@ -457,19 +457,28 @@ fn deser_preresolved( } } +/// Recursion guard for `deser_syntax`: syntax trees come from untrusted +/// metadata bytes; without a bound a crafted blob of nested Node tags +/// overflows the stack (a hard process abort under `panic = "abort"`). +const MAX_SYNTAX_DEPTH: usize = 512; + fn deser_syntax( buf: &mut &[u8], ixon_env: &super::env::Env, + depth: usize, ) -> Option { + if depth > MAX_SYNTAX_DEPTH { + return None; + } match deser_u8(buf)? { 0 => Some(env::Syntax::Missing), 1 => { let info = deser_source_info(buf, ixon_env)?; let kind = ixon_env.get_name(&deser_addr(buf)?)?; let arg_count = deser_tag0(buf)? as usize; - let mut args = Vec::with_capacity(arg_count); + let mut args = Vec::with_capacity(capped(arg_count, buf)); for _ in 0..arg_count { - args.push(deser_syntax(buf, ixon_env)?); + args.push(deser_syntax(buf, ixon_env, depth + 1)?); } Some(env::Syntax::Node(info, kind, args)) }, @@ -484,7 +493,7 @@ fn deser_syntax( let raw_val = deser_substring(buf, ixon_env)?; let val = ixon_env.get_name(&deser_addr(buf)?)?; let pr_count = deser_tag0(buf)? as usize; - let mut preresolved = Vec::with_capacity(pr_count); + let mut preresolved = Vec::with_capacity(capped(pr_count, buf)); for _ in 0..pr_count { preresolved.push(deser_preresolved(buf, ixon_env)?); } @@ -556,6 +565,15 @@ pub(super) fn get_vec_len(buf: &mut &[u8]) -> Result { Ok(Tag0::get(buf)?.size as usize) } +/// Cap capacity for Vec allocation during deserialization. Counts come +/// straight from untrusted bytes; each element consumes at least one byte, +/// so a count larger than the remaining buffer is necessarily malformed — +/// allocate at most `buf.len()` up front and let the element reads fail. +#[inline] +fn capped(len: usize, buf: &[u8]) -> usize { + len.min(buf.len()) +} + // =========================================================================== // BinderInfo and ReducibilityHints serialization // =========================================================================== @@ -660,7 +678,7 @@ fn get_idx_vec( rev: &NameReverseIndex, ) -> Result, String> { let len = get_vec_len(buf)?; - let mut v = Vec::with_capacity(len); + let mut v = Vec::with_capacity(capped(len, buf)); for _ in 0..len { v.push(get_idx(buf, rev)?); } @@ -746,7 +764,7 @@ fn get_kvmap_indexed( rev: &NameReverseIndex, ) -> Result { let len = get_vec_len(buf)?; - let mut kvmap = Vec::with_capacity(len); + let mut kvmap = Vec::with_capacity(capped(len, buf)); for _ in 0..len { kvmap.push((get_idx(buf, rev)?, DataValue::get_indexed(buf, rev)?)); } @@ -770,7 +788,7 @@ fn get_mdata_stack_indexed( rev: &NameReverseIndex, ) -> Result, String> { let len = get_vec_len(buf)?; - let mut mdata = Vec::with_capacity(len); + let mut mdata = Vec::with_capacity(capped(len, buf)); for _ in 0..len { mdata.push(get_kvmap_indexed(buf, rev)?); } @@ -908,7 +926,7 @@ impl ExprMetaData { 10 => { let name = get_idx(buf, rev)?; let n_entries = get_vec_len(buf)?; - let mut entries = Vec::with_capacity(n_entries); + let mut entries = Vec::with_capacity(capped(n_entries, buf)); for _ in 0..n_entries { let entry = match get_u8(buf)? { 0 => { @@ -955,7 +973,7 @@ impl ExprMeta { rev: &NameReverseIndex, ) -> Result { let len = get_vec_len(buf)?; - let mut nodes = Vec::with_capacity(len); + let mut nodes = Vec::with_capacity(capped(len, buf)); for _ in 0..len { nodes.push(ExprMetaData::get_indexed(buf, rev)?); } @@ -972,7 +990,7 @@ fn put_u64_vec(v: &[u64], buf: &mut Vec) { fn get_u64_vec(buf: &mut &[u8]) -> Result, String> { let len = get_vec_len(buf)?; - let mut v = Vec::with_capacity(len); + let mut v = Vec::with_capacity(capped(len, buf)); for _ in 0..len { v.push(get_u64(buf)?); } @@ -1147,7 +1165,7 @@ impl ConstantMetaInfo { }), 6 => { let n = get_u64(buf)? as usize; - let mut all = Vec::with_capacity(n); + let mut all = Vec::with_capacity(capped(n, buf)); for _ in 0..n { all.push(get_idx_vec(buf, rev)?); } @@ -1155,12 +1173,13 @@ impl ConstantMetaInfo { 0 => None, 1 => { let n_perm = get_u64(buf)? as usize; - let mut perm = Vec::with_capacity(n_perm); + let mut perm = Vec::with_capacity(capped(n_perm, buf)); for _ in 0..n_perm { perm.push(get_u64(buf)? as usize); } let n_counts = get_u64(buf)? as usize; - let mut source_ctor_counts = Vec::with_capacity(n_counts); + let mut source_ctor_counts = + Vec::with_capacity(capped(n_counts, buf)); for _ in 0..n_counts { source_ctor_counts.push(get_u64(buf)? as usize); } diff --git a/src/ix/ixon/proof.rs b/src/ix/ixon/proof.rs index 805f0017..8e248de4 100644 --- a/src/ix/ixon/proof.rs +++ b/src/ix/ixon/proof.rs @@ -408,7 +408,9 @@ fn put_rules(rules: &[RevealRecursorRule], buf: &mut Vec) { fn get_rules(buf: &mut &[u8]) -> Result, String> { let count = usize::try_from(Tag0::get(buf)?.size).map_err(|e| e.to_string())?; - let mut rules = Vec::with_capacity(count); + // Cap against the remaining buffer: counts are untrusted and each + // element consumes at least one byte. + let mut rules = Vec::with_capacity(count.min(buf.len())); for _ in 0..count { rules.push(RevealRecursorRule::get(buf)?); } @@ -428,7 +430,7 @@ fn get_ctors( ) -> Result, String> { let count = usize::try_from(Tag0::get(buf)?.size).map_err(|e| e.to_string())?; - let mut ctors = Vec::with_capacity(count); + let mut ctors = Vec::with_capacity(count.min(buf.len())); for _ in 0..count { let idx = Tag0::get(buf)?.size; let info = RevealConstructorInfo::get(buf)?; @@ -924,7 +926,7 @@ impl RevealConstantInfo { let components = if mask & 1 != 0 { let count = usize::try_from(Tag0::get(buf)?.size).map_err(|e| e.to_string())?; - let mut comps = Vec::with_capacity(count); + let mut comps = Vec::with_capacity(count.min(buf.len())); for _ in 0..count { let idx = Tag0::get(buf)?.size; let info = RevealMutConstInfo::get(buf)?; diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index ecbed0bf..b4eef6c2 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1031,13 +1031,14 @@ pub fn put_aux_layout(layout: &AuxLayout, buf: &mut Vec) { /// Deserialize an `AuxLayout` side-table entry. pub fn get_aux_layout(buf: &mut &[u8]) -> Result { - let n_perm = get_u64(buf)? as usize; - let mut perm = Vec::with_capacity(n_perm); + let n_perm = get_u64(buf)?; + let mut perm = Vec::with_capacity(capped_capacity(n_perm, buf)); for _ in 0..n_perm { perm.push(get_u64(buf)? as usize); } - let n_counts = get_u64(buf)? as usize; - let mut source_ctor_counts = Vec::with_capacity(n_counts); + let n_counts = get_u64(buf)?; + let mut source_ctor_counts = + Vec::with_capacity(capped_capacity(n_counts, buf)); for _ in 0..n_counts { source_ctor_counts.push(get_u64(buf)? as usize); } @@ -1359,6 +1360,18 @@ impl Env { } let (bytes, rest) = buf.split_at(len); *buf = rest; + // Per-entry integrity, mirroring the const check below. The kernel + // reads literal *values* out of blob bytes while hashing only the + // blob address, so an unverified blob lets tampered bytes change a + // literal inside a checked type without disturbing the merkle root. + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "Env::get: blob bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } env.blobs.insert(addr, bytes.to_vec()); } @@ -1397,7 +1410,7 @@ impl Env { let num_names = get_u64(buf)?; let mut names_lookup: FxHashMap = FxHashMap::default(); let mut name_reverse_index: NameReverseIndex = - Vec::with_capacity(num_names as usize + 1); + Vec::with_capacity(capped_capacity(num_names, buf) + 1); // Anonymous name is serialized first (index 0) — read it from the stream // along with all other names below. But pre-seed the lookup so name // reconstruction works for names whose parent is anonymous. @@ -1503,6 +1516,14 @@ impl Env { } let (bytes, rest) = buf.split_at(len); *buf = rest; + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "Env::get_anon: blob bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } env.blobs.insert(addr, bytes.to_vec()); } @@ -1538,7 +1559,7 @@ impl Env { let num_names = get_u64(buf)?; let mut names_lookup: FxHashMap = FxHashMap::default(); let mut name_reverse_index: NameReverseIndex = - Vec::with_capacity(num_names as usize + 1); + Vec::with_capacity(capped_capacity(num_names, buf) + 1); let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); names_lookup.insert(anon_addr, Name::anon()); for _ in 0..num_names { @@ -1696,6 +1717,14 @@ impl Env { } let (bytes, rest) = buf.split_at(len); buf = rest; + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "Env::get_anon_mmap: blob bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } env.blobs.insert(addr, bytes.to_vec()); } @@ -1732,7 +1761,7 @@ impl Env { let num_names = get_u64(&mut buf)?; let mut names_lookup: FxHashMap = FxHashMap::default(); let mut name_reverse_index: NameReverseIndex = - Vec::with_capacity(num_names as usize + 1); + Vec::with_capacity(capped_capacity(num_names, buf) + 1); let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); names_lookup.insert(anon_addr, Name::anon()); for _ in 0..num_names { diff --git a/src/ix/kernel/env.rs b/src/ix/kernel/env.rs index 1fdda019..ae74ab02 100644 --- a/src/ix/kernel/env.rs +++ b/src/ix/kernel/env.rs @@ -124,6 +124,15 @@ pub struct GeneratedRecursor { pub ind_addr: Address, pub ty: KExpr, pub rules: Vec>, + /// Canonical arity split (params, motives, minors, indices). A def-eq + /// check on `ty` only constrains the *sum* of leading binders; the iota + /// reducer (`whnf` major-index slicing) trusts the individual scalars, + /// so they must be validated against this canonical split, not just the + /// total. See `check_recursor_member`. + pub params: u64, + pub motives: u64, + pub minors: u64, + pub indices: u64, } /// Which nested-auxiliary order generated recursor validation should use. diff --git a/src/ix/kernel/inductive.rs b/src/ix/kernel/inductive.rs index 4cba8c1c..80d10cc2 100644 --- a/src/ix/kernel/inductive.rs +++ b/src/ix/kernel/inductive.rs @@ -2351,6 +2351,8 @@ peers={} flat={} rec_ids={} failed_gi={failed_gi}", // Generate recursor type for each ORIGINAL inductive (not auxiliaries). // The recursor type spans all flat block members (motives, minors). + let gen_motives = flat.len() as u64; + let gen_minors: u64 = flat.iter().map(|m| m.ctors.len() as u64).sum(); let mut generated = Vec::new(); for di in 0..n_originals { let rec_type = self.build_rec_type( @@ -2368,6 +2370,10 @@ peers={} flat={} rec_ids={} failed_gi={failed_gi}", // Rules are populated later from the recursor block by // `populate_recursor_rules_from_block`. rules: vec![], + params: n_params, + motives: gen_motives, + minors: gen_minors, + indices: flat[di].n_indices, }); } @@ -2388,6 +2394,10 @@ peers={} flat={} rec_ids={} failed_gi={failed_gi}", // Rules are populated later from the recursor block by // `populate_recursor_rules_from_block`. rules: vec![], + params: n_params, + motives: gen_motives, + minors: gen_minors, + indices: flat[di].n_indices, }); } @@ -4243,77 +4253,111 @@ re-run with `IX_RECURSOR_DUMP={}` for the full breakdown.", } } - let gen_rec = selected_idx.map(|i| &generated[i]); - match gen_rec { - Some(g) => { - if !self.is_def_eq(&g.ty, &ty)? { - let selected_by_signature = - selected_idx.is_some_and(|idx| signature_matches.contains(&idx)); - if self.env.recursor_aux_order == RecursorAuxOrder::Canonical - && motives > 1 - && selected_by_signature - { - return self.check_recursor_coherence(id); - } + // Resolve the matching canonical recursor by FULL type def-eq: try the + // selected candidate first, then every other signature match. Nested + // auxiliaries can contain several recursors with the same external + // major signature, and the positional/first-match selection above can + // pick the wrong peer. (This replaces a former `Canonical && motives>1` + // fallback to `check_recursor_coherence`, which skipped the type and + // rule comparison entirely — accepting a stored recursor whose type + // matched no canonical form. Acceptance now always requires a full + // type + rules match against a generated recursor.) + let mut candidate_order: Vec = Vec::new(); + if let Some(i) = selected_idx { + candidate_order.push(i); + } + for &gi in &signature_matches { + if Some(gi) != selected_idx { + candidate_order.push(gi); + } + } + let mut matched_idx: Option = None; + for gi in candidate_order { + if self.is_def_eq(&generated[gi].ty, &ty)? { + matched_idx = Some(gi); + break; + } + } - // When `IX_TYPE_DIFF` is set, walk the binder chain to find the - // first divergent binder and print a readable gen/sto diff. Off - // by default: in alpha-collapse regimes or for mutual blocks - // with near-identical peers, every such mismatch ends up in - // `stt.ungrounded` (non-fatal), and printing them all drowns - // stderr under tens of thousands of lines. The walk only runs - // when the env var is set to keep the common path cheap. - // - // Uses `KExpr::Display` (Name.Pretty@shorthex for consts, - // `#idx` / `name` for vars, `(f a b …)` for spines, etc.) — - // the same formatter `TcError::AppTypeMismatch` uses — so the - // output format matches the rest of the kernel's diagnostic - // surface. - if *IX_TYPE_DIFF { - let mut gc = g.ty.clone(); - let mut sc = ty.clone(); - let mut bi = 0u64; - loop { - match (gc.data(), sc.data()) { - ( - ExprData::All(_, _, gd, gb, _), - ExprData::All(_, _, sd, sb, _), - ) => { - if !self.is_def_eq(gd, sd).unwrap_or(false) { - let label = if bi < params { - "param" - } else if bi < params + motives { - "motive" - } else if bi < params + motives + minors { - "minor" - } else { - "idx/major" - }; - eprintln!( - "[type diff] binder {bi} ({label}) DIFFERS (p={params} m={motives} min={minors})" - ); - eprintln!(" gen: {gd}"); - eprintln!(" sto: {sd}"); - break; - } - let _ = self.push_fvar_decl_anon(gd.clone()); - gc = gb.clone(); - sc = sb.clone(); - bi += 1; - }, - _ => { - eprintln!("[type diff] return differs at {bi}"); - eprintln!(" gen: {gc}"); - eprintln!(" sto: {sc}"); - break; - }, + if matched_idx.is_none() + && let Some(diff_idx) = selected_idx + { + // When `IX_TYPE_DIFF` is set, walk the binder chain to find the + // first divergent binder and print a readable gen/sto diff (against + // the originally selected candidate). Off by default: in + // alpha-collapse regimes or for mutual blocks with near-identical + // peers, every such mismatch ends up in `stt.ungrounded` + // (non-fatal), and printing them all drowns stderr under tens of + // thousands of lines. The walk only runs when the env var is set to + // keep the common path cheap. + // + // Uses `KExpr::Display` (Name.Pretty@shorthex for consts, + // `#idx` / `name` for vars, `(f a b …)` for spines, etc.) — + // the same formatter `TcError::AppTypeMismatch` uses — so the + // output format matches the rest of the kernel's diagnostic + // surface. + if *IX_TYPE_DIFF { + let mut gc = generated[diff_idx].ty.clone(); + let mut sc = ty.clone(); + let mut bi = 0u64; + loop { + match (gc.data(), sc.data()) { + ( + ExprData::All(_, _, gd, gb, _), + ExprData::All(_, _, sd, sb, _), + ) => { + if !self.is_def_eq(gd, sd).unwrap_or(false) { + let label = if bi < params { + "param" + } else if bi < params + motives { + "motive" + } else if bi < params + motives + minors { + "minor" + } else { + "idx/major" + }; + eprintln!( + "[type diff] binder {bi} ({label}) DIFFERS (p={params} m={motives} min={minors})" + ); + eprintln!(" gen: {gd}"); + eprintln!(" sto: {sd}"); + break; } - } - for _ in 0..bi { - self.lctx.truncate(self.lctx.len() - 1); - } + let _ = self.push_fvar_decl_anon(gd.clone()); + gc = gb.clone(); + sc = sb.clone(); + bi += 1; + }, + _ => { + eprintln!("[type diff] return differs at {bi}"); + eprintln!(" gen: {gc}"); + eprintln!(" sto: {sc}"); + break; + }, } - return Err(TcError::Other("check_recursor: type mismatch".into())); + } + for _ in 0..bi { + self.lctx.truncate(self.lctx.len() - 1); + } + } + return Err(TcError::Other("check_recursor: type mismatch".into())); + } + + let gen_rec = matched_idx.map(|i| &generated[i]); + match gen_rec { + Some(g) => { + // The iota reducer slices the application spine using the stored + // scalar split (params/motives/minors/indices); the type def-eq + // above constrains only their *sum*, so a shifted split would + // still pass and make `whnf` apply the canonical RHS to the wrong + // argument list. Pin the stored scalars to the canonical split. + if (params, motives, minors, indices) + != (g.params, g.motives, g.minors, g.indices) + { + return Err(TcError::Other(format!( + "check_recursor: arity split mismatch: stored (p={params} m={motives} min={minors} i={indices}) vs canonical (p={} m={} min={} i={})", + g.params, g.motives, g.minors, g.indices + ))); } let gen_rules = g.rules.clone(); diff --git a/src/ix/kernel/infer.rs b/src/ix/kernel/infer.rs index 95f0232b..6599241a 100644 --- a/src/ix/kernel/infer.rs +++ b/src/ix/kernel/infer.rs @@ -381,6 +381,20 @@ impl TypeChecker<'_, M> { )); } + // Lean's kernel requires the struct type to be *fully* applied + // (`args.size == nparams + nindices`, type_checker.cpp infer_proj); + // an under-/over-applied spine makes the parameter substitution below + // walk the wrong arguments. + if args.len() != num_params + num_indices { + return Err(TcError::Other(format!( + "projection: struct type expects {} args ({} params + {} indices), got {}", + num_params + num_indices, + num_params, + num_indices, + args.len() + ))); + } + // Check if the structure lives in Prop. Do this from the inductive // declaration's result sort instead of inferring the full applied value // type: projection-heavy proof terms otherwise re-infer every parameter diff --git a/src/ix/kernel/ingress.rs b/src/ix/kernel/ingress.rs index 38bedb6e..a0f32abb 100644 --- a/src/ix/kernel/ingress.rs +++ b/src/ix/kernel/ingress.rs @@ -615,7 +615,23 @@ fn ingress_expr( // `Share` is transparent and keeps the same arena root. Expand it // before cache/mdata work; the old path walked metadata for the Share // frame, discarded it, then reprocessed the shared expression. + // + // Fuel bound: the sharing vector comes from untrusted bytes, so a + // crafted cycle (`sharing[a] = Share(b)`, `sharing[b] = Share(a)`) + // would otherwise spin forever. The fuel covers one contiguous run + // of Share→Share hops (fresh per frame — re-sharing the same slot + // from many expression positions is fine); within a run each hop's + // successor is determined by the current slot, so a revisit ⟺ a + // true cycle, and by pigeonhole an acyclic run makes ≤ len hops. + let mut share_fuel = ctx.sharing.len().saturating_add(1); while let IxonExpr::Share(share_idx) = expr.as_ref() { + if share_fuel == 0 { + return Err(format!( + "Share chain exceeds sharing table length ({}) — cycle in sharing vector", + ctx.sharing.len() + )); + } + share_fuel -= 1; bump_convert_stat!(stats, share_expansions); expr = ctx @@ -650,10 +666,17 @@ fn ingress_expr( bump_convert_stat!(stats, expr_cache_misses); } - // Walk mdata chain in arena + // Walk mdata chain in arena. + // + // Fuel bound: the arena lives in the Named section, which is not + // content-addressed, so a self/back-referential `Mdata { child }` + // would otherwise loop forever while growing `mdata_layers` + // unboundedly. An acyclic chain visits each arena node at most + // once, so `len + 1` steps suffice. let arena_t0 = if stats.enabled { Some(Instant::now()) } else { None }; let mut current_idx = arena_idx; let mut mdata_layers: Vec = Vec::new(); + let mut mdata_fuel = ctx.arena.nodes.len().saturating_add(1); while let Some(ExprMetaData::Mdata { mdata, child }) = ctx.arena.nodes.get( usize::try_from(current_idx).map_err(|_e| { @@ -661,6 +684,13 @@ fn ingress_expr( })?, ) { + if mdata_fuel == 0 { + return Err(format!( + "Mdata chain exceeds arena length ({}) — cycle in metadata arena", + ctx.arena.nodes.len() + )); + } + mdata_fuel -= 1; bump_convert_stat!(stats, mdata_nodes); bump_convert_stat!(stats, mdata_kv_maps, mdata.len()); let kv_t0 = if stats.enabled { Some(Instant::now()) } else { None }; @@ -857,7 +887,17 @@ fn ingress_expr( let mut canonical_args: Vec> = Vec::new(); let mut cur = expr.clone(); loop { + // Fuel-bounded like the main Share expansion above: the + // sharing vector is untrusted and may contain cycles. + let mut share_fuel = ctx.sharing.len().saturating_add(1); while let IxonExpr::Share(share_idx) = cur.as_ref() { + if share_fuel == 0 { + return Err(format!( + "Share chain exceeds sharing table length ({}) — cycle in sharing vector", + ctx.sharing.len() + )); + } + share_fuel -= 1; cur = ctx .sharing .get(usize::try_from(*share_idx).map_err(|_e| { @@ -876,7 +916,15 @@ fn ingress_expr( } canonical_args.reverse(); let mut head_ixon = cur; + let mut share_fuel = ctx.sharing.len().saturating_add(1); while let IxonExpr::Share(share_idx) = head_ixon.as_ref() { + if share_fuel == 0 { + return Err(format!( + "Share chain exceeds sharing table length ({}) — cycle in sharing vector", + ctx.sharing.len() + )); + } + share_fuel -= 1; head_ixon = ctx .sharing .get(usize::try_from(*share_idx).map_err(|_e| { @@ -3183,11 +3231,25 @@ fn insert_addr_aliases( let Some(names) = lookups.names_for_addr(addr) else { return; }; - let Some(template) = kenv - .consts - .iter() - .find_map(|(id, c)| if &id.addr == addr { Some(c.clone()) } else { None }) - else { + // The const was almost always ingressed under one of this addr's names — + // probe those KIds directly (O(#names) hash lookups) instead of scanning + // the entire consts map, which made alias insertion O(env) per + // projection alias (quadratic over a full ingress). + let mut template: Option> = None; + for name in names { + let id = KId::new(addr.clone(), M::meta_field(name.clone())); + if let Some(c) = kenv.get(&id) { + template = Some(c); + break; + } + } + // Fallback for a const ingressed under a name outside the alias list. + let Some(template) = template.or_else(|| { + kenv + .consts + .iter() + .find_map(|(id, c)| if &id.addr == addr { Some(c.clone()) } else { None }) + }) else { return; }; for name in names { @@ -4424,6 +4486,34 @@ pub fn ingress_anon_block( } } + // Canonicity validation for Indc-only blocks, mirroring + // `ingress_muts_block` (docs/ix_canonicity.md §6.0). The proven IxVM + // kernel and the Meta ingress both enforce canonical block order; + // without the same check here the anon re-checker (`rs_kernel_check_anon`) + // accepts envs the proven kernel rejects — a silent divergence between + // the two checkers. + let mut indcs: Vec<(KId, &KConst)> = Vec::new(); + for (id, c) in &all_entries { + if matches!(c, KConst::Indc { .. }) { + indcs.push((id.clone(), c)); + } + } + let all_primary_indc = !indcs.is_empty() + && indcs.len() + == members.iter().filter(|m| matches!(m, IxonMutConst::Indc(_))).count(); + if all_primary_indc + && members.iter().all(|m| matches!(m, IxonMutConst::Indc(_))) + { + let entries_ref: &Vec<(KId, KConst)> = &all_entries; + let resolve_ctor = |cid: &KId| -> Option> { + entries_ref.iter().find(|(rid, _)| rid == cid).map(|(_, c)| c.clone()) + }; + crate::ix::kernel::canonical_check::validate_canonical_block_single_pass::< + Anon, + >(block_addr, &indcs, &resolve_ctor) + .map_err(|e| format!("{e}"))?; + } + insert_muts_entries(kenv, all_entries); Ok(member_kids) } diff --git a/src/ix/kernel/whnf.rs b/src/ix/kernel/whnf.rs index 16d74f4d..87d73ebb 100644 --- a/src/ix/kernel/whnf.rs +++ b/src/ix/kernel/whnf.rs @@ -885,7 +885,9 @@ impl TypeChecker<'_, M> { minors: u64_to_usize::(minors)?, indices: u64_to_usize::(indices)?, major_idx, - rules: rules.clone(), + // `rules` is already owned here (moved out of the KConst clone + // `try_get_const` returned) — do not clone it again. + rules, lvls, } }, @@ -1065,23 +1067,42 @@ impl TypeChecker<'_, M> { /// Nat literal iota can create a long chain of distinct predecessor terms. /// These terms are useful only while the current WHNF is executing; keeping /// each one in the global WHNF caches makes RSS linear in the literal. + /// Allocation-free spine probe: head expression and arg count without + /// materializing the spine. The transient-nat probes below run on + /// *every* whnf call **before** the cache lookup, so they must not + /// heap-allocate on the (overwhelmingly common) non-Nat-recursor path — + /// the previous implementation paid two `collect_app_spine` Vec + /// allocations plus a `KConst::Recr` clone per call, defeating the + /// cache on the hottest path in a full check. + fn spine_head_and_len(e: &KExpr) -> (&KExpr, usize) { + let mut cur = e; + let mut n = 0usize; + while let ExprData::App(f, _, _) = cur.data() { + n += 1; + cur = f; + } + (cur, n) + } + fn is_transient_nat_literal_work( &mut self, e: &KExpr, ) -> Result> { - if self.is_nat_literal_recursor_app(e)? { - return Ok(true); - } - - let (head, args) = collect_app_spine(e); + let (head, nargs) = Self::spine_head_and_len(e); let ExprData::Const(id, _, _) = head.data() else { return Ok(false); }; - - if id.addr == self.prims.nat_succ.addr && args.len() == 1 { - return self.is_nat_literal_recursor_app(&args[0]); + if id.addr == self.prims.nat_rec.addr + || id.addr == self.prims.nat_cases_on.addr + { + return self.is_nat_literal_recursor_app(e); + } + if id.addr == self.prims.nat_succ.addr + && nargs == 1 + && let ExprData::App(_, arg, _) = e.data() + { + return self.is_nat_literal_recursor_app(arg); } - Ok(false) } @@ -1089,7 +1110,9 @@ impl TypeChecker<'_, M> { &mut self, e: &KExpr, ) -> Result> { - let (head, spine) = collect_app_spine(e); + // Cheap pre-filter first; only fall through to the allocating spine + // collection + recursor lookup when the head can actually match. + let (head, _) = Self::spine_head_and_len(e); let ExprData::Const(id, _, _) = head.data() else { return Ok(false); }; @@ -1104,6 +1127,7 @@ impl TypeChecker<'_, M> { else { return Ok(false); }; + let (_, spine) = collect_app_spine(e); let major_idx = u64_to_usize::(params + motives + minors + indices)?; Ok( spine @@ -2885,6 +2909,15 @@ fn compute_nat_bin( b: &Nat, ) -> Option { use num_bigint::BigUint; + // Output-size cap for natively-computed results. `shiftLeft` and `pow` + // amplify a tiny term into a result exponentially larger than the input + // bytes (`Nat.shiftLeft 1 (1 << 40)` is a ~12-byte term demanding a + // ~128 GiB allocation). Refuse to materialize anything wider and leave + // the term stuck, exactly like the `pow` exponent guard below. 2^26 bits + // (8 MiB) keeps every `2^e, e ≤ ReducePowMaxExp` reduction the C++ + // kernel performs while bounding the allocation an adversarial term can + // force. + const MAX_NAT_REDUCE_BITS: u64 = 1 << 26; let zero = BigUint::ZERO; let r = if *addr == p.nat_add.addr { &a.0 + &b.0 @@ -2901,7 +2934,14 @@ fn compute_nat_bin( const REDUCE_POW_MAX_EXP: u64 = 1 << 24; // 16_777_216 match b.to_u64() { #[allow(clippy::cast_possible_truncation)] // guarded: exp <= 2^24 - Some(exp) if exp <= REDUCE_POW_MAX_EXP => a.0.pow(exp as u32), + Some(exp) if exp <= REDUCE_POW_MAX_EXP => { + // The exponent cap alone does not bound the *result*: a wide base + // with an allowed exponent still explodes (bits(a) * exp). + if a.0.bits().saturating_mul(exp) > MAX_NAT_REDUCE_BITS { + return None; + } + a.0.pow(exp as u32) + }, _ => return None, // too large to compute } } else if *addr == p.nat_gcd.addr { @@ -2913,8 +2953,15 @@ fn compute_nat_bin( } else if *addr == p.nat_xor.addr { &a.0 ^ &b.0 } else if *addr == p.nat_shift_left.addr { - let shift = usize::try_from(b.to_u64()?).ok()?; - &a.0 << shift + let shift = b.to_u64()?; + if a.0 == zero { + zero + } else if a.0.bits().saturating_add(shift) > MAX_NAT_REDUCE_BITS { + // Result has exactly bits(a) + shift bits — refuse to materialize. + return None; + } else { + &a.0 << usize::try_from(shift).ok()? + } } else if *addr == p.nat_shift_right.addr { let shift = usize::try_from(b.to_u64()?).ok()?; &a.0 >> shift diff --git a/src/ix/profile.rs b/src/ix/profile.rs index ed05c539..09075e08 100644 --- a/src/ix/profile.rs +++ b/src/ix/profile.rs @@ -159,7 +159,8 @@ impl BlockProfile { return Err(ProfileError::BadVersion(version)); } let n = r.u32()? as usize; - let mut blocks = Vec::with_capacity(n); + // Each block entry consumes exactly 48 bytes (32 addr + 8 + 4 + 4). + let mut blocks = Vec::with_capacity(n.min(r.remaining() / 48)); for _ in 0..n { let addr = Address::from_slice(r.take(32)?) .map_err(|_| ProfileError::Truncated)?; @@ -174,11 +175,11 @@ impl BlockProfile { }); } let num_edges = r.u64()? as usize; - let mut delta_row = Vec::with_capacity(n + 1); + let mut delta_row = Vec::with_capacity((n + 1).min(r.remaining() / 8 + 1)); for _ in 0..n + 1 { delta_row.push(r.u64()? as usize); } - let mut delta_col = Vec::with_capacity(num_edges); + let mut delta_col = Vec::with_capacity(num_edges.min(r.remaining() / 4)); for _ in 0..num_edges { delta_col.push(r.u32()?); } @@ -246,6 +247,13 @@ impl<'a> Reader<'a> { self.pos = end; Ok(s) } + /// Bytes left in the buffer. Used to cap `Vec` pre-allocations against + /// untrusted counts: a count whose elements cannot fit in the remaining + /// bytes is necessarily malformed, so allocating beyond it only serves + /// an attacker. + fn remaining(&self) -> usize { + self.buf.len() - self.pos + } fn u32(&mut self) -> Result { Ok(u32::from_le_bytes(self.take(4)?.try_into().unwrap())) } diff --git a/src/ix/shard.rs b/src/ix/shard.rs index 0fd9682e..49135307 100644 --- a/src/ix/shard.rs +++ b/src/ix/shard.rs @@ -188,6 +188,16 @@ impl Hypergraph { if num_shards <= 1 { return vec![0u32; n]; // everything in shard 0 } + // Infeasible request: more shards than blocks. `rec_bisect`'s leaf + // allocation (`ideal.clamp(lo, hi)`) requires `k ≤ |sub|` and panics + // otherwise (`lo > hi`); shards cannot be smaller than one block, so + // cap the request at one-block-per-shard instead of aborting. + if num_shards > n { + eprintln!( + "[shard] requested {num_shards} shards for {n} blocks; producing {n} single-block shards" + ); + return (0..n as u32).collect(); + } // Cap each block's balance weight at the ideal per-shard heartbeats // (total / num_shards). This keeps balancing heartbeat-aware while a balanced // split is achievable (few shards), but stops a single oversized *atomic* @@ -1066,9 +1076,15 @@ pub struct ShardInfo { /// Sum of `serialized_size` over `foreign_blocks` (this shard's share of the /// cross-shard ingress objective). pub cross_ingress: u64, - /// Sound assumption-tree root for this shard's conditional proof: the Merkle - /// root over the foreign part of the shard's static reference closure. `None` - /// until populated by the env-aware layer (the pure partitioner has no `Env`). + /// Assumption-tree root *candidate* for this shard's conditional proof. + /// + /// As populated by `shard_esp`, this is the Merkle root over + /// `foreign_blocks` — the **delta-tight** set the cost model observed — + /// NOT the foreign part of the shard's full static reference closure. + /// The sound static root required for proof generation needs the `.ixe` + /// env and is reconstructed by the env-aware layer (`plans/sharding.md` + /// §4); nothing on the proving path may consume this field as if it were + /// that root. `None` until populated. pub assumption_root: Option
, } @@ -1304,8 +1320,13 @@ pub fn shard_esp( let profile = BlockProfile::from_bytes(&bytes) .map_err(|e| format!("parse {esp_path}: {e}"))?; let h = Hypergraph::from_profile(&profile); - let shard_of = h.partition(num_shards, balance); - let mut manifest = ShardManifest::build(&profile, &shard_of, num_shards); + // Shards cannot be smaller than one block; an over-asked partition is + // capped (see `Hypergraph::partition`), so size the manifest by the + // effective count to avoid trailing empty shards. + let effective_shards = num_shards.min(profile.num_blocks()).max(1); + let shard_of = h.partition(effective_shards, balance); + let mut manifest = + ShardManifest::build(&profile, &shard_of, effective_shards); for shard in &mut manifest.shards { shard.assumption_root = crate::ix::ixon::merkle::merkle_root_canonical(&shard.foreign_blocks);