From ec0a6a62fb4a904e322c5fb97e82a12ccd1cac9b Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 21:26:49 +0900
Subject: [PATCH 01/47] Scaffold the fedify bench subcommand

Add the skeleton for a new `fedify bench` subcommand in @fedify/cli that
will run ActivityPub-specific load benchmarks against a cooperative
Fedify target running in benchmark mode.

This first step wires the command into the CLI without the engine:

 -  Define the Optique `benchCommand` with the suite-file argument and the
    --target, --format, --output, --dry-run, and --allow-unsafe-target
    options, plus a stub `runBench` that is fleshed out in later steps.
 -  Register the command in the runner and dispatcher, and add a `bench`
    section to the configuration schema.
 -  Add the `@cfworker/json-schema` (draft 2020-12 validator) and `yaml`
    dependencies used by the scenario format, to both deno.json and
    package.json.
 -  Cover argument parsing with tests.

https://github.com/fedify-dev/fedify/issues/783
https://github.com/fedify-dev/fedify/issues/744

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 deno.lock                              |  8 ++-
 packages/cli/deno.json                 |  2 +
 packages/cli/package.json              |  4 +-
 packages/cli/src/bench/action.ts       | 14 ++++
 packages/cli/src/bench/command.test.ts | 65 +++++++++++++++++
 packages/cli/src/bench/command.ts      | 97 ++++++++++++++++++++++++++
 packages/cli/src/bench/mod.ts          |  2 +
 packages/cli/src/config.ts             |  9 +++
 packages/cli/src/mod.ts                |  3 +
 packages/cli/src/runner.ts             |  2 +
 pnpm-lock.yaml                         | 72 ++++++++++---------
 11 files changed, 242 insertions(+), 36 deletions(-)
 create mode 100644 packages/cli/src/bench/action.ts
 create mode 100644 packages/cli/src/bench/command.test.ts
 create mode 100644 packages/cli/src/bench/command.ts
 create mode 100644 packages/cli/src/bench/mod.ts

diff --git a/deno.lock b/deno.lock
index bef73340b..884f61e52 100644
--- a/deno.lock
+++ b/deno.lock
@@ -9402,6 +9402,7 @@
           "jsr:@hongminhee/localtunnel@0.3",
           "jsr:@hono/hono@^4.8.3",
           "jsr:@valibot/valibot@^1.4.0",
+          "npm:@cfworker/json-schema@^4.1.1",
           "npm:@inquirer/prompts@^7.8.4",
           "npm:@jimp/core@^1.6.1",
           "npm:@jimp/wasm-webp@^1.6.1",
@@ -9414,10 +9415,12 @@
           "npm:ora@^8.2.0",
           "npm:shiki@^1.6.4",
           "npm:smol-toml@^1.6.1",
-          "npm:srvx@~0.8.7"
+          "npm:srvx@~0.8.7",
+          "npm:yaml@^2.9.0"
         ],
         "packageJson": {
           "dependencies": [
+            "npm:@cfworker/json-schema@^4.1.1",
             "npm:@hongminhee/localtunnel@0.3",
             "npm:@inquirer/prompts@^7.8.4",
             "npm:@jimp/core@^1.6.1",
@@ -9436,7 +9439,8 @@
             "npm:shiki@^1.6.4",
             "npm:smol-toml@^1.6.1",
             "npm:srvx@~0.8.7",
-            "npm:valibot@^1.4.0"
+            "npm:valibot@^1.4.0",
+            "npm:yaml@^2.9.0"
           ]
         }
       },
diff --git a/packages/cli/deno.json b/packages/cli/deno.json
index 08001fcba..0ba8d066e 100644
--- a/packages/cli/deno.json
+++ b/packages/cli/deno.json
@@ -4,6 +4,7 @@
   "license": "MIT",
   "exports": "./src/mod.ts",
   "imports": {
+    "@cfworker/json-schema": "npm:@cfworker/json-schema@^4.1.1",
     "@hongminhee/localtunnel": "jsr:@hongminhee/localtunnel@^0.3.0",
     "@inquirer/prompts": "npm:@inquirer/prompts@^7.8.4",
     "@jimp/core": "npm:@jimp/core@^1.6.1",
@@ -20,6 +21,7 @@
     "smol-toml": "npm:smol-toml@^1.6.1",
     "srvx": "npm:srvx@^0.8.7",
     "valibot": "jsr:@valibot/valibot@^1.4.0",
+    "yaml": "npm:yaml@^2.9.0",
     "#kv": "./src/kv.node.ts"
   },
   "exclude": [
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 39a42ecbe..510693518 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -72,6 +72,7 @@
     }
   },
   "dependencies": {
+    "@cfworker/json-schema": "^4.1.1",
     "@fedify/fedify": "workspace:*",
     "@fedify/init": "workspace:*",
     "@fedify/relay": "workspace:*",
@@ -109,7 +110,8 @@
     "shiki": "^1.6.4",
     "smol-toml": "^1.6.1",
     "srvx": "^0.8.7",
-    "valibot": "^1.4.0"
+    "valibot": "^1.4.0",
+    "yaml": "^2.9.0"
   },
   "devDependencies": {
     "@types/bun": "catalog:",
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
new file mode 100644
index 000000000..daa3b4985
--- /dev/null
+++ b/packages/cli/src/bench/action.ts
@@ -0,0 +1,14 @@
+import type { BenchCommand } from "./command.ts";
+
+/**
+ * Runs the `fedify bench` command.
+ *
+ * This is a placeholder that is fleshed out in subsequent steps; the engine,
+ * scenario runners, and reporting are wired in incrementally.
+ * @param command The parsed `bench` command options.
+ */
+export default function runBench(_command: BenchCommand): Promise<void> {
+  return Promise.reject(
+    new Error("fedify bench is not implemented yet."),
+  );
+}
diff --git a/packages/cli/src/bench/command.test.ts b/packages/cli/src/bench/command.test.ts
new file mode 100644
index 000000000..eb3dad4fb
--- /dev/null
+++ b/packages/cli/src/bench/command.test.ts
@@ -0,0 +1,65 @@
+import { parse } from "@optique/core/parser";
+import assert from "node:assert/strict";
+import test from "node:test";
+import { benchCommand } from "./command.ts";
+
+const COMMAND = "bench";
+const FILE = "suite.yaml";
+
+test("benchCommand - scenario file only", () => {
+  const result = parse(benchCommand, [COMMAND, FILE]);
+  assert.ok(result.success);
+  if (result.success) {
+    assert.strictEqual(result.value.command, COMMAND);
+    assert.strictEqual(result.value.scenario, FILE);
+    assert.strictEqual(result.value.target, undefined);
+    assert.strictEqual(result.value.format, "text");
+    assert.strictEqual(result.value.output, undefined);
+    assert.strictEqual(result.value.dryRun, false);
+    assert.strictEqual(result.value.allowUnsafeTarget, false);
+    // userAgent has a dynamic default value from getUserAgent().
+    assert.ok(result.value.userAgent?.startsWith("Fedify/"));
+  }
+});
+
+test("benchCommand - with all options", () => {
+  const result = parse(benchCommand, [
+    COMMAND,
+    FILE,
+    "--target",
+    "http://localhost:3000",
+    "--format",
+    "json",
+    "--output",
+    "report.json",
+    "--dry-run",
+    "--allow-unsafe-target",
+    "-u",
+    "MyAgent/1.0",
+  ]);
+  assert.ok(result.success);
+  if (result.success) {
+    assert.strictEqual(result.value.scenario, FILE);
+    assert.strictEqual(result.value.target, "http://localhost:3000");
+    assert.strictEqual(result.value.format, "json");
+    assert.strictEqual(result.value.output, "report.json");
+    assert.strictEqual(result.value.dryRun, true);
+    assert.strictEqual(result.value.allowUnsafeTarget, true);
+    assert.strictEqual(result.value.userAgent, "MyAgent/1.0");
+  }
+});
+
+test("benchCommand - missing scenario file fails", () => {
+  const result = parse(benchCommand, [COMMAND]);
+  assert.ok(!result.success);
+});
+
+test("benchCommand - invalid format value fails", () => {
+  const result = parse(benchCommand, [COMMAND, FILE, "--format", "xml"]);
+  assert.ok(!result.success);
+});
+
+test("benchCommand - unknown option fails", () => {
+  const result = parse(benchCommand, [COMMAND, FILE, "-Q"]);
+  assert.ok(!result.success);
+});
diff --git a/packages/cli/src/bench/command.ts b/packages/cli/src/bench/command.ts
new file mode 100644
index 000000000..0acc04e82
--- /dev/null
+++ b/packages/cli/src/bench/command.ts
@@ -0,0 +1,97 @@
+import { bindConfig } from "@optique/config";
+import {
+  argument,
+  choice,
+  command,
+  constant,
+  flag,
+  group,
+  type InferValue,
+  merge,
+  message,
+  object,
+  option,
+  optional,
+  string,
+  withDefault,
+} from "@optique/core";
+import { configContext } from "../config.ts";
+import { userAgentOption } from "../options.ts";
+
+const formatOption = bindConfig(
+  option(
+    "-f",
+    "--format",
+    choice(["text", "json", "markdown"], { metavar: "FORMAT" }),
+    {
+      description: message`The output format for the benchmark report.`,
+    },
+  ),
+  {
+    context: configContext,
+    key: (config) => config.bench?.format ?? "text",
+    default: "text",
+  },
+);
+
+const allowUnsafeTarget = bindConfig(
+  flag("--allow-unsafe-target", {
+    description:
+      message`Allow benchmarking a public target that does not advertise \
+benchmark mode.`,
+  }),
+  {
+    context: configContext,
+    key: (config) => config.bench?.allowUnsafeTarget ?? false,
+    default: false,
+  },
+);
+
+export const benchCommand = command(
+  "bench",
+  merge(
+    "Benchmark options",
+    object({
+      command: constant("bench"),
+      scenario: group(
+        "Arguments",
+        argument(string({ metavar: "SCENARIO_FILE" }), {
+          description:
+            message`Path to the benchmark suite file (YAML or JSON).`,
+        }),
+      ),
+      target: optional(
+        option("-t", "--target", string({ metavar: "URL" }), {
+          description: message`Override the target URL declared in the suite.`,
+        }),
+      ),
+      format: formatOption,
+      output: optional(
+        option("-o", "--output", string({ metavar: "OUTPUT_PATH" }), {
+          description:
+            message`Write the report to a file instead of standard output.`,
+        }),
+      ),
+      dryRun: withDefault(
+        flag("--dry-run", {
+          description:
+            message`Resolve discovery and print the plan without sending load.`,
+        }),
+        false,
+      ),
+      allowUnsafeTarget,
+    }),
+    userAgentOption,
+  ),
+  {
+    brief: message`Benchmark a Fedify federation workload.`,
+    description: message`Run an ActivityPub-specific load benchmark against a \
+cooperative Fedify target running in benchmark mode.
+
+The suite file declares the target, actors, and scenarios.  Only the \`inbox\` \
+and \`webfinger\` scenario types are executed in this version; the format \
+itself can express every scenario type.`,
+  },
+);
+
+export type BenchCommand = InferValue<typeof benchCommand>;
diff --git a/packages/cli/src/bench/mod.ts b/packages/cli/src/bench/mod.ts
new file mode 100644
index 000000000..4ca1ef232
--- /dev/null
+++ b/packages/cli/src/bench/mod.ts
@@ -0,0 +1,2 @@
+export { default as runBench } from "./action.ts";
+export { benchCommand } from "./command.ts";
diff --git a/packages/cli/src/config.ts b/packages/cli/src/config.ts
index ff5ecba6b..58b63664e 100644
--- a/packages/cli/src/config.ts
+++ b/packages/cli/src/config.ts
@@ -108,6 +108,14 @@ const nodeinfoSchema = object({
   showMetadata: optional(boolean()),
 });
 
+/**
+ * Schema for the bench command configuration.
+ */
+const benchSchema = object({
+  format: optional(picklist(["text", "json", "markdown"])),
+  allowUnsafeTarget: optional(boolean()),
+});
+
 /**
  * Schema for the complete configuration file.
  */
@@ -125,6 +133,7 @@ export const configSchema = object({
   inbox: optional(inboxSchema),
   relay: optional(relaySchema),
   nodeinfo: optional(nodeinfoSchema),
+  bench: optional(benchSchema),
 });
 
 /**
diff --git a/packages/cli/src/mod.ts b/packages/cli/src/mod.ts
index e7dd885e0..61172eb96 100644
--- a/packages/cli/src/mod.ts
+++ b/packages/cli/src/mod.ts
@@ -1,4 +1,5 @@
 #!/usr/bin/env -S node --disable-warning=ExperimentalWarning
+import { runBench } from "./bench/mod.ts";
 import { runGenerateVocab } from "./generate-vocab/mod.ts";
 import { runInbox } from "./inbox.tsx";
 import { runInit } from "./init/mod.ts";
@@ -28,6 +29,8 @@ async function main() {
     await runGenerateVocab(result);
   } else if (result.command === "relay") {
     await runRelay(result);
+  } else if (result.command === "bench") {
+    await runBench(result);
   } else {
     // Make this branch exhaustive for type safety, even though it should never happen:
     const _exhaustiveCheck: never = result;
diff --git a/packages/cli/src/runner.ts b/packages/cli/src/runner.ts
index cdc0cefce..db0cc0eaa 100644
--- a/packages/cli/src/runner.ts
+++ b/packages/cli/src/runner.ts
@@ -6,6 +6,7 @@ import { homedir } from "node:os";
 import { join } from "node:path";
 import process from "node:process";
 import { parse as parseToml } from "smol-toml";
+import { benchCommand } from "./bench/command.ts";
 import { configContext, tryLoadToml } from "./config.ts";
 import { generateVocabCommand } from "./generate-vocab/mod.ts";
 import { inboxCommand } from "./inbox/command.ts";
@@ -66,6 +67,7 @@ export const command = merge(
         inboxCommand,
         nodeInfoCommand,
         relayCommand,
+        benchCommand,
       ),
     ),
     group(
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f6f7bec59..84a59ab96 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -849,7 +849,7 @@ importers:
         version: 0.10.8
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -865,7 +865,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -884,7 +884,7 @@ importers:
         version: 0.8.71(@cloudflare/workers-types@4.20260511.1)(@vitest/runner@3.2.4)(@vitest/snapshot@3.2.4)(vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.3.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.46.1)(tsx@4.21.0)(yaml@2.9.0))
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -897,6 +897,9 @@ importers:
 
   packages/cli:
     dependencies:
+      '@cfworker/json-schema':
+        specifier: ^4.1.1
+        version: 4.1.1
       '@fedify/fedify':
         specifier: workspace:*
         version: link:../fedify
@@ -1011,6 +1014,9 @@ importers:
       valibot:
         specifier: ^1.4.0
         version: 1.4.0(typescript@6.0.3)
+      yaml:
+        specifier: ^2.9.0
+        version: 2.9.0
     devDependencies:
       '@types/bun':
         specifier: 'catalog:'
@@ -1020,7 +1026,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1045,7 +1051,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1079,7 +1085,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1098,7 +1104,7 @@ importers:
         version: 1.2.19(@types/react@19.1.8)
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1120,7 +1126,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1142,7 +1148,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1224,7 +1230,7 @@ importers:
         version: 4.20250617.4
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       tsx:
         specifier: ^4.21.0
         version: 4.21.0
@@ -1258,7 +1264,7 @@ importers:
         version: 0.5.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1277,7 +1283,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1293,7 +1299,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1333,7 +1339,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1355,7 +1361,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1386,7 +1392,7 @@ importers:
         version: 9.32.0(jiti@2.6.1)
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1417,7 +1423,7 @@ importers:
         version: link:../testing
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1442,7 +1448,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1458,7 +1464,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1489,7 +1495,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1520,7 +1526,7 @@ importers:
         version: '@jsr/std__async@1.0.13'
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1554,7 +1560,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1579,7 +1585,7 @@ importers:
         version: link:../vocab-runtime
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1598,7 +1604,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1626,7 +1632,7 @@ importers:
         version: '@jsr/std__async@1.0.13'
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1642,7 +1648,7 @@ importers:
     devDependencies:
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1670,7 +1676,7 @@ importers:
         version: '@jsr/std__async@1.0.13'
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1685,7 +1691,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1740,7 +1746,7 @@ importers:
         version: 12.6.0
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1780,7 +1786,7 @@ importers:
         version: 12.6.0
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1805,7 +1811,7 @@ importers:
         version: 22.19.1
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -1836,7 +1842,7 @@ importers:
         version: 12.6.0
       tsdown:
         specifier: 'catalog:'
-        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34)
+        version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0))
       typescript:
         specifier: 'catalog:'
         version: 6.0.3
@@ -25494,7 +25500,7 @@ snapshots:
       minimist: 1.2.8
       strip-bom: 3.0.0
 
-  tsdown@0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34):
+  tsdown@0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)):
     dependencies:
       ansis: 4.3.0
       cac: 7.0.0

From 1815cdec1fafd78da73a0dc31450e4a40ebeed47 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 21:37:25 +0900
Subject: [PATCH 02/47] Add a log-linear histogram for benchmark percentiles

Add a lightweight HdrHistogram-style log-linear histogram used by the
benchmark engine to record latency samples and compute percentiles with
bounded relative error.  Values are bucketed by octave and split into
linear sub-buckets, so the relative error stays roughly constant across
the whole range.  The structure is sparse, mergeable, and serializable,
which lets percentiles from several runs be re-aggregated without
coordinated-omission error and lets the report carry an optional
serialized histogram.

Sub-bucket indices are derived from the mantissa ratio to avoid denormal
underflow, and non-positive samples (including -0) are normalized to the
zero bucket.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/metrics/histogram.test.ts   | 130 ++++++++++
 packages/cli/src/bench/metrics/histogram.ts   | 232 ++++++++++++++++++
 2 files changed, 362 insertions(+)
 create mode 100644 packages/cli/src/bench/metrics/histogram.test.ts
 create mode 100644 packages/cli/src/bench/metrics/histogram.ts

diff --git a/packages/cli/src/bench/metrics/histogram.test.ts b/packages/cli/src/bench/metrics/histogram.test.ts
new file mode 100644
index 000000000..0bb0a61fe
--- /dev/null
+++ b/packages/cli/src/bench/metrics/histogram.test.ts
@@ -0,0 +1,130 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { LogLinearHistogram } from "./histogram.ts";
+
+test("LogLinearHistogram - empty histogram", () => {
+  const h = new LogLinearHistogram();
+  assert.strictEqual(h.count, 0);
+  assert.strictEqual(h.min, 0);
+  assert.strictEqual(h.max, 0);
+  assert.strictEqual(h.mean, 0);
+  assert.strictEqual(h.percentile(50), 0);
+  assert.strictEqual(h.percentile(99), 0);
+});
+
+test("LogLinearHistogram - single value", () => {
+  const h = new LogLinearHistogram();
+  h.record(42);
+  assert.strictEqual(h.count, 1);
+  assert.strictEqual(h.min, 42);
+  assert.strictEqual(h.max, 42);
+  assert.strictEqual(h.mean, 42);
+  // p50 and p99 of a single sample are that sample (within bucket error,
+  // clamped to [min, max] which are exact here).
+  assert.strictEqual(h.percentile(50), 42);
+  assert.strictEqual(h.percentile(99), 42);
+});
+
+test("LogLinearHistogram - percentiles are monotonic and accurate", () => {
+  const h = new LogLinearHistogram();
+  for (let v = 1; v <= 1000; v++) h.record(v);
+  const p50 = h.percentile(50);
+  const p90 = h.percentile(90);
+  const p99 = h.percentile(99);
+  assert.ok(p50 <= p90, `p50 (${p50}) <= p90 (${p90})`);
+  assert.ok(p90 <= p99, `p90 (${p90}) <= p99 (${p99})`);
+  // Within 1% relative error of the true percentiles (500/900/990).
+  assert.ok(Math.abs(p50 - 500) / 500 < 0.01, `p50 ≈ 500, got ${p50}`);
+  assert.ok(Math.abs(p90 - 900) / 900 < 0.01, `p90 ≈ 900, got ${p90}`);
+  assert.ok(Math.abs(p99 - 990) / 990 < 0.01, `p99 ≈ 990, got ${p99}`);
+});
+
+test("LogLinearHistogram - handles sub-millisecond and large values", () => {
+  const h = new LogLinearHistogram();
+  for (const v of [0.25, 0.5, 0.75, 1.5, 3, 7.5, 1500, 30000]) h.record(v);
+  assert.strictEqual(h.count, 8);
+  assert.strictEqual(h.min, 0.25);
+  assert.strictEqual(h.max, 30000);
+  const p50 = h.percentile(50);
+  assert.ok(p50 >= 0.25 && p50 <= 30000);
+});
+
+test("LogLinearHistogram - records zero and clamps negatives to zero", () => {
+  const h = new LogLinearHistogram();
+  h.record(0);
+  h.record(-5); // clamped to 0
+  h.record(10);
+  assert.strictEqual(h.count, 3);
+  assert.strictEqual(h.min, 0);
+  assert.strictEqual(h.percentile(1), 0);
+  assert.strictEqual(h.percentile(50), 0);
+  assert.ok(h.percentile(99) >= 9 && h.percentile(99) <= 11);
+});
+
+test("LogLinearHistogram - tiny denormal value yields a finite percentile", () => {
+  const h = new LogLinearHistogram();
+  h.record(Number.MIN_VALUE);
+  assert.strictEqual(h.count, 1);
+  assert.ok(Number.isFinite(h.percentile(50)));
+  assert.ok(Number.isFinite(h.percentile(99)));
+});
+
+test("LogLinearHistogram - normalizes -0 to +0", () => {
+  const h = new LogLinearHistogram();
+  h.record(-0);
+  assert.ok(Object.is(h.min, 0));
+  assert.ok(Object.is(h.max, 0));
+  assert.ok(Object.is(h.toJSON().min, 0));
+});
+
+test("LogLinearHistogram - ignores non-finite values", () => {
+  const h = new LogLinearHistogram();
+  h.record(Number.NaN);
+  h.record(Number.POSITIVE_INFINITY);
+  h.record(5);
+  assert.strictEqual(h.count, 1);
+  assert.strictEqual(h.max, 5);
+});
+
+test("LogLinearHistogram - merge combines counts and bounds", () => {
+  const a = new LogLinearHistogram();
+  const b = new LogLinearHistogram();
+  for (let v = 1; v <= 500; v++) a.record(v);
+  for (let v = 501; v <= 1000; v++) b.record(v);
+  a.merge(b);
+  assert.strictEqual(a.count, 1000);
+  assert.strictEqual(a.min, 1);
+  assert.strictEqual(a.max, 1000);
+  const p50 = a.percentile(50);
+  assert.ok(Math.abs(p50 - 500) / 500 < 0.01, `merged p50 ≈ 500, got ${p50}`);
+});
+
+test("LogLinearHistogram - merge rejects mismatched subBucketCount", () => {
+  const a = new LogLinearHistogram({ subBucketCount: 64 });
+  const b = new LogLinearHistogram({ subBucketCount: 128 });
+  assert.throws(() => a.merge(b), TypeError);
+});
+
+test("LogLinearHistogram - toJSON/fromJSON round-trip", () => {
+  const h = new LogLinearHistogram();
+  for (let v = 1; v <= 1000; v++) h.record(v * 0.5);
+  const json = JSON.parse(JSON.stringify(h.toJSON()));
+  const restored = LogLinearHistogram.fromJSON(json);
+  assert.strictEqual(restored.count, h.count);
+  assert.strictEqual(restored.min, h.min);
+  assert.strictEqual(restored.max, h.max);
+  assert.strictEqual(restored.sum, h.sum);
+  assert.strictEqual(restored.percentile(50), h.percentile(50));
+  assert.strictEqual(restored.percentile(95), h.percentile(95));
+});
+
+test("LogLinearHistogram - rejects invalid subBucketCount", () => {
+  assert.throws(
+    () => new LogLinearHistogram({ subBucketCount: 0 }),
+    RangeError,
+  );
+  assert.throws(
+    () => new LogLinearHistogram({ subBucketCount: 1.5 }),
+    RangeError,
+  );
+});
diff --git a/packages/cli/src/bench/metrics/histogram.ts b/packages/cli/src/bench/metrics/histogram.ts
new file mode 100644
index 000000000..699dcbeca
--- /dev/null
+++ b/packages/cli/src/bench/metrics/histogram.ts
@@ -0,0 +1,232 @@
+/**
+ * A lightweight HdrHistogram-style log-linear histogram for recording latency
+ * samples and computing percentiles with bounded relative error.
+ *
+ * Values are bucketed by octave (a power-of-two band) and then split into a
+ * fixed number of linear sub-buckets within each octave, which keeps the
+ * relative error roughly constant across the whole range.  The structure is
+ * sparse (only non-empty buckets are stored), mergeable, and serializable, so
+ * percentiles from several runs can be re-aggregated without coordinated-
+ * omission error.
+ * @since 2.3.0
+ * @module
+ */
+
+/** The default number of linear sub-buckets per octave. */
+export const DEFAULT_SUB_BUCKET_COUNT = 128;
+
+/**
+ * The serialized form of a {@link LogLinearHistogram}.
+ * @since 2.3.0
+ */
+export interface SerializedHistogram {
+  /** The serialization format version. */
+  readonly version: 1;
+  /** The number of linear sub-buckets per octave. */
+  readonly subBucketCount: number;
+  /** The total number of recorded samples, including zeros. */
+  readonly count: number;
+  /** The number of recorded samples that were less than or equal to zero. */
+  readonly zeroCount: number;
+  /** The smallest recorded value, or `0` when empty. */
+  readonly min: number;
+  /** The largest recorded value, or `0` when empty. */
+  readonly max: number;
+  /** The exact sum of all recorded values. */
+  readonly sum: number;
+  /** The sorted bucket indices that have a non-zero count. */
+  readonly indices: readonly number[];
+  /** The per-bucket counts, parallel to {@link SerializedHistogram.indices}. */
+  readonly counts: readonly number[];
+}
+
+/**
+ * Options for constructing a {@link LogLinearHistogram}.
+ * @since 2.3.0
+ */
+export interface LogLinearHistogramOptions {
+  /**
+   * The number of linear sub-buckets per octave.  Higher values reduce the
+   * relative error at the cost of memory.  Defaults to
+   * {@link DEFAULT_SUB_BUCKET_COUNT}.
+   */
+  readonly subBucketCount?: number;
+}
+
+/**
+ * A sparse log-linear histogram.
+ * @since 2.3.0
+ */
+export class LogLinearHistogram {
+  readonly subBucketCount: number;
+  #buckets: Map<number, number> = new Map();
+  #count = 0;
+  #zeroCount = 0;
+  #sum = 0;
+  #min = Number.POSITIVE_INFINITY;
+  #max = Number.NEGATIVE_INFINITY;
+
+  constructor(options: LogLinearHistogramOptions = {}) {
+    const subBucketCount = options.subBucketCount ?? DEFAULT_SUB_BUCKET_COUNT;
+    if (!Number.isInteger(subBucketCount) || subBucketCount < 1) {
+      throw new RangeError(
+        `subBucketCount must be a positive integer; got ${subBucketCount}.`,
+      );
+    }
+    this.subBucketCount = subBucketCount;
+  }
+
+  /** The total number of recorded samples, including zeros. */
+  get count(): number {
+    return this.#count;
+  }
+
+  /** The smallest recorded value, or `0` when the histogram is empty. */
+  get min(): number {
+    return this.#count === 0 ? 0 : this.#min;
+  }
+
+  /** The largest recorded value, or `0` when the histogram is empty. */
+  get max(): number {
+    return this.#count === 0 ? 0 : this.#max;
+  }
+
+  /** The arithmetic mean of all recorded values, or `0` when empty. */
+  get mean(): number {
+    return this.#count === 0 ? 0 : this.#sum / this.#count;
+  }
+
+  /** The exact sum of all recorded values. */
+  get sum(): number {
+    return this.#sum;
+  }
+
+  /**
+   * Records a single sample.
+   * @param value The value to record.  Non-finite values are ignored; any
+   *              non-positive value (negatives, `0`, and `-0`) is normalized to
+   *              `0` and recorded in the zero bucket, since latency samples are
+   *              never negative.
+   */
+  record(value: number): void {
+    if (!Number.isFinite(value)) return;
+    const v = value <= 0 ? 0 : value;
+    this.#count++;
+    this.#sum += v;
+    if (v < this.#min) this.#min = v;
+    if (v > this.#max) this.#max = v;
+    if (v === 0) {
+      this.#zeroCount++;
+      return;
+    }
+    const index = this.#indexOf(v);
+    this.#buckets.set(index, (this.#buckets.get(index) ?? 0) + 1);
+  }
+
+  /**
+   * Computes an estimated percentile.
+   * @param p The percentile to compute, between 0 and 100 inclusive.
+   * @returns The estimated value at the given percentile, or `0` when the
+   *          histogram is empty.
+   */
+  percentile(p: number): number {
+    if (this.#count === 0) return 0;
+    if (p <= 0) return this.#min;
+    if (p >= 100) return this.#max;
+    const target = Math.ceil((p / 100) * this.#count);
+    let accumulated = this.#zeroCount;
+    if (accumulated >= target) return 0;
+    const indices = [...this.#buckets.keys()].sort((a, b) => a - b);
+    for (const index of indices) {
+      accumulated += this.#buckets.get(index)!;
+      if (accumulated >= target) {
+        return this.#clamp(this.#representativeValue(index));
+      }
+    }
+    return this.#max;
+  }
+
+  /**
+   * Merges another histogram into this one.  Both histograms must use the same
+   * {@link LogLinearHistogram.subBucketCount}.
+   * @param other The histogram to merge in.
+   */
+  merge(other: LogLinearHistogram): void {
+    if (other.subBucketCount !== this.subBucketCount) {
+      throw new TypeError(
+        "Cannot merge histograms with different subBucketCount " +
+          `(${this.subBucketCount} vs ${other.subBucketCount}).`,
+      );
+    }
+    if (other.#count === 0) return;
+    for (const [index, count] of other.#buckets) {
+      this.#buckets.set(index, (this.#buckets.get(index) ?? 0) + count);
+    }
+    this.#count += other.#count;
+    this.#zeroCount += other.#zeroCount;
+    this.#sum += other.#sum;
+    if (other.#min < this.#min) this.#min = other.#min;
+    if (other.#max > this.#max) this.#max = other.#max;
+  }
+
+  /** Serializes the histogram to a plain JSON-compatible object. */
+  toJSON(): SerializedHistogram {
+    const indices = [...this.#buckets.keys()].sort((a, b) => a - b);
+    return {
+      version: 1,
+      subBucketCount: this.subBucketCount,
+      count: this.#count,
+      zeroCount: this.#zeroCount,
+      min: this.min,
+      max: this.max,
+      sum: this.#sum,
+      indices,
+      counts: indices.map((index) => this.#buckets.get(index)!),
+    };
+  }
+
+  /** Reconstructs a histogram from its serialized form. */
+  static fromJSON(json: SerializedHistogram): LogLinearHistogram {
+    if (json.indices.length !== json.counts.length) {
+      throw new TypeError(
+        "Serialized histogram indices and counts must have equal length.",
+      );
+    }
+    const histogram = new LogLinearHistogram({
+      subBucketCount: json.subBucketCount,
+    });
+    for (let i = 0; i < json.indices.length; i++) {
+      histogram.#buckets.set(json.indices[i], json.counts[i]);
+    }
+    histogram.#count = json.count;
+    histogram.#zeroCount = json.zeroCount;
+    histogram.#sum = json.sum;
+    histogram.#min = json.count === 0 ? Number.POSITIVE_INFINITY : json.min;
+    histogram.#max = json.count === 0 ? Number.NEGATIVE_INFINITY : json.max;
+    return histogram;
+  }
+
+  #indexOf(value: number): number {
+    const octave = Math.floor(Math.log2(value));
+    // Use the mantissa ratio (value / 2**octave is in [1, 2)) rather than
+    // dividing by a sub-bucket width, which would underflow to 0 for denormal
+    // values and yield a NaN index.
+    let sub = Math.floor((value / 2 ** octave - 1) * this.subBucketCount);
+    // Guard against floating-point drift pushing the sub-bucket out of range.
+    if (sub < 0) sub = 0;
+    else if (sub >= this.subBucketCount) sub = this.subBucketCount - 1;
+    return octave * this.subBucketCount + sub;
+  }
+
+  #representativeValue(index: number): number {
+    const octave = Math.floor(index / this.subBucketCount);
+    const sub = index - octave * this.subBucketCount;
+    return 2 ** octave * (1 + (sub + 0.5) / this.subBucketCount);
+  }
+
+  #clamp(value: number): number {
+    if (value < this.#min) return this.#min;
+    if (value > this.#max) return this.#max;
+    return value;
+  }
+}

From 9e9aba25eaf265dd3fb6b8dfe1ad73860d046265 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 21:51:37 +0900
Subject: [PATCH 03/47] Add scenario-format primitives for fedify bench

Add the small, pure building blocks the scenario format is built on:

 -  `asList()`: scalar-or-list coercion, so fields such as `recipient`,
    `seed`, `collection`, and `type` can accept either a single value or a
    list while the common single-value case stays terse.
 -  `parseSize()` / `resolveGenerate()`: typed payload-generation
    directives (e.g. `content: { generate: lorem, size: 2KB }`) that
    produce deterministic output of an exact byte size, with the size
    parser bounded to the safe-integer range.
 -  A logic-less GitHub-Actions-style `${{ ... }}` template engine
    (dotted-path resolution plus whitelisted helper calls).  Lookups go
    through own properties only, with a denylist for prototype members,
    and unclosed delimiters, trailing text, and unbalanced quotes are
    rejected rather than silently mishandled, so the format cannot turn
    into a programming language.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/scenario/coerce.test.ts     |  21 +++
 packages/cli/src/bench/scenario/coerce.ts     |  23 +++
 .../cli/src/bench/template/generate.test.ts   |  73 ++++++++
 packages/cli/src/bench/template/generate.ts   | 116 ++++++++++++
 packages/cli/src/bench/template/helpers.ts    |  26 +++
 .../cli/src/bench/template/template.test.ts   | 117 ++++++++++++
 packages/cli/src/bench/template/template.ts   | 170 ++++++++++++++++++
 7 files changed, 546 insertions(+)
 create mode 100644 packages/cli/src/bench/scenario/coerce.test.ts
 create mode 100644 packages/cli/src/bench/scenario/coerce.ts
 create mode 100644 packages/cli/src/bench/template/generate.test.ts
 create mode 100644 packages/cli/src/bench/template/generate.ts
 create mode 100644 packages/cli/src/bench/template/helpers.ts
 create mode 100644 packages/cli/src/bench/template/template.test.ts
 create mode 100644 packages/cli/src/bench/template/template.ts

diff --git a/packages/cli/src/bench/scenario/coerce.test.ts b/packages/cli/src/bench/scenario/coerce.test.ts
new file mode 100644
index 000000000..c50678d86
--- /dev/null
+++ b/packages/cli/src/bench/scenario/coerce.test.ts
@@ -0,0 +1,21 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { asList } from "./coerce.ts";
+
+test("asList - wraps a scalar", () => {
+  assert.deepEqual(asList("a"), ["a"]);
+  assert.deepEqual(asList(42), [42]);
+  assert.deepEqual(asList(false), [false]);
+});
+
+test("asList - copies a list", () => {
+  const input = ["a", "b"];
+  const output = asList(input);
+  assert.deepEqual(output, ["a", "b"]);
+  assert.notStrictEqual(output, input);
+});
+
+test("asList - empty for null and undefined", () => {
+  assert.deepEqual(asList(undefined), []);
+  assert.deepEqual(asList(null), []);
+});
diff --git a/packages/cli/src/bench/scenario/coerce.ts b/packages/cli/src/bench/scenario/coerce.ts
new file mode 100644
index 000000000..b42db54b6
--- /dev/null
+++ b/packages/cli/src/bench/scenario/coerce.ts
@@ -0,0 +1,23 @@
+/**
+ * Scalar-or-list coercion used throughout the scenario format, where many
+ * fields (`recipient`, `seed`, `collection`, `type`, and so on) accept either a
+ * single value or a list of values so the common single-value case stays terse.
+ * @since 2.3.0
+ * @module
+ */
+
+/**
+ * Normalizes a scalar-or-list value into an array.
+ *
+ * A single value becomes a one-element array, an array is shallow-copied, and
+ * `null`/`undefined` becomes an empty array.
+ * @typeParam T The element type.
+ * @param value A single value, a list of values, or nothing.
+ * @returns A new array of values.
+ */
+export function asList<T>(
+  value: T | readonly T[] | null | undefined,
+): T[] {
+  if (value == null) return [];
+  return Array.isArray(value) ? [...value] : [value as T];
+}
diff --git a/packages/cli/src/bench/template/generate.test.ts b/packages/cli/src/bench/template/generate.test.ts
new file mode 100644
index 000000000..edfffe3d9
--- /dev/null
+++ b/packages/cli/src/bench/template/generate.test.ts
@@ -0,0 +1,73 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import {
+  type GenerateDirective,
+  isGenerateDirective,
+  parseSize,
+  resolveGenerate,
+} from "./generate.ts";
+
+test("parseSize - bare number is bytes", () => {
+  assert.strictEqual(parseSize(512), 512);
+  assert.strictEqual(parseSize("512"), 512);
+});
+
+test("parseSize - binary units", () => {
+  assert.strictEqual(parseSize("2KB"), 2048);
+  assert.strictEqual(parseSize("1KiB"), 1024);
+  assert.strictEqual(parseSize("1.5MB"), Math.floor(1.5 * 1024 * 1024));
+  assert.strictEqual(parseSize("1GB"), 1024 ** 3);
+});
+
+test("parseSize - case-insensitive and whitespace-tolerant", () => {
+  assert.strictEqual(parseSize("10 mb"), 10 * 1024 * 1024);
+  assert.strictEqual(parseSize("  4kb  "), 4096);
+});
+
+test("parseSize - rejects invalid and negative values", () => {
+  assert.throws(() => parseSize("abc"), RangeError);
+  assert.throws(() => parseSize("2 tb"), RangeError);
+  assert.throws(() => parseSize(-5), RangeError);
+  assert.throws(() => parseSize("-5"), RangeError);
+});
+
+test("parseSize - rejects values beyond the safe integer range", () => {
+  assert.throws(() => parseSize("9999999999999999999 gb"), RangeError);
+  assert.throws(() => parseSize(1e30), RangeError);
+});
+
+test("isGenerateDirective - distinguishes directives from literals", () => {
+  assert.ok(isGenerateDirective({ generate: "lorem" }));
+  assert.ok(isGenerateDirective({ generate: "lorem", size: "2KB" }));
+  assert.ok(!isGenerateDirective("plain string"));
+  assert.ok(!isGenerateDirective({}));
+  assert.ok(!isGenerateDirective(null));
+  assert.ok(!isGenerateDirective(["lorem"]));
+  // An inherited `generate` does not count; only own properties do.
+  assert.ok(!isGenerateDirective(Object.create({ generate: "lorem" })));
+});
+
+test("resolveGenerate - lorem produces exact byte size", () => {
+  const directive: GenerateDirective = { generate: "lorem", size: "100" };
+  const out = resolveGenerate(directive);
+  assert.strictEqual(out.length, 100);
+  // Deterministic across calls.
+  assert.strictEqual(resolveGenerate(directive), out);
+});
+
+test("resolveGenerate - lorem fills sizes larger than the corpus", () => {
+  const out = resolveGenerate({ generate: "lorem", size: "4KB" });
+  assert.strictEqual(out.length, 4096);
+});
+
+test("resolveGenerate - zero or missing size yields empty string", () => {
+  assert.strictEqual(resolveGenerate({ generate: "lorem", size: 0 }), "");
+  assert.strictEqual(resolveGenerate({ generate: "lorem" }), "");
+});
+
+test("resolveGenerate - unknown generator throws", () => {
+  assert.throws(
+    () => resolveGenerate({ generate: "markov" }),
+    RangeError,
+  );
+});
diff --git a/packages/cli/src/bench/template/generate.ts b/packages/cli/src/bench/template/generate.ts
new file mode 100644
index 000000000..49a8b4ab7
--- /dev/null
+++ b/packages/cli/src/bench/template/generate.ts
@@ -0,0 +1,116 @@
+/**
+ * Typed payload-generation directives for the scenario format.
+ *
+ * Rather than templating payload bodies as strings, the format uses typed
+ * directives such as `content: { generate: lorem, size: 2KB }`, which are
+ * JSON-Schema-validatable and produce deterministic output of a given byte
+ * size.
+ * @since 2.3.0
+ * @module
+ */
+
+/** Multipliers for the size units accepted by {@link parseSize}. */
+const SIZE_UNITS: Readonly<Record<string, number>> = {
+  b: 1,
+  kb: 1024,
+  kib: 1024,
+  mb: 1024 ** 2,
+  mib: 1024 ** 2,
+  gb: 1024 ** 3,
+  gib: 1024 ** 3,
+};
+
+const SIZE_RE = /^\s*(\d+(?:\.\d+)?)\s*(b|kb|kib|mb|mib|gb|gib)?\s*$/i;
+
+/**
+ * Parses a human-friendly byte size such as `"2KB"`, `"1.5MiB"`, or `512` into
+ * a number of bytes.  Units are binary (`KB` = 1024 bytes); a bare number is
+ * interpreted as bytes.
+ * @param value A size string or a plain number of bytes.
+ * @returns The size in bytes, as a non-negative integer.
+ * @throws {RangeError} If the value cannot be parsed or is negative.
+ */
+export function parseSize(value: string | number): number {
+  if (typeof value === "number") {
+    if (!Number.isFinite(value) || value < 0) {
+      throw new RangeError(`Invalid size: ${value}.`);
+    }
+    return ensureSafe(Math.floor(value), value);
+  }
+  const match = value.match(SIZE_RE);
+  if (match == null) {
+    throw new RangeError(`Invalid size: ${JSON.stringify(value)}.`);
+  }
+  const amount = Number.parseFloat(match[1]);
+  const unit = (match[2] ?? "b").toLowerCase();
+  return ensureSafe(Math.floor(amount * SIZE_UNITS[unit]), value);
+}
+
+function ensureSafe(bytes: number, original: string | number): number {
+  if (!Number.isSafeInteger(bytes)) {
+    throw new RangeError(`Size out of range: ${JSON.stringify(original)}.`);
+  }
+  return bytes;
+}
+
+/**
+ * A typed payload-generation directive.
+ * @since 2.3.0
+ */
+export interface GenerateDirective {
+  /** The generator to use, e.g. `"lorem"`. */
+  readonly generate: string;
+  /** The desired output size, e.g. `"2KB"` or a number of bytes. */
+  readonly size?: string | number;
+}
+
+/**
+ * Determines whether a value is a {@link GenerateDirective} rather than a plain
+ * literal (such as a string content body).
+ * @param value The value to test.
+ * @returns `true` if the value is a generate directive.
+ */
+export function isGenerateDirective(
+  value: unknown,
+): value is GenerateDirective {
+  return value != null && typeof value === "object" && !Array.isArray(value) &&
+    Object.hasOwn(value, "generate") &&
+    typeof (value as { generate?: unknown }).generate === "string";
+}
+
+/** A fixed lorem ipsum corpus used by the `lorem` generator. */
+const LOREM =
+  "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod " +
+  "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim " +
+  "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea " +
+  "commodo consequat. Duis aute irure dolor in reprehenderit in voluptate " +
+  "velit esse cillum dolore eu fugiat nulla pariatur. ";
+
+/**
+ * Resolves a {@link GenerateDirective} into a deterministic payload string.
+ *
+ * The output is exactly the requested number of bytes (ASCII, so bytes equal
+ * characters) and is identical across calls for the same directive, which keeps
+ * benchmark payloads reproducible.
+ * @param directive The directive to resolve.
+ * @returns The generated payload string.
+ * @throws {RangeError} If the generator is unknown or the size is invalid.
+ */
+export function resolveGenerate(directive: GenerateDirective): string {
+  const size = directive.size == null ? 0 : parseSize(directive.size);
+  switch (directive.generate) {
+    case "lorem":
+      return generateLorem(size);
+    default:
+      throw new RangeError(
+        `Unknown payload generator: ${JSON.stringify(directive.generate)}.`,
+      );
+  }
+}
+
+function generateLorem(size: number): string {
+  if (size <= 0) return "";
+  let out = LOREM.repeat(Math.ceil(size / LOREM.length));
+  if (out.length > size) out = out.slice(0, size);
+  return out;
+}
diff --git a/packages/cli/src/bench/template/helpers.ts b/packages/cli/src/bench/template/helpers.ts
new file mode 100644
index 000000000..c4f5055cb
--- /dev/null
+++ b/packages/cli/src/bench/template/helpers.ts
@@ -0,0 +1,26 @@
+/**
+ * The default whitelisted helpers available in `${{ ... }}` expressions.
+ *
+ * Runtime-specific helpers (such as actor and target accessors) are added on
+ * top of these when the benchmark context is assembled.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { TemplateHelper } from "./template.ts";
+
+/**
+ * Returns a fresh registry of the default template helpers:
+ *
+ *  -  `uuid()` — a random UUID string.
+ *  -  `upper(value)` — the uppercase form of the argument.
+ *  -  `lower(value)` — the lowercase form of the argument.
+ * @returns A new record of helper functions.
+ */
+export function defaultHelpers(): Record<string, TemplateHelper> {
+  return {
+    uuid: () => crypto.randomUUID(),
+    upper: (value) => String(value).toUpperCase(),
+    lower: (value) => String(value).toLowerCase(),
+  };
+}
diff --git a/packages/cli/src/bench/template/template.test.ts b/packages/cli/src/bench/template/template.test.ts
new file mode 100644
index 000000000..10fe54000
--- /dev/null
+++ b/packages/cli/src/bench/template/template.test.ts
@@ -0,0 +1,117 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { defaultHelpers } from "./helpers.ts";
+import { renderTemplates, TemplateError } from "./template.ts";
+
+const ctx = {
+  values: { count: 3, target: { host: "example.com" }, name: "bob" },
+  helpers: defaultHelpers(),
+};
+
+test("renderTemplates - whole expression keeps the raw value type", () => {
+  assert.strictEqual(renderTemplates("${{ count }}", ctx), 3);
+});
+
+test("renderTemplates - resolves a dotted path", () => {
+  assert.strictEqual(renderTemplates("${{ target.host }}", ctx), "example.com");
+});
+
+test("renderTemplates - interpolates inside surrounding text", () => {
+  assert.strictEqual(
+    renderTemplates("acct:alice@${{ target.host }}", ctx),
+    "acct:alice@example.com",
+  );
+});
+
+test("renderTemplates - interpolates multiple expressions", () => {
+  assert.strictEqual(
+    renderTemplates("${{ name }}-${{ count }}", ctx),
+    "bob-3",
+  );
+});
+
+test("renderTemplates - calls a helper, whole and embedded", () => {
+  assert.strictEqual(renderTemplates("${{ upper('hi') }}", ctx), "HI");
+  assert.strictEqual(renderTemplates("${{ upper(name) }}", ctx), "BOB");
+  assert.strictEqual(renderTemplates("x=${{ upper(name) }}", ctx), "x=BOB");
+});
+
+test("renderTemplates - walks nested objects and arrays", () => {
+  const input = {
+    recipient: "acct:a@${{ target.host }}",
+    counts: ["${{ count }}", "static"],
+    nested: { who: "${{ name }}" },
+  };
+  assert.deepEqual(renderTemplates(input, ctx), {
+    recipient: "acct:a@example.com",
+    counts: [3, "static"],
+    nested: { who: "bob" },
+  });
+});
+
+test("renderTemplates - leaves non-template strings untouched", () => {
+  assert.strictEqual(renderTemplates("plain text", ctx), "plain text");
+  assert.strictEqual(renderTemplates("price: ${5}", ctx), "price: ${5}");
+});
+
+test("renderTemplates - non-string scalars pass through", () => {
+  assert.strictEqual(renderTemplates(42, ctx), 42);
+  assert.strictEqual(renderTemplates(true, ctx), true);
+  assert.strictEqual(renderTemplates(null, ctx), null);
+});
+
+test("renderTemplates - unknown helper throws", () => {
+  assert.throws(() => renderTemplates("${{ bogus() }}", ctx), TemplateError);
+});
+
+test("renderTemplates - unknown reference throws", () => {
+  assert.throws(() => renderTemplates("${{ missing }}", ctx), TemplateError);
+  assert.throws(
+    () => renderTemplates("${{ target.nope }}", ctx),
+    TemplateError,
+  );
+});
+
+test("renderTemplates - empty expression throws", () => {
+  assert.throws(() => renderTemplates("${{ }}", ctx), TemplateError);
+});
+
+test("renderTemplates - does not resolve prototype members", () => {
+  assert.throws(() => renderTemplates("${{ toString }}", ctx), TemplateError);
+  assert.throws(
+    () => renderTemplates("${{ constructor }}", ctx),
+    TemplateError,
+  );
+  assert.throws(() => renderTemplates("${{ __proto__ }}", ctx), TemplateError);
+  assert.throws(() => renderTemplates("${{ toString() }}", ctx), TemplateError);
+});
+
+test("renderTemplates - does not discard trailing text after a match", () => {
+  assert.strictEqual(
+    renderTemplates("${{ name }} trailing }}", ctx),
+    "bob trailing }}",
+  );
+});
+
+test("renderTemplates - throws on an unclosed expression", () => {
+  assert.throws(() => renderTemplates("hello ${{ name", ctx), TemplateError);
+  assert.throws(
+    () => renderTemplates("${{ name }} and ${{ count", ctx),
+    TemplateError,
+  );
+});
+
+test("renderTemplates - throws on an unbalanced quote in arguments", () => {
+  assert.throws(
+    () => renderTemplates("${{ upper('hi) }}", ctx),
+    TemplateError,
+  );
+});
+
+test("defaultHelpers - uuid returns a UUID string", () => {
+  const value = renderTemplates("${{ uuid() }}", ctx) as string;
+  assert.match(
+    value,
+    /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/,
+  );
+});
diff --git a/packages/cli/src/bench/template/template.ts b/packages/cli/src/bench/template/template.ts
new file mode 100644
index 000000000..9cbdb0dad
--- /dev/null
+++ b/packages/cli/src/bench/template/template.ts
@@ -0,0 +1,170 @@
+/**
+ * A logic-less GitHub-Actions-style `${{ ... }}` template engine for scenario
+ * files.
+ *
+ * Expressions are intentionally restricted to property access on a context
+ * object (`${{ target.host }}`) and whitelisted helper calls
+ * (`${{ uuid() }}`).  There are no operators, conditionals, or loops, so a
+ * scenario file cannot turn into a programming language.  The `$` prefix also
+ * sidesteps the YAML gotcha where a value beginning with `{` is parsed as a
+ * flow mapping.
+ * @since 2.3.0
+ * @module
+ */
+
+/** A helper function callable from a `${{ ... }}` expression. */
+export type TemplateHelper = (...args: unknown[]) => unknown;
+
+/**
+ * The evaluation context for {@link renderTemplates}.
+ * @since 2.3.0
+ */
+export interface TemplateContext {
+  /** Named values resolvable by dotted path, e.g. `target.host`. */
+  readonly values?: Readonly<Record<string, unknown>>;
+  /** Named helper functions callable as `name(args)`. */
+  readonly helpers?: Readonly<Record<string, TemplateHelper>>;
+}
+
+/** An error raised while rendering a `${{ ... }}` template expression. */
+export class TemplateError extends Error {}
+
+const EXPR_RE = /\$\{\{([\s\S]*?)\}\}/g;
+const CALL_RE = /^([A-Za-z_]\w*)\s*\(([\s\S]*)\)$/;
+const IDENT_RE = /^[A-Za-z_]\w*$/;
+
+/** Property names that must never be resolved, to avoid prototype access. */
+const FORBIDDEN = new Set(["__proto__", "prototype", "constructor"]);
+
+/**
+ * Recursively renders every `${{ ... }}` expression in a value.
+ *
+ * When a string consists of a single expression, the raw evaluated value is
+ * returned (so `${{ count }}` can yield a number).  When an expression is
+ * embedded in surrounding text, its result is stringified and interpolated.
+ * Objects and arrays are walked recursively; other scalars pass through.
+ * @typeParam T The value type.
+ * @param value The value to render.
+ * @param context The evaluation context.
+ * @returns The rendered value, of the same shape as the input.
+ */
+export function renderTemplates<T>(value: T, context: TemplateContext = {}): T {
+  return renderValue(value, context) as T;
+}
+
+function renderValue(value: unknown, ctx: TemplateContext): unknown {
+  if (typeof value === "string") return renderString(value, ctx);
+  if (Array.isArray(value)) return value.map((item) => renderValue(item, ctx));
+  if (value != null && typeof value === "object") {
+    const out: Record<string, unknown> = {};
+    for (const [key, item] of Object.entries(value)) {
+      out[key] = renderValue(item, ctx);
+    }
+    return out;
+  }
+  return value;
+}
+
+function renderString(str: string, ctx: TemplateContext): unknown {
+  const matches = [...str.matchAll(EXPR_RE)];
+  // Every `${{` must have a matching `}}`; an unclosed delimiter is a typo.
+  if (str.split("${{").length - 1 !== matches.length) {
+    throw new TemplateError(`Unclosed \${{ }} expression: ${str}`);
+  }
+  if (matches.length === 0) return str;
+  // A string is a "whole expression" only when the single match spans the
+  // entire string apart from surrounding whitespace; otherwise interpolate so
+  // trailing text is not silently discarded.
+  const only = matches[0];
+  if (
+    matches.length === 1 &&
+    str.slice(0, only.index).trim() === "" &&
+    str.slice(only.index + only[0].length).trim() === ""
+  ) {
+    return evalExpr(only[1], ctx);
+  }
+  return str.replace(EXPR_RE, (_match, expr) => stringify(evalExpr(expr, ctx)));
+}
+
+function evalExpr(source: string, ctx: TemplateContext): unknown {
+  const expr = source.trim();
+  if (expr === "") throw new TemplateError("Empty ${{ }} expression.");
+  const call = expr.match(CALL_RE);
+  if (call != null) {
+    const name = call[1];
+    const helper = FORBIDDEN.has(name) || ctx.helpers == null ||
+        !Object.hasOwn(ctx.helpers, name)
+      ? undefined
+      : ctx.helpers[name];
+    if (typeof helper !== "function") {
+      throw new TemplateError(`Unknown helper: ${name}.`);
+    }
+    return helper(...parseArgs(call[2], ctx));
+  }
+  return resolvePath(expr, ctx.values ?? {});
+}
+
+function parseArgs(source: string, ctx: TemplateContext): unknown[] {
+  const trimmed = source.trim();
+  if (trimmed === "") return [];
+  return splitTopLevel(trimmed).map((arg) => parseArg(arg.trim(), ctx));
+}
+
+function splitTopLevel(source: string): string[] {
+  const parts: string[] = [];
+  let current = "";
+  let quote: string | null = null;
+  for (const char of source) {
+    if (quote != null) {
+      if (char === quote) quote = null;
+      current += char;
+    } else if (char === "'" || char === '"') {
+      quote = char;
+      current += char;
+    } else if (char === ",") {
+      parts.push(current);
+      current = "";
+    } else {
+      current += char;
+    }
+  }
+  if (quote != null) {
+    throw new TemplateError("Unbalanced quote in helper arguments.");
+  }
+  parts.push(current);
+  return parts;
+}
+
+function parseArg(arg: string, ctx: TemplateContext): unknown {
+  const str = arg.match(/^'([^']*)'$/) ?? arg.match(/^"([^"]*)"$/);
+  if (str != null) return str[1];
+  if (/^-?\d+(?:\.\d+)?$/.test(arg)) return Number(arg);
+  if (arg === "true") return true;
+  if (arg === "false") return false;
+  if (arg === "null") return null;
+  return resolvePath(arg, ctx.values ?? {});
+}
+
+function resolvePath(
+  path: string,
+  values: Readonly<Record<string, unknown>>,
+): unknown {
+  let current: unknown = values;
+  for (const part of path.split(".")) {
+    if (!IDENT_RE.test(part) || FORBIDDEN.has(part)) {
+      throw new TemplateError(`Invalid reference: ${path}.`);
+    }
+    if (
+      current == null || typeof current !== "object" ||
+      !Object.hasOwn(current as Record<string, unknown>, part)
+    ) {
+      throw new TemplateError(`Unknown reference: ${path}.`);
+    }
+    current = (current as Record<string, unknown>)[part];
+  }
+  return current;
+}
+
+function stringify(value: unknown): string {
+  return value == null ? "" : String(value);
+}

From bc11a6db4fc48cd54d879ce60da2fd100b24c126 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 22:11:07 +0900
Subject: [PATCH 04/47] Add the benchmark scenario format and its JSON Schema

Define the `fedify bench` scenario suite format and its published
JSON Schema (draft 2020-12).  The format is a suite of `version`,
`target`, `defaults`, `actors`, and `scenarios`, with an `expect` block
per scenario, and it can express every scenario type discussed for the
tool (inbox, webfinger, actor, object, fanout, collection, failure,
mixed) even though only inbox and webfinger will have runners.

Rather than a schema-first single source, the published JSON Schema and
the TypeScript types are maintained as two artifacts kept identical by a
drift guard.  Runtime validation uses `@cfworker/json-schema`, and a
validated value is narrowed with an `as unknown as` cast.  Three
cross-field rules live in the schema where an editor can flag them:

 -  exactly one HTTP request signature scheme per actor group
    (`contains` + `minContains`/`maxContains`);
 -  `rate` XOR `concurrency` in a load block (`oneOf`);
 -  the allowed `expect` metrics per scenario type (`if`/`then` +
    `propertyNames`).

The embedded schema object is the editing source; *schema/bench/*
holds the hosted copy, regenerated by *scripts/generate-bench-schema.ts*.
Four guards run as tests: structural/meta validation, example-fixture
validation (valid and invalid fixtures covering every scenario type),
drift between the embedded object and the published file, and git-based
immutability of already-published version files.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/scripts/generate-bench-schema.ts |  28 +
 .../invalid/bad-expect-metric.yaml            |   9 +
 .../invalid/failure-missing-fault.yaml        |   6 +
 .../__fixtures__/invalid/missing-version.yaml |   6 +
 .../invalid/mixed-bad-metric.yaml             |  11 +
 .../invalid/rate-and-concurrency.yaml         |  11 +
 .../invalid/two-http-schemes.yaml             |   9 +
 .../__fixtures__/invalid/unknown-field.yaml   |   8 +
 .../__fixtures__/scenarios/all-types.yaml     |  75 ++
 .../bench/__fixtures__/scenarios/ci-gate.json |  27 +
 .../scenarios/getting-started.yaml            |  22 +
 packages/cli/src/bench/scenario/errors.ts     |  69 ++
 packages/cli/src/bench/scenario/load.ts       |  30 +
 packages/cli/src/bench/scenario/schema.ts     | 378 ++++++++++
 packages/cli/src/bench/scenario/types.ts      | 153 ++++
 .../cli/src/bench/scenario/validate.test.ts   |  93 +++
 packages/cli/src/bench/scenario/validate.ts   |  35 +
 packages/cli/src/bench/schema-paths.ts        |  33 +
 packages/cli/src/bench/schema.test.ts         | 119 +++
 packages/cli/src/bench/schemas.ts             |  31 +
 schema/bench/scenario-v1.json                 | 700 ++++++++++++++++++
 21 files changed, 1853 insertions(+)
 create mode 100644 packages/cli/scripts/generate-bench-schema.ts
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/bad-expect-metric.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/failure-missing-fault.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/missing-version.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/mixed-bad-metric.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/rate-and-concurrency.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/two-http-schemes.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/invalid/unknown-field.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml
 create mode 100644 packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json
 create mode 100644 packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml
 create mode 100644 packages/cli/src/bench/scenario/errors.ts
 create mode 100644 packages/cli/src/bench/scenario/load.ts
 create mode 100644 packages/cli/src/bench/scenario/schema.ts
 create mode 100644 packages/cli/src/bench/scenario/types.ts
 create mode 100644 packages/cli/src/bench/scenario/validate.test.ts
 create mode 100644 packages/cli/src/bench/scenario/validate.ts
 create mode 100644 packages/cli/src/bench/schema-paths.ts
 create mode 100644 packages/cli/src/bench/schema.test.ts
 create mode 100644 packages/cli/src/bench/schemas.ts
 create mode 100644 schema/bench/scenario-v1.json

diff --git a/packages/cli/scripts/generate-bench-schema.ts b/packages/cli/scripts/generate-bench-schema.ts
new file mode 100644
index 000000000..141e936b5
--- /dev/null
+++ b/packages/cli/scripts/generate-bench-schema.ts
@@ -0,0 +1,28 @@
+/**
+ * Regenerates the published benchmark JSON Schema files under the repository's
+ * *schema/bench/* directory from the embedded schema objects.
+ *
+ * The embedded objects (under *packages/cli/src/bench/.../schema.ts*) are the
+ * editing source; the published *.json* files are the hosted copies.  A drift
+ * guard keeps the two identical, so run this script after editing an embedded
+ * schema.
+ *
+ * Usage: `deno run -A scripts/generate-bench-schema.ts`
+ * @module
+ */
+
+import { mkdir, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import { PUBLISHED_SCHEMAS } from "../src/bench/schemas.ts";
+import { SCHEMA_DIR, serializeSchema } from "../src/bench/schema-paths.ts";
+
+async function main(): Promise<void> {
+  await mkdir(SCHEMA_DIR, { recursive: true });
+  for (const { fileName, schema } of PUBLISHED_SCHEMAS) {
+    const path = join(SCHEMA_DIR, fileName);
+    await writeFile(path, serializeSchema(schema), { encoding: "utf-8" });
+    console.error(`Wrote ${path}`);
+  }
+}
+
+await main();
diff --git a/packages/cli/src/bench/__fixtures__/invalid/bad-expect-metric.yaml b/packages/cli/src/bench/__fixtures__/invalid/bad-expect-metric.yaml
new file mode 100644
index 000000000..e5e17cd82
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/bad-expect-metric.yaml
@@ -0,0 +1,9 @@
+# signatureVerification.* is not a valid expect metric for a webfinger scenario.
+version: 1
+target: http://localhost:3000
+scenarios:
+  - name: webfinger-lookup
+    type: webfinger
+    recipient: "acct:alice@example.com"
+    expect:
+      signatureVerification.p95: "< 10ms"
diff --git a/packages/cli/src/bench/__fixtures__/invalid/failure-missing-fault.yaml b/packages/cli/src/bench/__fixtures__/invalid/failure-missing-fault.yaml
new file mode 100644
index 000000000..88f6e3abd
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/failure-missing-fault.yaml
@@ -0,0 +1,6 @@
+# A failure scenario must declare at least one fault.
+version: 1
+target: http://localhost:3000
+scenarios:
+  - name: broken
+    type: failure
diff --git a/packages/cli/src/bench/__fixtures__/invalid/missing-version.yaml b/packages/cli/src/bench/__fixtures__/invalid/missing-version.yaml
new file mode 100644
index 000000000..0234726f4
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/missing-version.yaml
@@ -0,0 +1,6 @@
+# The top-level version field is required.
+target: http://localhost:3000
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "acct:alice@example.com"
diff --git a/packages/cli/src/bench/__fixtures__/invalid/mixed-bad-metric.yaml b/packages/cli/src/bench/__fixtures__/invalid/mixed-bad-metric.yaml
new file mode 100644
index 000000000..8990ebc0e
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/mixed-bad-metric.yaml
@@ -0,0 +1,11 @@
+# "bogus.metric" is not a recognized metric for a mixed scenario's expect block.
+version: 1
+target: http://localhost:3000
+scenarios:
+  - name: blend
+    type: mixed
+    mix:
+      - { scenario: inbox-shared, weight: 80 }
+      - { scenario: webfinger-lookup, weight: 20 }
+    expect:
+      bogus.metric: ">= 1"
diff --git a/packages/cli/src/bench/__fixtures__/invalid/rate-and-concurrency.yaml b/packages/cli/src/bench/__fixtures__/invalid/rate-and-concurrency.yaml
new file mode 100644
index 000000000..9414f755b
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/rate-and-concurrency.yaml
@@ -0,0 +1,11 @@
+# A load block must specify rate XOR concurrency, not both.
+version: 1
+target: http://localhost:3000
+defaults:
+  load:
+    rate: 100/s
+    concurrency: 50
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "acct:alice@example.com"
diff --git a/packages/cli/src/bench/__fixtures__/invalid/two-http-schemes.yaml b/packages/cli/src/bench/__fixtures__/invalid/two-http-schemes.yaml
new file mode 100644
index 000000000..37526324d
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/two-http-schemes.yaml
@@ -0,0 +1,9 @@
+# An actor group must have exactly one HTTP request signature scheme.
+version: 1
+target: http://localhost:3000
+actors:
+  - signatureStandards: [draft-cavage-http-signatures-12, rfc9421]
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "acct:alice@example.com"
diff --git a/packages/cli/src/bench/__fixtures__/invalid/unknown-field.yaml b/packages/cli/src/bench/__fixtures__/invalid/unknown-field.yaml
new file mode 100644
index 000000000..7c2d342ce
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/invalid/unknown-field.yaml
@@ -0,0 +1,8 @@
+# Unknown scenario fields are rejected (additionalProperties: false).
+version: 1
+target: http://localhost:3000
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "acct:alice@example.com"
+    bogusField: true
diff --git a/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml b/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml
new file mode 100644
index 000000000..07a82929a
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml
@@ -0,0 +1,75 @@
+# yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v1.json
+# Exercises every scenario type the format can express, even though only
+# `inbox` and `webfinger` have runners in this version.
+version: 1
+target: http://localhost:3000
+defaults:
+  duration: 30s
+  warmup: 5s
+  load:
+    rate: 100/s
+    arrival: poisson
+  signing: pipeline
+  runs: 3
+actors:
+  - name: "Mastodon-like actor ${{ index }}"
+    count: 3
+    signatureStandards: [draft-cavage-http-signatures-12, ld-signatures]
+  - name: "Hollo-like actor ${{ index }}"
+    count: 2
+    signatureStandards: [rfc9421, fep8b32]
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "acct:alice@${{ target.host }}"
+    inbox: shared
+    activity:
+      type: Create
+      embedObject: true
+      object:
+        type: Note
+        content: { generate: lorem, size: 2KB }
+    expect:
+      successRate: ">= 99%"
+      latency.p95: "< 100ms"
+  - name: webfinger-lookup
+    type: webfinger
+    recipient:
+      - "acct:alice@${{ target.host }}"
+      - "acct:bob@${{ target.host }}"
+    expect:
+      successRate: ">= 99%"
+  - name: actor-fetch
+    type: actor
+    recipient: "acct:alice@${{ target.host }}"
+    authenticated: true
+  - name: object-fetch
+    type: object
+    source:
+      seed: "acct:alice@${{ target.host }}"
+      collection: [outbox, featured]
+      limit: 500
+      type: Note
+  - name: collection-page
+    type: collection
+    recipient: "acct:alice@${{ target.host }}"
+    collection: followers
+  - name: fanout-1k
+    type: fanout
+    sender: alice
+    followers: 1000
+    trigger: { kind: benchmark-hook }
+    sinkBehavior: { latency: 50ms }
+    queueDrainTimeout: 2m
+    expect:
+      queueDrain.p95: "< 2s"
+      deliveryThroughput: ">= 500/s"
+  - name: bad-signature
+    type: failure
+    fault: [invalid-signature, missing-actor]
+  - name: realistic-blend
+    type: mixed
+    mix:
+      - { scenario: inbox-shared, weight: 70 }
+      - { scenario: object-fetch, weight: 20 }
+      - { scenario: webfinger-lookup, weight: 10 }
diff --git a/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json b/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json
new file mode 100644
index 000000000..efc9dfeee
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json
@@ -0,0 +1,27 @@
+{
+  "$schema": "https://json-schema.fedify.dev/bench/scenario-v1.json",
+  "version": 1,
+  "target": "http://localhost:3000",
+  "defaults": {
+    "runs": 3,
+    "load": { "rate": "200/s" }
+  },
+  "actors": [
+    {
+      "signatureStandards": ["draft-cavage-http-signatures-12"]
+    }
+  ],
+  "scenarios": [
+    {
+      "name": "inbox-shared",
+      "type": "inbox",
+      "recipient": "acct:alice@example.com",
+      "expect": {
+        "successRate": ">= 99%",
+        "errors.5xx": "== 0",
+        "throughputPerSec": ">= 50",
+        "latency.p95": { "assert": "< 250ms", "severity": "warn" }
+      }
+    }
+  ]
+}
diff --git a/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml b/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml
new file mode 100644
index 000000000..9cfb77cc0
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml
@@ -0,0 +1,22 @@
+# yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v1.json
+version: 1
+target: http://localhost:3000
+defaults:
+  duration: 60s
+  warmup: 10s
+  load:
+    concurrency: 50
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "acct:alice@${{ target.host }}"
+    inbox: shared
+    activity:
+      type: Create
+      embedObject: true
+      object:
+        type: Note
+        content: { generate: lorem, size: 2KB }
+    expect:
+      successRate: ">= 99%"
+      latency.p95: "< 100ms"
diff --git a/packages/cli/src/bench/scenario/errors.ts b/packages/cli/src/bench/scenario/errors.ts
new file mode 100644
index 000000000..13699a6e9
--- /dev/null
+++ b/packages/cli/src/bench/scenario/errors.ts
@@ -0,0 +1,69 @@
+/**
+ * Friendly error reporting for scenario validation failures.
+ *
+ * `@cfworker/json-schema` reports structural failures with a JSON-pointer
+ * instance location and a terse message.  Raw `oneOf`/`contains` failures read
+ * poorly, so this module turns the raw errors into a single readable message
+ * while keeping the schema authoritative for correctness.
+ * @since 2.3.0
+ * @module
+ */
+
+/** A raw validation error as reported by `@cfworker/json-schema`. */
+export interface RawValidationError {
+  readonly instanceLocation: string;
+  readonly keyword?: string;
+  readonly error: string;
+}
+
+/** An error raised when a scenario suite fails schema validation. */
+export class SuiteValidationError extends Error {
+  /** The individual validation problems, most specific first. */
+  readonly problems: readonly RawValidationError[];
+
+  constructor(problems: readonly RawValidationError[], source?: string) {
+    super(formatMessage(problems, source));
+    this.name = "SuiteValidationError";
+    this.problems = problems;
+  }
+}
+
+function formatMessage(
+  problems: readonly RawValidationError[],
+  source?: string,
+): string {
+  const where = source == null ? "scenario suite" : source;
+  if (problems.length === 0) {
+    return `Invalid ${where}.`;
+  }
+  const lines = dedupe(problems).map((problem) => {
+    const at =
+      problem.instanceLocation === "#" || problem.instanceLocation === ""
+        ? "(root)"
+        : problem.instanceLocation.replace(/^#/, "");
+    return `  - ${at}: ${problem.error}`;
+  });
+  return `Invalid ${where}:\n${lines.join("\n")}`;
+}
+
+function dedupe(
+  problems: readonly RawValidationError[],
+): RawValidationError[] {
+  const seen = new Set<string>();
+  const result: RawValidationError[] = [];
+  // Prefer the most specific (deepest) instance locations first.
+  const sorted = [...problems].sort((a, b) =>
+    depth(b.instanceLocation) - depth(a.instanceLocation)
+  );
+  for (const problem of sorted) {
+    const key = JSON.stringify([problem.instanceLocation, problem.error]);
+    if (seen.has(key)) continue;
+    seen.add(key);
+    result.push(problem);
+  }
+  return result;
+}
+
+function depth(instanceLocation: string): number {
+  return (instanceLocation.match(/\//g) ?? []).length;
+}
diff --git a/packages/cli/src/bench/scenario/load.ts b/packages/cli/src/bench/scenario/load.ts
new file mode 100644
index 000000000..222c52dc7
--- /dev/null
+++ b/packages/cli/src/bench/scenario/load.ts
@@ -0,0 +1,30 @@
+/**
+ * Reading and parsing scenario suite files.
+ *
+ * Files may be written in YAML or JSON; because YAML is a superset of JSON, a
+ * single YAML parser handles both, and YAML anchors/aliases are available for
+ * in-document reuse.
+ * @since 2.3.0
+ * @module
+ */
+
+import { readFile } from "node:fs/promises";
+import { parse as parseYaml } from "yaml";
+
+/**
+ * Parses scenario suite text (YAML or JSON) into an untyped value.
+ * @param text The file contents.
+ * @returns The parsed value, to be validated with `validateSuite()`.
+ */
+export function parseSuiteText(text: string): unknown {
+  return parseYaml(text);
+}
+
+/**
+ * Reads and parses a scenario suite file.
+ * @param path The path to the suite file.
+ * @returns The parsed value, to be validated with `validateSuite()`.
+ */
+export async function loadSuiteFile(path: string): Promise<unknown> {
+  return parseSuiteText(await readFile(path, { encoding: "utf-8" }));
+}
diff --git a/packages/cli/src/bench/scenario/schema.ts b/packages/cli/src/bench/scenario/schema.ts
new file mode 100644
index 000000000..f2f18894a
--- /dev/null
+++ b/packages/cli/src/bench/scenario/schema.ts
@@ -0,0 +1,378 @@
+/**
+ * The embedded JSON Schema (draft 2020-12) for benchmark scenario suite files.
+ *
+ * This object is the runtime copy used by the validator; it is published,
+ * byte-for-byte, as *schema/bench/scenario-v1.json* and a drift guard keeps the
+ * two in sync.  The matching TypeScript types live in {@link ./types.ts}.
+ *
+ * The schema expresses every scenario type discussed for `fedify bench`
+ * (`inbox`, `webfinger`, `actor`, `object`, `fanout`, `collection`, `failure`,
+ * `mixed`), even though only `inbox` and `webfinger` have runners in this
+ * version.  Three cross-field rules are enforced here rather than in code:
+ *
+ *  -  exactly one HTTP request signature scheme per actor group
+ *     (`contains` + `minContains`/`maxContains`);
+ *  -  `rate` XOR `concurrency` in a load block (`oneOf`);
+ *  -  the allowed `expect` metrics per scenario type (`if`/`then` +
+ *     `propertyNames`).
+ * @since 2.3.0
+ * @module
+ */
+
+/** The hosted URL that serves the scenario schema. */
+export const SCENARIO_SCHEMA_ID =
+  "https://json-schema.fedify.dev/bench/scenario-v1.json";
+
+const READ_METRICS = [
+  "successRate",
+  "throughputPerSec",
+  "errors.total",
+  "errors.4xx",
+  "errors.5xx",
+  "latency.p50",
+  "latency.p95",
+  "latency.p99",
+  "latency.mean",
+  "latency.max",
+];
+
+const INBOX_METRICS = [
+  ...READ_METRICS,
+  "signatureVerification.p50",
+  "signatureVerification.p95",
+  "signatureVerification.p99",
+];
+
+const FANOUT_METRICS = [
+  "successRate",
+  "deliveryThroughput",
+  "errors.total",
+  "errors.4xx",
+  "errors.5xx",
+  "queueDrain.p50",
+  "queueDrain.p95",
+  "queueDrain.p99",
+];
+
+// A `mixed` scenario blends others, so it may assert any of their metrics.
+const MIXED_METRICS = [...new Set([...INBOX_METRICS, ...FANOUT_METRICS])];
+
+/** The benchmark scenario suite JSON Schema (draft 2020-12). */
+export const scenarioSchemaV1 = {
+  $schema: "https://json-schema.org/draft/2020-12/schema",
+  $id: SCENARIO_SCHEMA_ID,
+  title: "Fedify benchmark scenario suite",
+  type: "object",
+  required: ["version", "scenarios"],
+  additionalProperties: false,
+  properties: {
+    $schema: {
+      type: "string",
+      description: "An optional editor hint pointing at this schema.",
+    },
+    version: { const: 1 },
+    target: {
+      type: "string",
+      format: "uri",
+      description: "The target base URL; may be overridden by --target.",
+    },
+    defaults: { $ref: "#/$defs/defaults" },
+    actors: {
+      type: "array",
+      items: { $ref: "#/$defs/actorGroup" },
+    },
+    scenarios: {
+      type: "array",
+      minItems: 1,
+      items: { $ref: "#/$defs/scenario" },
+    },
+  },
+  $defs: {
+    duration: {
+      type: "string",
+      pattern: "^\\d+(\\.\\d+)?(ms|s|m|h)$",
+      description: "A duration such as 500ms, 30s, 2m, or 1h.",
+    },
+    rate: {
+      description: "An open-loop arrival rate such as 200/s, or a number.",
+      oneOf: [
+        { type: "number", exclusiveMinimum: 0 },
+        { type: "string", pattern: "^\\d+(\\.\\d+)?\\s*/\\s*(s|m|h)$" },
+      ],
+    },
+    size: {
+      description: "A byte size such as 2KB or a plain number of bytes.",
+      oneOf: [
+        { type: "number", minimum: 0 },
+        {
+          type: "string",
+          pattern:
+            "^\\s*\\d+(\\.\\d+)?\\s*([Bb]|[Kk][Bb]|[Kk][Ii][Bb]|[Mm][Bb]|[Mm][Ii][Bb]|[Gg][Bb]|[Gg][Ii][Bb])?\\s*$",
+        },
+      ],
+    },
+    signatureStandard: {
+      enum: [
+        "draft-cavage-http-signatures-12",
+        "rfc9421",
+        "ld-signatures",
+        "fep8b32",
+      ],
+    },
+    signingMode: { enum: ["jit", "pipeline", "presign"] },
+    arrival: { enum: ["constant", "poisson"] },
+    scalarOrListString: {
+      oneOf: [
+        { type: "string" },
+        { type: "array", items: { type: "string" }, minItems: 1 },
+      ],
+    },
+    load: {
+      type: "object",
+      additionalProperties: false,
+      properties: {
+        rate: { $ref: "#/$defs/rate" },
+        concurrency: { type: "integer", minimum: 1 },
+        arrival: { $ref: "#/$defs/arrival" },
+        maxInFlight: { type: "integer", minimum: 1 },
+      },
+      oneOf: [
+        { required: ["rate"], not: { required: ["concurrency"] } },
+        { required: ["concurrency"], not: { required: ["rate"] } },
+      ],
+    },
+    defaults: {
+      type: "object",
+      additionalProperties: false,
+      properties: {
+        duration: { $ref: "#/$defs/duration" },
+        warmup: { $ref: "#/$defs/duration" },
+        load: { $ref: "#/$defs/load" },
+        signing: { $ref: "#/$defs/signingMode" },
+        signatureTimeWindow: { type: "boolean" },
+        runs: { type: "integer", minimum: 1 },
+      },
+    },
+    actorGroup: {
+      type: "object",
+      additionalProperties: false,
+      required: ["signatureStandards"],
+      properties: {
+        name: { type: "string" },
+        count: { type: "integer", minimum: 1 },
+        signatureStandards: {
+          type: "array",
+          uniqueItems: true,
+          minItems: 1,
+          items: { $ref: "#/$defs/signatureStandard" },
+          contains: { enum: ["draft-cavage-http-signatures-12", "rfc9421"] },
+          minContains: 1,
+          maxContains: 1,
+          description:
+            "Exactly one HTTP request signature scheme, plus optional " +
+            "document signature schemes.",
+        },
+      },
+    },
+    generateDirective: {
+      type: "object",
+      additionalProperties: false,
+      required: ["generate"],
+      properties: {
+        generate: { enum: ["lorem"] },
+        size: { $ref: "#/$defs/size" },
+      },
+    },
+    content: {
+      oneOf: [
+        { type: "string" },
+        { $ref: "#/$defs/generateDirective" },
+      ],
+    },
+    objectSpec: {
+      type: "object",
+      properties: {
+        type: { $ref: "#/$defs/scalarOrListString" },
+        content: { $ref: "#/$defs/content" },
+      },
+    },
+    activitySpec: {
+      type: "object",
+      additionalProperties: false,
+      properties: {
+        type: { $ref: "#/$defs/scalarOrListString" },
+        embedObject: { type: "boolean" },
+        object: { $ref: "#/$defs/objectSpec" },
+      },
+    },
+    objectSource: {
+      oneOf: [
+        { $ref: "#/$defs/scalarOrListString" },
+        {
+          type: "object",
+          additionalProperties: false,
+          required: ["seed"],
+          properties: {
+            seed: { $ref: "#/$defs/scalarOrListString" },
+            collection: { $ref: "#/$defs/scalarOrListString" },
+            limit: { type: "integer", minimum: 1 },
+            type: { $ref: "#/$defs/scalarOrListString" },
+          },
+        },
+      ],
+    },
+    expectSeverity: { enum: ["warn", "fail"] },
+    expectValue: {
+      oneOf: [
+        {
+          type: "string",
+          description: "An assertion such as '>= 99%' or '< 100ms'.",
+        },
+        {
+          type: "object",
+          additionalProperties: false,
+          required: ["assert"],
+          properties: {
+            assert: { type: "string" },
+            severity: { $ref: "#/$defs/expectSeverity" },
+          },
+        },
+      ],
+    },
+    mixEntry: {
+      type: "object",
+      additionalProperties: false,
+      required: ["scenario", "weight"],
+      properties: {
+        scenario: { type: "string" },
+        weight: { type: "number", exclusiveMinimum: 0 },
+      },
+    },
+    scenario: {
+      type: "object",
+      additionalProperties: false,
+      required: ["name", "type"],
+      properties: {
+        name: { type: "string" },
+        type: {
+          enum: [
+            "inbox",
+            "webfinger",
+            "actor",
+            "object",
+            "fanout",
+            "collection",
+            "failure",
+            "mixed",
+          ],
+        },
+        load: { $ref: "#/$defs/load" },
+        duration: { $ref: "#/$defs/duration" },
+        warmup: { $ref: "#/$defs/duration" },
+        signing: { $ref: "#/$defs/signingMode" },
+        signatureTimeWindow: { type: "boolean" },
+        runs: { type: "integer", minimum: 1 },
+        expect: {
+          type: "object",
+          additionalProperties: { $ref: "#/$defs/expectValue" },
+        },
+        // inbox / webfinger / actor / collection
+        recipient: { $ref: "#/$defs/scalarOrListString" },
+        inbox: { type: "string" },
+        activity: { $ref: "#/$defs/activitySpec" },
+        authenticated: { type: "boolean" },
+        collection: { $ref: "#/$defs/scalarOrListString" },
+        // object
+        source: { $ref: "#/$defs/objectSource" },
+        // fanout
+        sender: { type: "string" },
+        followers: { type: "integer", minimum: 1 },
+        trigger: { type: "object" },
+        sinkBehavior: { type: "object" },
+        queueDrainTimeout: { $ref: "#/$defs/duration" },
+        // failure
+        fault: { $ref: "#/$defs/scalarOrListString" },
+        // mixed
+        mix: {
+          type: "array",
+          minItems: 1,
+          items: { $ref: "#/$defs/mixEntry" },
+        },
+      },
+      allOf: [
+        {
+          if: { properties: { type: { const: "inbox" } } },
+          then: {
+            required: ["recipient"],
+            properties: {
+              expect: { propertyNames: { enum: INBOX_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "webfinger" } } },
+          then: {
+            required: ["recipient"],
+            properties: {
+              expect: { propertyNames: { enum: READ_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "actor" } } },
+          then: {
+            required: ["recipient"],
+            properties: {
+              expect: { propertyNames: { enum: INBOX_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "object" } } },
+          then: {
+            required: ["source"],
+            properties: {
+              expect: { propertyNames: { enum: READ_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "collection" } } },
+          then: {
+            required: ["recipient"],
+            properties: {
+              expect: { propertyNames: { enum: READ_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "fanout" } } },
+          then: {
+            required: ["sender"],
+            properties: {
+              expect: { propertyNames: { enum: FANOUT_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "failure" } } },
+          then: {
+            required: ["fault"],
+            properties: {
+              expect: { propertyNames: { enum: READ_METRICS } },
+            },
+          },
+        },
+        {
+          if: { properties: { type: { const: "mixed" } } },
+          then: {
+            required: ["mix"],
+            properties: {
+              expect: { propertyNames: { enum: MIXED_METRICS } },
+            },
+          },
+        },
+      ],
+    },
+  },
+} as const;
diff --git a/packages/cli/src/bench/scenario/types.ts b/packages/cli/src/bench/scenario/types.ts
new file mode 100644
index 000000000..18a295480
--- /dev/null
+++ b/packages/cli/src/bench/scenario/types.ts
@@ -0,0 +1,153 @@
+/**
+ * Hand-written TypeScript types for the benchmark scenario suite format.
+ *
+ * These mirror the published JSON Schema in {@link ./schema.ts} and
+ * *schema/bench/scenario-v1.json*.  Runtime validation is done with
+ * `@cfworker/json-schema`; after a value validates, it is narrowed to
+ * {@link Suite} with an `as unknown as` cast (see {@link ./validate.ts}).
+ * @since 2.3.0
+ * @module
+ */
+
+import type { GenerateDirective } from "../template/generate.ts";
+
+/** A signature standard an actor can use. */
+export type SignatureStandard =
+  | "draft-cavage-http-signatures-12"
+  | "rfc9421"
+  | "ld-signatures"
+  | "fep8b32";
+
+/** The HTTP request signature standards (mutually exclusive within a group). */
+export const HTTP_SIGNATURE_STANDARDS: readonly SignatureStandard[] = [
+  "draft-cavage-http-signatures-12",
+  "rfc9421",
+];
+
+/** A scenario type.  Only `inbox` and `webfinger` have runners so far. */
+export type ScenarioType =
+  | "inbox"
+  | "webfinger"
+  | "actor"
+  | "object"
+  | "fanout"
+  | "collection"
+  | "failure"
+  | "mixed";
+
+/** The lookahead signing strategy. */
+export type SigningMode = "jit" | "pipeline" | "presign";
+
+/** The arrival distribution for open-loop load. */
+export type ArrivalDistribution = "constant" | "poisson";
+
+/** The severity of an `expect` assertion. */
+export type ExpectSeverity = "warn" | "fail";
+
+/** A value that may be a single item or a list of items. */
+export type ScalarOrList<T> = T | T[];
+
+/** A load configuration (open-loop `rate` XOR closed-loop `concurrency`). */
+export interface LoadConfig {
+  readonly rate?: string | number;
+  readonly concurrency?: number;
+  readonly arrival?: ArrivalDistribution;
+  readonly maxInFlight?: number;
+}
+
+/** Suite-wide defaults applied to every scenario unless overridden. */
+export interface SuiteDefaults {
+  readonly duration?: string;
+  readonly warmup?: string;
+  readonly load?: LoadConfig;
+  readonly signing?: SigningMode;
+  readonly signatureTimeWindow?: boolean;
+  readonly runs?: number;
+}
+
+/** A group of synthetic actors sharing a set of signature standards. */
+export interface ActorGroup {
+  readonly name?: string;
+  readonly count?: number;
+  readonly signatureStandards: SignatureStandard[];
+}
+
+/** An `expect` assertion: a string, or an object with a severity. */
+export type ExpectValue =
+  | string
+  | { readonly assert: string; readonly severity?: ExpectSeverity };
+
+/** A block of `expect` assertions keyed by metric name. */
+export type ExpectBlock = Record<string, ExpectValue>;
+
+/** A generated or literal object body. */
+export interface ObjectSpec {
+  readonly type?: ScalarOrList<string>;
+  readonly content?: string | GenerateDirective;
+  readonly [key: string]: unknown;
+}
+
+/** The activity to deliver in an `inbox` scenario. */
+export interface ActivitySpec {
+  readonly type?: ScalarOrList<string>;
+  readonly embedObject?: boolean;
+  readonly object?: ObjectSpec;
+}
+
+/** The source of object URLs for an `object` scenario. */
+export type ObjectSource =
+  | ScalarOrList<string>
+  | {
+    readonly seed: ScalarOrList<string>;
+    readonly collection?: ScalarOrList<string>;
+    readonly limit?: number;
+    readonly type?: ScalarOrList<string>;
+  };
+
+/** One weighted entry in a `mixed` scenario. */
+export interface MixEntry {
+  readonly scenario: string;
+  readonly weight: number;
+}
+
+/** A single benchmark scenario. */
+export interface Scenario {
+  readonly name: string;
+  readonly type: ScenarioType;
+  readonly load?: LoadConfig;
+  readonly duration?: string;
+  readonly warmup?: string;
+  readonly signing?: SigningMode;
+  readonly signatureTimeWindow?: boolean;
+  readonly runs?: number;
+  readonly expect?: ExpectBlock;
+  // inbox / webfinger / actor / collection
+  readonly recipient?: ScalarOrList<string>;
+  readonly inbox?: string;
+  readonly activity?: ActivitySpec;
+  readonly authenticated?: boolean;
+  readonly collection?: ScalarOrList<string>;
+  // object
+  readonly source?: ObjectSource;
+  // fanout
+  readonly sender?: string;
+  readonly followers?: number;
+  readonly trigger?: Record<string, unknown>;
+  readonly sinkBehavior?: Record<string, unknown>;
+  readonly queueDrainTimeout?: string;
+  // failure
+  readonly fault?: ScalarOrList<string>;
+  // mixed
+  readonly mix?: MixEntry[];
+}
+
+/** A complete benchmark scenario suite. */
+export interface Suite {
+  /** An optional editor hint pointing at the published schema. */
+  readonly $schema?: string;
+  readonly version: 1;
+  readonly target?: string;
+  readonly defaults?: SuiteDefaults;
+  readonly actors?: ActorGroup[];
+  readonly scenarios: Scenario[];
+}
diff --git a/packages/cli/src/bench/scenario/validate.test.ts b/packages/cli/src/bench/scenario/validate.test.ts
new file mode 100644
index 000000000..1973cf435
--- /dev/null
+++ b/packages/cli/src/bench/scenario/validate.test.ts
@@ -0,0 +1,93 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { parseSuiteText } from "./load.ts";
+import { validateSuite } from "./validate.ts";
+import { SuiteValidationError } from "./errors.ts";
+
+function validInbox(): unknown {
+  return {
+    version: 1,
+    target: "http://localhost:3000",
+    actors: [{ signatureStandards: ["draft-cavage-http-signatures-12"] }],
+    scenarios: [
+      { name: "inbox-shared", type: "inbox", recipient: "acct:alice@x" },
+    ],
+  };
+}
+
+test("validateSuite - accepts a valid inbox suite", () => {
+  const suite = validateSuite(validInbox());
+  assert.strictEqual(suite.version, 1);
+  assert.strictEqual(suite.scenarios[0].type, "inbox");
+});
+
+test("validateSuite - accepts YAML and JSON equivalently", () => {
+  const yaml = parseSuiteText(`
+version: 1
+target: http://localhost:3000
+scenarios:
+  - name: wf
+    type: webfinger
+    recipient: "acct:alice@x"
+`);
+  const json = parseSuiteText(JSON.stringify({
+    version: 1,
+    target: "http://localhost:3000",
+    scenarios: [{ name: "wf", type: "webfinger", recipient: "acct:alice@x" }],
+  }));
+  assert.deepEqual(validateSuite(yaml), validateSuite(json));
+});
+
+test("validateSuite - rejects a missing required field", () => {
+  const bad = { target: "http://localhost:3000", scenarios: [] };
+  assert.throws(() => validateSuite(bad), SuiteValidationError);
+});
+
+test("validateSuite - rejects a wrong-typed field", () => {
+  const bad = validInbox() as Record<string, unknown>;
+  bad.version = "1";
+  assert.throws(() => validateSuite(bad), SuiteValidationError);
+});
+
+test("validateSuite - enforces exactly one HTTP signature scheme", () => {
+  const bad = validInbox() as Record<string, unknown>;
+  bad.actors = [{
+    signatureStandards: ["draft-cavage-http-signatures-12", "rfc9421"],
+  }];
+  assert.throws(() => validateSuite(bad), SuiteValidationError);
+
+  const docOnly = validInbox() as Record<string, unknown>;
+  docOnly.actors = [{ signatureStandards: ["ld-signatures"] }];
+  assert.throws(() => validateSuite(docOnly), SuiteValidationError);
+});
+
+test("validateSuite - enforces rate XOR concurrency", () => {
+  const bad = validInbox() as Record<string, unknown>;
+  bad.defaults = { load: { rate: "100/s", concurrency: 50 } };
+  assert.throws(() => validateSuite(bad), SuiteValidationError);
+});
+
+test("validateSuite - enforces per-type expect metric allowlist", () => {
+  const bad = {
+    version: 1,
+    target: "http://localhost:3000",
+    scenarios: [{
+      name: "wf",
+      type: "webfinger",
+      recipient: "acct:alice@x",
+      expect: { "signatureVerification.p95": "< 10ms" },
+    }],
+  };
+  assert.throws(() => validateSuite(bad), SuiteValidationError);
+});
+
+test("validateSuite - error message names the failing location", () => {
+  try {
+    validateSuite({ target: "http://localhost:3000", scenarios: [] });
+    assert.fail("expected a SuiteValidationError");
+  } catch (error) {
+    assert.ok(error instanceof SuiteValidationError);
+    assert.ok(error.problems.length > 0);
+    assert.match(error.message, /Invalid scenario suite/);
+  }
+});
diff --git a/packages/cli/src/bench/scenario/validate.ts b/packages/cli/src/bench/scenario/validate.ts
new file mode 100644
index 000000000..d160bec81
--- /dev/null
+++ b/packages/cli/src/bench/scenario/validate.ts
@@ -0,0 +1,35 @@
+/**
+ * Runtime validation of scenario suites against the embedded JSON Schema.
+ * @since 2.3.0
+ * @module
+ */
+
+import { type Schema, Validator } from "@cfworker/json-schema";
+import { scenarioSchemaV1 } from "./schema.ts";
+import { type RawValidationError, SuiteValidationError } from "./errors.ts";
+import type { Suite } from "./types.ts";
+
+let validator: Validator | undefined;
+
+function getValidator(): Validator {
+  validator ??= new Validator(scenarioSchemaV1 as unknown as Schema, "2020-12");
+  return validator;
+}
+
+/**
+ * Validates a parsed scenario suite against the schema and narrows its type.
+ * @param raw The parsed (but untyped) suite value.
+ * @param source An optional source label (e.g. a file path) for error messages.
+ * @returns The validated suite.
+ * @throws {SuiteValidationError} If the value does not satisfy the schema.
+ */
+export function validateSuite(raw: unknown, source?: string): Suite {
+  const result = getValidator().validate(raw);
+  if (!result.valid) {
+    throw new SuiteValidationError(
+      result.errors as RawValidationError[],
+      source,
+    );
+  }
+  return raw as unknown as Suite;
+}
diff --git a/packages/cli/src/bench/schema-paths.ts b/packages/cli/src/bench/schema-paths.ts
new file mode 100644
index 000000000..bbfc0d8cf
--- /dev/null
+++ b/packages/cli/src/bench/schema-paths.ts
@@ -0,0 +1,33 @@
+/**
+ * Shared path resolution and canonical serialization for the published
+ * benchmark JSON Schema files.
+ *
+ * This module is used only by the schema generator script and the schema
+ * guards (tests); it is never imported by the CLI runtime, which reads schemas
+ * from the embedded objects rather than from disk.
+ * @since 2.3.0
+ * @module
+ */
+
+import { join } from "node:path";
+
+/** The absolute path to the repository's *schema/bench/* directory. */
+export const SCHEMA_DIR: string = join(
+  import.meta.dirname!,
+  "..",
+  "..",
+  "..",
+  "..",
+  "schema",
+  "bench",
+);
+
+/**
+ * Serializes a schema object to the canonical published form: pretty-printed
+ * JSON with two-space indentation and a trailing newline.
+ * @param schema The schema object to serialize.
+ * @returns The canonical JSON text.
+ */
+export function serializeSchema(schema: unknown): string {
+  return `${JSON.stringify(schema, null, 2)}\n`;
+}
diff --git a/packages/cli/src/bench/schema.test.ts b/packages/cli/src/bench/schema.test.ts
new file mode 100644
index 000000000..d9702d41d
--- /dev/null
+++ b/packages/cli/src/bench/schema.test.ts
@@ -0,0 +1,119 @@
+import { type Schema, Validator } from "@cfworker/json-schema";
+import assert from "node:assert/strict";
+import { execFileSync } from "node:child_process";
+import { readdirSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import test from "node:test";
+import { parseSuiteText } from "./scenario/load.ts";
+import { SCHEMA_DIR, serializeSchema } from "./schema-paths.ts";
+import { PUBLISHED_SCHEMAS } from "./schemas.ts";
+
+const REPO_ROOT = join(SCHEMA_DIR, "..", "..");
+const FIXTURES = join(import.meta.dirname!, "__fixtures__");
+
+function collectRefs(node: unknown, refs: string[] = []): string[] {
+  if (Array.isArray(node)) {
+    for (const item of node) collectRefs(item, refs);
+  } else if (node != null && typeof node === "object") {
+    for (const [key, value] of Object.entries(node)) {
+      if (key === "$ref" && typeof value === "string") refs.push(value);
+      else collectRefs(value, refs);
+    }
+  }
+  return refs;
+}
+
+// Guard 1: meta-schema / structural validation.
+for (const { name, fileName, schema } of PUBLISHED_SCHEMAS) {
+  test(`schema guard - ${name} is structurally well-formed`, () => {
+    assert.strictEqual(
+      schema.$schema,
+      "https://json-schema.org/draft/2020-12/schema",
+    );
+    assert.ok(
+      typeof schema.$id === "string" && schema.$id.endsWith(`/${fileName}`),
+      `$id must end with /${fileName}`,
+    );
+    const defs = (schema.$defs ?? {}) as Record<string, unknown>;
+    for (const ref of collectRefs(schema)) {
+      if (!ref.startsWith("#/$defs/")) continue;
+      const defName = ref.slice("#/$defs/".length);
+      assert.ok(
+        Object.hasOwn(defs, defName),
+        `dangling $ref ${ref}`,
+      );
+    }
+    // Constructing the validator dereferences the schema; it must not throw.
+    assert.doesNotThrow(() =>
+      new Validator(schema as unknown as Schema, "2020-12")
+    );
+  });
+}
+
+// Guard 2: example-fixture validation.
+const scenarioSchema = PUBLISHED_SCHEMAS.find((s) => s.name === "scenario")!;
+const scenarioValidator = new Validator(
+  scenarioSchema.schema as unknown as Schema,
+  "2020-12",
+);
+
+function fixtureFiles(dir: string): string[] {
+  return readdirSync(join(FIXTURES, dir))
+    .filter((f) => /\.(ya?ml|json)$/.test(f))
+    .map((f) => join(FIXTURES, dir, f));
+}
+
+for (const file of fixtureFiles("scenarios")) {
+  test(`schema guard - valid fixture ${file.split("/").pop()}`, () => {
+    const suite = parseSuiteText(readFileSync(file, "utf-8"));
+    const result = scenarioValidator.validate(suite);
+    assert.ok(
+      result.valid,
+      `expected valid, got: ${JSON.stringify(result.errors)}`,
+    );
+  });
+}
+
+for (const file of fixtureFiles("invalid")) {
+  test(`schema guard - invalid fixture ${file.split("/").pop()}`, () => {
+    const suite = parseSuiteText(readFileSync(file, "utf-8"));
+    assert.ok(!scenarioValidator.validate(suite).valid, "expected invalid");
+  });
+}
+
+// Guard 3: drift between embedded schema and the published file.
+for (const { name, fileName, schema } of PUBLISHED_SCHEMAS) {
+  test(`schema guard - ${name} embedded schema matches published file`, () => {
+    const published = readFileSync(join(SCHEMA_DIR, fileName), "utf-8");
+    assert.strictEqual(
+      published,
+      serializeSchema(schema),
+      `schema/bench/${fileName} is out of sync; run ` +
+        `scripts/generate-bench-schema.ts`,
+    );
+  });
+}
+
+// Guard 4: immutability of already-published schema versions.
+for (const { name, fileName } of PUBLISHED_SCHEMAS) {
+  test(`schema guard - ${name} published file is unchanged from HEAD`, () => {
+    let committed: string;
+    try {
+      committed = execFileSync(
+        "git",
+        ["show", `HEAD:schema/bench/${fileName}`],
+        { cwd: REPO_ROOT, encoding: "utf-8" },
+      );
+    } catch {
+      // Not yet committed (a brand-new version file): nothing to guard.
+      return;
+    }
+    const current = readFileSync(join(SCHEMA_DIR, fileName), "utf-8");
+    assert.strictEqual(
+      current,
+      committed,
+      `schema/bench/${fileName} is published and immutable; publish a new ` +
+        `version file instead of editing it`,
+    );
+  });
+}
diff --git a/packages/cli/src/bench/schemas.ts b/packages/cli/src/bench/schemas.ts
new file mode 100644
index 000000000..7a249e8cc
--- /dev/null
+++ b/packages/cli/src/bench/schemas.ts
@@ -0,0 +1,31 @@
+/**
+ * The registry of published benchmark JSON Schemas.
+ *
+ * Each entry pairs the embedded runtime schema object with the file name it is
+ * published under in the repository's *schema/bench/* directory.  The schema
+ * guards (meta-schema, fixture, drift, and immutability) iterate over this
+ * registry, so adding a new published schema automatically extends the guards.
+ * @since 2.3.0
+ * @module
+ */
+
+import { scenarioSchemaV1 } from "./scenario/schema.ts";
+
+/** A published JSON Schema and where it is hosted. */
+export interface PublishedSchema {
+  /** A short identifier, e.g. `"scenario"`. */
+  readonly name: string;
+  /** The published file name under *schema/bench/*. */
+  readonly fileName: string;
+  /** The embedded runtime schema object. */
+  readonly schema: Record<string, unknown>;
+}
+
+/** All benchmark schemas published to json-schema.fedify.dev. */
+export const PUBLISHED_SCHEMAS: readonly PublishedSchema[] = [
+  {
+    name: "scenario",
+    fileName: "scenario-v1.json",
+    schema: scenarioSchemaV1 as unknown as Record<string, unknown>,
+  },
+];
diff --git a/schema/bench/scenario-v1.json b/schema/bench/scenario-v1.json
new file mode 100644
index 000000000..f0e4bcba0
--- /dev/null
+++ b/schema/bench/scenario-v1.json
@@ -0,0 +1,700 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://json-schema.fedify.dev/bench/scenario-v1.json",
+  "title": "Fedify benchmark scenario suite",
+  "type": "object",
+  "required": [
+    "version",
+    "scenarios"
+  ],
+  "additionalProperties": false,
+  "properties": {
+    "$schema": {
+      "type": "string",
+      "description": "An optional editor hint pointing at this schema."
+    },
+    "version": {
+      "const": 1
+    },
+    "target": {
+      "type": "string",
+      "format": "uri",
+      "description": "The target base URL; may be overridden by --target."
+    },
+    "defaults": {
+      "$ref": "#/$defs/defaults"
+    },
+    "actors": {
+      "type": "array",
+      "items": {
+        "$ref": "#/$defs/actorGroup"
+      }
+    },
+    "scenarios": {
+      "type": "array",
+      "minItems": 1,
+      "items": {
+        "$ref": "#/$defs/scenario"
+      }
+    }
+  },
+  "$defs": {
+    "duration": {
+      "type": "string",
+      "pattern": "^\\d+(\\.\\d+)?(ms|s|m|h)$",
+      "description": "A duration such as 500ms, 30s, 2m, or 1h."
+    },
+    "rate": {
+      "description": "An open-loop arrival rate such as 200/s, or a number.",
+      "oneOf": [
+        {
+          "type": "number",
+          "exclusiveMinimum": 0
+        },
+        {
+          "type": "string",
+          "pattern": "^\\d+(\\.\\d+)?\\s*/\\s*(s|m|h)$"
+        }
+      ]
+    },
+    "size": {
+      "description": "A byte size such as 2KB or a plain number of bytes.",
+      "oneOf": [
+        {
+          "type": "number",
+          "minimum": 0
+        },
+        {
+          "type": "string",
+          "pattern": "^\\s*\\d+(\\.\\d+)?\\s*([Bb]|[Kk][Bb]|[Kk][Ii][Bb]|[Mm][Bb]|[Mm][Ii][Bb]|[Gg][Bb]|[Gg][Ii][Bb])?\\s*$"
+        }
+      ]
+    },
+    "signatureStandard": {
+      "enum": [
+        "draft-cavage-http-signatures-12",
+        "rfc9421",
+        "ld-signatures",
+        "fep8b32"
+      ]
+    },
+    "signingMode": {
+      "enum": [
+        "jit",
+        "pipeline",
+        "presign"
+      ]
+    },
+    "arrival": {
+      "enum": [
+        "constant",
+        "poisson"
+      ]
+    },
+    "scalarOrListString": {
+      "oneOf": [
+        {
+          "type": "string"
+        },
+        {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "minItems": 1
+        }
+      ]
+    },
+    "load": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "rate": {
+          "$ref": "#/$defs/rate"
+        },
+        "concurrency": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "arrival": {
+          "$ref": "#/$defs/arrival"
+        },
+        "maxInFlight": {
+          "type": "integer",
+          "minimum": 1
+        }
+      },
+      "oneOf": [
+        {
+          "required": [
+            "rate"
+          ],
+          "not": {
+            "required": [
+              "concurrency"
+            ]
+          }
+        },
+        {
+          "required": [
+            "concurrency"
+          ],
+          "not": {
+            "required": [
+              "rate"
+            ]
+          }
+        }
+      ]
+    },
+    "defaults": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "duration": {
+          "$ref": "#/$defs/duration"
+        },
+        "warmup": {
+          "$ref": "#/$defs/duration"
+        },
+        "load": {
+          "$ref": "#/$defs/load"
+        },
+        "signing": {
+          "$ref": "#/$defs/signingMode"
+        },
+        "signatureTimeWindow": {
+          "type": "boolean"
+        },
+        "runs": {
+          "type": "integer",
+          "minimum": 1
+        }
+      }
+    },
+    "actorGroup": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "signatureStandards"
+      ],
+      "properties": {
+        "name": {
+          "type": "string"
+        },
+        "count": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "signatureStandards": {
+          "type": "array",
+          "uniqueItems": true,
+          "minItems": 1,
+          "items": {
+            "$ref": "#/$defs/signatureStandard"
+          },
+          "contains": {
+            "enum": [
+              "draft-cavage-http-signatures-12",
+              "rfc9421"
+            ]
+          },
+          "minContains": 1,
+          "maxContains": 1,
+          "description": "Exactly one HTTP request signature scheme, plus optional document signature schemes."
+        }
+      }
+    },
+    "generateDirective": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "generate"
+      ],
+      "properties": {
+        "generate": {
+          "enum": [
+            "lorem"
+          ]
+        },
+        "size": {
+          "$ref": "#/$defs/size"
+        }
+      }
+    },
+    "content": {
+      "oneOf": [
+        {
+          "type": "string"
+        },
+        {
+          "$ref": "#/$defs/generateDirective"
+        }
+      ]
+    },
+    "objectSpec": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "$ref": "#/$defs/scalarOrListString"
+        },
+        "content": {
+          "$ref": "#/$defs/content"
+        }
+      }
+    },
+    "activitySpec": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "type": {
+          "$ref": "#/$defs/scalarOrListString"
+        },
+        "embedObject": {
+          "type": "boolean"
+        },
+        "object": {
+          "$ref": "#/$defs/objectSpec"
+        }
+      }
+    },
+    "objectSource": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/scalarOrListString"
+        },
+        {
+          "type": "object",
+          "additionalProperties": false,
+          "required": [
+            "seed"
+          ],
+          "properties": {
+            "seed": {
+              "$ref": "#/$defs/scalarOrListString"
+            },
+            "collection": {
+              "$ref": "#/$defs/scalarOrListString"
+            },
+            "limit": {
+              "type": "integer",
+              "minimum": 1
+            },
+            "type": {
+              "$ref": "#/$defs/scalarOrListString"
+            }
+          }
+        }
+      ]
+    },
+    "expectSeverity": {
+      "enum": [
+        "warn",
+        "fail"
+      ]
+    },
+    "expectValue": {
+      "oneOf": [
+        {
+          "type": "string",
+          "description": "An assertion such as '>= 99%' or '< 100ms'."
+        },
+        {
+          "type": "object",
+          "additionalProperties": false,
+          "required": [
+            "assert"
+          ],
+          "properties": {
+            "assert": {
+              "type": "string"
+            },
+            "severity": {
+              "$ref": "#/$defs/expectSeverity"
+            }
+          }
+        }
+      ]
+    },
+    "mixEntry": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "scenario",
+        "weight"
+      ],
+      "properties": {
+        "scenario": {
+          "type": "string"
+        },
+        "weight": {
+          "type": "number",
+          "exclusiveMinimum": 0
+        }
+      }
+    },
+    "scenario": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "name",
+        "type"
+      ],
+      "properties": {
+        "name": {
+          "type": "string"
+        },
+        "type": {
+          "enum": [
+            "inbox",
+            "webfinger",
+            "actor",
+            "object",
+            "fanout",
+            "collection",
+            "failure",
+            "mixed"
+          ]
+        },
+        "load": {
+          "$ref": "#/$defs/load"
+        },
+        "duration": {
+          "$ref": "#/$defs/duration"
+        },
+        "warmup": {
+          "$ref": "#/$defs/duration"
+        },
+        "signing": {
+          "$ref": "#/$defs/signingMode"
+        },
+        "signatureTimeWindow": {
+          "type": "boolean"
+        },
+        "runs": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "expect": {
+          "type": "object",
+          "additionalProperties": {
+            "$ref": "#/$defs/expectValue"
+          }
+        },
+        "recipient": {
+          "$ref": "#/$defs/scalarOrListString"
+        },
+        "inbox": {
+          "type": "string"
+        },
+        "activity": {
+          "$ref": "#/$defs/activitySpec"
+        },
+        "authenticated": {
+          "type": "boolean"
+        },
+        "collection": {
+          "$ref": "#/$defs/scalarOrListString"
+        },
+        "source": {
+          "$ref": "#/$defs/objectSource"
+        },
+        "sender": {
+          "type": "string"
+        },
+        "followers": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "trigger": {
+          "type": "object"
+        },
+        "sinkBehavior": {
+          "type": "object"
+        },
+        "queueDrainTimeout": {
+          "$ref": "#/$defs/duration"
+        },
+        "fault": {
+          "$ref": "#/$defs/scalarOrListString"
+        },
+        "mix": {
+          "type": "array",
+          "minItems": 1,
+          "items": {
+            "$ref": "#/$defs/mixEntry"
+          }
+        }
+      },
+      "allOf": [
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "inbox"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "recipient"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max",
+                    "signatureVerification.p50",
+                    "signatureVerification.p95",
+                    "signatureVerification.p99"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "webfinger"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "recipient"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "actor"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "recipient"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max",
+                    "signatureVerification.p50",
+                    "signatureVerification.p95",
+                    "signatureVerification.p99"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "object"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "source"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "collection"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "recipient"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "fanout"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "sender"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "deliveryThroughput",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "queueDrain.p50",
+                    "queueDrain.p95",
+                    "queueDrain.p99"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "failure"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "fault"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        {
+          "if": {
+            "properties": {
+              "type": {
+                "const": "mixed"
+              }
+            }
+          },
+          "then": {
+            "required": [
+              "mix"
+            ],
+            "properties": {
+              "expect": {
+                "propertyNames": {
+                  "enum": [
+                    "successRate",
+                    "throughputPerSec",
+                    "errors.total",
+                    "errors.4xx",
+                    "errors.5xx",
+                    "latency.p50",
+                    "latency.p95",
+                    "latency.p99",
+                    "latency.mean",
+                    "latency.max",
+                    "signatureVerification.p50",
+                    "signatureVerification.p95",
+                    "signatureVerification.p99",
+                    "deliveryThroughput",
+                    "queueDrain.p50",
+                    "queueDrain.p95",
+                    "queueDrain.p99"
+                  ]
+                }
+              }
+            }
+          }
+        }
+      ]
+    }
+  }
+}

From d17cecb8f5f6d64b8301938d2b2773b8939c765b Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 22:19:43 +0900
Subject: [PATCH 05/47] Normalize benchmark scenarios into an executable form

Add the normalization step that turns a schema-validated suite into the
resolved form the engine runs:

 -  `parseDuration()` and `parseRate()` parse the human-friendly duration
    (`30s`) and rate (`200/s`) units into milliseconds and requests per
    second, rejecting non-positive and overflowing magnitudes.
 -  `normalizeSuite()` applies suite defaults, coerces the top-level
    scalar-or-list fields to arrays, resolves the target (with a
    `--target` override), and determines the open- or closed-loop load
    model, inheriting compatible fields such as `arrival` and
    `maxInFlight` from the defaults while a scenario's `rate`/
    `concurrency` selects the model.

It also enforces the one cross-field rule the JSON Schema cannot express:
the buffered signing modes (`pipeline`, `presign`) pre-sign requests, so
they require the target's signature time window to be off; a
time-windowed target must use `signing: jit`.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/scenario/normalize.test.ts  | 144 ++++++++++++
 packages/cli/src/bench/scenario/normalize.ts  | 215 ++++++++++++++++++
 packages/cli/src/bench/scenario/units.test.ts |  41 ++++
 packages/cli/src/bench/scenario/units.ts      |  66 ++++++
 4 files changed, 466 insertions(+)
 create mode 100644 packages/cli/src/bench/scenario/normalize.test.ts
 create mode 100644 packages/cli/src/bench/scenario/normalize.ts
 create mode 100644 packages/cli/src/bench/scenario/units.test.ts
 create mode 100644 packages/cli/src/bench/scenario/units.ts

diff --git a/packages/cli/src/bench/scenario/normalize.test.ts b/packages/cli/src/bench/scenario/normalize.test.ts
new file mode 100644
index 000000000..c16feea3d
--- /dev/null
+++ b/packages/cli/src/bench/scenario/normalize.test.ts
@@ -0,0 +1,144 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { normalizeSuite, SuiteNormalizeError } from "./normalize.ts";
+import type { Suite } from "./types.ts";
+
+function suite(overrides: Partial<Suite> = {}): Suite {
+  return {
+    version: 1,
+    target: "http://localhost:3000",
+    scenarios: [
+      { name: "inbox-shared", type: "inbox", recipient: "acct:alice@x" },
+    ],
+    ...overrides,
+  };
+}
+
+test("normalizeSuite - applies defaults and parses units", () => {
+  const resolved = normalizeSuite(suite({
+    defaults: { duration: "30s", warmup: "5s", load: { rate: "200/s" } },
+  }));
+  const s = resolved.scenarios[0];
+  assert.strictEqual(s.durationMs, 30_000);
+  assert.strictEqual(s.warmupMs, 5000);
+  assert.deepEqual(s.load, {
+    kind: "open",
+    ratePerSec: 200,
+    arrival: "constant",
+    maxInFlight: undefined,
+  });
+  assert.strictEqual(s.signing, "pipeline");
+  assert.strictEqual(s.runs, 1);
+  assert.deepEqual(s.recipients, ["acct:alice@x"]);
+});
+
+test("normalizeSuite - falls back to open-loop defaults", () => {
+  const s = normalizeSuite(suite()).scenarios[0];
+  assert.strictEqual(s.load.kind, "open");
+  assert.strictEqual(s.durationMs, 60_000);
+  assert.strictEqual(s.warmupMs, 0);
+});
+
+test("normalizeSuite - resolves closed-loop load", () => {
+  const s = normalizeSuite(suite({
+    defaults: { load: { concurrency: 50, maxInFlight: 100 } },
+  })).scenarios[0];
+  assert.deepEqual(s.load, {
+    kind: "closed",
+    concurrency: 50,
+    maxInFlight: 100,
+  });
+});
+
+test("normalizeSuite - scenario load overrides defaults", () => {
+  const s = normalizeSuite(suite({
+    defaults: { load: { rate: "10/s" } },
+    scenarios: [{
+      name: "x",
+      type: "inbox",
+      recipient: "acct:a@x",
+      load: { concurrency: 5 },
+    }],
+  })).scenarios[0];
+  assert.strictEqual(s.load.kind, "closed");
+});
+
+test("normalizeSuite - load inherits arrival/maxInFlight from defaults", () => {
+  const s = normalizeSuite(suite({
+    defaults: { load: { rate: "10/s", arrival: "poisson", maxInFlight: 200 } },
+    scenarios: [{
+      name: "x",
+      type: "inbox",
+      recipient: "acct:a@x",
+      load: { rate: "100/s" },
+    }],
+  })).scenarios[0];
+  assert.deepEqual(s.load, {
+    kind: "open",
+    ratePerSec: 100,
+    arrival: "poisson",
+    maxInFlight: 200,
+  });
+});
+
+test("normalizeSuite - parses fanout queueDrainTimeout to ms", () => {
+  const s = normalizeSuite(suite({
+    scenarios: [{
+      name: "fan",
+      type: "fanout",
+      sender: "alice",
+      queueDrainTimeout: "2m",
+    }],
+  })).scenarios[0];
+  assert.strictEqual(s.queueDrainTimeoutMs, 120_000);
+});
+
+test("normalizeSuite - coerces scalar recipient to a list", () => {
+  const s = normalizeSuite(suite({
+    scenarios: [{
+      name: "wf",
+      type: "webfinger",
+      recipient: ["acct:a@x", "acct:b@x"],
+    }],
+  })).scenarios[0];
+  assert.deepEqual(s.recipients, ["acct:a@x", "acct:b@x"]);
+});
+
+test("normalizeSuite - --target overrides the suite target", () => {
+  const resolved = normalizeSuite(suite(), {
+    target: "http://127.0.0.1:8080",
+  });
+  assert.strictEqual(resolved.target.href, "http://127.0.0.1:8080/");
+});
+
+test("normalizeSuite - requires a target", () => {
+  assert.throws(
+    () => normalizeSuite(suite({ target: undefined })),
+    SuiteNormalizeError,
+  );
+});
+
+test("normalizeSuite - rejects an invalid target URL", () => {
+  assert.throws(
+    () => normalizeSuite(suite({ target: "not a url" })),
+    SuiteNormalizeError,
+  );
+});
+
+test("normalizeSuite - pipeline signing rejects a time-windowed target", () => {
+  assert.throws(
+    () =>
+      normalizeSuite(suite({
+        defaults: { signing: "pipeline", signatureTimeWindow: true },
+      })),
+    SuiteNormalizeError,
+  );
+});
+
+test("normalizeSuite - jit signing allows a time-windowed target", () => {
+  const s = normalizeSuite(suite({
+    defaults: { signing: "jit", signatureTimeWindow: true },
+  })).scenarios[0];
+  assert.strictEqual(s.signing, "jit");
+  assert.strictEqual(s.signatureTimeWindow, true);
+});
diff --git a/packages/cli/src/bench/scenario/normalize.ts b/packages/cli/src/bench/scenario/normalize.ts
new file mode 100644
index 000000000..cc800d3e9
--- /dev/null
+++ b/packages/cli/src/bench/scenario/normalize.ts
@@ -0,0 +1,215 @@
+/**
+ * Normalizes a validated scenario suite into a fully resolved form the engine
+ * can execute: defaults applied, top-level scalar-or-list fields (`recipient`,
+ * `collection`, `fault`) coerced to arrays, durations and rates parsed to
+ * numbers, and the load model determined.  Nested specs (`activity`, `source`)
+ * are passed through and coerced where they are consumed.
+ *
+ * It also enforces the cross-field rules that the JSON Schema cannot express,
+ * notably that the buffered signing modes require the target's signature time
+ * window to be off.
+ * @since 2.3.0
+ * @module
+ */
+
+import { asList } from "./coerce.ts";
+import type {
+  ActivitySpec,
+  ActorGroup,
+  ArrivalDistribution,
+  ExpectBlock,
+  ObjectSource,
+  Scenario,
+  ScenarioType,
+  SigningMode,
+  Suite,
+} from "./types.ts";
+import { parseDuration, parseRate } from "./units.ts";
+
+const DEFAULT_DURATION_MS = 60_000;
+const DEFAULT_WARMUP_MS = 0;
+const DEFAULT_RATE_PER_SEC = 50;
+const DEFAULT_SIGNING: SigningMode = "pipeline";
+const DEFAULT_RUNS = 1;
+
+/** The resolved load model for a scenario. */
+export type LoadModel =
+  | {
+    readonly kind: "open";
+    readonly ratePerSec: number;
+    readonly arrival: ArrivalDistribution;
+    readonly maxInFlight?: number;
+  }
+  | {
+    readonly kind: "closed";
+    readonly concurrency: number;
+    readonly maxInFlight?: number;
+  };
+
+/** A scenario with all defaults applied and all units parsed. */
+export interface ResolvedScenario {
+  readonly name: string;
+  readonly type: ScenarioType;
+  readonly load: LoadModel;
+  readonly durationMs: number;
+  readonly warmupMs: number;
+  readonly signing: SigningMode;
+  readonly signatureTimeWindow: boolean;
+  readonly runs: number;
+  readonly recipients: string[];
+  readonly inbox?: string;
+  readonly activity?: ActivitySpec;
+  readonly authenticated: boolean;
+  readonly collections: string[];
+  readonly source?: ObjectSource;
+  readonly sender?: string;
+  readonly followers?: number;
+  readonly queueDrainTimeoutMs?: number;
+  readonly faults: string[];
+  readonly expect: ExpectBlock;
+  /** The original scenario, for any field not lifted onto this view. */
+  readonly raw: Scenario;
+}
+
+/** A suite with its target resolved and every scenario normalized. */
+export interface ResolvedSuite {
+  readonly target: URL;
+  readonly actors: ActorGroup[];
+  readonly scenarios: ResolvedScenario[];
+}
+
+/** Options for {@link normalizeSuite}. */
+export interface NormalizeOptions {
+  /** A target URL that overrides the suite's `target`. */
+  readonly target?: string;
+}
+
+/** An error raised when a suite cannot be normalized. */
+export class SuiteNormalizeError extends Error {}
+
+/**
+ * Normalizes a validated suite into resolved form.
+ * @param suite The validated suite.
+ * @param options Normalization options, such as a target override.
+ * @returns The resolved suite.
+ * @throws {SuiteNormalizeError} If the target is missing or a cross-field rule
+ *         is violated.
+ */
+export function normalizeSuite(
+  suite: Suite,
+  options: NormalizeOptions = {},
+): ResolvedSuite {
+  const targetString = options.target ?? suite.target;
+  if (targetString == null || targetString.trim() === "") {
+    throw new SuiteNormalizeError(
+      "No target URL: set `target` in the suite or pass --target.",
+    );
+  }
+  let target: URL;
+  try {
+    target = new URL(targetString);
+  } catch {
+    throw new SuiteNormalizeError(`Invalid target URL: ${targetString}.`);
+  }
+  return {
+    target,
+    actors: suite.actors ?? [],
+    scenarios: suite.scenarios.map((scenario) =>
+      resolveScenario(scenario, suite)
+    ),
+  };
+}
+
+function resolveScenario(scenario: Scenario, suite: Suite): ResolvedScenario {
+  const defaults = suite.defaults ?? {};
+  const load = resolveLoad(defaults.load, scenario.load);
+  const signing = scenario.signing ?? defaults.signing ?? DEFAULT_SIGNING;
+  const signatureTimeWindow = scenario.signatureTimeWindow ??
+    defaults.signatureTimeWindow ?? false;
+  if (signing !== "jit" && signatureTimeWindow) {
+    throw new SuiteNormalizeError(
+      `Scenario "${scenario.name}": ${signing} signing pre-signs requests, ` +
+        "which requires the target's signature time window to be off; use " +
+        "signing: jit for a time-windowed target.",
+    );
+  }
+  return {
+    name: scenario.name,
+    type: scenario.type,
+    load,
+    durationMs: resolveDuration(
+      scenario.duration ?? defaults.duration,
+      DEFAULT_DURATION_MS,
+    ),
+    warmupMs: resolveDuration(
+      scenario.warmup ?? defaults.warmup,
+      DEFAULT_WARMUP_MS,
+    ),
+    signing,
+    signatureTimeWindow,
+    runs: scenario.runs ?? defaults.runs ?? DEFAULT_RUNS,
+    recipients: asList(scenario.recipient),
+    inbox: scenario.inbox,
+    activity: scenario.activity,
+    authenticated: scenario.authenticated ?? false,
+    collections: asList(scenario.collection),
+    source: scenario.source,
+    sender: scenario.sender,
+    followers: scenario.followers,
+    queueDrainTimeoutMs: scenario.queueDrainTimeout == null
+      ? undefined
+      : parseDuration(scenario.queueDrainTimeout),
+    faults: asList(scenario.fault),
+    expect: scenario.expect ?? {},
+    raw: scenario,
+  };
+}
+
+/**
+ * Resolves the load model from suite defaults and a scenario override.  The
+ * scenario's choice of `rate`/`concurrency` wins outright (it selects the
+ * model), while compatible fields such as `arrival` and `maxInFlight` are
+ * inherited from the defaults when the scenario does not set them.
+ */
+function resolveLoad(
+  defaults: Scenario["load"] | undefined,
+  scenario: Scenario["load"] | undefined,
+): LoadModel {
+  const arrival = scenario?.arrival ?? defaults?.arrival ?? "constant";
+  const maxInFlight = scenario?.maxInFlight ?? defaults?.maxInFlight;
+  if (scenario?.concurrency != null) {
+    return { kind: "closed", concurrency: scenario.concurrency, maxInFlight };
+  }
+  if (scenario?.rate != null) {
+    return {
+      kind: "open",
+      ratePerSec: parseRate(scenario.rate),
+      arrival,
+      maxInFlight,
+    };
+  }
+  if (defaults?.concurrency != null) {
+    return { kind: "closed", concurrency: defaults.concurrency, maxInFlight };
+  }
+  if (defaults?.rate != null) {
+    return {
+      kind: "open",
+      ratePerSec: parseRate(defaults.rate),
+      arrival,
+      maxInFlight,
+    };
+  }
+  return {
+    kind: "open",
+    ratePerSec: DEFAULT_RATE_PER_SEC,
+    arrival,
+    maxInFlight,
+  };
+}
+
+function resolveDuration(
+  value: string | undefined,
+  fallback: number,
+): number {
+  return value == null ? fallback : parseDuration(value);
+}
diff --git a/packages/cli/src/bench/scenario/units.test.ts b/packages/cli/src/bench/scenario/units.test.ts
new file mode 100644
index 000000000..ec5d5b409
--- /dev/null
+++ b/packages/cli/src/bench/scenario/units.test.ts
@@ -0,0 +1,41 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { parseDuration, parseRate } from "./units.ts";
+
+test("parseDuration - parses each unit", () => {
+  assert.strictEqual(parseDuration("500ms"), 500);
+  assert.strictEqual(parseDuration("30s"), 30_000);
+  assert.strictEqual(parseDuration("2m"), 120_000);
+  assert.strictEqual(parseDuration("1h"), 3_600_000);
+  assert.strictEqual(parseDuration("1.5s"), 1500);
+});
+
+test("parseDuration - rejects invalid input", () => {
+  assert.throws(() => parseDuration("30"), RangeError);
+  assert.throws(() => parseDuration("abc"), RangeError);
+  assert.throws(() => parseDuration("10 s"), RangeError);
+});
+
+test("parseRate - bare number is per second", () => {
+  assert.strictEqual(parseRate(200), 200);
+});
+
+test("parseRate - parses each time unit", () => {
+  assert.strictEqual(parseRate("200/s"), 200);
+  assert.strictEqual(parseRate("60/m"), 1);
+  assert.strictEqual(parseRate("3600/h"), 1);
+  assert.strictEqual(parseRate("100 / s"), 100);
+});
+
+test("parseRate - rejects invalid or non-positive input", () => {
+  assert.throws(() => parseRate("abc"), RangeError);
+  assert.throws(() => parseRate("0"), RangeError);
+  assert.throws(() => parseRate("0/s"), RangeError);
+  assert.throws(() => parseRate(0), RangeError);
+  assert.throws(() => parseRate(-5), RangeError);
+});
+
+test("parseRate/parseDuration - reject overflowing magnitudes", () => {
+  assert.throws(() => parseRate(`${"9".repeat(400)}/s`), RangeError);
+  assert.throws(() => parseDuration(`${"9".repeat(400)}h`), RangeError);
+});
diff --git a/packages/cli/src/bench/scenario/units.ts b/packages/cli/src/bench/scenario/units.ts
new file mode 100644
index 000000000..6f51b5c42
--- /dev/null
+++ b/packages/cli/src/bench/scenario/units.ts
@@ -0,0 +1,66 @@
+/**
+ * Parsers for the human-friendly duration and rate units used in scenario
+ * files.
+ * @since 2.3.0
+ * @module
+ */
+
+const DURATION_RE = /^(\d+(?:\.\d+)?)(ms|s|m|h)$/;
+const DURATION_UNITS: Readonly<Record<string, number>> = {
+  ms: 1,
+  s: 1000,
+  m: 60_000,
+  h: 3_600_000,
+};
+
+const RATE_RE = /^(\d+(?:\.\d+)?)\s*\/\s*(s|m|h)$/;
+const RATE_DIVISORS: Readonly<Record<string, number>> = {
+  s: 1,
+  m: 60,
+  h: 3600,
+};
+
+/**
+ * Parses a duration such as `"500ms"`, `"30s"`, `"2m"`, or `"1h"` into
+ * milliseconds.
+ * @param value The duration string.
+ * @returns The duration in milliseconds.
+ * @throws {RangeError} If the value cannot be parsed.
+ */
+export function parseDuration(value: string): number {
+  const match = value.match(DURATION_RE);
+  if (match == null) {
+    throw new RangeError(`Invalid duration: ${JSON.stringify(value)}.`);
+  }
+  const ms = Number.parseFloat(match[1]) * DURATION_UNITS[match[2]];
+  if (!Number.isFinite(ms)) {
+    throw new RangeError(`Duration out of range: ${JSON.stringify(value)}.`);
+  }
+  return ms;
+}
+
+/**
+ * Parses an open-loop arrival rate into requests per second.  A bare number is
+ * interpreted as requests per second; a string such as `"200/s"`, `"60/m"`, or
+ * `"3600/h"` carries an explicit time unit.
+ * @param value The rate string or number.
+ * @returns The rate in requests per second.
+ * @throws {RangeError} If the value cannot be parsed or is not positive.
+ */
+export function parseRate(value: string | number): number {
+  if (typeof value === "number") {
+    if (!Number.isFinite(value) || value <= 0) {
+      throw new RangeError(`Invalid rate: ${value}.`);
+    }
+    return value;
+  }
+  const match = value.match(RATE_RE);
+  if (match == null) {
+    throw new RangeError(`Invalid rate: ${JSON.stringify(value)}.`);
+  }
+  const rate = Number.parseFloat(match[1]) / RATE_DIVISORS[match[2]];
+  if (!Number.isFinite(rate) || rate <= 0) {
+    throw new RangeError(`Invalid rate: ${JSON.stringify(value)}.`);
+  }
+  return rate;
+}

From 065ea3097ce271efc22f4f0a25a25c6d7efbe143 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 22:45:35 +0900
Subject: [PATCH 06/47] Add the benchmark report model and its JSON Schema

Define the canonical benchmark report: the single result model from
which the terminal, JSON, and Markdown renderers all derive, so the
outputs can never drift apart.  JSON is the canonical machine form,
pinned by a published draft-2020-12 schema (schema/bench/report-v1.json).

The model splits `client` and `server` numbers by nesting so it is clear
which the load generator measured and which came from the target's stats
endpoint, bakes the unit into numeric keys (latencyMs, drainMs), turns
each expect assertion into an evaluated record, and carries first-class
environment/target/configHash reproducibility metadata plus an optional
serialized histogram.

The report schema is registered alongside the scenario schema, so the
existing structural, fixture, drift, and immutability guards now cover it
too; a valid report fixture is added.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../__fixtures__/reports/inbox-report.json    |  92 ++++
 packages/cli/src/bench/result/model.ts        | 168 ++++++
 packages/cli/src/bench/result/schema.ts       | 286 ++++++++++
 packages/cli/src/bench/schema.test.ts         |  53 +-
 packages/cli/src/bench/schemas.ts             |   6 +
 schema/bench/report-v1.json                   | 520 ++++++++++++++++++
 6 files changed, 1106 insertions(+), 19 deletions(-)
 create mode 100644 packages/cli/src/bench/__fixtures__/reports/inbox-report.json
 create mode 100644 packages/cli/src/bench/result/model.ts
 create mode 100644 packages/cli/src/bench/result/schema.ts
 create mode 100644 schema/bench/report-v1.json

diff --git a/packages/cli/src/bench/__fixtures__/reports/inbox-report.json b/packages/cli/src/bench/__fixtures__/reports/inbox-report.json
new file mode 100644
index 000000000..b952b1f5a
--- /dev/null
+++ b/packages/cli/src/bench/__fixtures__/reports/inbox-report.json
@@ -0,0 +1,92 @@
+{
+  "$schema": "https://json-schema.fedify.dev/bench/report-v1.json",
+  "schemaVersion": 1,
+  "tool": { "name": "@fedify/cli", "version": "2.3.0" },
+  "environment": {
+    "runtime": "deno",
+    "runtimeVersion": "2.5.0",
+    "os": "linux",
+    "cpuCount": 16
+  },
+  "target": {
+    "url": "http://localhost:3000",
+    "fedifyVersion": "2.3.0",
+    "statsAvailable": true
+  },
+  "startedAt": "2026-06-04T12:00:00.000Z",
+  "finishedAt": "2026-06-04T12:01:10.000Z",
+  "suite": { "name": "Inbox regression suite", "configHash": "sha256:abc123" },
+  "passed": true,
+  "scenarios": [
+    {
+      "name": "inbox-shared",
+      "type": "inbox",
+      "load": {
+        "model": "closed",
+        "concurrency": 50,
+        "durationMs": 60000,
+        "warmupMs": 10000
+      },
+      "requests": {
+        "total": 18240,
+        "ok": 18137,
+        "failed": 103,
+        "successRate": 0.9944
+      },
+      "throughputPerSec": 304.0,
+      "client": {
+        "latencyMs": {
+          "p50": 24,
+          "p95": 91,
+          "p99": 184,
+          "mean": 31.2,
+          "max": 412
+        }
+      },
+      "server": {
+        "signatureVerificationMs": {
+          "overall": { "p50": 6, "p95": 12, "p99": 28 },
+          "byStandard": {
+            "draft-cavage-http-signatures-12": { "p50": 7, "p95": 13 }
+          }
+        },
+        "queue": { "drainMs": { "p50": 900, "p95": 1800 }, "depthMax": 1240 }
+      },
+      "errors": [
+        {
+          "kind": "http",
+          "status": 401,
+          "reason": "signature_failed",
+          "count": 72
+        },
+        {
+          "kind": "http",
+          "status": 500,
+          "reason": "handler_error",
+          "count": 31
+        }
+      ],
+      "expectations": [
+        {
+          "metric": "latency.p95",
+          "op": "lt",
+          "threshold": 100,
+          "unit": "ms",
+          "actual": 91,
+          "severity": "fail",
+          "pass": true
+        },
+        {
+          "metric": "successRate",
+          "op": "gte",
+          "threshold": 0.99,
+          "unit": "%",
+          "actual": 0.9944,
+          "severity": "fail",
+          "pass": true
+        }
+      ],
+      "passed": true
+    }
+  ]
+}
diff --git a/packages/cli/src/bench/result/model.ts b/packages/cli/src/bench/result/model.ts
new file mode 100644
index 000000000..44b885992
--- /dev/null
+++ b/packages/cli/src/bench/result/model.ts
@@ -0,0 +1,168 @@
+/**
+ * Hand-written TypeScript types for the canonical benchmark report model.
+ *
+ * The report is the single result model from which the terminal, JSON, and
+ * Markdown renderers all derive, so the three outputs can never drift apart.
+ * JSON is the canonical machine form, pinned by the published schema in
+ * {@link ./schema.ts} and *schema/bench/report-v1.json*.
+ *
+ * Conventions:
+ *
+ *  -  `client` and `server` numbers are split by nesting, honoring the
+ *     requirement that the report makes clear which numbers the load generator
+ *     measured and which came from the target's `stats` endpoint.
+ *  -  Numeric keys bake in their unit (`latencyMs`, `drainMs`) so no consumer
+ *     parses `"1.8s"`.
+ *  -  Each `expect` assertion becomes an evaluated record, with a top-level
+ *     `passed`, so the report is a self-contained CI gate.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ScenarioType } from "../scenario/types.ts";
+import type { SerializedHistogram } from "../metrics/histogram.ts";
+
+/** The reproducibility environment a run was measured in. */
+export interface Environment {
+  /** The JavaScript runtime, e.g. `"node"`, `"deno"`, or `"bun"`. */
+  readonly runtime: string;
+  /** The runtime version string. */
+  readonly runtimeVersion: string;
+  /** The operating system, e.g. `"linux"`. */
+  readonly os: string;
+  /** The number of logical CPUs. */
+  readonly cpuCount: number;
+}
+
+/** Information about the benchmarked target. */
+export interface TargetInfo {
+  /** The target base URL. */
+  readonly url: string;
+  /** The target's Fedify version, if it could be determined. */
+  readonly fedifyVersion?: string | null;
+  /** Whether the target's `stats` endpoint was available. */
+  readonly statsAvailable: boolean;
+}
+
+/** A latency distribution measured by the client (all values milliseconds). */
+export interface LatencyMs {
+  readonly p50: number;
+  readonly p95: number;
+  readonly p99: number;
+  readonly mean: number;
+  readonly max: number;
+}
+
+/** A partial latency distribution as projected from server metrics. */
+export interface PartialLatencyMs {
+  readonly p50?: number;
+  readonly p95?: number;
+  readonly p99?: number;
+}
+
+/** The load model summary recorded in a scenario result. */
+export type LoadSummary =
+  | {
+    readonly model: "open";
+    readonly ratePerSec: number;
+    readonly arrival: string;
+    readonly durationMs: number;
+    readonly warmupMs: number;
+    readonly maxInFlight?: number;
+  }
+  | {
+    readonly model: "closed";
+    readonly concurrency: number;
+    readonly durationMs: number;
+    readonly warmupMs: number;
+    readonly maxInFlight?: number;
+  };
+
+/** A request count summary. */
+export interface RequestSummary {
+  readonly total: number;
+  readonly ok: number;
+  readonly failed: number;
+  readonly successRate: number;
+}
+
+/** Client-measured metrics. */
+export interface ClientMetrics {
+  readonly latencyMs: LatencyMs;
+}
+
+/** Server-reported metrics, read from the target's `stats` endpoint. */
+export interface ServerMetrics {
+  readonly signatureVerificationMs?: {
+    readonly overall: PartialLatencyMs;
+    readonly byStandard?: Record<string, PartialLatencyMs>;
+  };
+  readonly queue?: {
+    readonly drainMs?: PartialLatencyMs;
+    readonly depthMax?: number;
+  };
+}
+
+/** An aggregated error bucket. */
+export interface ErrorBucket {
+  /** The error kind, e.g. `"http"` or `"network"`. */
+  readonly kind: string;
+  /** The HTTP status code, when applicable. */
+  readonly status?: number;
+  /** A short machine-readable reason. */
+  readonly reason: string;
+  /** How many times this error occurred. */
+  readonly count: number;
+}
+
+/** A comparison operator in an evaluated expectation. */
+export type ExpectOp = "lt" | "lte" | "gt" | "gte" | "eq";
+
+/** An evaluated `expect` assertion. */
+export interface ExpectResult {
+  /** The metric name, e.g. `"latency.p95"`. */
+  readonly metric: string;
+  /** The comparison operator. */
+  readonly op: ExpectOp;
+  /** The normalized numeric threshold. */
+  readonly threshold: number;
+  /** The threshold's unit (`"ms"`, `"%"`, `"/s"`), or `null` for a count. */
+  readonly unit: string | null;
+  /** The measured value in the same normalized unit, or `null` if absent. */
+  readonly actual: number | null;
+  /** The assertion severity. */
+  readonly severity: "warn" | "fail";
+  /** Whether the assertion held. */
+  readonly pass: boolean;
+}
+
+/** The result of one scenario. */
+export interface ScenarioResult {
+  readonly name: string;
+  readonly type: ScenarioType;
+  readonly load: LoadSummary;
+  readonly requests: RequestSummary;
+  readonly throughputPerSec: number;
+  readonly client: ClientMetrics;
+  readonly server: ServerMetrics | null;
+  readonly errors: ErrorBucket[];
+  readonly expectations: ExpectResult[];
+  readonly passed: boolean;
+  /** An optional serialized client latency histogram for re-aggregation. */
+  readonly histogram?: SerializedHistogram;
+}
+
+/** A complete benchmark report. */
+export interface BenchReport {
+  /** The published report schema URL. */
+  readonly $schema?: string;
+  readonly schemaVersion: 1;
+  readonly tool: { readonly name: string; readonly version: string };
+  readonly environment: Environment;
+  readonly target: TargetInfo;
+  readonly startedAt: string;
+  readonly finishedAt: string;
+  readonly suite: { readonly name?: string; readonly configHash: string };
+  readonly passed: boolean;
+  readonly scenarios: ScenarioResult[];
+}
diff --git a/packages/cli/src/bench/result/schema.ts b/packages/cli/src/bench/result/schema.ts
new file mode 100644
index 000000000..b465bf88f
--- /dev/null
+++ b/packages/cli/src/bench/result/schema.ts
@@ -0,0 +1,286 @@
+/**
+ * The embedded JSON Schema (draft 2020-12) for benchmark report output.
+ *
+ * Like the scenario schema, this object is the runtime copy and is published,
+ * byte-for-byte, as *schema/bench/report-v1.json*; a drift guard keeps the two
+ * in sync.  The matching TypeScript types live in {@link ./model.ts}.
+ * @since 2.3.0
+ * @module
+ */
+
+/** The hosted URL that serves the report schema. */
+export const REPORT_SCHEMA_ID =
+  "https://json-schema.fedify.dev/bench/report-v1.json";
+
+/** The benchmark report JSON Schema (draft 2020-12). */
+export const reportSchemaV1 = {
+  $schema: "https://json-schema.org/draft/2020-12/schema",
+  $id: REPORT_SCHEMA_ID,
+  title: "Fedify benchmark report",
+  type: "object",
+  additionalProperties: false,
+  required: [
+    "schemaVersion",
+    "tool",
+    "environment",
+    "target",
+    "startedAt",
+    "finishedAt",
+    "suite",
+    "passed",
+    "scenarios",
+  ],
+  properties: {
+    $schema: { type: "string" },
+    schemaVersion: { const: 1 },
+    tool: {
+      type: "object",
+      additionalProperties: false,
+      required: ["name", "version"],
+      properties: {
+        name: { type: "string" },
+        version: { type: "string" },
+      },
+    },
+    environment: {
+      type: "object",
+      additionalProperties: false,
+      required: ["runtime", "runtimeVersion", "os", "cpuCount"],
+      properties: {
+        runtime: { type: "string" },
+        runtimeVersion: { type: "string" },
+        os: { type: "string" },
+        cpuCount: { type: "integer", minimum: 0 },
+      },
+    },
+    target: {
+      type: "object",
+      additionalProperties: false,
+      required: ["url", "statsAvailable"],
+      properties: {
+        url: { type: "string" },
+        fedifyVersion: { type: ["string", "null"] },
+        statsAvailable: { type: "boolean" },
+      },
+    },
+    startedAt: { type: "string" },
+    finishedAt: { type: "string" },
+    suite: {
+      type: "object",
+      additionalProperties: false,
+      required: ["configHash"],
+      properties: {
+        name: { type: "string" },
+        configHash: { type: "string" },
+      },
+    },
+    passed: { type: "boolean" },
+    scenarios: {
+      type: "array",
+      items: { $ref: "#/$defs/scenarioResult" },
+    },
+  },
+  $defs: {
+    latencyMs: {
+      type: "object",
+      additionalProperties: false,
+      required: ["p50", "p95", "p99", "mean", "max"],
+      properties: {
+        p50: { type: "number" },
+        p95: { type: "number" },
+        p99: { type: "number" },
+        mean: { type: "number" },
+        max: { type: "number" },
+      },
+    },
+    partialLatencyMs: {
+      type: "object",
+      additionalProperties: false,
+      properties: {
+        p50: { type: "number" },
+        p95: { type: "number" },
+        p99: { type: "number" },
+      },
+    },
+    loadSummary: {
+      type: "object",
+      additionalProperties: false,
+      required: ["model", "durationMs", "warmupMs"],
+      properties: {
+        model: { enum: ["open", "closed"] },
+        ratePerSec: { type: "number" },
+        arrival: { type: "string" },
+        concurrency: { type: "integer" },
+        durationMs: { type: "number" },
+        warmupMs: { type: "number" },
+        maxInFlight: { type: "integer" },
+      },
+      oneOf: [
+        {
+          properties: { model: { const: "open" } },
+          required: ["ratePerSec", "arrival"],
+          not: { required: ["concurrency"] },
+        },
+        {
+          properties: { model: { const: "closed" } },
+          required: ["concurrency"],
+          not: {
+            anyOf: [{ required: ["ratePerSec"] }, { required: ["arrival"] }],
+          },
+        },
+      ],
+    },
+    requestSummary: {
+      type: "object",
+      additionalProperties: false,
+      required: ["total", "ok", "failed", "successRate"],
+      properties: {
+        total: { type: "integer", minimum: 0 },
+        ok: { type: "integer", minimum: 0 },
+        failed: { type: "integer", minimum: 0 },
+        successRate: { type: "number", minimum: 0, maximum: 1 },
+      },
+    },
+    clientMetrics: {
+      type: "object",
+      additionalProperties: false,
+      required: ["latencyMs"],
+      properties: {
+        latencyMs: { $ref: "#/$defs/latencyMs" },
+      },
+    },
+    serverMetrics: {
+      type: "object",
+      additionalProperties: false,
+      properties: {
+        signatureVerificationMs: {
+          type: "object",
+          additionalProperties: false,
+          required: ["overall"],
+          properties: {
+            overall: { $ref: "#/$defs/partialLatencyMs" },
+            byStandard: {
+              type: "object",
+              additionalProperties: { $ref: "#/$defs/partialLatencyMs" },
+            },
+          },
+        },
+        queue: {
+          type: "object",
+          additionalProperties: false,
+          properties: {
+            drainMs: { $ref: "#/$defs/partialLatencyMs" },
+            depthMax: { type: "number" },
+          },
+        },
+      },
+    },
+    errorBucket: {
+      type: "object",
+      additionalProperties: false,
+      required: ["kind", "reason", "count"],
+      properties: {
+        kind: { type: "string" },
+        status: { type: "integer" },
+        reason: { type: "string" },
+        count: { type: "integer", minimum: 0 },
+      },
+    },
+    expectResult: {
+      type: "object",
+      additionalProperties: false,
+      required: [
+        "metric",
+        "op",
+        "threshold",
+        "unit",
+        "actual",
+        "severity",
+        "pass",
+      ],
+      properties: {
+        metric: { type: "string" },
+        op: { enum: ["lt", "lte", "gt", "gte", "eq"] },
+        threshold: { type: "number" },
+        unit: { type: ["string", "null"] },
+        actual: { type: ["number", "null"] },
+        severity: { enum: ["warn", "fail"] },
+        pass: { type: "boolean" },
+      },
+    },
+    scenarioResult: {
+      type: "object",
+      additionalProperties: false,
+      required: [
+        "name",
+        "type",
+        "load",
+        "requests",
+        "throughputPerSec",
+        "client",
+        "server",
+        "errors",
+        "expectations",
+        "passed",
+      ],
+      properties: {
+        name: { type: "string" },
+        type: {
+          enum: [
+            "inbox",
+            "webfinger",
+            "actor",
+            "object",
+            "fanout",
+            "collection",
+            "failure",
+            "mixed",
+          ],
+        },
+        load: { $ref: "#/$defs/loadSummary" },
+        requests: { $ref: "#/$defs/requestSummary" },
+        throughputPerSec: { type: "number" },
+        client: { $ref: "#/$defs/clientMetrics" },
+        server: {
+          anyOf: [{ $ref: "#/$defs/serverMetrics" }, { type: "null" }],
+        },
+        errors: {
+          type: "array",
+          items: { $ref: "#/$defs/errorBucket" },
+        },
+        expectations: {
+          type: "array",
+          items: { $ref: "#/$defs/expectResult" },
+        },
+        passed: { type: "boolean" },
+        histogram: { $ref: "#/$defs/serializedHistogram" },
+      },
+    },
+    serializedHistogram: {
+      type: "object",
+      additionalProperties: false,
+      required: [
+        "version",
+        "subBucketCount",
+        "count",
+        "zeroCount",
+        "min",
+        "max",
+        "sum",
+        "indices",
+        "counts",
+      ],
+      properties: {
+        version: { const: 1 },
+        subBucketCount: { type: "integer", minimum: 1 },
+        count: { type: "integer", minimum: 0 },
+        zeroCount: { type: "integer", minimum: 0 },
+        min: { type: "number" },
+        max: { type: "number" },
+        sum: { type: "number" },
+        indices: { type: "array", items: { type: "integer" } },
+        counts: { type: "array", items: { type: "integer", minimum: 0 } },
+      },
+    },
+  },
+} as const;
diff --git a/packages/cli/src/bench/schema.test.ts b/packages/cli/src/bench/schema.test.ts
index d9702d41d..cef8f2c69 100644
--- a/packages/cli/src/bench/schema.test.ts
+++ b/packages/cli/src/bench/schema.test.ts
@@ -51,34 +51,49 @@ for (const { name, fileName, schema } of PUBLISHED_SCHEMAS) {
 }
 
 // Guard 2: example-fixture validation.
-const scenarioSchema = PUBLISHED_SCHEMAS.find((s) => s.name === "scenario")!;
-const scenarioValidator = new Validator(
-  scenarioSchema.schema as unknown as Schema,
-  "2020-12",
+const validators = new Map(
+  PUBLISHED_SCHEMAS.map((
+    s,
+  ) => [s.name, new Validator(s.schema as unknown as Schema, "2020-12")]),
 );
 
+interface FixtureGroup {
+  readonly dir: string;
+  readonly schema: string;
+  readonly valid: boolean;
+}
+
+const FIXTURE_GROUPS: readonly FixtureGroup[] = [
+  { dir: "scenarios", schema: "scenario", valid: true },
+  { dir: "invalid", schema: "scenario", valid: false },
+  { dir: "reports", schema: "report", valid: true },
+];
+
 function fixtureFiles(dir: string): string[] {
   return readdirSync(join(FIXTURES, dir))
     .filter((f) => /\.(ya?ml|json)$/.test(f))
     .map((f) => join(FIXTURES, dir, f));
 }
 
-for (const file of fixtureFiles("scenarios")) {
-  test(`schema guard - valid fixture ${file.split("/").pop()}`, () => {
-    const suite = parseSuiteText(readFileSync(file, "utf-8"));
-    const result = scenarioValidator.validate(suite);
-    assert.ok(
-      result.valid,
-      `expected valid, got: ${JSON.stringify(result.errors)}`,
+for (const group of FIXTURE_GROUPS) {
+  const validator = validators.get(group.schema)!;
+  for (const file of fixtureFiles(group.dir)) {
+    const label = `${group.dir}/${file.split("/").pop()}`;
+    test(
+      `schema guard - fixture ${label} is ${group.valid ? "valid" : "invalid"}`,
+      () => {
+        const value = parseSuiteText(readFileSync(file, "utf-8"));
+        const result = validator.validate(value);
+        assert.strictEqual(
+          result.valid,
+          group.valid,
+          group.valid
+            ? `expected valid, got: ${JSON.stringify(result.errors)}`
+            : "expected invalid",
+        );
+      },
     );
-  });
-}
-
-for (const file of fixtureFiles("invalid")) {
-  test(`schema guard - invalid fixture ${file.split("/").pop()}`, () => {
-    const suite = parseSuiteText(readFileSync(file, "utf-8"));
-    assert.ok(!scenarioValidator.validate(suite).valid, "expected invalid");
-  });
+  }
 }
 
 // Guard 3: drift between embedded schema and the published file.
diff --git a/packages/cli/src/bench/schemas.ts b/packages/cli/src/bench/schemas.ts
index 7a249e8cc..3812d6655 100644
--- a/packages/cli/src/bench/schemas.ts
+++ b/packages/cli/src/bench/schemas.ts
@@ -9,6 +9,7 @@
  * @module
  */
 
+import { reportSchemaV1 } from "./result/schema.ts";
 import { scenarioSchemaV1 } from "./scenario/schema.ts";
 
 /** A published JSON Schema and where it is hosted. */
@@ -28,4 +29,9 @@ export const PUBLISHED_SCHEMAS: readonly PublishedSchema[] = [
     fileName: "scenario-v1.json",
     schema: scenarioSchemaV1 as unknown as Record<string, unknown>,
   },
+  {
+    name: "report",
+    fileName: "report-v1.json",
+    schema: reportSchemaV1 as unknown as Record<string, unknown>,
+  },
 ];
diff --git a/schema/bench/report-v1.json b/schema/bench/report-v1.json
new file mode 100644
index 000000000..e5fbfe72a
--- /dev/null
+++ b/schema/bench/report-v1.json
@@ -0,0 +1,520 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://json-schema.fedify.dev/bench/report-v1.json",
+  "title": "Fedify benchmark report",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+    "schemaVersion",
+    "tool",
+    "environment",
+    "target",
+    "startedAt",
+    "finishedAt",
+    "suite",
+    "passed",
+    "scenarios"
+  ],
+  "properties": {
+    "$schema": {
+      "type": "string"
+    },
+    "schemaVersion": {
+      "const": 1
+    },
+    "tool": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "name",
+        "version"
+      ],
+      "properties": {
+        "name": {
+          "type": "string"
+        },
+        "version": {
+          "type": "string"
+        }
+      }
+    },
+    "environment": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "runtime",
+        "runtimeVersion",
+        "os",
+        "cpuCount"
+      ],
+      "properties": {
+        "runtime": {
+          "type": "string"
+        },
+        "runtimeVersion": {
+          "type": "string"
+        },
+        "os": {
+          "type": "string"
+        },
+        "cpuCount": {
+          "type": "integer",
+          "minimum": 0
+        }
+      }
+    },
+    "target": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "url",
+        "statsAvailable"
+      ],
+      "properties": {
+        "url": {
+          "type": "string"
+        },
+        "fedifyVersion": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "statsAvailable": {
+          "type": "boolean"
+        }
+      }
+    },
+    "startedAt": {
+      "type": "string"
+    },
+    "finishedAt": {
+      "type": "string"
+    },
+    "suite": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "configHash"
+      ],
+      "properties": {
+        "name": {
+          "type": "string"
+        },
+        "configHash": {
+          "type": "string"
+        }
+      }
+    },
+    "passed": {
+      "type": "boolean"
+    },
+    "scenarios": {
+      "type": "array",
+      "items": {
+        "$ref": "#/$defs/scenarioResult"
+      }
+    }
+  },
+  "$defs": {
+    "latencyMs": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "p50",
+        "p95",
+        "p99",
+        "mean",
+        "max"
+      ],
+      "properties": {
+        "p50": {
+          "type": "number"
+        },
+        "p95": {
+          "type": "number"
+        },
+        "p99": {
+          "type": "number"
+        },
+        "mean": {
+          "type": "number"
+        },
+        "max": {
+          "type": "number"
+        }
+      }
+    },
+    "partialLatencyMs": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "p50": {
+          "type": "number"
+        },
+        "p95": {
+          "type": "number"
+        },
+        "p99": {
+          "type": "number"
+        }
+      }
+    },
+    "loadSummary": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "model",
+        "durationMs",
+        "warmupMs"
+      ],
+      "properties": {
+        "model": {
+          "enum": [
+            "open",
+            "closed"
+          ]
+        },
+        "ratePerSec": {
+          "type": "number"
+        },
+        "arrival": {
+          "type": "string"
+        },
+        "concurrency": {
+          "type": "integer"
+        },
+        "durationMs": {
+          "type": "number"
+        },
+        "warmupMs": {
+          "type": "number"
+        },
+        "maxInFlight": {
+          "type": "integer"
+        }
+      },
+      "oneOf": [
+        {
+          "properties": {
+            "model": {
+              "const": "open"
+            }
+          },
+          "required": [
+            "ratePerSec",
+            "arrival"
+          ],
+          "not": {
+            "required": [
+              "concurrency"
+            ]
+          }
+        },
+        {
+          "properties": {
+            "model": {
+              "const": "closed"
+            }
+          },
+          "required": [
+            "concurrency"
+          ],
+          "not": {
+            "anyOf": [
+              {
+                "required": [
+                  "ratePerSec"
+                ]
+              },
+              {
+                "required": [
+                  "arrival"
+                ]
+              }
+            ]
+          }
+        }
+      ]
+    },
+    "requestSummary": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "total",
+        "ok",
+        "failed",
+        "successRate"
+      ],
+      "properties": {
+        "total": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "ok": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "failed": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "successRate": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 1
+        }
+      }
+    },
+    "clientMetrics": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "latencyMs"
+      ],
+      "properties": {
+        "latencyMs": {
+          "$ref": "#/$defs/latencyMs"
+        }
+      }
+    },
+    "serverMetrics": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "signatureVerificationMs": {
+          "type": "object",
+          "additionalProperties": false,
+          "required": [
+            "overall"
+          ],
+          "properties": {
+            "overall": {
+              "$ref": "#/$defs/partialLatencyMs"
+            },
+            "byStandard": {
+              "type": "object",
+              "additionalProperties": {
+                "$ref": "#/$defs/partialLatencyMs"
+              }
+            }
+          }
+        },
+        "queue": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "drainMs": {
+              "$ref": "#/$defs/partialLatencyMs"
+            },
+            "depthMax": {
+              "type": "number"
+            }
+          }
+        }
+      }
+    },
+    "errorBucket": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "kind",
+        "reason",
+        "count"
+      ],
+      "properties": {
+        "kind": {
+          "type": "string"
+        },
+        "status": {
+          "type": "integer"
+        },
+        "reason": {
+          "type": "string"
+        },
+        "count": {
+          "type": "integer",
+          "minimum": 0
+        }
+      }
+    },
+    "expectResult": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "metric",
+        "op",
+        "threshold",
+        "unit",
+        "actual",
+        "severity",
+        "pass"
+      ],
+      "properties": {
+        "metric": {
+          "type": "string"
+        },
+        "op": {
+          "enum": [
+            "lt",
+            "lte",
+            "gt",
+            "gte",
+            "eq"
+          ]
+        },
+        "threshold": {
+          "type": "number"
+        },
+        "unit": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "actual": {
+          "type": [
+            "number",
+            "null"
+          ]
+        },
+        "severity": {
+          "enum": [
+            "warn",
+            "fail"
+          ]
+        },
+        "pass": {
+          "type": "boolean"
+        }
+      }
+    },
+    "scenarioResult": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "name",
+        "type",
+        "load",
+        "requests",
+        "throughputPerSec",
+        "client",
+        "server",
+        "errors",
+        "expectations",
+        "passed"
+      ],
+      "properties": {
+        "name": {
+          "type": "string"
+        },
+        "type": {
+          "enum": [
+            "inbox",
+            "webfinger",
+            "actor",
+            "object",
+            "fanout",
+            "collection",
+            "failure",
+            "mixed"
+          ]
+        },
+        "load": {
+          "$ref": "#/$defs/loadSummary"
+        },
+        "requests": {
+          "$ref": "#/$defs/requestSummary"
+        },
+        "throughputPerSec": {
+          "type": "number"
+        },
+        "client": {
+          "$ref": "#/$defs/clientMetrics"
+        },
+        "server": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/serverMetrics"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "errors": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/errorBucket"
+          }
+        },
+        "expectations": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/expectResult"
+          }
+        },
+        "passed": {
+          "type": "boolean"
+        },
+        "histogram": {
+          "$ref": "#/$defs/serializedHistogram"
+        }
+      }
+    },
+    "serializedHistogram": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "version",
+        "subBucketCount",
+        "count",
+        "zeroCount",
+        "min",
+        "max",
+        "sum",
+        "indices",
+        "counts"
+      ],
+      "properties": {
+        "version": {
+          "const": 1
+        },
+        "subBucketCount": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "count": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "zeroCount": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "min": {
+          "type": "number"
+        },
+        "max": {
+          "type": "number"
+        },
+        "sum": {
+          "type": "number"
+        },
+        "indices": {
+          "type": "array",
+          "items": {
+            "type": "integer"
+          }
+        },
+        "counts": {
+          "type": "array",
+          "items": {
+            "type": "integer",
+            "minimum": 0
+          }
+        }
+      }
+    }
+  }
+}

From eb3379f64e603b3d30c187422706cdf3621a9807 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Thu, 4 Jun 2026 22:56:17 +0900
Subject: [PATCH 07/47] Evaluate expect assertions against measured metrics

Turn each scenario's `expect` block into evaluated records that gate a
run.  `parseAssertion()` parses a human assertion (">= 99%", "< 100ms",
"< 2s", ">= 500/s", "== 0") into an operator and a machine-clean
threshold: percentages become ratios, durations milliseconds, rates per
second.  `evaluateExpect()` looks each metric up by name (successRate,
throughputPerSec, errors.4xx/5xx/total, latency.*, signatureVerification.*,
queueDrain.*), checks the assertion's unit is compatible with the
metric's natural unit, and compares.  Equality is tolerant for float
metrics but exact for counts.  A `fail`-severity assertion gates the
build while `warn` only annotates, and a missing or unmeasured metric
fails cleanly.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../src/bench/result/expect/assert.test.ts    |  61 ++++++
 .../cli/src/bench/result/expect/assert.ts     |  97 +++++++++
 .../src/bench/result/expect/evaluate.test.ts  | 121 ++++++++++++
 .../cli/src/bench/result/expect/evaluate.ts   | 185 ++++++++++++++++++
 4 files changed, 464 insertions(+)
 create mode 100644 packages/cli/src/bench/result/expect/assert.test.ts
 create mode 100644 packages/cli/src/bench/result/expect/assert.ts
 create mode 100644 packages/cli/src/bench/result/expect/evaluate.test.ts
 create mode 100644 packages/cli/src/bench/result/expect/evaluate.ts

diff --git a/packages/cli/src/bench/result/expect/assert.test.ts b/packages/cli/src/bench/result/expect/assert.test.ts
new file mode 100644
index 000000000..4a218f8aa
--- /dev/null
+++ b/packages/cli/src/bench/result/expect/assert.test.ts
@@ -0,0 +1,61 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { AssertionParseError, compare, parseAssertion } from "./assert.ts";
+
+test("parseAssertion - normalizes percentages to ratios", () => {
+  assert.deepEqual(parseAssertion(">= 99%"), {
+    op: "gte",
+    threshold: 0.99,
+    unit: "%",
+  });
+});
+
+test("parseAssertion - normalizes durations to milliseconds", () => {
+  assert.deepEqual(parseAssertion("< 100ms"), {
+    op: "lt",
+    threshold: 100,
+    unit: "ms",
+  });
+  assert.deepEqual(parseAssertion("< 2s"), {
+    op: "lt",
+    threshold: 2000,
+    unit: "ms",
+  });
+});
+
+test("parseAssertion - keeps rates per second and bare counts", () => {
+  assert.deepEqual(parseAssertion(">= 500/s"), {
+    op: "gte",
+    threshold: 500,
+    unit: "/s",
+  });
+  assert.deepEqual(parseAssertion("== 0"), {
+    op: "eq",
+    threshold: 0,
+    unit: null,
+  });
+});
+
+test("parseAssertion - rejects malformed assertions", () => {
+  assert.throws(() => parseAssertion("abc"), AssertionParseError);
+  assert.throws(() => parseAssertion(">="), AssertionParseError);
+  assert.throws(() => parseAssertion("100ms"), AssertionParseError);
+});
+
+test("compare - all operators", () => {
+  assert.ok(compare(1, "lt", 2));
+  assert.ok(!compare(2, "lt", 2));
+  assert.ok(compare(2, "lte", 2));
+  assert.ok(compare(3, "gt", 2));
+  assert.ok(compare(2, "gte", 2));
+  assert.ok(compare(0, "eq", 0));
+  assert.ok(!compare(1, "eq", 0));
+});
+
+test("compare - eq tolerance is opt-out for exact counts", () => {
+  // Tolerant (default) absorbs float noise.
+  assert.ok(compare(0.994, "eq", 0.9940000000000001));
+  // Exact mode does not absorb a near-miss large count.
+  assert.ok(!compare(1_000_000_001, "eq", 1_000_000_000, false));
+  assert.ok(compare(1_000_000_000, "eq", 1_000_000_000, false));
+});
diff --git a/packages/cli/src/bench/result/expect/assert.ts b/packages/cli/src/bench/result/expect/assert.ts
new file mode 100644
index 000000000..7dd654b3b
--- /dev/null
+++ b/packages/cli/src/bench/result/expect/assert.ts
@@ -0,0 +1,97 @@
+/**
+ * Parsing of `expect` assertion strings such as `">= 99%"`, `"< 100ms"`, or
+ * `"== 0"` into a comparison operator and a normalized numeric threshold.
+ *
+ * The input stays human-friendly; the parsed threshold is machine-clean: a
+ * percentage becomes a ratio, a duration becomes milliseconds, and a rate stays
+ * per second.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ExpectOp } from "../model.ts";
+
+/** A parsed assertion. */
+export interface ParsedAssertion {
+  readonly op: ExpectOp;
+  /** The normalized numeric threshold. */
+  readonly threshold: number;
+  /** The normalized unit (`"ms"`, `"%"`, `"/s"`), or `null` for a count. */
+  readonly unit: string | null;
+}
+
+const ASSERT_RE = /^\s*(<=|>=|==|=|<|>)\s*(\d+(?:\.\d+)?)\s*(%|ms|s|\/s)?\s*$/;
+
+const OP_MAP: Readonly<Record<string, ExpectOp>> = {
+  "<": "lt",
+  "<=": "lte",
+  ">": "gt",
+  ">=": "gte",
+  "==": "eq",
+  "=": "eq",
+};
+
+/** An error raised when an `expect` assertion cannot be parsed. */
+export class AssertionParseError extends Error {}
+
+/**
+ * Parses an `expect` assertion string.
+ * @param text The assertion, e.g. `">= 99%"`.
+ * @returns The parsed operator, normalized threshold, and unit.
+ * @throws {AssertionParseError} If the assertion cannot be parsed.
+ */
+export function parseAssertion(text: string): ParsedAssertion {
+  const match = text.match(ASSERT_RE);
+  if (match == null) {
+    throw new AssertionParseError(
+      `Invalid expect assertion: ${JSON.stringify(text)}.`,
+    );
+  }
+  const op = OP_MAP[match[1]];
+  const value = Number.parseFloat(match[2]);
+  switch (match[3]) {
+    case "%":
+      return { op, threshold: value / 100, unit: "%" };
+    case "ms":
+      return { op, threshold: value, unit: "ms" };
+    case "s":
+      return { op, threshold: value * 1000, unit: "ms" };
+    case "/s":
+      return { op, threshold: value, unit: "/s" };
+    default:
+      return { op, threshold: value, unit: null };
+  }
+}
+
+/**
+ * Compares a measured value against a threshold using a comparison operator.
+ * @param actual The measured value.
+ * @param op The comparison operator.
+ * @param threshold The threshold.
+ * @param tolerant Whether `eq` allows a small floating-point tolerance.  Pass
+ *                 `false` for exact (count) metrics; defaults to `true` so
+ *                 float-normalized thresholds (e.g. `"99.4%"` ->
+ *                 `0.9940000000000001`) still match a measured `0.994`.
+ * @returns Whether the comparison holds.
+ */
+export function compare(
+  actual: number,
+  op: ExpectOp,
+  threshold: number,
+  tolerant = true,
+): boolean {
+  switch (op) {
+    case "lt":
+      return actual < threshold;
+    case "lte":
+      return actual <= threshold;
+    case "gt":
+      return actual > threshold;
+    case "gte":
+      return actual >= threshold;
+    case "eq":
+      return tolerant
+        ? Math.abs(actual - threshold) <= 1e-9 + 1e-9 * Math.abs(threshold)
+        : actual === threshold;
+  }
+}
diff --git a/packages/cli/src/bench/result/expect/evaluate.test.ts b/packages/cli/src/bench/result/expect/evaluate.test.ts
new file mode 100644
index 000000000..0947bed53
--- /dev/null
+++ b/packages/cli/src/bench/result/expect/evaluate.test.ts
@@ -0,0 +1,121 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { evaluateExpect, type MetricView } from "./evaluate.ts";
+
+function metrics(overrides: Partial<MetricView> = {}): MetricView {
+  return {
+    requests: { total: 1000, ok: 994, failed: 6, successRate: 0.994 },
+    throughputPerSec: 304,
+    client: {
+      latencyMs: { p50: 24, p95: 91, p99: 184, mean: 31.2, max: 412 },
+    },
+    server: {
+      signatureVerificationMs: { overall: { p50: 6, p95: 12, p99: 28 } },
+    },
+    errors: [
+      { kind: "http", status: 401, reason: "signature_failed", count: 5 },
+      { kind: "http", status: 500, reason: "handler_error", count: 1 },
+    ],
+    ...overrides,
+  };
+}
+
+test("evaluateExpect - passes when all fail-severity assertions hold", () => {
+  const { results, passed } = evaluateExpect(
+    { successRate: ">= 99%", "latency.p95": "< 100ms" },
+    metrics(),
+  );
+  assert.strictEqual(passed, true);
+  assert.strictEqual(results.length, 2);
+  assert.ok(results.every((r) => r.pass));
+});
+
+test("evaluateExpect - fails when a fail-severity assertion is violated", () => {
+  const { results, passed } = evaluateExpect(
+    { "errors.5xx": "== 0" },
+    metrics(),
+  );
+  assert.strictEqual(passed, false);
+  assert.strictEqual(results[0].actual, 1);
+  assert.strictEqual(results[0].pass, false);
+});
+
+test("evaluateExpect - warn severity does not fail the gate", () => {
+  const { passed, results } = evaluateExpect(
+    { "latency.p95": { assert: "< 50ms", severity: "warn" } },
+    metrics(),
+  );
+  assert.strictEqual(results[0].pass, false);
+  assert.strictEqual(results[0].severity, "warn");
+  assert.strictEqual(passed, true);
+});
+
+test("evaluateExpect - buckets 4xx and 5xx errors", () => {
+  const { results } = evaluateExpect(
+    { "errors.4xx": "<= 10", "errors.5xx": "== 0", "errors.total": ">= 0" },
+    metrics(),
+  );
+  assert.strictEqual(results[0].actual, 5); // 4xx
+  assert.strictEqual(results[1].actual, 1); // 5xx
+  assert.strictEqual(results[2].actual, 6); // total
+});
+
+test("evaluateExpect - reads server signature-verification metrics", () => {
+  const { results, passed } = evaluateExpect(
+    { "signatureVerification.p95": "< 20ms" },
+    metrics(),
+  );
+  assert.strictEqual(results[0].actual, 12);
+  assert.strictEqual(passed, true);
+});
+
+test("evaluateExpect - missing server metric fails (actual null)", () => {
+  const { results, passed } = evaluateExpect(
+    { "signatureVerification.p95": "< 20ms" },
+    metrics({ server: null }),
+  );
+  assert.strictEqual(results[0].actual, null);
+  assert.strictEqual(results[0].pass, false);
+  assert.strictEqual(passed, false);
+});
+
+test("evaluateExpect - unmeasured metric yields null actual and fails", () => {
+  const { results } = evaluateExpect(
+    { deliveryThroughput: ">= 1/s" },
+    metrics(),
+  );
+  assert.strictEqual(results[0].actual, null);
+  assert.strictEqual(results[0].pass, false);
+});
+
+test("evaluateExpect - tolerant equality matches float-normalized ratios", () => {
+  const { passed } = evaluateExpect(
+    { successRate: "== 99.4%" },
+    metrics(),
+  );
+  assert.strictEqual(passed, true);
+});
+
+test("evaluateExpect - count equality is exact (no tolerance)", () => {
+  const errors = [{
+    kind: "http",
+    status: 500,
+    reason: "x",
+    count: 1_000_000_001,
+  }];
+  const exact = evaluateExpect(
+    { "errors.5xx": "== 1000000000" },
+    metrics({ errors }),
+  );
+  assert.strictEqual(exact.results[0].pass, false);
+});
+
+test("evaluateExpect - incompatible assertion unit fails", () => {
+  // A percentage threshold against a millisecond metric is nonsense; even
+  // though 91 > 0.01 would hold numerically, the unit mismatch fails it.
+  const { results } = evaluateExpect(
+    { "latency.p95": "> 1%" },
+    metrics(),
+  );
+  assert.strictEqual(results[0].pass, false);
+});
diff --git a/packages/cli/src/bench/result/expect/evaluate.ts b/packages/cli/src/bench/result/expect/evaluate.ts
new file mode 100644
index 000000000..05c590532
--- /dev/null
+++ b/packages/cli/src/bench/result/expect/evaluate.ts
@@ -0,0 +1,185 @@
+/**
+ * Evaluation of a scenario's `expect` block against its measured metrics.
+ *
+ * Each assertion becomes an {@link ExpectResult}; the gate passes when every
+ * `fail`-severity assertion holds (`warn`-severity assertions annotate without
+ * failing the build).
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ExpectBlock } from "../../scenario/types.ts";
+import type {
+  ErrorBucket,
+  ExpectResult,
+  LatencyMs,
+  PartialLatencyMs,
+  ScenarioResult,
+} from "../model.ts";
+import { compare, parseAssertion } from "./assert.ts";
+
+/** The subset of a scenario result that `expect` metrics are looked up from. */
+export type MetricView = Pick<
+  ScenarioResult,
+  "requests" | "throughputPerSec" | "client" | "server" | "errors"
+>;
+
+/** The outcome of evaluating an `expect` block. */
+export interface ExpectEvaluation {
+  readonly results: ExpectResult[];
+  readonly passed: boolean;
+}
+
+/**
+ * Evaluates an `expect` block against measured metrics.
+ * @param expect The scenario's `expect` block.
+ * @param metrics The measured metrics to evaluate against.
+ * @returns The evaluated assertions and whether the gate passed.
+ */
+export function evaluateExpect(
+  expect: ExpectBlock,
+  metrics: MetricView,
+): ExpectEvaluation {
+  const results: ExpectResult[] = [];
+  for (const [metric, value] of Object.entries(expect)) {
+    const assertion = typeof value === "string" ? value : value.assert;
+    const severity = typeof value === "string"
+      ? "fail"
+      : value.severity ?? "fail";
+    const { op, threshold, unit } = parseAssertion(assertion);
+    const lookup = lookupMetric(metrics, metric);
+    const actual = lookup?.value ?? null;
+    const pass = lookup != null && actual != null &&
+      unitCompatible(unit, lookup.unit) &&
+      compare(actual, op, threshold, lookup.unit !== "count");
+    results.push({ metric, op, threshold, unit, actual, severity, pass });
+  }
+  const passed = results.every((r) => r.severity === "warn" || r.pass);
+  return { results, passed };
+}
+
+/** The natural unit class of a metric. */
+type MetricUnit = "ratio" | "ms" | "rate" | "count";
+
+interface MetricLookup {
+  /** The measured value, or `null` if the metric was not measured. */
+  readonly value: number | null;
+  /** The metric's natural unit. */
+  readonly unit: MetricUnit;
+}
+
+/**
+ * Whether an assertion's (normalized) unit is compatible with a metric's
+ * natural unit.  A unitless assertion is always compatible.
+ */
+function unitCompatible(
+  assertionUnit: string | null,
+  metricUnit: MetricUnit,
+): boolean {
+  if (assertionUnit == null) return true;
+  switch (metricUnit) {
+    case "ratio":
+      return assertionUnit === "%";
+    case "ms":
+      return assertionUnit === "ms";
+    case "rate":
+      return assertionUnit === "/s";
+    case "count":
+      return false;
+  }
+}
+
+function lookupMetric(
+  metrics: MetricView,
+  metric: string,
+): MetricLookup | null {
+  switch (metric) {
+    case "successRate":
+      return { value: metrics.requests.successRate, unit: "ratio" };
+    case "throughputPerSec":
+      return { value: metrics.throughputPerSec, unit: "rate" };
+    case "deliveryThroughput":
+      // Recognized (fanout/mixed) but not measured by the runners yet.
+      return { value: null, unit: "rate" };
+    case "errors.total":
+      return { value: sumErrors(metrics.errors), unit: "count" };
+    case "errors.4xx":
+      return { value: sumErrors(metrics.errors, 400, 500), unit: "count" };
+    case "errors.5xx":
+      return { value: sumErrors(metrics.errors, 500, 600), unit: "count" };
+  }
+  if (metric.startsWith("latency.")) {
+    return {
+      value: latencyField(metrics.client.latencyMs, metric.slice(8)),
+      unit: "ms",
+    };
+  }
+  if (metric.startsWith("signatureVerification.")) {
+    return {
+      value: partialField(
+        metrics.server?.signatureVerificationMs?.overall,
+        metric.slice("signatureVerification.".length),
+      ),
+      unit: "ms",
+    };
+  }
+  if (metric.startsWith("queueDrain.")) {
+    return {
+      value: partialField(
+        metrics.server?.queue?.drainMs,
+        metric.slice("queueDrain.".length),
+      ),
+      unit: "ms",
+    };
+  }
+  // Unknown metric name.
+  return null;
+}
+
+function latencyField(latency: LatencyMs, key: string): number | null {
+  switch (key) {
+    case "p50":
+      return latency.p50;
+    case "p95":
+      return latency.p95;
+    case "p99":
+      return latency.p99;
+    case "mean":
+      return latency.mean;
+    case "max":
+      return latency.max;
+    default:
+      return null;
+  }
+}
+
+function partialField(
+  source: PartialLatencyMs | undefined,
+  key: string,
+): number | null {
+  if (source == null) return null;
+  switch (key) {
+    case "p50":
+      return source.p50 ?? null;
+    case "p95":
+      return source.p95 ?? null;
+    case "p99":
+      return source.p99 ?? null;
+    default:
+      return null;
+  }
+}
+
+function sumErrors(errors: ErrorBucket[], min?: number, max?: number): number {
+  let total = 0;
+  for (const error of errors) {
+    if (min == null) {
+      total += error.count;
+    } else if (
+      error.status != null && error.status >= min && error.status < max!
+    ) {
+      total += error.count;
+    }
+  }
+  return total;
+}

From 7fae2ae1f951a548567adda0f032478472af6791 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 01:03:17 +0900
Subject: [PATCH 08/47] Build benchmark reports and render them three ways

Assemble the canonical report from measured scenario data and render it
in three forms from that single model:

 -  `buildScenarioResult()`/`buildReport()` turn resolved scenarios and
    their measurements into the report, evaluating each `expect` block,
    summarizing the load model, and computing the overall gate.
 -  `detectEnvironment()` and `configHash()` capture the reproducibility
    metadata (runtime, OS, CPU count, and a stable sha256 over the
    canonicalized configuration, honoring `toJSON()` so URLs hash by
    value).
 -  The JSON renderer is the canonical machine form (pinned by the
    report schema); the terminal-text and Markdown renderers derive from
    the same model.  A shared metric-unit registry keeps the evaluator
    and the renderers in agreement, so measured values display in the
    metric's own unit while an explicit assertion unit stays visible.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/render/format.ts       |  68 +++++++
 packages/cli/src/bench/render/index.ts        |  33 ++++
 packages/cli/src/bench/render/json.ts         |  17 ++
 packages/cli/src/bench/render/markdown.ts     |  89 +++++++++
 packages/cli/src/bench/render/render.test.ts  |  69 +++++++
 packages/cli/src/bench/render/text.ts         | 129 +++++++++++++
 packages/cli/src/bench/result/build.test.ts   | 122 ++++++++++++
 packages/cli/src/bench/result/build.ts        | 182 ++++++++++++++++++
 .../cli/src/bench/result/expect/evaluate.ts   |  54 +++---
 .../cli/src/bench/result/expect/metrics.ts    |  57 ++++++
 10 files changed, 790 insertions(+), 30 deletions(-)
 create mode 100644 packages/cli/src/bench/render/format.ts
 create mode 100644 packages/cli/src/bench/render/index.ts
 create mode 100644 packages/cli/src/bench/render/json.ts
 create mode 100644 packages/cli/src/bench/render/markdown.ts
 create mode 100644 packages/cli/src/bench/render/render.test.ts
 create mode 100644 packages/cli/src/bench/render/text.ts
 create mode 100644 packages/cli/src/bench/result/build.test.ts
 create mode 100644 packages/cli/src/bench/result/build.ts
 create mode 100644 packages/cli/src/bench/result/expect/metrics.ts

diff --git a/packages/cli/src/bench/render/format.ts b/packages/cli/src/bench/render/format.ts
new file mode 100644
index 000000000..4033f29a2
--- /dev/null
+++ b/packages/cli/src/bench/render/format.ts
@@ -0,0 +1,68 @@
+/**
+ * Shared number and assertion formatting used by the text and Markdown
+ * renderers.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ExpectOp } from "../result/model.ts";
+
+const OP_SYMBOLS: Readonly<Record<ExpectOp, string>> = {
+  lt: "<",
+  lte: "<=",
+  gt: ">",
+  gte: ">=",
+  eq: "==",
+};
+
+/** Returns the symbolic form of a comparison operator. */
+export function opSymbol(op: ExpectOp): string {
+  return OP_SYMBOLS[op];
+}
+
+/** Formats a number with grouping and at most three fractional digits. */
+export function formatNumber(value: number): string {
+  if (!Number.isFinite(value)) return String(value);
+  const rounded = Math.round(value * 1000) / 1000;
+  return rounded.toLocaleString("en-US", { maximumFractionDigits: 3 });
+}
+
+/** Formats a ratio (0..1) as a percentage with at most two fractional digits. */
+export function formatPercent(ratio: number): string {
+  const pct = Math.round(ratio * 1_000_000) / 10_000;
+  return `${pct.toLocaleString("en-US", { maximumFractionDigits: 2 })}%`;
+}
+
+/**
+ * Formats a normalized threshold back into its human-friendly unit.
+ * @param threshold The normalized numeric threshold.
+ * @param unit The threshold's unit (`"ms"`, `"%"`, `"/s"`, or `null`).
+ */
+export function formatThreshold(
+  threshold: number,
+  unit: string | null,
+): string {
+  switch (unit) {
+    case "%":
+      return formatPercent(threshold);
+    case "ms":
+      return `${formatNumber(threshold)}ms`;
+    case "/s":
+      return `${formatNumber(threshold)}/s`;
+    default:
+      return formatNumber(threshold);
+  }
+}
+
+/**
+ * Formats a measured value using the unit of the assertion it is compared to.
+ * @param actual The measured value, or `null` if unmeasured.
+ * @param unit The assertion's unit.
+ */
+export function formatActual(
+  actual: number | null,
+  unit: string | null,
+): string {
+  if (actual == null) return "n/a";
+  return formatThreshold(actual, unit);
+}
diff --git a/packages/cli/src/bench/render/index.ts b/packages/cli/src/bench/render/index.ts
new file mode 100644
index 000000000..e350871bc
--- /dev/null
+++ b/packages/cli/src/bench/render/index.ts
@@ -0,0 +1,33 @@
+/**
+ * Output-format selection over the single report model.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { BenchReport } from "../result/model.ts";
+import { renderJson } from "./json.ts";
+import { renderMarkdown } from "./markdown.ts";
+import { renderText } from "./text.ts";
+
+/** A report output format. */
+export type ReportFormat = "text" | "json" | "markdown";
+
+/**
+ * Renders a report in the requested format.
+ * @param report The report to render.
+ * @param format The output format.
+ * @returns The rendered text.
+ */
+export function renderReport(
+  report: BenchReport,
+  format: ReportFormat,
+): string {
+  switch (format) {
+    case "json":
+      return renderJson(report);
+    case "markdown":
+      return renderMarkdown(report);
+    case "text":
+      return renderText(report);
+  }
+}
diff --git a/packages/cli/src/bench/render/json.ts b/packages/cli/src/bench/render/json.ts
new file mode 100644
index 000000000..fc4e667ec
--- /dev/null
+++ b/packages/cli/src/bench/render/json.ts
@@ -0,0 +1,17 @@
+/**
+ * The canonical JSON renderer.  This is the machine form pinned by the
+ * published report schema; the other renderers are derived from the same model.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { BenchReport } from "../result/model.ts";
+
+/**
+ * Renders a report as pretty-printed canonical JSON.
+ * @param report The report to render.
+ * @returns The JSON text, with a trailing newline.
+ */
+export function renderJson(report: BenchReport): string {
+  return `${JSON.stringify(report, null, 2)}\n`;
+}
diff --git a/packages/cli/src/bench/render/markdown.ts b/packages/cli/src/bench/render/markdown.ts
new file mode 100644
index 000000000..a722686e6
--- /dev/null
+++ b/packages/cli/src/bench/render/markdown.ts
@@ -0,0 +1,89 @@
+/**
+ * The Markdown renderer, suited to a GitHub Actions job summary or a PR
+ * comment.  It is derived from the same report model as the text and JSON
+ * forms.
+ * @since 2.3.0
+ * @module
+ */
+
+import { metricDisplayUnit } from "../result/expect/metrics.ts";
+import type { BenchReport, ScenarioResult } from "../result/model.ts";
+import {
+  formatActual,
+  formatNumber,
+  formatPercent,
+  formatThreshold,
+  opSymbol,
+} from "./format.ts";
+
+/**
+ * Renders a report as Markdown.
+ * @param report The report to render.
+ * @returns The Markdown text.
+ */
+export function renderMarkdown(report: BenchReport): string {
+  const lines: string[] = [];
+  lines.push("# Fedify benchmark report", "");
+  lines.push(`**Result:** ${report.passed ? "✅ PASS" : "❌ FAIL"}`, "");
+  lines.push(
+    `- **Target:** \`${report.target.url}\` ` +
+      `(${report.target.statsAvailable ? "stats available" : "no stats"})`,
+  );
+  lines.push(
+    `- **Environment:** ${report.environment.runtime} ` +
+      `${report.environment.runtimeVersion}, ${report.environment.os}, ` +
+      `${report.environment.cpuCount} CPUs`,
+  );
+  lines.push(`- **Config:** \`${report.suite.configHash}\``, "");
+
+  for (const scenario of report.scenarios) {
+    lines.push(...renderScenario(scenario), "");
+  }
+  return lines.join("\n");
+}
+
+function renderScenario(scenario: ScenarioResult): string[] {
+  const lines: string[] = [];
+  lines.push(
+    `## ${scenario.name} (${scenario.type}) ` +
+      `${scenario.passed ? "✅" : "❌"}`,
+    "",
+  );
+  lines.push("| Metric | Value |", "| --- | --- |");
+  const r = scenario.requests;
+  lines.push(`| Requests | ${formatNumber(r.total)} |`);
+  lines.push(`| Success rate | ${formatPercent(r.successRate)} |`);
+  lines.push(`| Throughput | ${formatNumber(scenario.throughputPerSec)}/s |`);
+  const l = scenario.client.latencyMs;
+  lines.push(`| Latency p50 | ${formatNumber(l.p50)}ms |`);
+  lines.push(`| Latency p95 | ${formatNumber(l.p95)}ms |`);
+  lines.push(`| Latency p99 | ${formatNumber(l.p99)}ms |`);
+  const sig = scenario.server?.signatureVerificationMs?.overall;
+  if (sig?.p95 != null) {
+    lines.push(
+      `| Signature verification p95 (server) | ${formatNumber(sig.p95)}ms |`,
+    );
+  }
+
+  if (scenario.errors.length > 0) {
+    lines.push("", "| Error | Count |", "| --- | --- |");
+    for (const error of scenario.errors) {
+      const code = error.status == null ? error.kind : String(error.status);
+      lines.push(`| ${code} ${error.reason} | ${formatNumber(error.count)} |`);
+    }
+  }
+
+  if (scenario.expectations.length > 0) {
+    lines.push("", "| Expectation | Actual | Result |", "| --- | --- | --- |");
+    for (const e of scenario.expectations) {
+      const tag = e.pass ? "✅" : e.severity === "warn" ? "⚠️" : "❌";
+      const unit = metricDisplayUnit(e.metric);
+      lines.push(
+        `| \`${e.metric} ${opSymbol(e.op)} ${
+          formatThreshold(e.threshold, e.unit ?? unit)
+        }\` | ${formatActual(e.actual, unit)} | ${tag} |`,
+      );
+    }
+  }
+  return lines;
+}
diff --git a/packages/cli/src/bench/render/render.test.ts b/packages/cli/src/bench/render/render.test.ts
new file mode 100644
index 000000000..008383c5c
--- /dev/null
+++ b/packages/cli/src/bench/render/render.test.ts
@@ -0,0 +1,69 @@
+import { type Schema, Validator } from "@cfworker/json-schema";
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import test from "node:test";
+import type { BenchReport } from "../result/model.ts";
+import { reportSchemaV1 } from "../result/schema.ts";
+import { renderReport } from "./index.ts";
+
+const report = JSON.parse(
+  readFileSync(
+    join(
+      import.meta.dirname!,
+      "..",
+      "__fixtures__",
+      "reports",
+      "inbox-report.json",
+    ),
+    "utf-8",
+  ),
+) as BenchReport;
+
+test("renderReport json - valid JSON that validates against the schema", () => {
+  const json = renderReport(report, "json");
+  const parsed = JSON.parse(json);
+  const validator = new Validator(
+    reportSchemaV1 as unknown as Schema,
+    "2020-12",
+  );
+  assert.ok(validator.validate(parsed).valid);
+});
+
+test("renderReport text - includes the key facts and gate", () => {
+  const text = renderReport(report, "text");
+  assert.match(text, /Fedify benchmark report/);
+  assert.match(text, /inbox-shared \(inbox\)/);
+  assert.match(text, /Client latency \(ms\): p50 24/);
+  assert.match(text, /\[PASS\] latency\.p95 < 100ms/);
+  assert.match(text, /Overall: PASS/);
+});
+
+test("renderReport - shows actuals in the metric's natural unit", () => {
+  // A unitless assertion still renders successRate as a percentage.
+  const r: BenchReport = {
+    ...report,
+    scenarios: [{
+      ...report.scenarios[0],
+      expectations: [{
+        metric: "successRate",
+        op: "gte",
+        threshold: 0.99,
+        unit: null,
+        actual: 0.994,
+        severity: "fail",
+        pass: true,
+      }],
+    }],
+  };
+  const text = renderReport(r, "text");
+  assert.match(text, /successRate >= 99%\s+\(actual 99\.4%\)/);
+});
+
+test("renderReport markdown - includes tables and the gate result", () => {
+  const md = renderReport(report, "markdown");
+  assert.match(md, /# Fedify benchmark report/);
+  assert.match(md, /✅ PASS/);
+  assert.match(md, /\| Latency p95 \| 91ms \|/);
+  assert.match(md, /signature_failed/);
+});
diff --git a/packages/cli/src/bench/render/text.ts b/packages/cli/src/bench/render/text.ts
new file mode 100644
index 000000000..88b18b276
--- /dev/null
+++ b/packages/cli/src/bench/render/text.ts
@@ -0,0 +1,129 @@
+/**
+ * The terminal-text renderer: a readable per-scenario summary with the gate
+ * result, derived from the same report model as the JSON and Markdown forms.
+ * @since 2.3.0
+ * @module
+ */
+
+import type {
+  BenchReport,
+  PartialLatencyMs,
+  ScenarioResult,
+} from "../result/model.ts";
+import { metricDisplayUnit } from "../result/expect/metrics.ts";
+import {
+  formatActual,
+  formatNumber,
+  formatPercent,
+  formatThreshold,
+  opSymbol,
+} from "./format.ts";
+
+/**
+ * Renders a report as a plain-text terminal summary.
+ * @param report The report to render.
+ * @returns The summary text.
+ */
+export function renderText(report: BenchReport): string {
+  const lines: string[] = [];
+  lines.push("Fedify benchmark report", "");
+  const fedify = report.target.fedifyVersion == null
+    ? "Fedify version unknown"
+    : `Fedify ${report.target.fedifyVersion}`;
+  const stats = report.target.statsAvailable
+    ? "stats available"
+    : "stats unavailable";
+  lines.push(`Target: ${report.target.url}  (${fedify}, ${stats})`);
+  const env = report.environment;
+  lines.push(
+    `Environment: ${env.runtime} ${env.runtimeVersion}, ${env.os}, ` +
+      `${env.cpuCount} CPUs`,
+  );
+  lines.push(`Started: ${report.startedAt}  Finished: ${report.finishedAt}`);
+  lines.push(`Config: ${report.suite.configHash}`, "");
+
+  for (const scenario of report.scenarios) {
+    lines.push(...renderScenario(scenario), "");
+  }
+  lines.push(`Overall: ${report.passed ? "PASS" : "FAIL"}`);
+  return lines.join("\n");
+}
+
+function renderScenario(scenario: ScenarioResult): string[] {
+  const lines: string[] = [];
+  lines.push(
+    `Scenario: ${scenario.name} (${scenario.type})  ` +
+      `[${scenario.passed ? "PASS" : "FAIL"}]`,
+  );
+  lines.push(`  Load: ${describeLoad(scenario.load)}`);
+  const r = scenario.requests;
+  lines.push(
+    `  Requests: ${formatNumber(r.total)}  (ok ${formatNumber(r.ok)}, ` +
+      `failed ${formatNumber(r.failed)}, success ${
+        formatPercent(r.successRate)
+      })`,
+  );
+  lines.push(`  Throughput: ${formatNumber(scenario.throughputPerSec)} req/s`);
+  const l = scenario.client.latencyMs;
+  lines.push(
+    `  Client latency (ms): p50 ${formatNumber(l.p50)}  p95 ${
+      formatNumber(l.p95)
+    }  p99 ${formatNumber(l.p99)}  mean ${formatNumber(l.mean)}  max ${
+      formatNumber(l.max)
+    }`,
+  );
+  if (scenario.server?.signatureVerificationMs != null) {
+    lines.push(
+      `  Server signature verification (ms): ${
+        describePartial(scenario.server.signatureVerificationMs.overall)
+      }`,
+    );
+  }
+  if (scenario.server?.queue?.drainMs != null) {
+    const depth = scenario.server.queue.depthMax;
+    const suffix = depth == null ? "" : `  (depth max ${formatNumber(depth)})`;
+    lines.push(
+      `  Server queue drain (ms): ${
+        describePartial(scenario.server.queue.drainMs)
+      }${suffix}`,
+    );
+  }
+  if (scenario.errors.length > 0) {
+    lines.push("  Errors:");
+    for (const error of scenario.errors) {
+      const code = error.status == null ? error.kind : String(error.status);
+      lines.push(`    ${code} ${error.reason}: ${formatNumber(error.count)}`);
+    }
+  }
+  if (scenario.expectations.length > 0) {
+    lines.push("  Expectations:");
+    for (const e of scenario.expectations) {
+      const tag = e.pass ? "PASS" : e.severity === "warn" ? "WARN" : "FAIL";
+      const unit = metricDisplayUnit(e.metric);
+      lines.push(
+        `    [${tag}] ${e.metric} ${opSymbol(e.op)} ${
+          formatThreshold(e.threshold, e.unit ?? unit)
+        }  (actual ${formatActual(e.actual, unit)})`,
+      );
+    }
+  }
+  return lines;
+}
+
+function describeLoad(load: ScenarioResult["load"]): string {
+  const tail = `duration ${formatNumber(load.durationMs)}ms, warmup ${
+    formatNumber(load.warmupMs)
+  }ms`;
+  if (load.model === "closed") {
+    return `closed, concurrency ${load.concurrency}, ${tail}`;
+  }
+  return `open, ${formatNumber(load.ratePerSec)}/s ${load.arrival}, ${tail}`;
+}
+
+function describePartial(latency: PartialLatencyMs): string {
+  const parts: string[] = [];
+  if (latency.p50 != null) parts.push(`p50 ${formatNumber(latency.p50)}`);
+  if (latency.p95 != null) parts.push(`p95 ${formatNumber(latency.p95)}`);
+  if (latency.p99 != null) parts.push(`p99 ${formatNumber(latency.p99)}`);
+  return parts.join("  ");
+}
diff --git a/packages/cli/src/bench/result/build.test.ts b/packages/cli/src/bench/result/build.test.ts
new file mode 100644
index 000000000..5125d80e0
--- /dev/null
+++ b/packages/cli/src/bench/result/build.test.ts
@@ -0,0 +1,122 @@
+import { type Schema, Validator } from "@cfworker/json-schema";
+import assert from "node:assert/strict";
+import test from "node:test";
+import { normalizeSuite } from "../scenario/normalize.ts";
+import {
+  buildReport,
+  buildScenarioResult,
+  configHash,
+  detectEnvironment,
+  type ScenarioMeasurement,
+} from "./build.ts";
+import { reportSchemaV1 } from "./schema.ts";
+
+function resolvedInbox() {
+  return normalizeSuite({
+    version: 1,
+    target: "http://localhost:3000",
+    defaults: { load: { concurrency: 50 }, duration: "60s", warmup: "10s" },
+    scenarios: [{
+      name: "inbox-shared",
+      type: "inbox",
+      recipient: "acct:a@x",
+      expect: { successRate: ">= 99%", "latency.p95": "< 100ms" },
+    }],
+  }).scenarios[0];
+}
+
+function measurement(): ScenarioMeasurement {
+  return {
+    requests: { total: 1000, ok: 994, failed: 6, successRate: 0.994 },
+    throughputPerSec: 304,
+    client: {
+      latencyMs: { p50: 24, p95: 91, p99: 184, mean: 31.2, max: 412 },
+    },
+    server: {
+      signatureVerificationMs: { overall: { p50: 6, p95: 12, p99: 28 } },
+    },
+    errors: [{ kind: "http", status: 500, reason: "handler_error", count: 1 }],
+  };
+}
+
+test("buildScenarioResult - summarizes load and evaluates expect", () => {
+  const result = buildScenarioResult(resolvedInbox(), measurement());
+  assert.deepEqual(result.load, {
+    model: "closed",
+    concurrency: 50,
+    durationMs: 60_000,
+    warmupMs: 10_000,
+  });
+  assert.strictEqual(result.expectations.length, 2);
+  assert.ok(result.expectations.every((e) => e.pass));
+  assert.strictEqual(result.passed, true);
+});
+
+test("buildReport - gate passes only when all scenarios pass", () => {
+  const ok = buildScenarioResult(resolvedInbox(), measurement());
+  const bad = buildScenarioResult(resolvedInbox(), {
+    ...measurement(),
+    requests: { total: 1000, ok: 900, failed: 100, successRate: 0.9 },
+  });
+  const report = buildReport({
+    scenarios: [ok, bad],
+    environment: detectEnvironment(),
+    target: { url: "http://localhost:3000", statsAvailable: true },
+    startedAt: "2026-06-04T12:00:00.000Z",
+    finishedAt: "2026-06-04T12:01:00.000Z",
+    suite: { configHash: configHash({ a: 1 }) },
+  });
+  assert.strictEqual(report.passed, false);
+});
+
+test("buildReport - output validates against the report schema", () => {
+  const report = buildReport({
+    scenarios: [buildScenarioResult(resolvedInbox(), measurement())],
+    environment: detectEnvironment(),
+    target: {
+      url: "http://localhost:3000",
+      fedifyVersion: "2.3.0",
+      statsAvailable: true,
+    },
+    startedAt: "2026-06-04T12:00:00.000Z",
+    finishedAt: "2026-06-04T12:01:00.000Z",
+    suite: { name: "suite", configHash: configHash({ a: 1 }) },
+  });
+  const validator = new Validator(
+    reportSchemaV1 as unknown as Schema,
+    "2020-12",
+  );
+  const result = validator.validate(JSON.parse(JSON.stringify(report)));
+  assert.ok(result.valid, JSON.stringify(result.errors));
+});
+
+test("configHash - stable across key order, sensitive to values", () => {
+  assert.strictEqual(configHash({ a: 1, b: 2 }), configHash({ b: 2, a: 1 }));
+  assert.notStrictEqual(configHash({ a: 1 }), configHash({ a: 2 }));
+  assert.match(configHash({ a: 1 }), /^sha256:[0-9a-f]{64}$/);
+});
+
+test("configHash - distinguishes arrays with undefined holes", () => {
+  // [undefined] must not collapse to [].
+  assert.notStrictEqual(configHash([undefined]), configHash([]));
+  assert.notStrictEqual(configHash([1, undefined, 2]), configHash([1, 2]));
+});
+
+test("configHash - hashes URL/Date by serialized form (toJSON)", () => {
+  // A config carrying a URL target must not collapse to {} (same hash).
+  assert.notStrictEqual(
+    configHash({ target: new URL("http://a.example/") }),
+    configHash({ target: new URL("http://b.example/") }),
+  );
+  assert.strictEqual(
+    configHash({ target: new URL("http://a.example/") }),
+    configHash({ target: "http://a.example/" }),
+  );
+});
+
+test("detectEnvironment - reports runtime, os, and cpu count", () => {
+  const env = detectEnvironment();
+  assert.ok(["node", "deno", "bun"].includes(env.runtime));
+  assert.ok(env.os.length > 0);
+  assert.ok(env.cpuCount >= 0);
+});
diff --git a/packages/cli/src/bench/result/build.ts b/packages/cli/src/bench/result/build.ts
new file mode 100644
index 000000000..844611202
--- /dev/null
+++ b/packages/cli/src/bench/result/build.ts
@@ -0,0 +1,182 @@
+/**
+ * Assembly of the canonical benchmark report from measured scenario data.
+ *
+ * The runners produce per-scenario measurements; this module turns each into a
+ * {@link ScenarioResult} (evaluating its `expect` block) and assembles the
+ * top-level {@link BenchReport} with reproducibility metadata.
+ * @since 2.3.0
+ * @module
+ */
+
+import { createHash } from "node:crypto";
+import { cpus } from "node:os";
+import process from "node:process";
+import metadata from "../../../deno.json" with { type: "json" };
+import type { ResolvedScenario } from "../scenario/normalize.ts";
+import type { SerializedHistogram } from "../metrics/histogram.ts";
+import { evaluateExpect } from "./expect/evaluate.ts";
+import { REPORT_SCHEMA_ID } from "./schema.ts";
+import type {
+  BenchReport,
+  ClientMetrics,
+  Environment,
+  ErrorBucket,
+  LoadSummary,
+  RequestSummary,
+  ScenarioResult,
+  ServerMetrics,
+  TargetInfo,
+} from "./model.ts";
+
+/** The per-scenario measurement a runner produces. */
+export interface ScenarioMeasurement {
+  readonly requests: RequestSummary;
+  readonly throughputPerSec: number;
+  readonly client: ClientMetrics;
+  readonly server: ServerMetrics | null;
+  readonly errors: ErrorBucket[];
+  readonly histogram?: SerializedHistogram;
+}
+
+/**
+ * Builds a scenario result from its resolved definition and measurement,
+ * evaluating the `expect` block in the process.
+ * @param scenario The resolved scenario.
+ * @param measurement The measured client and server metrics.
+ * @returns The assembled scenario result.
+ */
+export function buildScenarioResult(
+  scenario: ResolvedScenario,
+  measurement: ScenarioMeasurement,
+): ScenarioResult {
+  const { results, passed } = evaluateExpect(scenario.expect, measurement);
+  return {
+    name: scenario.name,
+    type: scenario.type,
+    load: loadSummary(scenario),
+    requests: measurement.requests,
+    throughputPerSec: measurement.throughputPerSec,
+    client: measurement.client,
+    server: measurement.server,
+    errors: measurement.errors,
+    expectations: results,
+    passed,
+    ...(measurement.histogram ? { histogram: measurement.histogram } : {}),
+  };
+}
+
+/** Inputs for {@link buildReport} beyond the scenario results. */
+export interface ReportInput {
+  readonly scenarios: ScenarioResult[];
+  readonly environment: Environment;
+  readonly target: TargetInfo;
+  readonly startedAt: string;
+  readonly finishedAt: string;
+  readonly suite: { readonly name?: string; readonly configHash: string };
+}
+
+/**
+ * Assembles the top-level report.  The gate passes only when every scenario
+ * passes.
+ * @param input The report inputs.
+ * @returns The complete report.
+ */
+export function buildReport(input: ReportInput): BenchReport {
+  return {
+    $schema: REPORT_SCHEMA_ID,
+    schemaVersion: 1,
+    tool: { name: "@fedify/cli", version: metadata.version },
+    environment: input.environment,
+    target: input.target,
+    startedAt: input.startedAt,
+    finishedAt: input.finishedAt,
+    suite: input.suite,
+    passed: input.scenarios.every((s) => s.passed),
+    scenarios: input.scenarios,
+  };
+}
+
+/** Detects the current runtime environment for reproducibility metadata. */
+export function detectEnvironment(): Environment {
+  const g = globalThis as {
+    Deno?: { version?: { deno?: string } };
+    Bun?: { version?: string };
+  };
+  let runtime = "node";
+  let runtimeVersion = process.versions?.node ?? "unknown";
+  if (g.Deno?.version?.deno != null) {
+    runtime = "deno";
+    runtimeVersion = g.Deno.version.deno;
+  } else if (g.Bun?.version != null) {
+    runtime = "bun";
+    runtimeVersion = g.Bun.version;
+  }
+  let cpuCount = 0;
+  try {
+    cpuCount = cpus().length;
+  } catch {
+    cpuCount = 0;
+  }
+  return { runtime, runtimeVersion, os: process.platform, cpuCount };
+}
+
+/**
+ * Computes a stable `sha256:` hash of a resolved configuration, so CI only
+ * compares runs from the same configuration.
+ * @param config The configuration object to hash.
+ * @returns A `sha256:`-prefixed hex digest.
+ */
+export function configHash(config: unknown): string {
+  const digest = createHash("sha256").update(canonicalJson(config)).digest(
+    "hex",
+  );
+  return `sha256:${digest}`;
+}
+
+function canonicalJson(value: unknown): string {
+  // Mirror JSON.stringify: `undefined` is dropped from objects and becomes
+  // `null` inside arrays.
+  if (value === undefined) return "null";
+  if (value === null || typeof value !== "object") return JSON.stringify(value);
+  // Honor toJSON() (as JSON.stringify does) so URL, Date, and similar values
+  // are hashed by their serialized form rather than as an empty object.
+  const toJson = (value as { toJSON?: unknown }).toJSON;
+  if (typeof toJson === "function") {
+    return canonicalJson((toJson as () => unknown).call(value));
+  }
+  if (Array.isArray(value)) {
+    return `[${value.map(canonicalJson).join(",")}]`;
+  }
+  const entries = Object.entries(value as Record<string, unknown>)
+    .filter(([, v]) => v !== undefined)
+    .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0));
+  return `{${
+    entries.map(([k, v]) => `${JSON.stringify(k)}:${canonicalJson(v)}`).join(
+      ",",
+    )
+  }}`;
+}
+
+function loadSummary(scenario: ResolvedScenario): LoadSummary {
+  const { load, durationMs, warmupMs } = scenario;
+  const maxInFlight = load.maxInFlight == null
+    ? {}
+    : { maxInFlight: load.maxInFlight };
+  if (load.kind === "closed") {
+    return {
+      model: "closed",
+      concurrency: load.concurrency,
+      durationMs,
+      warmupMs,
+      ...maxInFlight,
+    };
+  }
+  return {
+    model: "open",
+    ratePerSec: load.ratePerSec,
+    arrival: load.arrival,
+    durationMs,
+    warmupMs,
+    ...maxInFlight,
+  };
+}
diff --git a/packages/cli/src/bench/result/expect/evaluate.ts b/packages/cli/src/bench/result/expect/evaluate.ts
index 05c590532..498828b0c 100644
--- a/packages/cli/src/bench/result/expect/evaluate.ts
+++ b/packages/cli/src/bench/result/expect/evaluate.ts
@@ -17,6 +17,7 @@ import type {
   ScenarioResult,
 } from "../model.ts";
 import { compare, parseAssertion } from "./assert.ts";
+import { type MetricUnit, metricUnit } from "./metrics.ts";
 
 /** The subset of a scenario result that `expect` metrics are looked up from. */
 export type MetricView = Pick<
@@ -58,9 +59,6 @@ export function evaluateExpect(
   return { results, passed };
 }
 
-/** The natural unit class of a metric. */
-type MetricUnit = "ratio" | "ms" | "rate" | "count";
-
 interface MetricLookup {
   /** The measured value, or `null` if the metric was not measured. */
   readonly value: number | null;
@@ -74,10 +72,10 @@ interface MetricLookup {
  */
 function unitCompatible(
   assertionUnit: string | null,
-  metricUnit: MetricUnit,
+  unit: MetricUnit,
 ): boolean {
   if (assertionUnit == null) return true;
-  switch (metricUnit) {
+  switch (unit) {
     case "ratio":
       return assertionUnit === "%";
     case "ms":
@@ -93,46 +91,42 @@ function lookupMetric(
   metrics: MetricView,
   metric: string,
 ): MetricLookup | null {
+  const unit = metricUnit(metric);
+  if (unit == null) return null; // Unknown metric name.
+  return { value: lookupValue(metrics, metric), unit };
+}
+
+function lookupValue(metrics: MetricView, metric: string): number | null {
   switch (metric) {
     case "successRate":
-      return { value: metrics.requests.successRate, unit: "ratio" };
+      return metrics.requests.successRate;
     case "throughputPerSec":
-      return { value: metrics.throughputPerSec, unit: "rate" };
+      return metrics.throughputPerSec;
     case "deliveryThroughput":
       // Recognized (fanout/mixed) but not measured by the runners yet.
-      return { value: null, unit: "rate" };
+      return null;
     case "errors.total":
-      return { value: sumErrors(metrics.errors), unit: "count" };
+      return sumErrors(metrics.errors);
     case "errors.4xx":
-      return { value: sumErrors(metrics.errors, 400, 500), unit: "count" };
+      return sumErrors(metrics.errors, 400, 500);
     case "errors.5xx":
-      return { value: sumErrors(metrics.errors, 500, 600), unit: "count" };
+      return sumErrors(metrics.errors, 500, 600);
   }
   if (metric.startsWith("latency.")) {
-    return {
-      value: latencyField(metrics.client.latencyMs, metric.slice(8)),
-      unit: "ms",
-    };
+    return latencyField(metrics.client.latencyMs, metric.slice(8));
   }
   if (metric.startsWith("signatureVerification.")) {
-    return {
-      value: partialField(
-        metrics.server?.signatureVerificationMs?.overall,
-        metric.slice("signatureVerification.".length),
-      ),
-      unit: "ms",
-    };
+    return partialField(
+      metrics.server?.signatureVerificationMs?.overall,
+      metric.slice("signatureVerification.".length),
+    );
   }
   if (metric.startsWith("queueDrain.")) {
-    return {
-      value: partialField(
-        metrics.server?.queue?.drainMs,
-        metric.slice("queueDrain.".length),
-      ),
-      unit: "ms",
-    };
+    return partialField(
+      metrics.server?.queue?.drainMs,
+      metric.slice("queueDrain.".length),
+    );
   }
-  // Unknown metric name.
   return null;
 }
 
diff --git a/packages/cli/src/bench/result/expect/metrics.ts b/packages/cli/src/bench/result/expect/metrics.ts
new file mode 100644
index 000000000..0191b00fb
--- /dev/null
+++ b/packages/cli/src/bench/result/expect/metrics.ts
@@ -0,0 +1,57 @@
+/**
+ * The single registry mapping `expect` metric names to their natural unit.
+ *
+ * Both the evaluator (for unit-compatibility checks) and the renderers (for
+ * displaying measured values in the metric's own unit) read from here, so the
+ * two never disagree about what `latency.p95` or `successRate` mean.
+ * @since 2.3.0
+ * @module
+ */
+
+/** The natural unit class of a metric. */
+export type MetricUnit = "ratio" | "ms" | "rate" | "count";
+
+/**
+ * Returns the natural unit class of a metric, or `null` if the metric name is
+ * not recognized.
+ * @param metric The metric name, e.g. `"latency.p95"`.
+ */
+export function metricUnit(metric: string): MetricUnit | null {
+  switch (metric) {
+    case "successRate":
+      return "ratio";
+    case "throughputPerSec":
+    case "deliveryThroughput":
+      return "rate";
+    case "errors.total":
+    case "errors.4xx":
+    case "errors.5xx":
+      return "count";
+  }
+  if (
+    metric.startsWith("latency.") ||
+    metric.startsWith("signatureVerification.") ||
+    metric.startsWith("queueDrain.")
+  ) {
+    return "ms";
+  }
+  return null;
+}
+
+/**
+ * Returns the human display unit for a metric (`"%"`, `"ms"`, `"/s"`), or
+ * `null` for counts and unknown metrics.
+ * @param metric The metric name.
+ */
+export function metricDisplayUnit(metric: string): string | null {
+  switch (metricUnit(metric)) {
+    case "ratio":
+      return "%";
+    case "ms":
+      return "ms";
+    case "rate":
+      return "/s";
+    default:
+      return null;
+  }
+}

From d9812d785f3154e098ac3060c667f8a303da0da3 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 01:05:20 +0900
Subject: [PATCH 09/47] Add benchmark target safety gating and recipient
 discovery

Add the client-side safety guard and the discovery that finds where to
deliver:

 -  `classifyTarget()` sorts a target into loopback/private/public from
    its host (IP-literal aware, IPv4-mapped IPv6 decoded), conservatively
    treating anything it cannot confirm as public.
 -  `assertTargetAllowed()` lets loopback/private targets and any target
    advertising benchmark mode run without friction, and refuses only a
    public target that does not advertise benchmark mode unless
    --allow-unsafe-target is given (mandatory, with no interactive
    prompt); --dry-run bypasses the gate since it only inspects.
 -  `probeBenchmarkMode()` reads the cooperative `stats` endpoint to
    detect benchmark mode and the target's Fedify version, never throwing.
 -  `discoverInbox()` resolves a handle or actor URI to its personal and
    shared inbox the way a remote peer would, building
    private-address-allowing loaders for loopback targets, and
    `selectInbox()` picks the inbox for the scenario's mode.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/discovery/discover.test.ts  |  99 ++++++++++++++
 packages/cli/src/bench/discovery/discover.ts  | 122 ++++++++++++++++++
 .../cli/src/bench/discovery/probe.test.ts     |  52 ++++++++
 packages/cli/src/bench/discovery/probe.ts     |  76 +++++++++++
 packages/cli/src/bench/safety/gate.test.ts    |  68 ++++++++++
 packages/cli/src/bench/safety/gate.ts         |  47 +++++++
 packages/cli/src/bench/safety/tiers.test.ts   |  80 ++++++++++++
 packages/cli/src/bench/safety/tiers.ts        |  68 ++++++++++
 8 files changed, 612 insertions(+)
 create mode 100644 packages/cli/src/bench/discovery/discover.test.ts
 create mode 100644 packages/cli/src/bench/discovery/discover.ts
 create mode 100644 packages/cli/src/bench/discovery/probe.test.ts
 create mode 100644 packages/cli/src/bench/discovery/probe.ts
 create mode 100644 packages/cli/src/bench/safety/gate.test.ts
 create mode 100644 packages/cli/src/bench/safety/gate.ts
 create mode 100644 packages/cli/src/bench/safety/tiers.test.ts
 create mode 100644 packages/cli/src/bench/safety/tiers.ts

diff --git a/packages/cli/src/bench/discovery/discover.test.ts b/packages/cli/src/bench/discovery/discover.test.ts
new file mode 100644
index 000000000..38759ca4d
--- /dev/null
+++ b/packages/cli/src/bench/discovery/discover.test.ts
@@ -0,0 +1,99 @@
+import { Endpoints, Note, Person } from "@fedify/vocab";
+import assert from "node:assert/strict";
+import test from "node:test";
+import { discoverInbox, DiscoveryError, selectInbox } from "./discover.ts";
+
+function actor(): Person {
+  return new Person({
+    id: new URL("http://localhost:3000/users/alice"),
+    inbox: new URL("http://localhost:3000/users/alice/inbox"),
+    endpoints: new Endpoints({
+      sharedInbox: new URL("http://localhost:3000/inbox"),
+    }),
+  });
+}
+
+test("discoverInbox - resolves personal and shared inboxes", async () => {
+  const discovered = await discoverInbox("acct:alice@localhost:3000", {
+    lookup: () => Promise.resolve(actor()),
+  });
+  assert.strictEqual(
+    discovered.actorUri.href,
+    "http://localhost:3000/users/alice",
+  );
+  assert.strictEqual(
+    discovered.personalInbox.href,
+    "http://localhost:3000/users/alice/inbox",
+  );
+  assert.strictEqual(
+    discovered.sharedInbox?.href,
+    "http://localhost:3000/inbox",
+  );
+});
+
+test("discoverInbox - throws when the recipient is not an actor", async () => {
+  await assert.rejects(
+    discoverInbox("acct:bob@localhost", {
+      lookup: () => Promise.resolve(new Note({})),
+    }),
+    DiscoveryError,
+  );
+});
+
+test("discoverInbox - throws when resolution fails", async () => {
+  await assert.rejects(
+    discoverInbox("acct:bob@localhost", {
+      lookup: () => Promise.reject(new Error("boom")),
+    }),
+    DiscoveryError,
+  );
+});
+
+test("discoverInbox - throws when the actor has no inbox", async () => {
+  await assert.rejects(
+    discoverInbox("acct:bob@localhost", {
+      lookup: () =>
+        Promise.resolve(
+          new Person({ id: new URL("http://localhost/users/bob") }),
+        ),
+    }),
+    DiscoveryError,
+  );
+});
+
+test("selectInbox - shared is the default and falls back to personal", () => {
+  const both = {
+    actorUri: new URL("http://localhost/users/a"),
+    personalInbox: new URL("http://localhost/users/a/inbox"),
+    sharedInbox: new URL("http://localhost/inbox"),
+  };
+  assert.strictEqual(
+    selectInbox(both, undefined).href,
+    "http://localhost/inbox",
+  );
+  assert.strictEqual(
+    selectInbox(both, "shared").href,
+    "http://localhost/inbox",
+  );
+  assert.strictEqual(
+    selectInbox(both, "personal").href,
+    "http://localhost/users/a/inbox",
+  );
+  const personalOnly = { ...both, sharedInbox: null };
+  assert.strictEqual(
+    selectInbox(personalOnly, "shared").href,
+    "http://localhost/users/a/inbox",
+  );
+});
+
+test("selectInbox - an explicit URL is used verbatim", () => {
+  const discovered = {
+    actorUri: new URL("http://localhost/users/a"),
+    personalInbox: new URL("http://localhost/users/a/inbox"),
+    sharedInbox: null,
+  };
+  assert.strictEqual(
+    selectInbox(discovered, "http://localhost/custom-inbox").href,
+    "http://localhost/custom-inbox",
+  );
+});
diff --git a/packages/cli/src/bench/discovery/discover.ts b/packages/cli/src/bench/discovery/discover.ts
new file mode 100644
index 000000000..f98fd233b
--- /dev/null
+++ b/packages/cli/src/bench/discovery/discover.ts
@@ -0,0 +1,122 @@
+/**
+ * Recipient discovery: resolving a handle or actor URI to the inbox URL a real
+ * peer would deliver to.
+ *
+ * Discovery mirrors how a remote server finds an inbox: WebFinger on a handle
+ * yields the actor URI, then the actor document yields its personal `inbox` and
+ * its shared inbox endpoint.  `lookupObject()` performs the WebFinger step for
+ * `acct:` identifiers automatically.
+ * @since 2.3.0
+ * @module
+ */
+
+import { isActor, lookupObject } from "@fedify/vocab";
+import type { DocumentLoader } from "@fedify/vocab-runtime";
+import { getContextLoader, getDocumentLoader } from "../../docloader.ts";
+import { convertUrlIfHandle } from "../../webfinger/lib.ts";
+
+/** The inbox mode an inbox scenario targets. */
+export type InboxKind = "shared" | "personal";
+
+/** A discovered recipient's inbox URLs. */
+export interface DiscoveredInbox {
+  readonly actorUri: URL;
+  readonly personalInbox: URL;
+  readonly sharedInbox: URL | null;
+}
+
+/** The loaders and network policy passed to the object resolver. */
+export interface DiscoverLoaders {
+  readonly documentLoader?: DocumentLoader;
+  readonly contextLoader?: DocumentLoader;
+  /**
+   * Whether WebFinger and document fetches may target private addresses; set
+   * for loopback/private benchmark targets.
+   */
+  readonly allowPrivateAddress?: boolean;
+}
+
+/** Options controlling discovery. */
+export interface DiscoverOptions extends DiscoverLoaders {
+  /** An overridable object resolver, for testing.  Defaults to `lookupObject`. */
+  readonly lookup?: (
+    identifier: URL,
+    loaders: DiscoverLoaders,
+  ) => Promise<unknown>;
+}
+
+/** An error raised when a recipient cannot be discovered. */
+export class DiscoveryError extends Error {}
+
+/**
+ * Discovers a recipient's inbox URLs from a handle or actor URI.
+ * @param recipient A handle (`acct:alice@host` or `@alice@host`) or actor URI.
+ * @param options Document/context loaders (use a private-address-allowing
+ *                loader for loopback targets).
+ * @returns The actor URI and its personal and shared inbox URLs.
+ * @throws {DiscoveryError} If the recipient does not resolve to an actor with
+ *         an inbox.
+ */
+export async function discoverInbox(
+  recipient: string,
+  options: DiscoverOptions = {},
+): Promise<DiscoveredInbox> {
+  const identifier = convertUrlIfHandle(recipient);
+  const { lookup = lookupObject, allowPrivateAddress } = options;
+  // When private addresses are allowed but no loaders are supplied, build
+  // private-address-allowing loaders so loopback discovery actually fetches.
+  const documentLoader = options.documentLoader ??
+    (allowPrivateAddress
+      ? await getDocumentLoader({ allowPrivateAddress: true })
+      : undefined);
+  const contextLoader = options.contextLoader ??
+    (allowPrivateAddress
+      ? await getContextLoader({ allowPrivateAddress: true })
+      : undefined);
+  let object: unknown;
+  try {
+    object = await lookup(identifier, {
+      documentLoader,
+      contextLoader,
+      allowPrivateAddress,
+    });
+  } catch (error) {
+    throw new DiscoveryError(
+      `Failed to resolve recipient ${recipient}: ${error}`,
+    );
+  }
+  if (!isActor(object)) {
+    throw new DiscoveryError(
+      `Recipient ${recipient} did not resolve to an actor.`,
+    );
+  }
+  if (object.inboxId == null) {
+    throw new DiscoveryError(`Actor ${recipient} has no inbox.`);
+  }
+  return {
+    actorUri: object.id ?? identifier,
+    personalInbox: object.inboxId,
+    sharedInbox: object.endpoints?.sharedInbox ?? null,
+  };
+}
+
+/**
+ * Chooses the inbox URL to deliver to for a scenario's `inbox` mode.
+ *
+ * `"shared"` (the default) prefers the shared inbox and falls back to the
+ * personal one; `"personal"` uses the personal inbox; any other value is an
+ * explicit inbox URL that skips discovery selection.
+ * @param discovered The discovered inbox URLs.
+ * @param mode The scenario's `inbox` value.
+ * @returns The inbox URL to deliver to.
+ */
+export function selectInbox(
+  discovered: DiscoveredInbox,
+  mode: string | undefined,
+): URL {
+  if (mode != null && mode !== "shared" && mode !== "personal") {
+    return new URL(mode);
+  }
+  if (mode === "personal") return discovered.personalInbox;
+  return discovered.sharedInbox ?? discovered.personalInbox;
+}
diff --git a/packages/cli/src/bench/discovery/probe.test.ts b/packages/cli/src/bench/discovery/probe.test.ts
new file mode 100644
index 000000000..43be7a913
--- /dev/null
+++ b/packages/cli/src/bench/discovery/probe.test.ts
@@ -0,0 +1,52 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { probeBenchmarkMode } from "./probe.ts";
+
+function jsonResponse(body: unknown, status = 200): Response {
+  return new Response(JSON.stringify(body), {
+    status,
+    headers: { "content-type": "application/json" },
+  });
+}
+
+const STATS = {
+  version: 1,
+  source: "server",
+  generatedAt: "2026-06-04T00:00:00Z",
+  scopeMetrics: [
+    { scope: { name: "@fedify/fedify", version: "2.3.0" }, metrics: [] },
+  ],
+  errors: [],
+};
+
+test("probeBenchmarkMode - detects benchmark mode and Fedify version", async () => {
+  const probe = await probeBenchmarkMode(
+    new URL("http://localhost:3000"),
+    () => Promise.resolve(jsonResponse(STATS)),
+  );
+  assert.deepEqual(probe, { benchmarkMode: true, fedifyVersion: "2.3.0" });
+});
+
+test("probeBenchmarkMode - a 404 means no benchmark mode", async () => {
+  const probe = await probeBenchmarkMode(
+    new URL("http://localhost:3000"),
+    () => Promise.resolve(jsonResponse({ error: "not found" }, 404)),
+  );
+  assert.deepEqual(probe, { benchmarkMode: false, fedifyVersion: null });
+});
+
+test("probeBenchmarkMode - a non-benchmark body means no benchmark mode", async () => {
+  const probe = await probeBenchmarkMode(
+    new URL("http://localhost:3000"),
+    () => Promise.resolve(jsonResponse({ hello: "world" })),
+  );
+  assert.strictEqual(probe.benchmarkMode, false);
+});
+
+test("probeBenchmarkMode - a network error means no benchmark mode", async () => {
+  const probe = await probeBenchmarkMode(
+    new URL("http://localhost:3000"),
+    () => Promise.reject(new Error("ECONNREFUSED")),
+  );
+  assert.deepEqual(probe, { benchmarkMode: false, fedifyVersion: null });
+});
diff --git a/packages/cli/src/bench/discovery/probe.ts b/packages/cli/src/bench/discovery/probe.ts
new file mode 100644
index 000000000..9ef82af72
--- /dev/null
+++ b/packages/cli/src/bench/discovery/probe.ts
@@ -0,0 +1,76 @@
+/**
+ * Probing a target for benchmark mode by querying its `stats` endpoint.
+ *
+ * A valid `stats` response means the target advertises benchmark mode, which is
+ * the operator's assertion that the target is not production.  The probe also
+ * reads the target's Fedify version from the metric scope, for the report.
+ * @since 2.3.0
+ * @module
+ */
+
+/** The result of probing a target for benchmark mode. */
+export interface BenchmarkProbe {
+  /** Whether the target advertises benchmark mode. */
+  readonly benchmarkMode: boolean;
+  /** The target's Fedify version, if discoverable. */
+  readonly fedifyVersion: string | null;
+}
+
+/** The path of the cooperative benchmark stats endpoint. */
+export const STATS_PATH = "/.well-known/fedify/bench/stats";
+
+/**
+ * Probes a target for benchmark mode.
+ * @param target The target base URL.
+ * @param fetchImpl The fetch implementation (overridable for tests).
+ * @returns Whether benchmark mode is advertised and the target's Fedify
+ *          version.  Never throws; a failed probe reports `benchmarkMode:
+ *          false`.
+ */
+export async function probeBenchmarkMode(
+  target: URL,
+  fetchImpl: typeof fetch = fetch,
+): Promise<BenchmarkProbe> {
+  try {
+    const response = await fetchImpl(new URL(STATS_PATH, target), {
+      headers: { accept: "application/json" },
+    });
+    if (!response.ok) return notAdvertised();
+    const json = await response.json() as {
+      version?: unknown;
+      source?: unknown;
+      scopeMetrics?: unknown;
+    };
+    if (json?.version === 1 && json?.source === "server") {
+      return { benchmarkMode: true, fedifyVersion: extractFedifyVersion(json) };
+    }
+    return notAdvertised();
+  } catch {
+    return notAdvertised();
+  }
+}
+
+function notAdvertised(): BenchmarkProbe {
+  return { benchmarkMode: false, fedifyVersion: null };
+}
+
+function extractFedifyVersion(json: { scopeMetrics?: unknown }): string | null {
+  try {
+    const scopes = Array.isArray(json.scopeMetrics) ? json.scopeMetrics : [];
+    for (const entry of scopes) {
+      if (entry == null || typeof entry !== "object") continue;
+      const descriptor = (entry as { scope?: unknown }).scope;
+      if (descriptor == null || typeof descriptor !== "object") continue;
+      const { name, version } = descriptor as {
+        name?: unknown;
+        version?: unknown;
+      };
+      if (name === "@fedify/fedify") {
+        return typeof version === "string" ? version : null;
+      }
+    }
+  } catch {
+    // Version extraction must never affect benchmark-mode detection.
+  }
+  return null;
+}
diff --git a/packages/cli/src/bench/safety/gate.test.ts b/packages/cli/src/bench/safety/gate.test.ts
new file mode 100644
index 000000000..55948dbf8
--- /dev/null
+++ b/packages/cli/src/bench/safety/gate.test.ts
@@ -0,0 +1,68 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { assertTargetAllowed, UnsafeTargetError } from "./gate.ts";
+
+test("assertTargetAllowed - loopback/private are always allowed", () => {
+  assert.doesNotThrow(() =>
+    assertTargetAllowed({
+      tier: "loopback",
+      benchmarkMode: false,
+      allowUnsafe: false,
+      dryRun: false,
+    })
+  );
+  assert.doesNotThrow(() =>
+    assertTargetAllowed({
+      tier: "private",
+      benchmarkMode: false,
+      allowUnsafe: false,
+      dryRun: false,
+    })
+  );
+});
+
+test("assertTargetAllowed - public with benchmark mode is allowed", () => {
+  assert.doesNotThrow(() =>
+    assertTargetAllowed({
+      tier: "public",
+      benchmarkMode: true,
+      allowUnsafe: false,
+      dryRun: false,
+    })
+  );
+});
+
+test("assertTargetAllowed - public without benchmark mode is refused", () => {
+  assert.throws(
+    () =>
+      assertTargetAllowed({
+        tier: "public",
+        benchmarkMode: false,
+        allowUnsafe: false,
+        dryRun: false,
+      }),
+    UnsafeTargetError,
+  );
+});
+
+test("assertTargetAllowed - the unsafe flag overrides the refusal", () => {
+  assert.doesNotThrow(() =>
+    assertTargetAllowed({
+      tier: "public",
+      benchmarkMode: false,
+      allowUnsafe: true,
+      dryRun: false,
+    })
+  );
+});
+
+test("assertTargetAllowed - dry-run bypasses the gate", () => {
+  assert.doesNotThrow(() =>
+    assertTargetAllowed({
+      tier: "public",
+      benchmarkMode: false,
+      allowUnsafe: false,
+      dryRun: true,
+    })
+  );
+});
diff --git a/packages/cli/src/bench/safety/gate.ts b/packages/cli/src/bench/safety/gate.ts
new file mode 100644
index 000000000..153316c6f
--- /dev/null
+++ b/packages/cli/src/bench/safety/gate.ts
@@ -0,0 +1,47 @@
+/**
+ * The client-side safety gate.
+ *
+ * A run is allowed without friction when the target is loopback/private or
+ * advertises benchmark mode (the operator's "not production" assertion).  Only
+ * a public target that does not advertise benchmark mode is gated, behind an
+ * explicit `--allow-unsafe-target`.  There is no interactive prompt, so the
+ * flag is mandatory in CI and any non-TTY context.  A `--dry-run` only inspects
+ * (discovery reads), so it bypasses the gate.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { TargetTier } from "./tiers.ts";
+
+/** An error raised when a target is refused by the safety gate. */
+export class UnsafeTargetError extends Error {}
+
+/** The inputs to the safety gate decision. */
+export interface GateContext {
+  /** The target's risk tier. */
+  readonly tier: TargetTier;
+  /** Whether the target advertises benchmark mode (the `stats` probe). */
+  readonly benchmarkMode: boolean;
+  /** Whether `--allow-unsafe-target` was given. */
+  readonly allowUnsafe: boolean;
+  /** Whether this is a `--dry-run` (inspection only). */
+  readonly dryRun: boolean;
+}
+
+/**
+ * Asserts that a target may be benchmarked, throwing otherwise.
+ * @param context The gate decision inputs.
+ * @throws {UnsafeTargetError} If the target is public, does not advertise
+ *         benchmark mode, and `--allow-unsafe-target` was not given.
+ */
+export function assertTargetAllowed(context: GateContext): void {
+  if (context.dryRun) return;
+  if (context.tier !== "public") return;
+  if (context.benchmarkMode) return;
+  if (context.allowUnsafe) return;
+  throw new UnsafeTargetError(
+    "Refusing to benchmark a public target that does not advertise benchmark " +
+      "mode.  If you control this target, pass --allow-unsafe-target " +
+      "(mandatory in CI and any non-interactive context).",
+  );
+}
diff --git a/packages/cli/src/bench/safety/tiers.test.ts b/packages/cli/src/bench/safety/tiers.test.ts
new file mode 100644
index 000000000..e5ee4c696
--- /dev/null
+++ b/packages/cli/src/bench/safety/tiers.test.ts
@@ -0,0 +1,80 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { classifyTarget } from "./tiers.ts";
+
+test("classifyTarget - loopback", () => {
+  for (
+    const url of [
+      "http://localhost:3000",
+      "http://127.0.0.1",
+      "http://127.5.5.5:8080",
+      "http://[::1]:8080",
+      "http://app.localhost",
+    ]
+  ) {
+    assert.strictEqual(classifyTarget(new URL(url)), "loopback", url);
+  }
+});
+
+test("classifyTarget - private", () => {
+  for (
+    const url of [
+      "http://10.0.0.5",
+      "http://192.168.1.10",
+      "http://172.16.0.1",
+      "http://172.31.255.1",
+      "http://169.254.1.1",
+      "http://printer.local",
+      "http://[fc00::1]",
+      "http://[fd12:3456::1]",
+      "http://[fe80::1]",
+    ]
+  ) {
+    assert.strictEqual(classifyTarget(new URL(url)), "private", url);
+  }
+});
+
+test("classifyTarget - public", () => {
+  for (
+    const url of [
+      "https://example.com",
+      "http://8.8.8.8",
+      "http://172.32.0.1",
+      "https://staging.example.org",
+    ]
+  ) {
+    assert.strictEqual(classifyTarget(new URL(url)), "public", url);
+  }
+});
+
+test("classifyTarget - IP-looking hostnames are not private", () => {
+  // These are real DNS names that merely start with private-looking octets.
+  for (
+    const url of [
+      "http://127.example.com",
+      "http://10.example.com",
+      "http://192.168.1.example.com",
+    ]
+  ) {
+    assert.strictEqual(classifyTarget(new URL(url)), "public", url);
+  }
+});
+
+test("classifyTarget - trailing root dot is stripped", () => {
+  assert.strictEqual(classifyTarget(new URL("http://localhost./")), "loopback");
+  assert.strictEqual(
+    classifyTarget(new URL("http://printer.local./")),
+    "private",
+  );
+});
+
+test("classifyTarget - IPv4-mapped IPv6 loopback/private", () => {
+  assert.strictEqual(
+    classifyTarget(new URL("http://[::ffff:127.0.0.1]/")),
+    "loopback",
+  );
+  assert.strictEqual(
+    classifyTarget(new URL("http://[::ffff:10.0.0.1]/")),
+    "private",
+  );
+});
diff --git a/packages/cli/src/bench/safety/tiers.ts b/packages/cli/src/bench/safety/tiers.ts
new file mode 100644
index 000000000..e666b4846
--- /dev/null
+++ b/packages/cli/src/bench/safety/tiers.ts
@@ -0,0 +1,68 @@
+/**
+ * Target risk classification.
+ *
+ * A target is `loopback` or `private` when it is clearly one of the operator's
+ * own boxes, and `public` otherwise.  Classification is conservative: a host
+ * that is not obviously loopback or private is treated as `public` (the gated
+ * tier), since the tool cannot tell staging from production without resolving
+ * and trusting DNS.
+ * @since 2.3.0
+ * @module
+ */
+
+/** The risk tier of a benchmark target. */
+export type TargetTier = "loopback" | "private" | "public";
+
+/**
+ * Classifies a target URL into a risk tier from its host.
+ * @param target The target URL.
+ * @returns The risk tier.
+ */
+export function classifyTarget(target: URL): TargetTier {
+  let host = target.hostname.replace(/^\[/, "").replace(/\]$/, "")
+    .toLowerCase();
+  if (host.endsWith(".")) host = host.slice(0, -1); // strip the root dot
+  // Hostname forms (not IP literals).
+  if (host === "localhost" || host.endsWith(".localhost")) return "loopback";
+  if (host.endsWith(".local")) return "private";
+  if (isIpv4(host)) return classifyIpv4(host);
+  if (host.includes(":")) return classifyIpv6(host);
+  // Not a known-local hostname and not an IP literal: treat as public.
+  return "public";
+}
+
+function isIpv4(host: string): boolean {
+  const match = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
+  return match != null && match.slice(1).every((octet) => Number(octet) <= 255);
+}
+
+function classifyIpv4(host: string): TargetTier {
+  if (host === "0.0.0.0" || /^127\./.test(host)) return "loopback";
+  if (
+    /^10\./.test(host) || /^192\.168\./.test(host) ||
+    /^172\.(1[6-9]|2\d|3[01])\./.test(host) || /^169\.254\./.test(host)
+  ) {
+    return "private";
+  }
+  return "public";
+}
+
+function classifyIpv6(host: string): TargetTier {
+  if (host === "::1") return "loopback";
+  // IPv4-mapped IPv6, dotted or hex-compressed (e.g. ::ffff:127.0.0.1 or
+  // ::ffff:7f00:1), so a mapped loopback/private address is not seen as public.
+  const dotted = host.match(/^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/);
+  if (dotted != null && isIpv4(dotted[1])) return classifyIpv4(dotted[1]);
+  const hex = host.match(/^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/);
+  if (hex != null) {
+    const hi = Number.parseInt(hex[1], 16);
+    const lo = Number.parseInt(hex[2], 16);
+    return classifyIpv4(
+      `${(hi >> 8) & 255}.${hi & 255}.${(lo >> 8) & 255}.${lo & 255}`,
+    );
+  }
+  // IPv6 unique-local (fc00::/7) and link-local (fe80::/10).
+  if (/^f[cd][0-9a-f]*:/.test(host)) return "private";
+  if (/^fe[89ab][0-9a-f]*:/.test(host)) return "private";
+  return "public";
+}

From a47b074bd0f7c2357e81182bc48ddccf25bf69d7 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 01:05:23 +0900
Subject: [PATCH 10/47] Add the synthetic actor/key server for benchmarks

Stand up the benchmark's own synthetic remote peer.  An author picks
signature standards and the key set is derived: HTTP request signatures
and LD Signatures share one RSA pair, FEP-8b32 uses an Ed25519 pair.
`buildFleet()` expands the actor groups into members with generated keys,
and `spawnSyntheticServer()` serves each member as a normal ActivityPub
actor document with an embedded `publicKey` and `assertionMethod` over
plain loopback HTTP.

The target dereferences a signature's keyId during verification, so
serving exactly the document a real actor exposes lets verification
resolve the key the same way; a fixed actor set keeps this on a cold path
a warm-up window excludes.  A test confirms the served document parses
back into a verifiable actor whose keys resolve.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/actor/documents.ts     |  45 ++++++++
 packages/cli/src/bench/actor/fleet.ts         |  79 ++++++++++++++
 packages/cli/src/bench/actor/keys.ts          |  53 +++++++++
 .../cli/src/bench/server/synthetic.test.ts    |  70 ++++++++++++
 packages/cli/src/bench/server/synthetic.ts    | 103 ++++++++++++++++++
 5 files changed, 350 insertions(+)
 create mode 100644 packages/cli/src/bench/actor/documents.ts
 create mode 100644 packages/cli/src/bench/actor/fleet.ts
 create mode 100644 packages/cli/src/bench/actor/keys.ts
 create mode 100644 packages/cli/src/bench/server/synthetic.test.ts
 create mode 100644 packages/cli/src/bench/server/synthetic.ts

diff --git a/packages/cli/src/bench/actor/documents.ts b/packages/cli/src/bench/actor/documents.ts
new file mode 100644
index 000000000..c617bc2fb
--- /dev/null
+++ b/packages/cli/src/bench/actor/documents.ts
@@ -0,0 +1,45 @@
+/**
+ * Building the ActivityPub actor documents the synthetic key server serves.
+ *
+ * The target dereferences a signature's `keyId` during verification; serving a
+ * normal actor document with an embedded `publicKey` (RSA, for HTTP and LD
+ * Signatures) and `assertionMethod` (Ed25519 Multikey, for FEP-8b32) is exactly
+ * what a real actor exposes, so verification resolves the key the same way.
+ * @since 2.3.0
+ * @module
+ */
+
+import { Application, CryptographicKey, Multikey } from "@fedify/vocab";
+import type { DocumentLoader } from "@fedify/vocab-runtime";
+import type { SyntheticActor } from "../server/synthetic.ts";
+
+/**
+ * Renders a synthetic actor as a compact JSON-LD actor document.
+ * @param actor The synthetic actor, with its URLs and keys.
+ * @param options The context loader used to compact the document.
+ * @returns The JSON-LD actor document.
+ */
+export async function actorDocument(
+  actor: SyntheticActor,
+  options: { contextLoader: DocumentLoader },
+): Promise<unknown> {
+  const application = new Application({
+    id: actor.id,
+    preferredUsername: `bench-${actor.index}`,
+    name: actor.name ?? `Benchmark actor ${actor.index}`,
+    inbox: new URL(`${actor.id.href}/inbox`),
+    publicKey: actor.keys.rsa == null ? undefined : new CryptographicKey({
+      id: actor.rsaKeyId,
+      owner: actor.id,
+      publicKey: actor.keys.rsa.publicKey,
+    }),
+    assertionMethods: actor.keys.ed25519 == null ? [] : [
+      new Multikey({
+        id: actor.ed25519KeyId,
+        controller: actor.id,
+        publicKey: actor.keys.ed25519.publicKey,
+      }),
+    ],
+  });
+  return await application.toJsonLd({ contextLoader: options.contextLoader });
+}
diff --git a/packages/cli/src/bench/actor/fleet.ts b/packages/cli/src/bench/actor/fleet.ts
new file mode 100644
index 000000000..1453d43c9
--- /dev/null
+++ b/packages/cli/src/bench/actor/fleet.ts
@@ -0,0 +1,79 @@
+/**
+ * Building the fleet of synthetic actors a benchmark run signs as.
+ *
+ * A fixed actor set keeps the target's key dereferencing on a cold path that a
+ * warm-up window excludes, so the synthetic key server adds no steady-state
+ * measurement noise.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ActorGroup, SignatureStandard } from "../scenario/types.ts";
+import { type ActorKeys, generateActorKeys } from "./keys.ts";
+
+/** The HTTP request signature standard used by an actor. */
+export type HttpSignatureStandard =
+  | "draft-cavage-http-signatures-12"
+  | "rfc9421";
+
+/** A synthetic actor before its URLs are known (no server yet). */
+export interface FleetMember {
+  /** The actor's index across the whole fleet. */
+  readonly index: number;
+  /** The display name template the actor came from, if any. */
+  readonly name?: string;
+  /** The signature standards the actor signs with. */
+  readonly standards: SignatureStandard[];
+  /** The actor's key pairs. */
+  readonly keys: ActorKeys;
+  /** The single HTTP request signature standard the actor uses. */
+  readonly httpStandard: HttpSignatureStandard;
+}
+
+function httpStandardOf(
+  standards: readonly SignatureStandard[],
+): HttpSignatureStandard {
+  const http = standards.find((s) =>
+    s === "draft-cavage-http-signatures-12" || s === "rfc9421"
+  );
+  if (http == null) {
+    throw new TypeError(
+      "Every actor group must declare exactly one HTTP request signature " +
+        "standard.",
+    );
+  }
+  return http as HttpSignatureStandard;
+}
+
+/**
+ * Builds the fleet from the suite's actor groups, generating each actor's keys.
+ * When no groups are declared, a single default actor using
+ * `draft-cavage-http-signatures-12` is created.
+ * @param groups The suite's actor groups.
+ * @returns The fleet members, with keys generated.
+ */
+export async function buildFleet(
+  groups: readonly ActorGroup[],
+): Promise<FleetMember[]> {
+  const effective: readonly ActorGroup[] = groups.length > 0 ? groups : [{
+    signatureStandards: ["draft-cavage-http-signatures-12"],
+  }];
+  const members: FleetMember[] = [];
+  let index = 0;
+  for (const group of effective) {
+    const count = group.count ?? 1;
+    const standards = group.signatureStandards;
+    const httpStandard = httpStandardOf(standards);
+    for (let i = 0; i < count; i++) {
+      members.push({
+        index,
+        name: group.name,
+        standards,
+        keys: await generateActorKeys(standards),
+        httpStandard,
+      });
+      index++;
+    }
+  }
+  return members;
+}
diff --git a/packages/cli/src/bench/actor/keys.ts b/packages/cli/src/bench/actor/keys.ts
new file mode 100644
index 000000000..7316a167f
--- /dev/null
+++ b/packages/cli/src/bench/actor/keys.ts
@@ -0,0 +1,53 @@
+/**
+ * Key-pair generation for synthetic benchmark actors.
+ *
+ * An author picks signature standards, not key algorithms; the key set is
+ * derived from the chosen standards, mirroring how a real Fedify actor exposes
+ * keys.  HTTP request signatures and LD Signatures share one RSA key pair;
+ * FEP-8b32 object integrity proofs use an Ed25519 key pair.
+ * @since 2.3.0
+ * @module
+ */
+
+import { generateCryptoKeyPair } from "@fedify/fedify";
+import type { SignatureStandard } from "../scenario/types.ts";
+
+/** The key pairs an actor holds, derived from its signature standards. */
+export interface ActorKeys {
+  /** The RSA pair for HTTP request signatures and LD Signatures. */
+  readonly rsa?: CryptoKeyPair;
+  /** The Ed25519 pair for FEP-8b32 object integrity proofs. */
+  readonly ed25519?: CryptoKeyPair;
+}
+
+/** Whether a set of standards needs an RSA key pair. */
+export function needsRsa(standards: readonly SignatureStandard[]): boolean {
+  return standards.some((s) =>
+    s === "draft-cavage-http-signatures-12" || s === "rfc9421" ||
+    s === "ld-signatures"
+  );
+}
+
+/** Whether a set of standards needs an Ed25519 key pair. */
+export function needsEd25519(standards: readonly SignatureStandard[]): boolean {
+  return standards.includes("fep8b32");
+}
+
+/**
+ * Generates the key pairs an actor needs for its signature standards.
+ * @param standards The actor's signature standards.
+ * @returns The derived key pairs.
+ */
+export async function generateActorKeys(
+  standards: readonly SignatureStandard[],
+): Promise<ActorKeys> {
+  const [rsa, ed25519] = await Promise.all([
+    needsRsa(standards)
+      ? generateCryptoKeyPair("RSASSA-PKCS1-v1_5")
+      : Promise.resolve(undefined),
+    needsEd25519(standards)
+      ? generateCryptoKeyPair("Ed25519")
+      : Promise.resolve(undefined),
+  ]);
+  return { rsa, ed25519 };
+}
diff --git a/packages/cli/src/bench/server/synthetic.test.ts b/packages/cli/src/bench/server/synthetic.test.ts
new file mode 100644
index 000000000..37ce2dc56
--- /dev/null
+++ b/packages/cli/src/bench/server/synthetic.test.ts
@@ -0,0 +1,70 @@
+import { isActor, Object as APObject } from "@fedify/vocab";
+import assert from "node:assert/strict";
+import test from "node:test";
+import { getContextLoader, getDocumentLoader } from "../../docloader.ts";
+import { buildFleet } from "../actor/fleet.ts";
+import { spawnSyntheticServer } from "./synthetic.ts";
+
+test("spawnSyntheticServer - serves a verifiable actor document", async () => {
+  const fleet = await buildFleet([{
+    count: 1,
+    signatureStandards: ["draft-cavage-http-signatures-12", "fep8b32"],
+  }]);
+  const server = await spawnSyntheticServer(fleet);
+  try {
+    const actor = server.actors[0];
+    assert.strictEqual(actor.id.hostname, "127.0.0.1");
+    assert.ok(actor.rsaKeyId?.href.endsWith("#main-key"));
+    assert.ok(actor.ed25519KeyId?.href.endsWith("#ed25519-key"));
+
+    const response = await fetch(actor.id);
+    assert.strictEqual(response.status, 200);
+    assert.match(
+      response.headers.get("content-type") ?? "",
+      /activity\+json/,
+    );
+    const json = await response.text();
+    assert.match(json, /publicKeyPem/);
+    assert.match(json, /BEGIN PUBLIC KEY/);
+    assert.match(json, /publicKeyMultibase/);
+
+    // The served document parses back into a verifiable actor with its keys.
+    const documentLoader = await getDocumentLoader({
+      allowPrivateAddress: true,
+    });
+    const contextLoader = await getContextLoader({ allowPrivateAddress: true });
+    const parsed = await APObject.fromJsonLd(JSON.parse(json), {
+      documentLoader,
+      contextLoader,
+    });
+    assert.ok(isActor(parsed));
+    const publicKeys = await Array.fromAsync(
+      parsed.getPublicKeys({ documentLoader, contextLoader }),
+    );
+    assert.strictEqual(publicKeys.length, 1);
+    assert.ok(publicKeys[0].publicKey != null);
+    const multikeys = await Array.fromAsync(
+      parsed.getAssertionMethods({ documentLoader, contextLoader }),
+    );
+    assert.strictEqual(multikeys.length, 1);
+    assert.ok(multikeys[0].publicKey != null);
+  } finally {
+    await server.close();
+  }
+});
+
+test("spawnSyntheticServer - unknown paths 404", async () => {
+  const fleet = await buildFleet([{
+    signatureStandards: ["rfc9421"],
+  }]);
+  const server = await spawnSyntheticServer(fleet);
+  try {
+    const response = await fetch(new URL("/nope", server.url));
+    assert.strictEqual(response.status, 404);
+    // An rfc9421-only actor has an RSA key but no Ed25519 key.
+    assert.ok(server.actors[0].rsaKeyId != null);
+    assert.ok(server.actors[0].ed25519KeyId == null);
+  } finally {
+    await server.close();
+  }
+});
diff --git a/packages/cli/src/bench/server/synthetic.ts b/packages/cli/src/bench/server/synthetic.ts
new file mode 100644
index 000000000..177c365c0
--- /dev/null
+++ b/packages/cli/src/bench/server/synthetic.ts
@@ -0,0 +1,103 @@
+/**
+ * The benchmark's own synthetic actor/key server.
+ *
+ * It serves the actor documents (with embedded keys) that the target
+ * dereferences while verifying signatures, over plain loopback HTTP — which
+ * works because `benchmarkMode` enables `allowPrivateAddress` on the target.
+ * @since 2.3.0
+ * @module
+ */
+
+import { serve } from "srvx";
+import type { DocumentLoader } from "@fedify/vocab-runtime";
+import { getContextLoader } from "../../docloader.ts";
+import { actorDocument } from "../actor/documents.ts";
+import type { FleetMember } from "../actor/fleet.ts";
+
+/** A synthetic actor with its server-assigned URLs. */
+export interface SyntheticActor extends FleetMember {
+  /** The actor's URL on the synthetic server. */
+  readonly id: URL;
+  /** The RSA key's id (a fragment of the actor URL), if the actor has one. */
+  readonly rsaKeyId?: URL;
+  /** The Ed25519 key's id, if the actor has one. */
+  readonly ed25519KeyId?: URL;
+}
+
+/** A running synthetic actor/key server. */
+export interface SyntheticServer {
+  /** The server's base URL. */
+  readonly url: URL;
+  /** The actors it serves, with their URLs and keys. */
+  readonly actors: SyntheticActor[];
+  /** Shuts the server down. */
+  close(): Promise<void>;
+}
+
+/** Options for {@link spawnSyntheticServer}. */
+export interface SyntheticServerOptions {
+  /** The context loader used to render actor documents. */
+  readonly contextLoader?: DocumentLoader;
+}
+
+/**
+ * Starts the synthetic actor/key server and serves each fleet member's actor
+ * document.
+ * @param members The fleet members (with keys) to serve.
+ * @param options Server options.
+ * @returns The running server, including the actors with their assigned URLs.
+ */
+export async function spawnSyntheticServer(
+  members: readonly FleetMember[],
+  options: SyntheticServerOptions = {},
+): Promise<SyntheticServer> {
+  const routes = new Map<string, string>();
+  const server = serve({
+    port: 0,
+    hostname: "127.0.0.1",
+    silent: true,
+    fetch(request: Request): Response {
+      const { pathname } = new URL(request.url);
+      const body = routes.get(pathname);
+      if (body == null) return new Response("Not found", { status: 404 });
+      return new Response(body, {
+        status: 200,
+        headers: { "content-type": "application/activity+json" },
+      });
+    },
+  });
+  await server.ready();
+  const actors: SyntheticActor[] = [];
+  try {
+    const base = new URL(server.url!);
+    const contextLoader = options.contextLoader ??
+      await getContextLoader({ allowPrivateAddress: true });
+    for (const member of members) {
+      const id = new URL(`/actors/${member.index}`, base);
+      const actor: SyntheticActor = {
+        ...member,
+        id,
+        rsaKeyId: member.keys.rsa == null
+          ? undefined
+          : new URL("#main-key", id),
+        ed25519KeyId: member.keys.ed25519 == null
+          ? undefined
+          : new URL("#ed25519-key", id),
+      };
+      const document = await actorDocument(actor, { contextLoader });
+      routes.set(`/actors/${member.index}`, JSON.stringify(document));
+      actors.push(actor);
+    }
+    return {
+      url: new URL(server.url!),
+      actors,
+      async close() {
+        await server.close(true);
+      },
+    };
+  } catch (error) {
+    // Don't leak the listener if rendering the actor documents fails.
+    await server.close(true);
+    throw error;
+  }
+}

From 1bb3bee20101b653c930b0454de0e8d2de84d297 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 01:05:24 +0900
Subject: [PATCH 11/47] Add the benchmark signing pipeline

Sign inbox deliveries reusing the @fedify/fedify signers so the client
pays realistic crypto cost.  `signInboxDelivery()` applies the FEP-8b32
object proof and the LD Signature to the document, then the HTTP request
signature (cavage or rfc9421) to the final body.
`createActivityIdMinter()` mints a unique activity id per request,
satisfying Fedify's always-on inbox idempotency automatically.

`createSigningPipeline()` keeps RSA signing off the send critical path
with three lookahead modes: `jit`, `pipeline` (default; background
signers keep a bounded buffer filled and buffer starvation surfaces the
client as the bottleneck), and `presign`.  The pipeline cannot hang on a
stuck factory, drops transient sign failures, and fails fast on
deterministic ones.  Tests verify the produced cavage and rfc9421
requests pass Fedify's own verifyRequest against synthetic-server keys.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/signing/activity-id.test.ts |  20 ++
 packages/cli/src/bench/signing/activity-id.ts |  33 +++
 .../cli/src/bench/signing/pipeline.test.ts    | 101 +++++++++
 packages/cli/src/bench/signing/pipeline.ts    | 203 ++++++++++++++++++
 packages/cli/src/bench/signing/signer.test.ts |  92 ++++++++
 packages/cli/src/bench/signing/signer.ts      |  80 +++++++
 6 files changed, 529 insertions(+)
 create mode 100644 packages/cli/src/bench/signing/activity-id.test.ts
 create mode 100644 packages/cli/src/bench/signing/activity-id.ts
 create mode 100644 packages/cli/src/bench/signing/pipeline.test.ts
 create mode 100644 packages/cli/src/bench/signing/pipeline.ts
 create mode 100644 packages/cli/src/bench/signing/signer.test.ts
 create mode 100644 packages/cli/src/bench/signing/signer.ts

diff --git a/packages/cli/src/bench/signing/activity-id.test.ts b/packages/cli/src/bench/signing/activity-id.test.ts
new file mode 100644
index 000000000..191bcdedc
--- /dev/null
+++ b/packages/cli/src/bench/signing/activity-id.test.ts
@@ -0,0 +1,20 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { createActivityIdMinter } from "./activity-id.ts";
+
+test("createActivityIdMinter - mints unique ids under the base", () => {
+  const minter = createActivityIdMinter(new URL("http://127.0.0.1:3000"));
+  const a = minter.next();
+  const b = minter.next();
+  assert.notStrictEqual(a.href, b.href);
+  assert.strictEqual(a.protocol, "http:");
+  assert.strictEqual(a.hostname, "127.0.0.1");
+  assert.match(a.pathname, /^\/activities\//);
+});
+
+test("createActivityIdMinter - separate minters do not collide", () => {
+  const base = new URL("http://127.0.0.1:3000");
+  const first = createActivityIdMinter(base).next();
+  const second = createActivityIdMinter(base).next();
+  assert.notStrictEqual(first.href, second.href);
+});
diff --git a/packages/cli/src/bench/signing/activity-id.ts b/packages/cli/src/bench/signing/activity-id.ts
new file mode 100644
index 000000000..72d353203
--- /dev/null
+++ b/packages/cli/src/bench/signing/activity-id.ts
@@ -0,0 +1,33 @@
+/**
+ * Unique activity-id minting.
+ *
+ * Inbox idempotency is always on in Fedify: a duplicate activity `id` is
+ * short-circuited before the listener runs.  So the load generator must mint a
+ * unique `id` per request, which is exactly what real traffic looks like; the
+ * tool owns the id so an author cannot forget it.
+ * @since 2.3.0
+ * @module
+ */
+
+/** Mints unique activity ids. */
+export interface ActivityIdMinter {
+  /** Returns the next unique activity id URL. */
+  next(): URL;
+}
+
+/**
+ * Creates a minter that produces unique activity ids under a base URL.  Ids
+ * combine a per-run random component with a monotonic counter, so they are
+ * unique within a run and across runs.
+ * @param base The base URL (typically the synthetic server's URL).
+ * @returns A new minter.
+ */
+export function createActivityIdMinter(base: URL): ActivityIdMinter {
+  const run = crypto.randomUUID();
+  let counter = 0;
+  return {
+    next(): URL {
+      return new URL(`/activities/${run}/${counter++}`, base);
+    },
+  };
+}
diff --git a/packages/cli/src/bench/signing/pipeline.test.ts b/packages/cli/src/bench/signing/pipeline.test.ts
new file mode 100644
index 000000000..891e253be
--- /dev/null
+++ b/packages/cli/src/bench/signing/pipeline.test.ts
@@ -0,0 +1,101 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { createSigningPipeline } from "./pipeline.ts";
+
+function fakeFactory(delayMs = 0): () => Promise<Request> {
+  let counter = 0;
+  return () =>
+    new Promise<Request>((resolve) =>
+      setTimeout(
+        () =>
+          resolve(new Request(`http://sink/${counter++}`, { method: "POST" })),
+        delayMs,
+      )
+    );
+}
+
+test("jit - signs in the send path with no starvation", async () => {
+  const pipeline = createSigningPipeline("jit", fakeFactory());
+  const request = await pipeline.next();
+  assert.ok(request instanceof Request);
+  assert.strictEqual(pipeline.starvationCount, 0);
+  await pipeline.close();
+});
+
+test("pipeline - buffers and surfaces starvation under a slow signer", async () => {
+  const pipeline = createSigningPipeline("pipeline", fakeFactory(15), {
+    bufferSize: 1,
+    signers: 1,
+  });
+  await pipeline.prime();
+  const requests: Request[] = [];
+  for (let i = 0; i < 5; i++) requests.push(await pipeline.next());
+  assert.strictEqual(requests.length, 5);
+  assert.ok(
+    pipeline.starvationCount > 0,
+    `expected starvation, got ${pipeline.starvationCount}`,
+  );
+  await pipeline.close();
+});
+
+test("pipeline - survives a synchronous factory throw", async () => {
+  let calls = 0;
+  const pipeline = createSigningPipeline("pipeline", () => {
+    calls++;
+    if (calls <= 2) throw new Error("sync boom");
+    return Promise.resolve(new Request("http://sink/ok", { method: "POST" }));
+  }, { bufferSize: 1, signers: 1 });
+  const request = await pipeline.next();
+  assert.ok(request instanceof Request);
+  await pipeline.close();
+});
+
+test("pipeline - fails fast when signing always fails", async () => {
+  const pipeline = createSigningPipeline(
+    "pipeline",
+    () => Promise.reject(new Error("bad key")),
+    { bufferSize: 2, signers: 1 },
+  );
+  await assert.rejects(pipeline.next(), /bad key/);
+  await pipeline.close();
+});
+
+test("presign - signs the whole run up front without starvation", async () => {
+  const pipeline = createSigningPipeline("presign", fakeFactory(), {
+    total: 3,
+    signers: 2,
+  });
+  await pipeline.prime();
+  assert.strictEqual(pipeline.starvationCount, 0);
+  for (let i = 0; i < 3; i++) {
+    assert.ok((await pipeline.next()) instanceof Request);
+  }
+  await pipeline.close();
+});
+
+test("close - rejects a pending consumer", async () => {
+  const pipeline = createSigningPipeline("pipeline", fakeFactory(50), {
+    bufferSize: 1,
+    signers: 1,
+  });
+  await pipeline.prime();
+  await pipeline.next();
+  const pending = pipeline.next();
+  // Attach the rejection handler before close() rejects the pending consumer.
+  const rejection = assert.rejects(pending, /closed/);
+  await pipeline.close();
+  await rejection;
+});
+
+test("close - resolves promptly even with a never-resolving factory", async () => {
+  const pipeline = createSigningPipeline(
+    "pipeline",
+    () => new Promise<Request>(() => {}),
+    { bufferSize: 2, signers: 2 },
+  );
+  const outcome = await Promise.race([
+    pipeline.close().then(() => "closed"),
+    new Promise((resolve) => setTimeout(() => resolve("timeout"), 1000)),
+  ]);
+  assert.strictEqual(outcome, "closed");
+});
diff --git a/packages/cli/src/bench/signing/pipeline.ts b/packages/cli/src/bench/signing/pipeline.ts
new file mode 100644
index 000000000..bd661c863
--- /dev/null
+++ b/packages/cli/src/bench/signing/pipeline.ts
@@ -0,0 +1,203 @@
+/**
+ * The signing pipeline that keeps RSA signing out of the send critical path.
+ *
+ * Three lookahead modes, all reusing the same per-request signing factory:
+ *
+ *  -  `jit`: sign in the send path (the only valid mode against a strict
+ *     time-window target); rate-capped.
+ *  -  `pipeline` (default): background signers keep a bounded buffer filled and
+ *     senders pull from it; if the buffer starves, that is the client-bound
+ *     signal, surfaced via `starvationCount`.
+ *  -  `presign`: the whole run is signed up front, so the achievable rate is
+ *     not bounded by real-time signing throughput.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { SigningMode } from "../scenario/types.ts";
+
+/** A factory that signs and returns one request. */
+export type SignFactory = () => Promise<Request>;
+
+/** A running signing pipeline. */
+export interface SigningPipeline {
+  /** Returns the next signed request, awaiting one if none is buffered. */
+  next(): Promise<Request>;
+  /** Pre-fills the buffer to its target before the timed window opens. */
+  prime(): Promise<void>;
+  /** The number of times `next()` found the buffer empty (client-bound). */
+  readonly starvationCount: number;
+  /** Stops background signing and releases pending consumers. */
+  close(): Promise<void>;
+}
+
+/** Options for {@link createSigningPipeline}. */
+export interface SigningPipelineOptions {
+  /** The bounded buffer size for `pipeline` mode. */
+  readonly bufferSize?: number;
+  /** The total number of requests for `presign` mode. */
+  readonly total?: number;
+  /** The number of concurrent background signers. */
+  readonly signers?: number;
+}
+
+/** An error used to release consumers waiting on a closed pipeline. */
+export class PipelineClosedError extends Error {}
+
+const DEFAULT_BUFFER_SIZE = 256;
+const DEFAULT_SIGNERS = 4;
+/**
+ * After this many signing failures with no successful sign in between, the
+ * pipeline gives up so a deterministic signing error fails fast instead of
+ * spinning forever.
+ */
+const FATAL_FAILURE_THRESHOLD = 8;
+
+/**
+ * Creates a signing pipeline for the given mode.
+ * @param mode The lookahead mode.
+ * @param factory The per-request signing factory.
+ * @param options Buffer, total, and concurrency options.
+ * @returns The signing pipeline.
+ */
+export function createSigningPipeline(
+  mode: SigningMode,
+  factory: SignFactory,
+  options: SigningPipelineOptions = {},
+): SigningPipeline {
+  if (mode === "jit") return createJit(factory);
+  const signers = options.signers ?? DEFAULT_SIGNERS;
+  if (mode === "presign") {
+    const total = options.total ?? DEFAULT_BUFFER_SIZE;
+    return createBuffered(factory, {
+      bufferSize: total,
+      fillTarget: total,
+      signers,
+      countStarvation: false,
+    });
+  }
+  const bufferSize = options.bufferSize ?? DEFAULT_BUFFER_SIZE;
+  return createBuffered(factory, {
+    bufferSize,
+    fillTarget: bufferSize,
+    signers,
+    countStarvation: true,
+  });
+}
+
+function createJit(factory: SignFactory): SigningPipeline {
+  return {
+    next: factory,
+    prime: () => Promise.resolve(),
+    starvationCount: 0,
+    close: () => Promise.resolve(),
+  };
+}
+
+interface BufferedOptions {
+  readonly bufferSize: number;
+  readonly fillTarget: number;
+  readonly signers: number;
+  readonly countStarvation: boolean;
+}
+
+function createBuffered(
+  factory: SignFactory,
+  options: BufferedOptions,
+): SigningPipeline {
+  const ready: Request[] = [];
+  const waiters: Array<{
+    resolve: (request: Request) => void;
+    reject: (error: unknown) => void;
+  }> = [];
+  let starvationCount = 0;
+  let inFlight = 0;
+  let closed = false;
+  let consecutiveFailures = 0;
+  let fatalError: unknown = null;
+  const CLOSED = Symbol("closed");
+  let signalClose!: () => void;
+  const closeSignal = new Promise<typeof CLOSED>((resolve) => {
+    signalClose = () => resolve(CLOSED);
+  });
+
+  function deliver(request: Request): void {
+    const waiter = waiters.shift();
+    if (waiter != null) waiter.resolve(request);
+    else ready.push(request);
+  }
+
+  function fail(error: unknown): void {
+    fatalError = error;
+    closed = true;
+    signalClose();
+    ready.length = 0; // discard buffered requests so next() rejects
+    while (waiters.length > 0) waiters.shift()!.reject(error);
+  }
+
+  async function producer(): Promise<void> {
+    while (!closed) {
+      if (
+        waiters.length === 0 && ready.length + inFlight >= options.bufferSize
+      ) {
+        await Promise.race([delay(), closeSignal]);
+        continue;
+      }
+      inFlight++;
+      try {
+        // Race the sign against close so a slow/stuck factory cannot block
+        // close(); the detached factory promise is swallowed if it settles
+        // late.  `Promise.resolve().then(factory)` turns a synchronous throw in
+        // the factory into a rejection rather than killing the producer.
+        const pending = Promise.resolve().then(factory);
+        pending.catch(() => {});
+        const result = await Promise.race([pending, closeSignal]);
+        if (result === CLOSED || closed) break;
+        consecutiveFailures = 0;
+        deliver(result);
+      } catch (error) {
+        // A transient failure is dropped, but a run of failures with no
+        // success means signing is deterministically broken: fail fast.
+        if (++consecutiveFailures >= FATAL_FAILURE_THRESHOLD) fail(error);
+      } finally {
+        inFlight--;
+      }
+    }
+  }
+
+  const producers = Array.from({ length: options.signers }, () => producer());
+
+  return {
+    get starvationCount(): number {
+      return starvationCount;
+    },
+    next(): Promise<Request> {
+      const buffered = ready.shift();
+      if (buffered != null) return Promise.resolve(buffered);
+      if (fatalError != null) return Promise.reject(fatalError);
+      if (closed) return Promise.reject(new PipelineClosedError("closed"));
+      if (options.countStarvation) starvationCount++;
+      return new Promise<Request>((resolve, reject) => {
+        waiters.push({ resolve, reject });
+      });
+    },
+    async prime(): Promise<void> {
+      while (!closed && ready.length < options.fillTarget) {
+        await Promise.race([delay(), closeSignal]);
+      }
+      if (fatalError != null) throw fatalError;
+    },
+    async close(): Promise<void> {
+      closed = true;
+      signalClose();
+      while (waiters.length > 0) {
+        waiters.shift()!.reject(new PipelineClosedError("closed"));
+      }
+      await Promise.allSettled(producers);
+    },
+  };
+}
+
+function delay(): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, 1));
+}
diff --git a/packages/cli/src/bench/signing/signer.test.ts b/packages/cli/src/bench/signing/signer.test.ts
new file mode 100644
index 000000000..b702089d9
--- /dev/null
+++ b/packages/cli/src/bench/signing/signer.test.ts
@@ -0,0 +1,92 @@
+import { verifyRequest } from "@fedify/fedify";
+import { Create, Note } from "@fedify/vocab";
+import assert from "node:assert/strict";
+import test from "node:test";
+import { getContextLoader, getDocumentLoader } from "../../docloader.ts";
+import { buildFleet } from "../actor/fleet.ts";
+import { spawnSyntheticServer } from "../server/synthetic.ts";
+import { signInboxDelivery } from "./signer.ts";
+
+async function signOne(
+  standards: Parameters<typeof buildFleet>[0][number]["signatureStandards"],
+) {
+  const fleet = await buildFleet([{ count: 1, signatureStandards: standards }]);
+  const server = await spawnSyntheticServer(fleet);
+  const documentLoader = await getDocumentLoader({ allowPrivateAddress: true });
+  const contextLoader = await getContextLoader({ allowPrivateAddress: true });
+  const actor = server.actors[0];
+  const activity = new Create({
+    id: new URL("/activities/1", server.url),
+    actor: actor.id,
+    object: new Note({
+      id: new URL("/notes/1", server.url),
+      content: "benchmark",
+      attribution: actor.id,
+    }),
+  });
+  const request = await signInboxDelivery({
+    actor,
+    inbox: new URL("/inbox", server.url),
+    activity,
+    contextLoader,
+  });
+  return { server, request, actor, documentLoader, contextLoader };
+}
+
+test("signInboxDelivery - draft-cavage signature verifies", async () => {
+  const { server, request, documentLoader, contextLoader } = await signOne([
+    "draft-cavage-http-signatures-12",
+  ]);
+  try {
+    const key = await verifyRequest(request, {
+      documentLoader,
+      contextLoader,
+    });
+    assert.ok(key != null, "the draft-cavage HTTP signature should verify");
+  } finally {
+    await server.close();
+  }
+});
+
+test("signInboxDelivery - rfc9421 signature verifies", async () => {
+  const { server, request, documentLoader, contextLoader } = await signOne([
+    "rfc9421",
+  ]);
+  try {
+    const key = await verifyRequest(request, {
+      documentLoader,
+      contextLoader,
+    });
+    assert.ok(key != null, "the rfc9421 HTTP signature should verify");
+  } finally {
+    await server.close();
+  }
+});
+
+test("signInboxDelivery - embeds a FEP-8b32 proof in the body", async () => {
+  const { server, request } = await signOne([
+    "draft-cavage-http-signatures-12",
+    "fep8b32",
+  ]);
+  try {
+    const body = await request.clone().text();
+    assert.match(body, /"proof"/);
+    assert.match(body, /eddsa-jcs-2022/);
+  } finally {
+    await server.close();
+  }
+});
+
+test("signInboxDelivery - embeds an LD signature in the body", async () => {
+  const { server, request } = await signOne([
+    "draft-cavage-http-signatures-12",
+    "ld-signatures",
+  ]);
+  try {
+    const body = await request.clone().text();
+    assert.match(body, /"signature"/);
+    assert.match(body, /RsaSignature2017/);
+  } finally {
+    await server.close();
+  }
+});
diff --git a/packages/cli/src/bench/signing/signer.ts b/packages/cli/src/bench/signing/signer.ts
new file mode 100644
index 000000000..906290a11
--- /dev/null
+++ b/packages/cli/src/bench/signing/signer.ts
@@ -0,0 +1,80 @@
+/**
+ * Signing one inbox delivery, reusing the `@fedify/fedify` signers so the
+ * client pays realistic crypto cost.
+ *
+ * Document signatures are applied first (FEP-8b32 object proof, then LD
+ * Signature on the serialized document), then the HTTP request signature is
+ * applied to the final body, matching how a real sender composes a request.
+ * @since 2.3.0
+ * @module
+ */
+
+import { signJsonLd, signObject, signRequest } from "@fedify/fedify";
+import type { Activity } from "@fedify/vocab";
+import type { DocumentLoader } from "@fedify/vocab-runtime";
+import type { SyntheticActor } from "../server/synthetic.ts";
+
+/** Options for {@link signInboxDelivery}. */
+export interface SignDeliveryOptions {
+  /** The signing actor, with its keys and key ids. */
+  readonly actor: SyntheticActor;
+  /** The inbox URL to deliver to. */
+  readonly inbox: URL;
+  /** The activity to sign and deliver (its `id` must already be set). */
+  readonly activity: Activity;
+  /** The context loader used to serialize and canonicalize the document. */
+  readonly contextLoader: DocumentLoader;
+}
+
+/**
+ * Signs an inbox delivery and returns a ready-to-send `Request`.
+ * @param options The delivery options.
+ * @returns The signed POST request.
+ * @throws {TypeError} If the actor lacks the RSA key required for HTTP signing.
+ */
+export async function signInboxDelivery(
+  options: SignDeliveryOptions,
+): Promise<Request> {
+  const { actor, inbox, contextLoader } = options;
+  if (actor.keys.rsa == null || actor.rsaKeyId == null) {
+    throw new TypeError(
+      "Actor is missing the RSA key required for HTTP request signing.",
+    );
+  }
+
+  let activity = options.activity;
+  if (
+    actor.standards.includes("fep8b32") && actor.keys.ed25519 != null &&
+    actor.ed25519KeyId != null
+  ) {
+    activity = await signObject(
+      activity,
+      actor.keys.ed25519.privateKey,
+      actor.ed25519KeyId,
+      { contextLoader },
+    );
+  }
+
+  let document: unknown = await activity.toJsonLd({ contextLoader });
+  if (actor.standards.includes("ld-signatures")) {
+    document = await signJsonLd(
+      document,
+      actor.keys.rsa.privateKey,
+      actor.rsaKeyId,
+      { contextLoader },
+    );
+  }
+
+  const body = new TextEncoder().encode(JSON.stringify(document));
+  const request = new Request(inbox, {
+    method: "POST",
+    headers: { "content-type": "application/activity+json" },
+    body,
+  });
+  return await signRequest(
+    request,
+    actor.keys.rsa.privateKey,
+    actor.rsaKeyId,
+    { spec: actor.httpStandard, body: body.buffer as ArrayBuffer },
+  );
+}

From 317079a4ca5e781d044443ca12c99be46fc3beae Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 01:16:02 +0900
Subject: [PATCH 12/47] Add the benchmark load generator and sample aggregation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drive load and turn the raw samples into client-side metrics.
`runLoad()` supports open-loop (a fixed arrival schedule, with latency
measured from each request's scheduled time — the coordinated-omission
correction — so a stalled target or maxInFlight backpressure shows up as
latency rather than being omitted) and closed-loop (N virtual users).
A fair slot-transferring semaphore enforces `maxInFlight` in both models
and reports backpressure as the saturation signal; arrivals are a lazy
generator (constant or seeded Poisson) and only in-flight dispatches are
retained, so memory stays flat on long runs.

`aggregateSamples()` excludes warm-up samples and produces request
counts, success rate, throughput over the measured window, latency
percentiles from the log-linear histogram, and errors grouped by kind,
status, and reason.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/load/arrival.test.ts   |  64 ++++++
 packages/cli/src/bench/load/arrival.ts        |  70 ++++++
 packages/cli/src/bench/load/clock.ts          |  26 +++
 packages/cli/src/bench/load/generator.test.ts | 155 +++++++++++++
 packages/cli/src/bench/load/generator.ts      | 207 ++++++++++++++++++
 .../cli/src/bench/metrics/aggregate.test.ts   |  91 ++++++++
 packages/cli/src/bench/metrics/aggregate.ts   |  96 ++++++++
 7 files changed, 709 insertions(+)
 create mode 100644 packages/cli/src/bench/load/arrival.test.ts
 create mode 100644 packages/cli/src/bench/load/arrival.ts
 create mode 100644 packages/cli/src/bench/load/clock.ts
 create mode 100644 packages/cli/src/bench/load/generator.test.ts
 create mode 100644 packages/cli/src/bench/load/generator.ts
 create mode 100644 packages/cli/src/bench/metrics/aggregate.test.ts
 create mode 100644 packages/cli/src/bench/metrics/aggregate.ts

diff --git a/packages/cli/src/bench/load/arrival.test.ts b/packages/cli/src/bench/load/arrival.test.ts
new file mode 100644
index 000000000..b47f16998
--- /dev/null
+++ b/packages/cli/src/bench/load/arrival.test.ts
@@ -0,0 +1,64 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { createSeededRng, scheduleArrivals } from "./arrival.ts";
+
+test("scheduleArrivals - constant spacing equals 1/rate", () => {
+  const offsets = [
+    ...scheduleArrivals({
+      ratePerSec: 100,
+      durationMs: 100,
+      arrival: "constant",
+    }),
+  ];
+  assert.strictEqual(offsets.length, 10);
+  assert.strictEqual(offsets[0], 0);
+  for (let i = 1; i < offsets.length; i++) {
+    assert.ok(Math.abs(offsets[i] - offsets[i - 1] - 10) < 1e-9);
+  }
+});
+
+test("scheduleArrivals - empty for non-positive rate or duration", () => {
+  assert.deepEqual(
+    [...scheduleArrivals({
+      ratePerSec: 0,
+      durationMs: 100,
+      arrival: "constant",
+    })],
+    [],
+  );
+  assert.deepEqual(
+    [...scheduleArrivals({
+      ratePerSec: 100,
+      durationMs: 0,
+      arrival: "constant",
+    })],
+    [],
+  );
+});
+
+test("scheduleArrivals - poisson mean spacing approximates 1/rate", () => {
+  const offsets = [
+    ...scheduleArrivals({
+      ratePerSec: 100, // mean gap 10ms
+      durationMs: 100_000,
+      arrival: "poisson",
+      rng: createSeededRng(42),
+    }),
+  ];
+  assert.ok(offsets.length > 8000, `got ${offsets.length} arrivals`);
+  const meanGap = offsets[offsets.length - 1] / (offsets.length - 1);
+  assert.ok(Math.abs(meanGap - 10) < 1, `mean gap ${meanGap} ≈ 10`);
+  for (let i = 1; i < offsets.length; i++) {
+    assert.ok(offsets[i] > offsets[i - 1]);
+  }
+});
+
+test("scheduleArrivals - poisson is reproducible for a given seed", () => {
+  const make = () => [...scheduleArrivals({
+    ratePerSec: 50,
+    durationMs: 1000,
+    arrival: "poisson",
+    rng: createSeededRng(7),
+  })];
+  assert.deepEqual(make(), make());
+});
diff --git a/packages/cli/src/bench/load/arrival.ts b/packages/cli/src/bench/load/arrival.ts
new file mode 100644
index 000000000..800471c6d
--- /dev/null
+++ b/packages/cli/src/bench/load/arrival.ts
@@ -0,0 +1,70 @@
+/**
+ * Arrival scheduling for open-loop load.
+ *
+ * `constant` arrivals are evenly spaced at `1 / rate`; `poisson` arrivals draw
+ * exponentially distributed inter-arrival gaps with the same mean, modeling
+ * realistic burstiness.  A seedable RNG keeps Poisson schedules reproducible.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ArrivalDistribution } from "../scenario/types.ts";
+
+/** A pseudo-random number generator returning values in [0, 1). */
+export type Rng = () => number;
+
+/**
+ * Creates a small deterministic RNG (mulberry32) from a numeric seed, for
+ * reproducible Poisson schedules.
+ * @param seed The seed value.
+ * @returns A seeded RNG.
+ */
+export function createSeededRng(seed: number): Rng {
+  let state = seed >>> 0;
+  return () => {
+    state = (state + 0x6d2b79f5) >>> 0;
+    let t = state;
+    t = Math.imul(t ^ (t >>> 15), t | 1);
+    t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
+
+/** Options for {@link scheduleArrivals}. */
+export interface ScheduleOptions {
+  /** The arrival rate in requests per second. */
+  readonly ratePerSec: number;
+  /** The total duration to schedule over, in milliseconds. */
+  readonly durationMs: number;
+  /** The arrival distribution. */
+  readonly arrival: ArrivalDistribution;
+  /** The RNG used for `poisson` arrivals; defaults to `Math.random`. */
+  readonly rng?: Rng;
+}
+
+/**
+ * Lazily yields the scheduled arrival offsets (milliseconds from the start) for
+ * a load run.  Yielding rather than materializing keeps memory flat for long,
+ * high-rate runs.
+ * @param options The scheduling options.
+ * @yields Arrival offsets within `[0, durationMs)`, in increasing order.
+ */
+export function* scheduleArrivals(
+  options: ScheduleOptions,
+): Generator<number> {
+  const { ratePerSec, durationMs, arrival } = options;
+  if (ratePerSec <= 0 || durationMs <= 0) return;
+  const meanGapMs = 1000 / ratePerSec;
+  if (arrival === "constant") {
+    for (let t = 0; t < durationMs; t += meanGapMs) yield t;
+    return;
+  }
+  const rng = options.rng ?? Math.random;
+  let t = 0;
+  for (;;) {
+    // Exponential inter-arrival gap with mean meanGapMs.
+    t += -Math.log(1 - rng()) * meanGapMs;
+    if (t >= durationMs) break;
+    yield t;
+  }
+}
diff --git a/packages/cli/src/bench/load/clock.ts b/packages/cli/src/bench/load/clock.ts
new file mode 100644
index 000000000..848e0bdce
--- /dev/null
+++ b/packages/cli/src/bench/load/clock.ts
@@ -0,0 +1,26 @@
+/**
+ * A small clock abstraction so the load generator's timing can be driven by a
+ * real monotonic clock in production and substituted in tests.
+ * @since 2.3.0
+ * @module
+ */
+
+/** A monotonic clock with a sleep primitive. */
+export interface Clock {
+  /** The current time in milliseconds (monotonic, not wall-clock). */
+  now(): number;
+  /** Resolves once the clock reaches `timeMs` (or immediately if already past). */
+  sleepUntil(timeMs: number): Promise<void>;
+}
+
+/** Returns a clock backed by `performance.now()` and `setTimeout`. */
+export function systemClock(): Clock {
+  return {
+    now: () => performance.now(),
+    sleepUntil(timeMs: number): Promise<void> {
+      const remaining = timeMs - performance.now();
+      if (remaining <= 0) return Promise.resolve();
+      return new Promise((resolve) => setTimeout(resolve, remaining));
+    },
+  };
+}
diff --git a/packages/cli/src/bench/load/generator.test.ts b/packages/cli/src/bench/load/generator.test.ts
new file mode 100644
index 000000000..c23567f26
--- /dev/null
+++ b/packages/cli/src/bench/load/generator.test.ts
@@ -0,0 +1,155 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { runLoad, type SendOutcome } from "./generator.ts";
+
+function delay(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+const ok: SendOutcome = { ok: true, status: 202 };
+
+test("runLoad - open-loop records a sample per scheduled arrival", async () => {
+  const result = await runLoad(
+    {
+      load: { kind: "open", ratePerSec: 100, arrival: "constant" },
+      durationMs: 100,
+      warmupMs: 0,
+    },
+    () => Promise.resolve(ok),
+  );
+  assert.strictEqual(result.samples.length, 10);
+  assert.strictEqual(result.saturated, false);
+  assert.ok(result.samples.every((s) => s.outcome.ok));
+});
+
+test("runLoad - coordinated-omission: a stall inflates later latencies", async () => {
+  let firstSend = true;
+  const result = await runLoad(
+    {
+      load: {
+        kind: "open",
+        ratePerSec: 50, // arrivals at 0, 20, 40, 60, 80 ms
+        arrival: "constant",
+        maxInFlight: 1,
+      },
+      durationMs: 100,
+      warmupMs: 0,
+    },
+    async () => {
+      if (firstSend) {
+        firstSend = false;
+        await delay(60); // first request stalls, holding the only slot
+      } else {
+        await delay(1);
+      }
+      return ok;
+    },
+  );
+  assert.strictEqual(result.saturated, true);
+  // A later request, blocked behind the stall, measures latency from its
+  // scheduled time, so it is far larger than its own ~1ms service time.
+  const delayed = result.samples.filter((s) => s.scheduledAtMs > 0);
+  assert.ok(
+    delayed.some((s) => s.latencyMs > 25),
+    `expected an inflated latency; got ${
+      delayed.map((s) => Math.round(s.latencyMs)).join(", ")
+    }`,
+  );
+});
+
+test("runLoad - open-loop respects the maxInFlight cap", async () => {
+  let inFlight = 0;
+  let peak = 0;
+  await runLoad(
+    {
+      load: {
+        kind: "open",
+        ratePerSec: 1000,
+        arrival: "constant",
+        maxInFlight: 3,
+      },
+      durationMs: 60,
+      warmupMs: 0,
+    },
+    async () => {
+      inFlight++;
+      peak = Math.max(peak, inFlight);
+      await delay(5);
+      inFlight--;
+      return ok;
+    },
+  );
+  assert.ok(peak <= 3, `peak in-flight ${peak} must not exceed 3`);
+});
+
+test("runLoad - marks warm-up samples", async () => {
+  const result = await runLoad(
+    {
+      load: { kind: "open", ratePerSec: 100, arrival: "constant" },
+      durationMs: 100,
+      warmupMs: 30,
+    },
+    () => Promise.resolve(ok),
+  );
+  assert.ok(result.samples.some((s) => s.warmup));
+  assert.ok(result.samples.some((s) => !s.warmup));
+  assert.ok(
+    result.samples.filter((s) => s.warmup).every((s) => s.scheduledAtMs < 30),
+  );
+});
+
+test("runLoad - closed-loop runs N workers for the duration", async () => {
+  let concurrent = 0;
+  let peak = 0;
+  const result = await runLoad(
+    {
+      load: { kind: "closed", concurrency: 2 },
+      durationMs: 40,
+      warmupMs: 0,
+    },
+    async () => {
+      concurrent++;
+      peak = Math.max(peak, concurrent);
+      await delay(5);
+      concurrent--;
+      return ok;
+    },
+  );
+  assert.ok(result.samples.length > 0);
+  assert.ok(peak <= 2, `closed-loop concurrency ${peak} must not exceed 2`);
+  assert.strictEqual(result.saturated, false);
+});
+
+test("runLoad - closed-loop honors maxInFlight below concurrency", async () => {
+  let concurrent = 0;
+  let peak = 0;
+  await runLoad(
+    {
+      load: { kind: "closed", concurrency: 8, maxInFlight: 2 },
+      durationMs: 40,
+      warmupMs: 0,
+    },
+    async () => {
+      concurrent++;
+      peak = Math.max(peak, concurrent);
+      await delay(5);
+      concurrent--;
+      return ok;
+    },
+  );
+  assert.ok(peak <= 2, `in-flight ${peak} must respect maxInFlight 2`);
+});
+
+test("runLoad - records send exceptions as failed samples", async () => {
+  const result = await runLoad(
+    {
+      load: { kind: "open", ratePerSec: 100, arrival: "constant" },
+      durationMs: 30,
+      warmupMs: 0,
+    },
+    () => Promise.reject(new Error("boom")),
+  );
+  assert.ok(result.samples.length > 0);
+  assert.ok(result.samples.every((s) => !s.outcome.ok));
+  assert.ok(result.samples.every((s) => s.outcome.errorKind === "exception"));
+});
diff --git a/packages/cli/src/bench/load/generator.ts b/packages/cli/src/bench/load/generator.ts
new file mode 100644
index 000000000..7b05caae1
--- /dev/null
+++ b/packages/cli/src/bench/load/generator.ts
@@ -0,0 +1,207 @@
+/**
+ * The load generator: drives requests against a send function and records
+ * coordinated-omission-corrected latency samples.
+ *
+ * Open-loop (the default) launches requests on a fixed schedule regardless of
+ * whether earlier responses returned, and measures each request's latency from
+ * its *scheduled* time, not from when it was actually sent — so falling behind
+ * schedule (a stalled target, or backpressure from the `maxInFlight` cap)
+ * shows up as latency rather than being silently omitted.  Closed-loop runs a
+ * fixed number of virtual users, each looping send-then-wait.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { LoadModel } from "../scenario/normalize.ts";
+import { scheduleArrivals } from "./arrival.ts";
+import { type Clock, systemClock } from "./clock.ts";
+import type { Rng } from "./arrival.ts";
+
+/** The outcome of a single send. */
+export interface SendOutcome {
+  readonly ok: boolean;
+  readonly status?: number;
+  readonly errorKind?: string;
+  readonly reason?: string;
+}
+
+/** Sends one request; receives the request's scheduled offset (ms). */
+export type SendFunction = (scheduledAtMs: number) => Promise<SendOutcome>;
+
+/** A recorded latency sample. */
+export interface Sample {
+  /** The request's scheduled offset from the run start, in milliseconds. */
+  readonly scheduledAtMs: number;
+  /** Latency in milliseconds (coordinated-omission corrected in open-loop). */
+  readonly latencyMs: number;
+  /** Whether the sample falls within the warm-up window (excluded later). */
+  readonly warmup: boolean;
+  /** The send outcome. */
+  readonly outcome: SendOutcome;
+}
+
+/** The result of a load run. */
+export interface LoadResult {
+  readonly samples: Sample[];
+  /**
+   * Whether the `maxInFlight` cap caused backpressure — at least one dispatch
+   * had to wait for a slot.  This is the saturation signal.
+   */
+  readonly saturated: boolean;
+  /** The wall-clock duration of the run, in milliseconds. */
+  readonly wallDurationMs: number;
+}
+
+/** A load plan derived from a resolved scenario. */
+export interface LoadPlan {
+  readonly load: LoadModel;
+  readonly durationMs: number;
+  readonly warmupMs: number;
+  /** The RNG for Poisson arrivals (open-loop). */
+  readonly rng?: Rng;
+}
+
+/**
+ * Runs a load plan against a send function.
+ * @param plan The load plan.
+ * @param send The function that performs one send.
+ * @param clock The clock (overridable for tests); defaults to the system clock.
+ * @returns The recorded samples and run metadata.
+ */
+export function runLoad(
+  plan: LoadPlan,
+  send: SendFunction,
+  clock: Clock = systemClock(),
+): Promise<LoadResult> {
+  return plan.load.kind === "open"
+    ? runOpenLoop(plan, plan.load, send, clock)
+    : runClosedLoop(plan, plan.load, send, clock);
+}
+
+async function runOpenLoop(
+  plan: LoadPlan,
+  load: Extract<LoadModel, { kind: "open" }>,
+  send: SendFunction,
+  clock: Clock,
+): Promise<LoadResult> {
+  const arrivals = scheduleArrivals({
+    ratePerSec: load.ratePerSec,
+    durationMs: plan.durationMs,
+    arrival: load.arrival,
+    rng: plan.rng,
+  });
+  const samples: Sample[] = [];
+  const slots = createSemaphore(load.maxInFlight);
+  let saturated = false;
+  const start = clock.now();
+  // Track only active dispatches, deleting each as it settles, so memory stays
+  // bounded by the in-flight count rather than the total request count.
+  const active = new Set<Promise<void>>();
+  for (const offset of arrivals) {
+    await clock.sleepUntil(start + offset);
+    if (await slots.acquire()) saturated = true;
+    const dispatched = dispatch(
+      send,
+      offset,
+      start,
+      plan.warmupMs,
+      clock,
+      samples,
+    )
+      .finally(() => {
+        slots.release();
+        active.delete(dispatched);
+      });
+    active.add(dispatched);
+  }
+  await Promise.all(active);
+  return { samples, saturated, wallDurationMs: clock.now() - start };
+}
+
+async function runClosedLoop(
+  plan: LoadPlan,
+  load: Extract<LoadModel, { kind: "closed" }>,
+  send: SendFunction,
+  clock: Clock,
+): Promise<LoadResult> {
+  const samples: Sample[] = [];
+  const slots = createSemaphore(load.maxInFlight);
+  let saturated = false;
+  const start = clock.now();
+  const deadline = start + plan.durationMs;
+  async function worker(): Promise<void> {
+    while (clock.now() < deadline) {
+      if (await slots.acquire()) saturated = true;
+      if (clock.now() >= deadline) {
+        slots.release();
+        break;
+      }
+      const offset = clock.now() - start;
+      try {
+        await dispatch(send, offset, start, plan.warmupMs, clock, samples);
+      } finally {
+        slots.release();
+      }
+    }
+  }
+  await Promise.all(
+    Array.from({ length: load.concurrency }, () => worker()),
+  );
+  return { samples, saturated, wallDurationMs: clock.now() - start };
+}
+
+async function dispatch(
+  send: SendFunction,
+  offset: number,
+  start: number,
+  warmupMs: number,
+  clock: Clock,
+  samples: Sample[],
+): Promise<void> {
+  let outcome: SendOutcome;
+  try {
+    outcome = await send(offset);
+  } catch (error) {
+    outcome = { ok: false, errorKind: "exception", reason: String(error) };
+  }
+  // Coordinated-omission correction: measure from the scheduled time, so a
+  // request that could not be sent on time records the extra delay as latency.
+  samples.push({
+    scheduledAtMs: offset,
+    latencyMs: clock.now() - (start + offset),
+    warmup: offset < warmupMs,
+    outcome,
+  });
+}
+
+interface Semaphore {
+  /** Acquires a slot; resolves `true` if it had to wait (backpressure). */
+  acquire(): Promise<boolean>;
+  /** Releases a slot, transferring it to the next waiter if any. */
+  release(): void;
+}
+
+function createSemaphore(max: number | undefined): Semaphore {
+  if (max == null) {
+    return { acquire: () => Promise.resolve(false), release: () => {} };
+  }
+  let count = 0;
+  const queue: Array<() => void> = [];
+  return {
+    acquire(): Promise<boolean> {
+      if (count < max) {
+        count++;
+        return Promise.resolve(false);
+      }
+      // Wait in FIFO order; release() transfers the slot to us directly
+      // (count is not decremented), so an active worker cannot barge ahead of
+      // a queued one.
+      return new Promise<boolean>((resolve) => queue.push(() => resolve(true)));
+    },
+    release(): void {
+      const next = queue.shift();
+      if (next != null) next();
+      else count--;
+    },
+  };
+}
diff --git a/packages/cli/src/bench/metrics/aggregate.test.ts b/packages/cli/src/bench/metrics/aggregate.test.ts
new file mode 100644
index 000000000..324789f8f
--- /dev/null
+++ b/packages/cli/src/bench/metrics/aggregate.test.ts
@@ -0,0 +1,91 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import type { Sample } from "../load/generator.ts";
+import { aggregateSamples } from "./aggregate.ts";
+
+function sample(overrides: Partial<Sample>): Sample {
+  return {
+    scheduledAtMs: 0,
+    latencyMs: 10,
+    warmup: false,
+    outcome: { ok: true, status: 202 },
+    ...overrides,
+  };
+}
+
+test("aggregateSamples - excludes warm-up samples from every figure", () => {
+  const samples = [
+    sample({ warmup: true, latencyMs: 1000 }),
+    sample({ latencyMs: 20 }),
+    sample({ latencyMs: 30 }),
+  ];
+  const m = aggregateSamples(samples, { measuredWindowMs: 1000 });
+  assert.strictEqual(m.requests.total, 2);
+  assert.ok(m.client.latencyMs.max < 1000);
+});
+
+test("aggregateSamples - counts requests and success rate", () => {
+  const samples = [
+    sample({}),
+    sample({}),
+    sample({ outcome: { ok: false, status: 500, reason: "handler_error" } }),
+  ];
+  const m = aggregateSamples(samples, { measuredWindowMs: 1000 });
+  assert.deepEqual(m.requests, {
+    total: 3,
+    ok: 2,
+    failed: 1,
+    successRate: 2 / 3,
+  });
+});
+
+test("aggregateSamples - throughput is total over the measured window", () => {
+  const samples = Array.from({ length: 50 }, () => sample({}));
+  const m = aggregateSamples(samples, { measuredWindowMs: 2000 });
+  assert.strictEqual(m.throughputPerSec, 25);
+});
+
+test("aggregateSamples - groups errors by kind, status, and reason", () => {
+  const samples = [
+    sample({ outcome: { ok: false, status: 500, reason: "handler_error" } }),
+    sample({ outcome: { ok: false, status: 500, reason: "handler_error" } }),
+    sample({ outcome: { ok: false, status: 401, reason: "signature_failed" } }),
+    sample({ outcome: { ok: false, errorKind: "exception", reason: "boom" } }),
+  ];
+  const m = aggregateSamples(samples, { measuredWindowMs: 1000 });
+  // Sorted by descending count: the 500 bucket (2) first.
+  assert.strictEqual(m.errors[0].count, 2);
+  assert.strictEqual(m.errors[0].status, 500);
+  assert.strictEqual(m.errors.length, 3);
+  const exception = m.errors.find((e) => e.kind === "exception");
+  assert.ok(exception != null && exception.status === undefined);
+});
+
+test("aggregateSamples - latency percentiles come from the samples", () => {
+  const samples = Array.from(
+    { length: 100 },
+    (_, i) => sample({ latencyMs: i + 1 }),
+  );
+  const m = aggregateSamples(samples, { measuredWindowMs: 1000 });
+  assert.ok(m.client.latencyMs.p50 >= 45 && m.client.latencyMs.p50 <= 55);
+  assert.strictEqual(m.client.latencyMs.max, 100);
+});
+
+test("aggregateSamples - optionally includes a serialized histogram", () => {
+  const m = aggregateSamples([sample({})], {
+    measuredWindowMs: 1000,
+    includeHistogram: true,
+  });
+  assert.ok(m.histogram != null);
+  assert.strictEqual(m.histogram?.count, 1);
+
+  const without = aggregateSamples([sample({})], { measuredWindowMs: 1000 });
+  assert.strictEqual(without.histogram, undefined);
+});
+
+test("aggregateSamples - empty input yields a 100% success rate", () => {
+  const m = aggregateSamples([], { measuredWindowMs: 1000 });
+  assert.strictEqual(m.requests.total, 0);
+  assert.strictEqual(m.requests.successRate, 1);
+  assert.strictEqual(m.server, null);
+});
diff --git a/packages/cli/src/bench/metrics/aggregate.ts b/packages/cli/src/bench/metrics/aggregate.ts
new file mode 100644
index 000000000..53494f5ad
--- /dev/null
+++ b/packages/cli/src/bench/metrics/aggregate.ts
@@ -0,0 +1,96 @@
+/**
+ * Aggregation of raw load-generator samples into the client side of a scenario
+ * measurement: request counts, throughput, the latency distribution, and
+ * grouped errors.  Warm-up samples are excluded from every figure.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { Sample } from "../load/generator.ts";
+import type {
+  ClientMetrics,
+  ErrorBucket,
+  RequestSummary,
+} from "../result/model.ts";
+import type { ScenarioMeasurement } from "../result/build.ts";
+import { LogLinearHistogram } from "./histogram.ts";
+
+/** Options for {@link aggregateSamples}. */
+export interface AggregateOptions {
+  /** The measured window (excluding warm-up) in ms, used for throughput. */
+  readonly measuredWindowMs: number;
+  /** Whether to include the serialized latency histogram in the result. */
+  readonly includeHistogram?: boolean;
+}
+
+/**
+ * Aggregates samples into the client side of a scenario measurement (the
+ * `server` field is left `null` for the runner to fill from the stats endpoint).
+ * @param samples The raw samples from the load generator.
+ * @param options Aggregation options.
+ * @returns The client-side scenario measurement.
+ */
+export function aggregateSamples(
+  samples: readonly Sample[],
+  options: AggregateOptions,
+): ScenarioMeasurement {
+  const measured = samples.filter((s) => !s.warmup);
+  const histogram = new LogLinearHistogram();
+  const errorCounts = new Map<string, ErrorBucket>();
+  let ok = 0;
+  for (const sample of measured) {
+    histogram.record(sample.latencyMs);
+    if (sample.outcome.ok) {
+      ok++;
+    } else {
+      bucketError(errorCounts, sample);
+    }
+  }
+  const total = measured.length;
+  const requests: RequestSummary = {
+    total,
+    ok,
+    failed: total - ok,
+    successRate: total === 0 ? 1 : ok / total,
+  };
+  const windowSec = Math.max(options.measuredWindowMs, 1) / 1000;
+  const client: ClientMetrics = {
+    latencyMs: {
+      p50: histogram.percentile(50),
+      p95: histogram.percentile(95),
+      p99: histogram.percentile(99),
+      mean: histogram.mean,
+      max: histogram.max,
+    },
+  };
+  const errors = [...errorCounts.values()].sort((a, b) => b.count - a.count);
+  return {
+    requests,
+    throughputPerSec: total / windowSec,
+    client,
+    server: null,
+    errors,
+    ...(options.includeHistogram ? { histogram: histogram.toJSON() } : {}),
+  };
+}
+
+function bucketError(
+  buckets: Map<string, ErrorBucket>,
+  sample: Sample,
+): void {
+  const { status, errorKind, reason } = sample.outcome;
+  const kind = errorKind ?? (status != null ? "http" : "error");
+  const reasonText = reason ?? (status != null ? `status_${status}` : "error");
+  const key = `${kind}|${status ?? ""}|${reasonText}`;
+  const existing = buckets.get(key);
+  if (existing != null) {
+    buckets.set(key, { ...existing, count: existing.count + 1 });
+  } else {
+    buckets.set(key, {
+      kind,
+      ...(status != null ? { status } : {}),
+      reason: reasonText,
+      count: 1,
+    });
+  }
+}

From 14c8ac15b78c03f688aa0b08e3374b5f1ff6cec3 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 01:52:11 +0900
Subject: [PATCH 13/47] Run the inbox and webfinger benchmark scenarios
 end-to-end

Wire the engine into runnable scenarios.  The stats client reads the
cooperative `stats` endpoint and projects the signature-verification
histogram and queue depth into the report's server section, robust to
malformed snapshots.  The inbox runner discovers the recipient inbox,
builds a signing factory over the synthetic fleet, drives the signing
pipeline and load generator, aggregates the client metrics, and attaches
the server metrics; the webfinger runner drives handle-resolution
lookups.  A registry dispatches by type and reports a clear error for the
scenario types that the format expresses but this version does not run.

`presign` signing now requires an open-loop load (a closed-loop run has
no fixed request count to pre-sign).  An end-to-end test stands up a real
`benchmarkMode` Fedify federation and confirms signed inbox deliveries
verify, the inbox listener runs, and server-side signature-verification
metrics are read back.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../src/bench/metrics/stats-client.test.ts    | 128 ++++++++++++++
 .../cli/src/bench/metrics/stats-client.ts     | 161 ++++++++++++++++++
 .../cli/src/bench/scenario/normalize.test.ts  |  18 ++
 packages/cli/src/bench/scenario/normalize.ts  |   7 +
 .../cli/src/bench/scenarios/inbox.test.ts     | 129 ++++++++++++++
 packages/cli/src/bench/scenarios/inbox.ts     | 139 +++++++++++++++
 .../cli/src/bench/scenarios/registry.test.ts  |  23 +++
 packages/cli/src/bench/scenarios/registry.ts  |  42 +++++
 packages/cli/src/bench/scenarios/runner.ts    |  78 +++++++++
 .../cli/src/bench/scenarios/webfinger.test.ts |  61 +++++++
 packages/cli/src/bench/scenarios/webfinger.ts |  50 ++++++
 11 files changed, 836 insertions(+)
 create mode 100644 packages/cli/src/bench/metrics/stats-client.test.ts
 create mode 100644 packages/cli/src/bench/metrics/stats-client.ts
 create mode 100644 packages/cli/src/bench/scenarios/inbox.test.ts
 create mode 100644 packages/cli/src/bench/scenarios/inbox.ts
 create mode 100644 packages/cli/src/bench/scenarios/registry.test.ts
 create mode 100644 packages/cli/src/bench/scenarios/registry.ts
 create mode 100644 packages/cli/src/bench/scenarios/runner.ts
 create mode 100644 packages/cli/src/bench/scenarios/webfinger.test.ts
 create mode 100644 packages/cli/src/bench/scenarios/webfinger.ts

diff --git a/packages/cli/src/bench/metrics/stats-client.test.ts b/packages/cli/src/bench/metrics/stats-client.test.ts
new file mode 100644
index 000000000..ca185ea17
--- /dev/null
+++ b/packages/cli/src/bench/metrics/stats-client.test.ts
@@ -0,0 +1,128 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { fetchServerMetrics, parseServerMetrics } from "./stats-client.ts";
+
+function snapshot() {
+  return {
+    version: 1,
+    source: "server",
+    scopeMetrics: [
+      {
+        scope: { name: "@fedify/fedify", version: "2.3.0" },
+        metrics: [
+          {
+            name: "activitypub.signature.verification.duration",
+            unit: "ms",
+            dataPointType: "histogram",
+            dataPoints: [
+              {
+                attributes: { "activitypub.signature.kind": "http" },
+                value: {
+                  buckets: {
+                    boundaries: [5, 10, 25, 50, 100],
+                    counts: [10, 20, 30, 20, 15, 5],
+                  },
+                  count: 100,
+                  sum: 2000,
+                },
+              },
+            ],
+          },
+          {
+            name: "fedify.queue.depth",
+            unit: "{task}",
+            dataPointType: "gauge",
+            dataPoints: [
+              {
+                attributes: { "fedify.queue.depth.state": "queued" },
+                value: 7,
+              },
+              { attributes: { "fedify.queue.depth.state": "ready" }, value: 3 },
+            ],
+          },
+        ],
+      },
+    ],
+    errors: [],
+  };
+}
+
+test("parseServerMetrics - extracts signature verification percentiles", () => {
+  const metrics = parseServerMetrics(snapshot());
+  assert.ok(metrics != null);
+  const overall = metrics.signatureVerificationMs?.overall;
+  assert.strictEqual(overall?.p50, 25);
+  assert.strictEqual(overall?.p95, 100);
+  assert.strictEqual(overall?.p99, 100);
+});
+
+test("parseServerMetrics - extracts max queue depth", () => {
+  const metrics = parseServerMetrics(snapshot());
+  assert.strictEqual(metrics?.queue?.depthMax, 7);
+});
+
+test("parseServerMetrics - null when no relevant instruments", () => {
+  assert.strictEqual(
+    parseServerMetrics({ version: 1, source: "server", scopeMetrics: [] }),
+    null,
+  );
+});
+
+test("parseServerMetrics - tolerates malformed snapshots without throwing", () => {
+  for (
+    const bad of [
+      null,
+      undefined,
+      42,
+      "nope",
+      {},
+      { scopeMetrics: "x" },
+      { scopeMetrics: [{ metrics: "x" }] },
+      {
+        scopeMetrics: [{
+          metrics: [{
+            name: "activitypub.signature.verification.duration",
+            dataPointType: "histogram",
+            dataPoints: [{
+              value: { buckets: { boundaries: null, counts: 5 } },
+            }],
+          }],
+        }],
+      },
+      {
+        scopeMetrics: [{
+          metrics: [{
+            name: "activitypub.signature.verification.duration",
+            dataPointType: "histogram",
+            dataPoints: [{
+              value: { buckets: { boundaries: [1, "x"], counts: [1, 2, 3] } },
+            }],
+          }],
+        }],
+      },
+    ]
+  ) {
+    assert.strictEqual(parseServerMetrics(bad), null);
+  }
+});
+
+test("fetchServerMetrics - parses a fetched snapshot", async () => {
+  const metrics = await fetchServerMetrics(
+    new URL("http://localhost:3000"),
+    () =>
+      Promise.resolve(
+        new Response(JSON.stringify(snapshot()), {
+          headers: { "content-type": "application/json" },
+        }),
+      ),
+  );
+  assert.ok(metrics?.signatureVerificationMs != null);
+});
+
+test("fetchServerMetrics - null on a failed request", async () => {
+  const metrics = await fetchServerMetrics(
+    new URL("http://localhost:3000"),
+    () => Promise.resolve(new Response("nope", { status: 404 })),
+  );
+  assert.strictEqual(metrics, null);
+});
diff --git a/packages/cli/src/bench/metrics/stats-client.ts b/packages/cli/src/bench/metrics/stats-client.ts
new file mode 100644
index 000000000..42e46b745
--- /dev/null
+++ b/packages/cli/src/bench/metrics/stats-client.ts
@@ -0,0 +1,161 @@
+/**
+ * Reading server-side metrics from the cooperative `stats` endpoint.
+ *
+ * The endpoint returns a JSON projection of the target's OpenTelemetry meters
+ * (see *@fedify/fedify*'s benchmark module).  This module projects the relevant
+ * instruments — signature verification latency and queue depth — into the
+ * report's `server` section, marked distinct from client-measured numbers.
+ * @since 2.3.0
+ * @module
+ */
+
+import { STATS_PATH } from "../discovery/probe.ts";
+import type { PartialLatencyMs, ServerMetrics } from "../result/model.ts";
+
+interface OtelHistogram {
+  readonly buckets?: {
+    readonly boundaries?: number[];
+    readonly counts?: number[];
+  };
+  readonly count?: number;
+  readonly sum?: number;
+}
+
+interface SnapshotMetric {
+  readonly name?: string;
+  readonly dataPointType?: string;
+  readonly dataPoints?: ReadonlyArray<{
+    readonly attributes?: Record<string, unknown>;
+    readonly value?: number | OtelHistogram;
+  }>;
+}
+
+interface Snapshot {
+  readonly scopeMetrics?: ReadonlyArray<
+    { readonly metrics?: SnapshotMetric[] }
+  >;
+}
+
+/**
+ * Parses a `stats` snapshot into the report's server metrics, or `null` when
+ * the snapshot carries no relevant instruments.
+ * @param snapshot The parsed `stats` JSON.
+ * @returns The server metrics, or `null`.
+ */
+export function parseServerMetrics(snapshot: unknown): ServerMetrics | null {
+  try {
+    const metrics = flattenMetrics(snapshot as Snapshot);
+    const result: {
+      signatureVerificationMs?: { overall: PartialLatencyMs };
+      queue?: { depthMax?: number };
+    } = {};
+
+    const signature = metrics.find((m) =>
+      m.dataPointType === "histogram" &&
+      (m.name ?? "").includes("signature.verification")
+    );
+    const merged = signature == null
+      ? null
+      : mergeHistogram(signature.dataPoints);
+    if (merged != null) {
+      result.signatureVerificationMs = {
+        overall: {
+          p50: histogramPercentile(merged, 50),
+          p95: histogramPercentile(merged, 95),
+          p99: histogramPercentile(merged, 99),
+        },
+      };
+    }
+
+    const depth = metrics.find((m) => m.name === "fedify.queue.depth");
+    if (depth != null && Array.isArray(depth.dataPoints)) {
+      const values = depth.dataPoints
+        .map((p) => p.value)
+        .filter((v): v is number => typeof v === "number");
+      if (values.length > 0) result.queue = { depthMax: Math.max(...values) };
+    }
+
+    return Object.keys(result).length > 0 ? result : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Fetches and parses the target's server metrics.
+ * @param target The target base URL.
+ * @param fetchImpl The fetch implementation (overridable for tests).
+ * @returns The server metrics, or `null` if unavailable.
+ */
+export async function fetchServerMetrics(
+  target: URL,
+  fetchImpl: typeof fetch = fetch,
+): Promise<ServerMetrics | null> {
+  try {
+    const response = await fetchImpl(new URL(STATS_PATH, target));
+    if (!response.ok) return null;
+    return parseServerMetrics(await response.json());
+  } catch {
+    return null;
+  }
+}
+
+function isFiniteNumber(value: unknown): value is number {
+  return typeof value === "number" && Number.isFinite(value);
+}
+
+function flattenMetrics(snapshot: Snapshot): SnapshotMetric[] {
+  const scopes = Array.isArray(snapshot?.scopeMetrics)
+    ? snapshot.scopeMetrics
+    : [];
+  return scopes.flatMap((scope) =>
+    Array.isArray(scope?.metrics) ? scope.metrics : []
+  );
+}
+
+interface Histogram {
+  readonly boundaries: number[];
+  readonly counts: number[];
+}
+
+function mergeHistogram(
+  dataPoints: SnapshotMetric["dataPoints"],
+): Histogram | null {
+  if (!Array.isArray(dataPoints)) return null;
+  let boundaries: number[] | null = null;
+  let counts: number[] | null = null;
+  for (const point of dataPoints) {
+    const value = point?.value;
+    if (typeof value !== "object" || value == null) continue;
+    const b = value.buckets?.boundaries;
+    const c = value.buckets?.counts;
+    if (!Array.isArray(b) || !Array.isArray(c)) continue;
+    if (!b.every(isFiniteNumber) || !c.every(isFiniteNumber)) continue;
+    if (boundaries == null) {
+      boundaries = [...b];
+      counts = [...c];
+    } else if (counts != null && counts.length === c.length) {
+      for (let i = 0; i < c.length; i++) counts[i] += c[i];
+    }
+  }
+  return boundaries != null && counts != null ? { boundaries, counts } : null;
+}
+
+function histogramPercentile(histogram: Histogram, p: number): number {
+  const { boundaries, counts } = histogram;
+  const total = counts.reduce((sum, n) => sum + n, 0);
+  if (total === 0) return 0;
+  const target = Math.ceil((p / 100) * total);
+  let accumulated = 0;
+  for (let i = 0; i < counts.length; i++) {
+    accumulated += counts[i];
+    if (accumulated >= target) {
+      // Estimate by the bucket's upper boundary; the last bucket is unbounded,
+      // so fall back to the highest boundary.
+      return i < boundaries.length
+        ? boundaries[i]
+        : boundaries[boundaries.length - 1] ?? 0;
+    }
+  }
+  return boundaries[boundaries.length - 1] ?? 0;
+}
diff --git a/packages/cli/src/bench/scenario/normalize.test.ts b/packages/cli/src/bench/scenario/normalize.test.ts
index c16feea3d..b05000129 100644
--- a/packages/cli/src/bench/scenario/normalize.test.ts
+++ b/packages/cli/src/bench/scenario/normalize.test.ts
@@ -135,6 +135,24 @@ test("normalizeSuite - pipeline signing rejects a time-windowed target", () => {
   );
 });
 
+test("normalizeSuite - presign rejects a closed-loop load", () => {
+  assert.throws(
+    () =>
+      normalizeSuite(suite({
+        defaults: { signing: "presign", load: { concurrency: 10 } },
+      })),
+    SuiteNormalizeError,
+  );
+});
+
+test("normalizeSuite - presign allows an open-loop load", () => {
+  const s = normalizeSuite(suite({
+    defaults: { signing: "presign", load: { rate: "100/s" } },
+  })).scenarios[0];
+  assert.strictEqual(s.signing, "presign");
+  assert.strictEqual(s.load.kind, "open");
+});
+
 test("normalizeSuite - jit signing allows a time-windowed target", () => {
   const s = normalizeSuite(suite({
     defaults: { signing: "jit", signatureTimeWindow: true },
diff --git a/packages/cli/src/bench/scenario/normalize.ts b/packages/cli/src/bench/scenario/normalize.ts
index cc800d3e9..b94180e68 100644
--- a/packages/cli/src/bench/scenario/normalize.ts
+++ b/packages/cli/src/bench/scenario/normalize.ts
@@ -133,6 +133,13 @@ function resolveScenario(scenario: Scenario, suite: Suite): ResolvedScenario {
         "signing: jit for a time-windowed target.",
     );
   }
+  if (signing === "presign" && load.kind === "closed") {
+    throw new SuiteNormalizeError(
+      `Scenario "${scenario.name}": presign signing needs a fixed request ` +
+        "count, which a closed-loop (concurrency) load does not have; use an " +
+        "open-loop rate, or signing: pipeline or jit.",
+    );
+  }
   return {
     name: scenario.name,
     type: scenario.type,
diff --git a/packages/cli/src/bench/scenarios/inbox.test.ts b/packages/cli/src/bench/scenarios/inbox.test.ts
new file mode 100644
index 000000000..4fc874a6f
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/inbox.test.ts
@@ -0,0 +1,129 @@
+import {
+  createFederation,
+  generateCryptoKeyPair,
+  MemoryKvStore,
+} from "@fedify/fedify";
+import { Create, Endpoints, Person } from "@fedify/vocab";
+import assert from "node:assert/strict";
+import test from "node:test";
+import { serve } from "srvx";
+import { getContextLoader, getDocumentLoader } from "../../docloader.ts";
+import { buildFleet } from "../actor/fleet.ts";
+import { normalizeSuite } from "../scenario/normalize.ts";
+import type { Suite } from "../scenario/types.ts";
+import { spawnSyntheticServer } from "../server/synthetic.ts";
+import { inboxRunner } from "./inbox.ts";
+
+// Stands up a real Fedify federation in benchmark mode that serves WebFinger,
+// the recipient actor, and an inbox that verifies incoming signatures.
+async function spawnBenchmarkTarget() {
+  // No message queue, so incoming activities are processed inline (which also
+  // keeps the test process from being held open by a queue worker timer).
+  const federation = createFederation<void>({
+    kv: new MemoryKvStore(),
+    benchmarkMode: true,
+  });
+  let keyPairs: CryptoKeyPair[] | undefined;
+  federation
+    .setActorDispatcher("/users/{identifier}", async (ctx, identifier) => {
+      if (identifier !== "alice") return null;
+      const pairs = await ctx.getActorKeyPairs(identifier);
+      return new Person({
+        id: ctx.getActorUri(identifier),
+        preferredUsername: identifier,
+        inbox: ctx.getInboxUri(identifier),
+        endpoints: new Endpoints({ sharedInbox: ctx.getInboxUri() }),
+        publicKey: pairs[0]?.cryptographicKey,
+        assertionMethods: pairs.map((p) => p.multikey),
+      });
+    })
+    .mapHandle((_ctx, username) => (username === "alice" ? "alice" : null))
+    .setKeyPairsDispatcher(async (_ctx, identifier) => {
+      if (identifier !== "alice") return [];
+      keyPairs ??= [
+        await generateCryptoKeyPair("RSASSA-PKCS1-v1_5"),
+        await generateCryptoKeyPair("Ed25519"),
+      ];
+      return keyPairs;
+    });
+
+  let received = 0;
+  federation
+    .setInboxListeners("/users/{identifier}/inbox", "/inbox")
+    .on(Create, () => {
+      received++;
+    });
+
+  const server = serve({
+    port: 0,
+    hostname: "127.0.0.1",
+    silent: true,
+    fetch: (request: Request) =>
+      federation.fetch(request, { contextData: undefined }),
+  });
+  await server.ready();
+  return {
+    url: new URL(server.url!),
+    receivedCount: () => received,
+    close: () => server.close(true),
+  };
+}
+
+test("inboxRunner - signed deliveries verify against a benchmarkMode target", async () => {
+  const target = await spawnBenchmarkTarget();
+  let fleet: Awaited<ReturnType<typeof spawnSyntheticServer>> | undefined;
+  try {
+    fleet = await spawnSyntheticServer(
+      await buildFleet([{
+        count: 1,
+        signatureStandards: ["draft-cavage-http-signatures-12"],
+      }]),
+    );
+    const suite: Suite = {
+      version: 1,
+      target: target.url.href,
+      scenarios: [{
+        name: "inbox-shared",
+        type: "inbox",
+        // An actor URI is used (not an acct: handle) because WebFinger is
+        // https-only and this loopback target is served over http.
+        recipient: new URL("/users/alice", target.url).href,
+        inbox: "shared",
+        load: { concurrency: 2 },
+        duration: "300ms",
+      }],
+    };
+    const scenario = normalizeSuite(suite).scenarios[0];
+    const measurement = await inboxRunner.run({
+      scenario,
+      target: target.url,
+      documentLoader: await getDocumentLoader({ allowPrivateAddress: true }),
+      contextLoader: await getContextLoader({ allowPrivateAddress: true }),
+      allowPrivateAddress: true,
+      fleet,
+    });
+
+    // Deliveries were accepted, i.e. the target verified the HTTP signatures.
+    assert.ok(measurement.requests.total > 0, "expected some deliveries");
+    assert.strictEqual(
+      measurement.requests.successRate,
+      1,
+      `expected all deliveries to succeed; errors: ${
+        JSON.stringify(measurement.errors)
+      }`,
+    );
+    // Server-side metrics are read from the cooperative stats endpoint.
+    assert.ok(
+      measurement.server?.signatureVerificationMs != null,
+      "expected server-side signature verification metrics",
+    );
+    // The inbox listener actually ran (activities were processed inline).
+    assert.ok(target.receivedCount() > 0, "expected the inbox listener to run");
+  } finally {
+    try {
+      await fleet?.close();
+    } finally {
+      await target.close();
+    }
+  }
+});
diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts
new file mode 100644
index 000000000..ce34f3221
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/inbox.ts
@@ -0,0 +1,139 @@
+/**
+ * The `inbox` scenario runner: the end-to-end signed-delivery benchmark.
+ *
+ * It discovers the recipient's inbox the way a real peer does, then drives
+ * signed activity deliveries through the signing pipeline, aggregates the
+ * client-measured results, and reads the target's server-side metrics.
+ * @since 2.3.0
+ * @module
+ */
+
+import { Create, Note } from "@fedify/vocab";
+import type { Activity } from "@fedify/vocab";
+import { discoverInbox, selectInbox } from "../discovery/discover.ts";
+import { runLoad } from "../load/generator.ts";
+import { aggregateSamples } from "../metrics/aggregate.ts";
+import { fetchServerMetrics } from "../metrics/stats-client.ts";
+import { asList } from "../scenario/coerce.ts";
+import type { ActivitySpec } from "../scenario/types.ts";
+import type { SyntheticActor } from "../server/synthetic.ts";
+import { createActivityIdMinter } from "../signing/activity-id.ts";
+import { createSigningPipeline } from "../signing/pipeline.ts";
+import { signInboxDelivery } from "../signing/signer.ts";
+import {
+  type GenerateDirective,
+  isGenerateDirective,
+  resolveGenerate,
+} from "../template/generate.ts";
+import {
+  estimateTotal,
+  loadPlanOf,
+  measuredWindowMs,
+  type RunContext,
+  type ScenarioRunner,
+  sendRequest,
+} from "./runner.ts";
+
+/** The `inbox` scenario runner. */
+export const inboxRunner: ScenarioRunner = {
+  async run(context: RunContext) {
+    const { scenario, fleet } = context;
+    if (fleet == null || fleet.actors.length < 1) {
+      throw new Error(
+        "The inbox scenario requires the synthetic actor server.",
+      );
+    }
+    if (scenario.recipients.length < 1) {
+      throw new Error("The inbox scenario requires a recipient.");
+    }
+    const fetchImpl = context.fetch ?? fetch;
+    const discovered = await discoverInbox(scenario.recipients[0], {
+      documentLoader: context.documentLoader,
+      contextLoader: context.contextLoader,
+      allowPrivateAddress: context.allowPrivateAddress,
+    });
+    const inbox = selectInbox(discovered, scenario.inbox);
+
+    const actors = fleet.actors;
+    const minter = createActivityIdMinter(fleet.url);
+    let actorIndex = 0;
+    const factory = () => {
+      const actor = actors[actorIndex++ % actors.length];
+      const activity = buildActivity(
+        scenario.activity,
+        actor,
+        minter.next(),
+        fleet.url,
+        discovered.actorUri,
+      );
+      return signInboxDelivery({
+        actor,
+        inbox,
+        activity,
+        contextLoader: context.contextLoader,
+      });
+    };
+    const pipeline = createSigningPipeline(scenario.signing, factory, {
+      total: estimateTotal(scenario),
+    });
+
+    const send = async () => {
+      let request: Request;
+      try {
+        request = await pipeline.next();
+      } catch (error) {
+        return { ok: false, errorKind: "client", reason: String(error) };
+      }
+      return sendRequest(request, fetchImpl);
+    };
+
+    try {
+      await pipeline.prime();
+      const result = await runLoad(
+        loadPlanOf(scenario, context.rng),
+        send,
+        context.clock,
+      );
+      const measurement = aggregateSamples(result.samples, {
+        measuredWindowMs: measuredWindowMs(scenario),
+        includeHistogram: true,
+      });
+      const server = await fetchServerMetrics(context.target, fetchImpl);
+      return { ...measurement, server };
+    } finally {
+      await pipeline.close();
+    }
+  },
+};
+
+function buildActivity(
+  spec: ActivitySpec | undefined,
+  actor: SyntheticActor,
+  id: URL,
+  base: URL,
+  recipient: URL,
+): Activity {
+  const type = asList(spec?.type)[0] ?? "Create";
+  if (type !== "Create") {
+    throw new Error(
+      `The inbox runner currently supports only Create activities; got ` +
+        `${JSON.stringify(type)}.`,
+    );
+  }
+  const note = new Note({
+    id: new URL(`/objects/${crypto.randomUUID()}`, base),
+    attribution: actor.id,
+    content: resolveContent(spec?.object?.content),
+    to: recipient,
+  });
+  return new Create({ id, actor: actor.id, object: note, to: recipient });
+}
+
+function resolveContent(
+  content: string | GenerateDirective | undefined,
+): string {
+  if (content == null) return "Benchmark activity.";
+  if (typeof content === "string") return content;
+  if (isGenerateDirective(content)) return resolveGenerate(content);
+  return String(content);
+}
diff --git a/packages/cli/src/bench/scenarios/registry.test.ts b/packages/cli/src/bench/scenarios/registry.test.ts
new file mode 100644
index 000000000..c42f57e96
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/registry.test.ts
@@ -0,0 +1,23 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import type { ScenarioType } from "../scenario/types.ts";
+import { runnerFor } from "./registry.ts";
+
+test("runnerFor - returns the inbox and webfinger runners", () => {
+  assert.strictEqual(typeof runnerFor("inbox").run, "function");
+  assert.strictEqual(typeof runnerFor("webfinger").run, "function");
+});
+
+test("runnerFor - throws for scenario types without a runner", () => {
+  const unimplemented: ScenarioType[] = [
+    "actor",
+    "object",
+    "fanout",
+    "collection",
+    "failure",
+    "mixed",
+  ];
+  for (const type of unimplemented) {
+    assert.throws(() => runnerFor(type), /not implemented/);
+  }
+});
diff --git a/packages/cli/src/bench/scenarios/registry.ts b/packages/cli/src/bench/scenarios/registry.ts
new file mode 100644
index 000000000..2cc09d0fd
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/registry.ts
@@ -0,0 +1,42 @@
+/**
+ * The scenario-runner registry.
+ *
+ * Only `inbox` and `webfinger` have runners in this version; the other scenario
+ * types are expressible in the format but not yet executable, so requesting one
+ * fails with a clear message.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { ScenarioType } from "../scenario/types.ts";
+import { inboxRunner } from "./inbox.ts";
+import type { ScenarioRunner } from "./runner.ts";
+import { webfingerRunner } from "./webfinger.ts";
+
+/** The scenario types that have runners in this version. */
+export const IMPLEMENTED_SCENARIO_TYPES: readonly ScenarioType[] = [
+  "inbox",
+  "webfinger",
+];
+
+/**
+ * Returns the runner for a scenario type.
+ * @param type The scenario type.
+ * @returns The runner.
+ * @throws {Error} If the type has no runner in this version.
+ */
+export function runnerFor(type: ScenarioType): ScenarioRunner {
+  switch (type) {
+    case "inbox":
+      return inboxRunner;
+    case "webfinger":
+      return webfingerRunner;
+    default:
+      throw new Error(
+        `The "${type}" scenario type is not implemented in this version of ` +
+          `fedify bench; supported types: ${
+            IMPLEMENTED_SCENARIO_TYPES.join(", ")
+          }.`,
+      );
+  }
+}
diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts
new file mode 100644
index 000000000..8f1a59cfc
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/runner.ts
@@ -0,0 +1,78 @@
+/**
+ * The scenario runner interface and the shared plumbing every runner uses:
+ * turning a Response into a send outcome, deriving a load plan, and the
+ * measured window for throughput.
+ * @since 2.3.0
+ * @module
+ */
+
+import type { DocumentLoader } from "@fedify/vocab-runtime";
+import type { Rng } from "../load/arrival.ts";
+import type { Clock } from "../load/clock.ts";
+import type { LoadPlan, SendOutcome } from "../load/generator.ts";
+import type { ResolvedScenario } from "../scenario/normalize.ts";
+import type { ScenarioMeasurement } from "../result/build.ts";
+import type { SyntheticServer } from "../server/synthetic.ts";
+
+/** The context a scenario runner needs to execute. */
+export interface RunContext {
+  readonly scenario: ResolvedScenario;
+  readonly target: URL;
+  readonly documentLoader: DocumentLoader;
+  readonly contextLoader: DocumentLoader;
+  readonly allowPrivateAddress: boolean;
+  /** The synthetic actor/key server, required by signed scenarios (inbox). */
+  readonly fleet: SyntheticServer | null;
+  /** Clock override for deterministic tests. */
+  readonly clock?: Clock;
+  /** RNG override for Poisson arrivals. */
+  readonly rng?: Rng;
+  /** Fetch implementation (overridable for tests). */
+  readonly fetch?: typeof fetch;
+}
+
+/** A runner for one scenario type. */
+export interface ScenarioRunner {
+  run(context: RunContext): Promise<ScenarioMeasurement>;
+}
+
+/** Performs one HTTP send and classifies the result as a send outcome. */
+export async function sendRequest(
+  request: Request,
+  fetchImpl: typeof fetch,
+): Promise<SendOutcome> {
+  try {
+    const response = await fetchImpl(request);
+    // Drain the body so the connection can be reused.
+    await response.arrayBuffer().catch(() => {});
+    if (response.ok) return { ok: true, status: response.status };
+    return {
+      ok: false,
+      status: response.status,
+      reason: `status_${response.status}`,
+    };
+  } catch (error) {
+    return { ok: false, errorKind: "network", reason: String(error) };
+  }
+}
+
+/** Builds the load plan for a resolved scenario. */
+export function loadPlanOf(scenario: ResolvedScenario, rng?: Rng): LoadPlan {
+  return {
+    load: scenario.load,
+    durationMs: scenario.durationMs,
+    warmupMs: scenario.warmupMs,
+    rng,
+  };
+}
+
+/** The measured window (excluding warm-up) used for throughput, in ms. */
+export function measuredWindowMs(scenario: ResolvedScenario): number {
+  return Math.max(scenario.durationMs - scenario.warmupMs, 1);
+}
+
+/** Estimates the total request count, for presigning open-loop runs. */
+export function estimateTotal(scenario: ResolvedScenario): number | undefined {
+  if (scenario.load.kind !== "open") return undefined;
+  return Math.ceil(scenario.load.ratePerSec * (scenario.durationMs / 1000));
+}
diff --git a/packages/cli/src/bench/scenarios/webfinger.test.ts b/packages/cli/src/bench/scenarios/webfinger.test.ts
new file mode 100644
index 000000000..bdb1ecfab
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/webfinger.test.ts
@@ -0,0 +1,61 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { serve } from "srvx";
+import { getContextLoader, getDocumentLoader } from "../../docloader.ts";
+import { normalizeSuite } from "../scenario/normalize.ts";
+import type { Suite } from "../scenario/types.ts";
+import { webfingerRunner } from "./webfinger.ts";
+
+test("webfingerRunner - drives lookups and aggregates results", async () => {
+  let lookups = 0;
+  const server = serve({
+    port: 0,
+    hostname: "127.0.0.1",
+    silent: true,
+    fetch(request: Request): Response {
+      const url = new URL(request.url);
+      if (url.pathname === "/.well-known/webfinger") {
+        lookups++;
+        return new Response(
+          JSON.stringify({
+            subject: url.searchParams.get("resource"),
+            links: [],
+          }),
+          { headers: { "content-type": "application/jrd+json" } },
+        );
+      }
+      return new Response("Not found", { status: 404 });
+    },
+  });
+  await server.ready();
+  const target = new URL(server.url!);
+  try {
+    const suite: Suite = {
+      version: 1,
+      target: target.href,
+      scenarios: [{
+        name: "wf",
+        type: "webfinger",
+        recipient: [`acct:alice@${target.host}`, `acct:bob@${target.host}`],
+        load: { concurrency: 4 },
+        duration: "50ms",
+      }],
+    };
+    const scenario = normalizeSuite(suite).scenarios[0];
+    const measurement = await webfingerRunner.run({
+      scenario,
+      target,
+      documentLoader: await getDocumentLoader({ allowPrivateAddress: true }),
+      contextLoader: await getContextLoader({ allowPrivateAddress: true }),
+      allowPrivateAddress: true,
+      fleet: null,
+    });
+    assert.ok(measurement.requests.total > 0);
+    assert.strictEqual(measurement.requests.successRate, 1);
+    assert.ok(lookups > 0);
+    assert.ok(measurement.client.latencyMs.p95 >= 0);
+    assert.strictEqual(measurement.server, null);
+  } finally {
+    await server.close(true);
+  }
+});
diff --git a/packages/cli/src/bench/scenarios/webfinger.ts b/packages/cli/src/bench/scenarios/webfinger.ts
new file mode 100644
index 000000000..125f3882c
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/webfinger.ts
@@ -0,0 +1,50 @@
+/**
+ * The `webfinger` scenario runner: drives WebFinger handle-resolution lookups,
+ * the discovery primitive every other scenario reuses.
+ * @since 2.3.0
+ * @module
+ */
+
+import { convertUrlIfHandle } from "../../webfinger/lib.ts";
+import { runLoad } from "../load/generator.ts";
+import { aggregateSamples } from "../metrics/aggregate.ts";
+import { fetchServerMetrics } from "../metrics/stats-client.ts";
+import {
+  loadPlanOf,
+  measuredWindowMs,
+  type RunContext,
+  type ScenarioRunner,
+  sendRequest,
+} from "./runner.ts";
+
+function webfingerUrl(target: URL, recipient: string): URL {
+  const resource = convertUrlIfHandle(recipient).href;
+  const url = new URL("/.well-known/webfinger", target);
+  url.searchParams.set("resource", resource);
+  return url;
+}
+
+/** The `webfinger` scenario runner. */
+export const webfingerRunner: ScenarioRunner = {
+  async run(context: RunContext) {
+    const fetchImpl = context.fetch ?? fetch;
+    const urls =
+      (context.scenario.recipients.length > 0
+        ? context.scenario.recipients
+        : [context.target.host]).map((r) => webfingerUrl(context.target, r));
+    let index = 0;
+    const send = () =>
+      sendRequest(new Request(urls[index++ % urls.length]), fetchImpl);
+    const result = await runLoad(
+      loadPlanOf(context.scenario, context.rng),
+      send,
+      context.clock,
+    );
+    const measurement = aggregateSamples(result.samples, {
+      measuredWindowMs: measuredWindowMs(context.scenario),
+      includeHistogram: true,
+    });
+    const server = await fetchServerMetrics(context.target, fetchImpl);
+    return { ...measurement, server };
+  },
+};

From f14d0326749dc9c9872c32cd81e443e82f49793e Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 02:07:50 +0900
Subject: [PATCH 14/47] Wire up the fedify bench orchestrator

Implement `runBench`: load, validate, and normalize the suite (any
configuration error logs a friendly message and exits 2), preflight the
scenario runners so an unsupported type fails fast, classify and probe
the target, and apply the safety gate.  A `--dry-run` prints the plan and
sends nothing.  For a real run it builds the synthetic actor server once
when a signed scenario needs it, runs each scenario, assembles the
report, renders it to the chosen format (stdout or a file), and sets the
exit code to 0 when the gate passes and 1 otherwise.

The default exit sets `process.exitCode` so cleanup and output flushing
finish first.  Signed scenarios are refused against a public target,
since the synthetic actor server is only reachable on the client's
loopback.  Dependencies are injectable, and tests cover the passing and
failing gates, dry run, the unsafe-target and public-signed refusals, and
an invalid suite.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/action.test.ts | 245 ++++++++++++++++++++++++++
 packages/cli/src/bench/action.ts      | 221 ++++++++++++++++++++++-
 2 files changed, 458 insertions(+), 8 deletions(-)
 create mode 100644 packages/cli/src/bench/action.test.ts

diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts
new file mode 100644
index 000000000..7083c3197
--- /dev/null
+++ b/packages/cli/src/bench/action.test.ts
@@ -0,0 +1,245 @@
+import {
+  createFederation,
+  generateCryptoKeyPair,
+  MemoryKvStore,
+} from "@fedify/fedify";
+import { Create, Endpoints, Person } from "@fedify/vocab";
+import assert from "node:assert/strict";
+import { mkdtemp, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import test from "node:test";
+import { serve } from "srvx";
+import runBench from "./action.ts";
+import type { BenchCommand } from "./command.ts";
+
+async function spawnTarget() {
+  const federation = createFederation<void>({
+    kv: new MemoryKvStore(),
+    benchmarkMode: true,
+  });
+  let keyPairs: CryptoKeyPair[] | undefined;
+  federation
+    .setActorDispatcher("/users/{identifier}", async (ctx, identifier) => {
+      if (identifier !== "alice") return null;
+      const pairs = await ctx.getActorKeyPairs(identifier);
+      return new Person({
+        id: ctx.getActorUri(identifier),
+        preferredUsername: identifier,
+        inbox: ctx.getInboxUri(identifier),
+        endpoints: new Endpoints({ sharedInbox: ctx.getInboxUri() }),
+        publicKey: pairs[0]?.cryptographicKey,
+        assertionMethods: pairs.map((p) => p.multikey),
+      });
+    })
+    .mapHandle((_ctx, username) => (username === "alice" ? "alice" : null))
+    .setKeyPairsDispatcher(async (_ctx, identifier) => {
+      if (identifier !== "alice") return [];
+      keyPairs ??= [
+        await generateCryptoKeyPair("RSASSA-PKCS1-v1_5"),
+        await generateCryptoKeyPair("Ed25519"),
+      ];
+      return keyPairs;
+    });
+  federation.setInboxListeners("/users/{identifier}/inbox", "/inbox").on(
+    Create,
+    () => {},
+  );
+  const server = serve({
+    port: 0,
+    hostname: "127.0.0.1",
+    silent: true,
+    fetch: (request: Request) =>
+      federation.fetch(request, { contextData: undefined }),
+  });
+  await server.ready();
+  return { url: new URL(server.url!), close: () => server.close(true) };
+}
+
+function command(overrides: Partial<BenchCommand>): BenchCommand {
+  return {
+    command: "bench",
+    scenario: "",
+    target: undefined,
+    format: "json",
+    output: undefined,
+    dryRun: false,
+    allowUnsafeTarget: false,
+    userAgent: "Fedify-bench-test/1.0",
+    ...overrides,
+  } as BenchCommand;
+}
+
+async function writeSuite(content: string): Promise<string> {
+  const dir = await mkdtemp(join(tmpdir(), "fedify-bench-"));
+  const path = join(dir, "suite.yaml");
+  await writeFile(path, content, { encoding: "utf-8" });
+  return path;
+}
+
+function inboxSuite(target: URL, expectLine: string): string {
+  const recipient = new URL("/users/alice", target).href;
+  return `version: 1
+target: ${target.href}
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: ${JSON.stringify(recipient)}
+    inbox: shared
+    load: { concurrency: 2 }
+    duration: 250ms
+    expect:
+${expectLine}
+`;
+}
+
+test("runBench - passing gate exits 0 and writes a valid report", async () => {
+  const target = await spawnTarget();
+  try {
+    const file = await writeSuite(
+      inboxSuite(target.url, '      successRate: ">= 99%"'),
+    );
+    let code = -1;
+    let output = "";
+    await runBench(command({ scenario: file }), {
+      exit: (c) => {
+        code = c;
+      },
+      writeOutput: (c) => {
+        output = c;
+        return Promise.resolve();
+      },
+      log: () => {},
+    });
+    assert.strictEqual(code, 0);
+    const report = JSON.parse(output);
+    assert.strictEqual(report.passed, true);
+    assert.strictEqual(report.scenarios[0].requests.successRate, 1);
+    assert.ok(report.target.statsAvailable);
+  } finally {
+    await target.close();
+  }
+});
+
+test("runBench - failing gate exits 1", async () => {
+  const target = await spawnTarget();
+  try {
+    // An impossible latency threshold makes the gate fail.
+    const file = await writeSuite(
+      inboxSuite(target.url, '      latency.p95: "< 0ms"'),
+    );
+    let code = -1;
+    await runBench(command({ scenario: file }), {
+      exit: (c) => {
+        code = c;
+      },
+      writeOutput: () => Promise.resolve(),
+      log: () => {},
+    });
+    assert.strictEqual(code, 1);
+  } finally {
+    await target.close();
+  }
+});
+
+test("runBench - dry run prints a plan and sends nothing", async () => {
+  const target = await spawnTarget();
+  try {
+    const file = await writeSuite(
+      inboxSuite(target.url, '      successRate: ">= 99%"'),
+    );
+    let code = -1;
+    let output = "";
+    await runBench(command({ scenario: file, dryRun: true }), {
+      exit: (c) => {
+        code = c;
+      },
+      writeOutput: (c) => {
+        output = c;
+        return Promise.resolve();
+      },
+      log: () => {},
+    });
+    assert.strictEqual(code, 0);
+    assert.match(output, /dry run/i);
+    assert.match(output, /No requests were sent/);
+  } finally {
+    await target.close();
+  }
+});
+
+test("runBench - refuses an unsafe public target (exit 2)", async () => {
+  const file = await writeSuite(`version: 1
+target: https://example.com
+scenarios:
+  - name: wf
+    type: webfinger
+    recipient: "acct:alice@example.com"
+`);
+  let code = -1;
+  await runBench(command({ scenario: file }), {
+    exit: (c) => {
+      code = c;
+    },
+    writeOutput: () => Promise.resolve(),
+    log: () => {},
+    // Probe fails without network, so the target appears non-benchmark.
+    fetch: () => Promise.reject(new Error("offline")),
+  });
+  assert.strictEqual(code, 2);
+});
+
+test("runBench - rejects a signed scenario against a public target", async () => {
+  const file = await writeSuite(`version: 1
+target: https://staging.example
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "https://staging.example/users/alice"
+    load: { concurrency: 2 }
+    duration: 100ms
+`);
+  let code = -1;
+  let message = "";
+  await runBench(command({ scenario: file }), {
+    exit: (c) => {
+      code = c;
+    },
+    writeOutput: () => Promise.resolve(),
+    log: (m) => {
+      message = m;
+    },
+    // The target advertises benchmark mode so it passes the safety gate.
+    fetch: () =>
+      Promise.resolve(
+        new Response(
+          JSON.stringify({ version: 1, source: "server", scopeMetrics: [] }),
+          { headers: { "content-type": "application/json" } },
+        ),
+      ),
+  });
+  assert.strictEqual(code, 2);
+  assert.match(message, /loopback or private/);
+});
+
+test("runBench - invalid suite exits 2", async () => {
+  const file = await writeSuite(`target: http://localhost:3000
+scenarios:
+  - name: x
+    type: inbox
+    recipient: "acct:a@x"
+`); // missing version
+  let code = -1;
+  let message = "";
+  await runBench(command({ scenario: file }), {
+    exit: (c) => {
+      code = c;
+    },
+    writeOutput: () => Promise.resolve(),
+    log: (m) => {
+      message = m;
+    },
+  });
+  assert.strictEqual(code, 2);
+  assert.match(message, /Invalid/);
+});
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index daa3b4985..9b8fb585c 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -1,14 +1,219 @@
+import { writeFile } from "node:fs/promises";
+import process from "node:process";
+import { getContextLoader, getDocumentLoader } from "../docloader.ts";
+import { buildFleet } from "./actor/fleet.ts";
 import type { BenchCommand } from "./command.ts";
+import {
+  buildReport,
+  buildScenarioResult,
+  configHash,
+  detectEnvironment,
+} from "./result/build.ts";
+import { probeBenchmarkMode } from "./discovery/probe.ts";
+import { renderReport, type ReportFormat } from "./render/index.ts";
+import { loadSuiteFile } from "./scenario/load.ts";
+import {
+  normalizeSuite,
+  type ResolvedScenario,
+  type ResolvedSuite,
+} from "./scenario/normalize.ts";
+import type { Suite } from "./scenario/types.ts";
+import { validateSuite } from "./scenario/validate.ts";
+import { assertTargetAllowed, UnsafeTargetError } from "./safety/gate.ts";
+import { classifyTarget } from "./safety/tiers.ts";
+import { runnerFor } from "./scenarios/registry.ts";
+import {
+  spawnSyntheticServer,
+  type SyntheticServer,
+} from "./server/synthetic.ts";
+
+/** Injectable dependencies for {@link runBench}, overridable in tests. */
+export interface RunBenchDeps {
+  /** Terminates the process with an exit code. */
+  readonly exit?: (code: number) => void;
+  /** Writes the rendered report to the output path or standard output. */
+  readonly writeOutput?: (
+    content: string,
+    outputPath: string | undefined,
+  ) => Promise<void>;
+  /** Emits a progress line (to standard error by default). */
+  readonly log?: (message: string) => void;
+  /** Fetch implementation. */
+  readonly fetch?: typeof fetch;
+}
+
+/** The scenario types that need the synthetic actor/key server. */
+const SIGNED_TYPES = new Set(["inbox"]);
 
 /**
- * Runs the `fedify bench` command.
- *
- * This is a placeholder that is fleshed out in subsequent steps; the engine,
- * scenario runners, and reporting are wired in incrementally.
+ * Runs the `fedify bench` command: load and validate the suite, gate the
+ * target, run each scenario, and render the report.  The process exits 0 when
+ * every `expect` gate passes and 1 otherwise; configuration and safety errors
+ * exit 2.
  * @param command The parsed `bench` command options.
+ * @param deps Injectable dependencies for testing.
  */
-export default function runBench(_command: BenchCommand): Promise<void> {
-  return Promise.reject(
-    new Error("fedify bench is not implemented yet."),
-  );
+export default async function runBench(
+  command: BenchCommand,
+  deps: RunBenchDeps = {},
+): Promise<void> {
+  // Set the exit code rather than terminating, so cleanup (closing the fleet)
+  // and output flushing complete before the process exits.
+  const exit = deps.exit ?? ((code: number) => {
+    process.exitCode = code;
+  });
+  const writeOutput = deps.writeOutput ?? defaultWriteOutput;
+  const log = deps.log ??
+    ((message: string) => process.stderr.write(`${message}\n`));
+  const fetchImpl = deps.fetch ?? fetch;
+
+  // Loading, validation, and normalization failures are all user-facing
+  // configuration errors.
+  let validated: Suite;
+  let suite: ResolvedSuite;
+  try {
+    const raw = await loadSuiteFile(command.scenario);
+    validated = validateSuite(raw, command.scenario);
+    suite = normalizeSuite(validated, { target: command.target });
+  } catch (error) {
+    log(error instanceof Error ? error.message : String(error));
+    return void exit(2);
+  }
+
+  // Preflight every runner so an unsupported scenario type fails fast, before
+  // any probe or load.
+  let runners;
+  try {
+    runners = suite.scenarios.map((scenario) => runnerFor(scenario.type));
+  } catch (error) {
+    log(error instanceof Error ? error.message : String(error));
+    return void exit(2);
+  }
+
+  if (command.dryRun) {
+    await writeOutput(renderPlan(suite), command.output);
+    return void exit(0);
+  }
+
+  const tier = classifyTarget(suite.target);
+  const probe = await probeBenchmarkMode(suite.target, fetchImpl);
+  try {
+    assertTargetAllowed({
+      tier,
+      benchmarkMode: probe.benchmarkMode,
+      allowUnsafe: command.allowUnsafeTarget,
+      dryRun: false,
+    });
+  } catch (error) {
+    if (error instanceof UnsafeTargetError) {
+      log(error.message);
+      return void exit(2);
+    }
+    throw error;
+  }
+
+  // The synthetic actor server is only reachable on the client's loopback, so
+  // a remote (public) target cannot dereference its keys.  Signed scenarios
+  // therefore require a loopback or private target.
+  if (
+    tier === "public" && suite.scenarios.some((s) => SIGNED_TYPES.has(s.type))
+  ) {
+    log(
+      "Signed scenarios (inbox) require a loopback or private target: the " +
+        "benchmark's synthetic actor server is only reachable on the client's " +
+        "loopback, so a public target cannot dereference its keys.  Use a " +
+        "local target, or a read scenario such as webfinger.",
+    );
+    return void exit(2);
+  }
+
+  const allowPrivateAddress = tier !== "public";
+  const documentLoader = await getDocumentLoader({
+    allowPrivateAddress,
+    userAgent: command.userAgent,
+  });
+  const contextLoader = await getContextLoader({
+    allowPrivateAddress,
+    userAgent: command.userAgent,
+  });
+
+  let fleet: SyntheticServer | undefined;
+  const startedAt = new Date().toISOString();
+  try {
+    if (suite.scenarios.some((s) => SIGNED_TYPES.has(s.type))) {
+      fleet = await spawnSyntheticServer(await buildFleet(suite.actors));
+    }
+    const results = [];
+    for (let i = 0; i < suite.scenarios.length; i++) {
+      const scenario = suite.scenarios[i];
+      log(`Running scenario "${scenario.name}" (${scenario.type})…`);
+      const measurement = await runners[i].run({
+        scenario,
+        target: suite.target,
+        documentLoader,
+        contextLoader,
+        allowPrivateAddress,
+        fleet: fleet ?? null,
+        fetch: fetchImpl,
+      });
+      results.push(buildScenarioResult(scenario, measurement));
+    }
+    const report = buildReport({
+      scenarios: results,
+      environment: detectEnvironment(),
+      target: {
+        url: suite.target.href,
+        fedifyVersion: probe.fedifyVersion,
+        statsAvailable: probe.benchmarkMode,
+      },
+      startedAt,
+      finishedAt: new Date().toISOString(),
+      suite: {
+        // Hash the whole authored suite plus the effective target, so any
+        // change to defaults, actors, or scenarios changes the hash.
+        configHash: configHash({ suite: validated, target: suite.target.href }),
+      },
+    });
+    await writeOutput(
+      renderReport(report, command.format as ReportFormat),
+      command.output,
+    );
+    return void exit(report.passed ? 0 : 1);
+  } finally {
+    await fleet?.close();
+  }
+}
+
+async function defaultWriteOutput(
+  content: string,
+  outputPath: string | undefined,
+): Promise<void> {
+  if (outputPath == null) {
+    process.stdout.write(content.endsWith("\n") ? content : `${content}\n`);
+    return;
+  }
+  await writeFile(outputPath, content, { encoding: "utf-8" });
+}
+
+function renderPlan(suite: ResolvedSuite): string {
+  const lines = [
+    "Fedify benchmark plan (dry run)",
+    "",
+    `Target: ${suite.target.href}`,
+    "",
+  ];
+  for (const scenario of suite.scenarios) {
+    lines.push(
+      `- ${scenario.name} (${scenario.type}): ${describePlan(scenario)}`,
+    );
+  }
+  lines.push("", "No requests were sent.");
+  return `${lines.join("\n")}\n`;
+}
+
+function describePlan(scenario: ResolvedScenario): string {
+  const load = scenario.load.kind === "open"
+    ? `open-loop ${scenario.load.ratePerSec}/s ${scenario.load.arrival}`
+    : `closed-loop concurrency ${scenario.load.concurrency}`;
+  return `${load}, duration ${scenario.durationMs}ms, signing ${scenario.signing}`;
 }

From a27f6a068330852e883a5272e7d8403df0d2f258 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 02:36:15 +0900
Subject: [PATCH 15/47] Apply scenario templates when loading a suite

Wire the logic-less `${{ ... }}` template engine into the load pipeline:
`renderSuiteTemplates()` expands templates in a parsed suite with a
context exposing the target (host, hostname, port, origin, href,
protocol) plus the default helpers, and `runBench` runs it between
loading and validation.  This is what makes `recipient:
"http://${{ target.host }}/users/alice"` resolve to a concrete URL.

The target comes from `--target` or the suite's own `target`, neither of
which is templated.  Tests cover rendering and the end-to-end inbox run
now uses templating.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/deno.json                        |   1 +
 .../__fixtures__/scenarios/all-types.yaml     |   4 +-
 .../scenarios/getting-started.yaml            |   4 +-
 packages/cli/src/bench/action.test.ts         |   5 +-
 packages/cli/src/bench/action.ts              |   5 +-
 packages/cli/src/bench/scenario/load.test.ts  |  49 ++++
 packages/cli/src/bench/scenario/load.ts       |  45 ++++
 packages/cli/src/bench/schema.test.ts         |  40 +++-
 schema/README.md                              |  88 +++++++
 schema/_headers                               |   9 +
 schema/index.html                             | 171 ++++++++++++++
 schema/logo.svg                               | 215 ++++++++++++++++++
 schema/netlify.toml                           |  17 ++
 13 files changed, 638 insertions(+), 15 deletions(-)
 create mode 100644 packages/cli/src/bench/scenario/load.test.ts
 create mode 100644 schema/README.md
 create mode 100644 schema/_headers
 create mode 100644 schema/index.html
 create mode 100644 schema/logo.svg
 create mode 100644 schema/netlify.toml

diff --git a/packages/cli/deno.json b/packages/cli/deno.json
index 0ba8d066e..5af61c849 100644
--- a/packages/cli/deno.json
+++ b/packages/cli/deno.json
@@ -58,6 +58,7 @@
         "codegen"
       ]
     },
+    "generate-bench-schema": "deno run -A scripts/generate-bench-schema.ts",
     "test": {
       "command": "deno test --allow-all",
       "dependencies": [
diff --git a/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml b/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml
index 07a82929a..c83a775d1 100644
--- a/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml
+++ b/packages/cli/src/bench/__fixtures__/scenarios/all-types.yaml
@@ -12,10 +12,10 @@ defaults:
   signing: pipeline
   runs: 3
 actors:
-  - name: "Mastodon-like actor ${{ index }}"
+  - name: "Mastodon-like actor"
     count: 3
     signatureStandards: [draft-cavage-http-signatures-12, ld-signatures]
-  - name: "Hollo-like actor ${{ index }}"
+  - name: "Hollo-like actor"
     count: 2
     signatureStandards: [rfc9421, fep8b32]
 scenarios:
diff --git a/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml b/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml
index 9cfb77cc0..2e4ca25b4 100644
--- a/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml
+++ b/packages/cli/src/bench/__fixtures__/scenarios/getting-started.yaml
@@ -9,7 +9,9 @@ defaults:
 scenarios:
   - name: inbox-shared
     type: inbox
-    recipient: "acct:alice@${{ target.host }}"
+    # An actor URI (not an acct: handle) works over a plain-http loopback
+    # target, since WebFinger resolution requires https.
+    recipient: "http://${{ target.host }}/users/alice"
     inbox: shared
     activity:
       type: Create
diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts
index 7083c3197..ea622ccd3 100644
--- a/packages/cli/src/bench/action.test.ts
+++ b/packages/cli/src/bench/action.test.ts
@@ -78,13 +78,14 @@ async function writeSuite(content: string): Promise<string> {
 }
 
 function inboxSuite(target: URL, expectLine: string): string {
-  const recipient = new URL("/users/alice", target).href;
+  // Uses `${{ target.host }}` templating to form the actor URI (WebFinger is
+  // https-only, so an acct: handle would not resolve over http loopback).
   return `version: 1
 target: ${target.href}
 scenarios:
   - name: inbox-shared
     type: inbox
-    recipient: ${JSON.stringify(recipient)}
+    recipient: "http://\${{ target.host }}/users/alice"
     inbox: shared
     load: { concurrency: 2 }
     duration: 250ms
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index 9b8fb585c..5bef894d4 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -11,7 +11,7 @@ import {
 } from "./result/build.ts";
 import { probeBenchmarkMode } from "./discovery/probe.ts";
 import { renderReport, type ReportFormat } from "./render/index.ts";
-import { loadSuiteFile } from "./scenario/load.ts";
+import { loadSuiteFile, renderSuiteTemplates } from "./scenario/load.ts";
 import {
   normalizeSuite,
   type ResolvedScenario,
@@ -73,7 +73,8 @@ export default async function runBench(
   let suite: ResolvedSuite;
   try {
     const raw = await loadSuiteFile(command.scenario);
-    validated = validateSuite(raw, command.scenario);
+    const rendered = renderSuiteTemplates(raw, command.target);
+    validated = validateSuite(rendered, command.scenario);
     suite = normalizeSuite(validated, { target: command.target });
   } catch (error) {
     log(error instanceof Error ? error.message : String(error));
diff --git a/packages/cli/src/bench/scenario/load.test.ts b/packages/cli/src/bench/scenario/load.test.ts
new file mode 100644
index 000000000..a89eef4cb
--- /dev/null
+++ b/packages/cli/src/bench/scenario/load.test.ts
@@ -0,0 +1,49 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { parseSuiteText, renderSuiteTemplates } from "./load.ts";
+
+test("parseSuiteText - parses YAML and JSON alike", () => {
+  const yaml = parseSuiteText("version: 1\ntarget: http://x\n");
+  const json = parseSuiteText('{"version":1,"target":"http://x"}');
+  assert.deepEqual(yaml, json);
+});
+
+test("renderSuiteTemplates - expands target.host from the suite target", () => {
+  const raw = {
+    version: 1,
+    target: "http://localhost:3000",
+    scenarios: [{
+      name: "x",
+      type: "inbox",
+      recipient: "http://${{ target.host }}/users/alice",
+    }],
+  };
+  const rendered = renderSuiteTemplates(raw) as typeof raw;
+  assert.strictEqual(
+    rendered.scenarios[0].recipient,
+    "http://localhost:3000/users/alice",
+  );
+});
+
+test("renderSuiteTemplates - uses the --target override for the context", () => {
+  const raw = {
+    version: 1,
+    target: "http://a",
+    scenarios: [{
+      name: "x",
+      type: "webfinger",
+      recipient: "acct:bob@${{ target.host }}",
+    }],
+  };
+  const rendered = renderSuiteTemplates(raw, "http://b:9000") as typeof raw;
+  assert.strictEqual(rendered.scenarios[0].recipient, "acct:bob@b:9000");
+});
+
+test("renderSuiteTemplates - leaves untemplated values untouched", () => {
+  const raw = {
+    version: 1,
+    target: "http://localhost:3000",
+    scenarios: [{ name: "x", type: "webfinger", recipient: "acct:a@host" }],
+  };
+  assert.deepEqual(renderSuiteTemplates(raw), raw);
+});
diff --git a/packages/cli/src/bench/scenario/load.ts b/packages/cli/src/bench/scenario/load.ts
index 222c52dc7..0aabbfd21 100644
--- a/packages/cli/src/bench/scenario/load.ts
+++ b/packages/cli/src/bench/scenario/load.ts
@@ -10,6 +10,8 @@
 
 import { readFile } from "node:fs/promises";
 import { parse as parseYaml } from "yaml";
+import { defaultHelpers } from "../template/helpers.ts";
+import { renderTemplates } from "../template/template.ts";
 
 /**
  * Parses scenario suite text (YAML or JSON) into an untyped value.
@@ -28,3 +30,46 @@ export function parseSuiteText(text: string): unknown {
 export async function loadSuiteFile(path: string): Promise<unknown> {
   return parseSuiteText(await readFile(path, { encoding: "utf-8" }));
 }
+
+/**
+ * Expands `${{ ... }}` templates in a parsed suite.
+ *
+ * The context exposes `target` (its `host`, `hostname`, `port`, `origin`,
+ * `href`, and `protocol`) plus the default helpers.  The target comes from the
+ * `--target` override or the suite's own `target`, neither of which is
+ * templated.
+ * @param raw The parsed suite value.
+ * @param targetOverride A target URL from `--target`, if any.
+ * @returns The suite with templates expanded.
+ */
+export function renderSuiteTemplates(
+  raw: unknown,
+  targetOverride?: string,
+): unknown {
+  const target = targetOverride ?? suiteTarget(raw);
+  const values: Record<string, unknown> = {};
+  if (target != null) {
+    try {
+      const url = new URL(target);
+      values.target = {
+        host: url.host,
+        hostname: url.hostname,
+        port: url.port,
+        origin: url.origin,
+        href: url.href,
+        protocol: url.protocol.replace(/:$/, ""),
+      };
+    } catch {
+      // Leave `target` unset; `${{ target.* }}` then fails with a clear error.
+    }
+  }
+  return renderTemplates(raw, { values, helpers: defaultHelpers() });
+}
+
+function suiteTarget(raw: unknown): string | undefined {
+  if (raw != null && typeof raw === "object" && "target" in raw) {
+    const target = (raw as { target?: unknown }).target;
+    return typeof target === "string" ? target : undefined;
+  }
+  return undefined;
+}
diff --git a/packages/cli/src/bench/schema.test.ts b/packages/cli/src/bench/schema.test.ts
index cef8f2c69..abace7489 100644
--- a/packages/cli/src/bench/schema.test.ts
+++ b/packages/cli/src/bench/schema.test.ts
@@ -109,25 +109,49 @@ for (const { name, fileName, schema } of PUBLISHED_SCHEMAS) {
   });
 }
 
-// Guard 4: immutability of already-published schema versions.
+// Guard 4: immutability of already-published schema versions.  A published
+// version file must not differ from its content on the main branch; compare
+// against the merge-base so a committed edit on a feature branch is caught
+// (not just an uncommitted one).  The check is skipped when no base ref is
+// available (e.g. a shallow clone) or the file is new since the base.
+function publishedBaseCommit(): string | null {
+  for (const ref of ["origin/main", "main"]) {
+    try {
+      execFileSync("git", ["rev-parse", "--verify", "--quiet", ref], {
+        cwd: REPO_ROOT,
+        stdio: "ignore",
+      });
+      return execFileSync("git", ["merge-base", "HEAD", ref], {
+        cwd: REPO_ROOT,
+        encoding: "utf-8",
+      }).trim();
+    } catch {
+      // Ref unavailable; try the next.
+    }
+  }
+  return null;
+}
+
+const baseCommit = publishedBaseCommit();
 for (const { name, fileName } of PUBLISHED_SCHEMAS) {
-  test(`schema guard - ${name} published file is unchanged from HEAD`, () => {
-    let committed: string;
+  test(`schema guard - ${name} published file is immutable`, () => {
+    if (baseCommit == null) return;
+    let published: string;
     try {
-      committed = execFileSync(
+      published = execFileSync(
         "git",
-        ["show", `HEAD:schema/bench/${fileName}`],
+        ["show", `${baseCommit}:schema/bench/${fileName}`],
         { cwd: REPO_ROOT, encoding: "utf-8" },
       );
     } catch {
-      // Not yet committed (a brand-new version file): nothing to guard.
+      // Not published at the base (a brand-new version file): nothing to guard.
       return;
     }
     const current = readFileSync(join(SCHEMA_DIR, fileName), "utf-8");
     assert.strictEqual(
       current,
-      committed,
-      `schema/bench/${fileName} is published and immutable; publish a new ` +
+      published,
+      `schema/bench/${fileName} is published and immutable; ship a new ` +
         `version file instead of editing it`,
     );
   });
diff --git a/schema/README.md b/schema/README.md
new file mode 100644
index 000000000..81e481da9
--- /dev/null
+++ b/schema/README.md
@@ -0,0 +1,88 @@
+<!-- deno-fmt-ignore-file -->
+
+Fedify JSON Schemas
+===================
+
+This directory holds the published JSON Schemas (draft 2020-12) for Fedify file
+formats.  It is deployed to <https://json-schema.fedify.dev/> by Netlify on every
+push to the *main* branch; the directory layout maps onto the URL, so
+*schema/bench/scenario-v1.json* is served at
+<https://json-schema.fedify.dev/bench/scenario-v1.json>.
+
+Current schemas:
+
+ -  *bench/scenario-v1.json* — the `fedify bench` scenario suite format (input).
+ -  *bench/report-v1.json* — the `fedify bench` report format (output).
+
+
+Versioning: append-only and immutable
+--------------------------------------
+
+A published version file is **never edited**.  Each schema's `$id` equals its
+hosted URL, and external consumers pin that URL, so editing a published file
+would silently change their validation.  A change therefore ships as a **new
+version file** (for example *scenario-v2.json*), never an edit to an existing
+one.  The immutability guard below enforces this where *main* history is
+available, and review enforces it otherwise.
+
+
+Source of truth and regeneration
+---------------------------------
+
+The schemas are authored as embedded objects in the CLI so the validator can
+use them without reading files at runtime (which keeps the `deno compile`
+binary self-contained):
+
+ -  *packages/cli/src/bench/scenario/schema.ts*
+ -  *packages/cli/src/bench/result/schema.ts*
+
+The *.json* files here are generated from those objects.  After editing an
+embedded schema, regenerate the published copies:
+
+~~~~ sh
+deno task -f @fedify/cli generate-bench-schema
+~~~~
+
+The matching TypeScript types live next to each schema
+(*packages/cli/src/bench/scenario/types.ts* and
+*packages/cli/src/bench/result/model.ts*); keep them in sync with the schema.
+
+
+Guards
+------
+
+The benchmark schema tests (*packages/cli/src/bench/schema.test.ts*) enforce:
+
+ -  **Meta/structural validation** — each schema is well-formed draft 2020-12
+    with a hosted `$id` and no dangling `$ref`s.
+ -  **Fixture validation** — example scenario and report fixtures validate, and
+    deliberately invalid fixtures are rejected.
+ -  **Drift** — the embedded schema object equals the published *.json* file
+    byte-for-byte (run the regeneration task if this fails).
+ -  **Immutability** — a published version file does not differ from its
+    content at the merge-base with *main*, so a committed edit on a branch is
+    caught.  This runs wherever *main* history is available (local development,
+    and CI checked out with full history); it is skipped in a shallow checkout,
+    where immutability is enforced by review instead.  Either way, ship a new
+    version file rather than editing a published one.
+
+
+Hosting
+-------
+
+*_headers* and *netlify.toml* configure Netlify to serve the schemas
+cross-origin (editors and online validators fetch them), with the
+`application/schema+json` media type and a long immutable cache.  Point the
+Netlify site's base directory at this *schema/* folder.
+
+
+Editor support
+--------------
+
+Add a schema reference to a scenario file for autocomplete and validation:
+
+~~~~ yaml
+# yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v1.json
+version: 1
+target: http://localhost:3000
+~~~~
diff --git a/schema/_headers b/schema/_headers
new file mode 100644
index 000000000..43cf845f9
--- /dev/null
+++ b/schema/_headers
@@ -0,0 +1,9 @@
+# Netlify headers for the published JSON Schemas.
+# Schemas are served cross-origin (editors and online validators fetch them),
+# with the JSON Schema media type and a long immutable cache, since every
+# version file is immutable.
+
+/bench/*
+  Access-Control-Allow-Origin: *
+  Content-Type: application/schema+json
+  Cache-Control: public, max-age=31536000, immutable
diff --git a/schema/index.html b/schema/index.html
new file mode 100644
index 000000000..54ff4cc44
--- /dev/null
+++ b/schema/index.html
@@ -0,0 +1,171 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>Fedify JSON Schemas</title>
+    <meta
+      name="description"
+      content="Published JSON Schemas for Fedify, including the fedify bench scenario and report formats."
+    />
+    <style>
+      :root {
+        color-scheme: light dark;
+        --bg: #ffffff;
+        --fg: #1f2937;
+        --muted: #6b7280;
+        --accent: #0284c7;
+        --card: #f8fafc;
+        --border: #e5e7eb;
+        --code-bg: #f1f5f9;
+      }
+      @media (prefers-color-scheme: dark) {
+        :root {
+          --bg: #0b1220;
+          --fg: #e5e7eb;
+          --muted: #9ca3af;
+          --accent: #38bdf8;
+          --card: #111a2e;
+          --border: #1f2a44;
+          --code-bg: #0f1830;
+        }
+      }
+      * {
+        box-sizing: border-box;
+      }
+      body {
+        margin: 0;
+        background: var(--bg);
+        color: var(--fg);
+        font: 16px/1.6 system-ui, -apple-system, "Segoe UI", Roboto, sans-serif;
+      }
+      main {
+        max-width: 44rem;
+        margin: 0 auto;
+        padding: 3rem 1.25rem 5rem;
+      }
+      header {
+        display: flex;
+        align-items: center;
+        gap: 1rem;
+        margin-bottom: 2rem;
+      }
+      header img {
+        width: 56px;
+        height: 56px;
+      }
+      h1 {
+        font-size: 1.6rem;
+        margin: 0;
+      }
+      h2 {
+        font-size: 1.15rem;
+        margin: 2.5rem 0 0.75rem;
+      }
+      a {
+        color: var(--accent);
+      }
+      p {
+        color: var(--fg);
+      }
+      .lead {
+        color: var(--muted);
+        font-size: 1.05rem;
+      }
+      ul.schemas {
+        list-style: none;
+        padding: 0;
+        margin: 0;
+        display: grid;
+        gap: 0.75rem;
+      }
+      ul.schemas li {
+        background: var(--card);
+        border: 1px solid var(--border);
+        border-radius: 0.6rem;
+        padding: 0.9rem 1.1rem;
+      }
+      ul.schemas a {
+        font-weight: 600;
+        text-decoration: none;
+      }
+      ul.schemas a:hover {
+        text-decoration: underline;
+      }
+      ul.schemas .desc {
+        color: var(--muted);
+        font-size: 0.92rem;
+        margin-top: 0.25rem;
+      }
+      pre {
+        background: var(--code-bg);
+        border: 1px solid var(--border);
+        border-radius: 0.6rem;
+        padding: 0.9rem 1.1rem;
+        overflow-x: auto;
+      }
+      code {
+        font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
+        font-size: 0.9em;
+      }
+      footer {
+        margin-top: 3.5rem;
+        color: var(--muted);
+        font-size: 0.9rem;
+        border-top: 1px solid var(--border);
+        padding-top: 1.25rem;
+      }
+    </style>
+  </head>
+  <body>
+    <main>
+      <header>
+        <img src="./logo.svg" alt="Fedify logo" width="56" height="56" />
+        <h1>Fedify JSON Schemas</h1>
+      </header>
+
+      <p class="lead">
+        Published JSON Schemas (draft 2020-12) for Fedify file formats.  Each
+        version is immutable: a change ships as a new version file, never an
+        edit, so a pinned <code>$schema</code> URL keeps validating the same way.
+      </p>
+
+      <h2>Benchmarking (<code>fedify bench</code>)</h2>
+      <ul class="schemas">
+        <li>
+          <a href="./bench/scenario-v1.json">bench/scenario-v1.json</a>
+          <div class="desc">
+            The benchmark scenario suite format (input).  YAML or JSON.
+          </div>
+        </li>
+        <li>
+          <a href="./bench/report-v1.json">bench/report-v1.json</a>
+          <div class="desc">
+            The benchmark report format (output).  The canonical machine form.
+          </div>
+        </li>
+      </ul>
+
+      <h2>Editor support</h2>
+      <p>
+        Add a schema reference to your scenario file for autocomplete and
+        validation in editors with the YAML Language Server:
+      </p>
+      <pre><code># yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v1.json
+version: 1
+target: http://localhost:3000
+# …</code></pre>
+      <p>
+        Generated benchmark reports already carry their <code>$schema</code>, so
+        consumers can validate them directly.
+      </p>
+
+      <footer>
+        <a href="https://fedify.dev/">Fedify</a> &middot;
+        <a href="https://github.com/fedify-dev/fedify">Source</a> &middot;
+        <a href="https://github.com/fedify-dev/fedify/tree/main/schema"
+        >Contributing schemas</a>
+      </footer>
+    </main>
+  </body>
+</html>
diff --git a/schema/logo.svg b/schema/logo.svg
new file mode 100644
index 000000000..e92ecbe5d
--- /dev/null
+++ b/schema/logo.svg
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+  width="48"
+  height="48"
+  viewBox="0 0 112 112"
+  version="1.1"
+  id="svg5"
+  xmlns="http://www.w3.org/2000/svg"
+  xmlns:svg="http://www.w3.org/2000/svg"
+  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+  xmlns:cc="http://creativecommons.org/ns#"
+  xmlns:dc="http://purl.org/dc/elements/1.1/"
+>
+  <defs
+    id="defs5"
+  >
+    <clipPath
+      clipPathUnits="userSpaceOnUse"
+      id="clipPath8"
+    >
+      <ellipse
+        style="fill: #000000; stroke: #000000; stroke-width: 3.02635; stroke-linejoin: miter; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; paint-order: normal"
+        id="ellipse8"
+        cx="55.92646"
+        cy="56.073448"
+        transform="rotate(-0.07519647)"
+        rx="54.486828"
+        ry="54.486824"
+      />
+    </clipPath>
+  </defs>
+  <title id="title1">Fedify</title>
+  <ellipse
+    style="fill: #ffffff; stroke: none; stroke-width: 3.02635; stroke-linejoin: miter; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; paint-order: normal"
+    id="path8-2"
+    cx="55.92646"
+    cy="56.073448"
+    transform="rotate(-0.07519647)"
+    rx="54.486828"
+    ry="54.486824"
+  />
+  <g
+    id="g8"
+    clip-path="url(#clipPath8)"
+  >
+    <g
+      id="g5"
+    >
+      <path
+        d="M 77.4624,78.9593 C 78.2802,68.3428 73.7143,58.8833 71.3291,55.4806 L 87.6847,48.335 c 4.9066,1.6333 6.474,17.3537 6.6444,25.0098 0,0 -3.5778,0.5104 -5.6222,2.0416 -2.085,1.5616 -5.6222,5.1041 -11.2445,3.5729 z"
+        fill="#ffffff"
+        stroke="#84b5d9"
+        stroke-width="3"
+        stroke-linecap="round"
+        id="path1"
+      />
+      <path
+        d="M 7.06239,52.159 C -5.55748,54.1782 -12.682,66.0659 -17.661,73.2769 c -0.8584,13.3918 -0.6181,41.1021 7.211,44.8111 7.82906,3.709 26.9553,1.545 35.5398,0 v 4.121 c 1.3736,0.515 5.0477,1.648 8.7562,2.06 3.7085,0.412 6.696,-1.202 7.7261,-2.06 v -9.787 c 0.5151,-0.343 2.9874,-1.957 8.7562,-5.666 7.211,-4.635 11.3315,-16.482 9.7863,-24.7229 -1.1589,-6.181 3.6055,-18.5427 6.1809,-26.7838 9.7863,2.0601 22.148,-1.0301 23.1781,-14.9369 C 90.1205,31.5801 80.7174,19.9868 63.2051,25.3752 45.6927,30.7636 48.268,52.159 41.5721,59.37 35.3913,53.1891 23.5446,49.5219 7.06239,52.159 Z"
+        fill="#bae6fd"
+        stroke="#0c4a6e"
+        stroke-width="3"
+        stroke-linecap="round"
+        id="path3"
+      />
+      <path
+        d="M 66.2955,55.2493 C 64.5786,54.7342 60.9387,53.6011 60.1146,53.189"
+        stroke="#0284c7"
+        stroke-opacity="0.37"
+        stroke-width="3"
+        stroke-linecap="round"
+        id="path4"
+        style="opacity: 1; fill: none; stroke-width: 3; stroke-linejoin: miter; stroke-dasharray: none; paint-order: normal"
+      />
+      <path
+        d="m 41.5721,59.3698 c -0.6868,0.8585 -2.6784,2.7814 -5.1507,3.6055"
+        stroke="#0284c7"
+        stroke-opacity="0.37"
+        stroke-width="3"
+        stroke-linecap="round"
+        id="path5"
+        style="fill: none"
+      />
+      <circle
+        cx="68.870796"
+        cy="42.8876"
+        r="2.0602801"
+        fill="#000000"
+        id="circle5"
+      />
+    </g>
+    <g
+      id="g2"
+      transform="matrix(0.08160718,0,0,0.08160718,76.994732,53.205469)"
+      style="display: inline"
+    >
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #a730b8; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 181.13086,275.13672 a 68.892408,68.892408 0 0 1 -29.46484,29.32812 l 161.75781,162.38868 38.99805,-19.76368 z m 213.36328,214.1875 -38.99805,19.76367 81.96289,82.2832 a 68.892409,68.892409 0 0 1 29.47071,-29.33203 z"
+        id="path9722"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #5496be; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 581.64648,339.39062 -91.57617,46.41016 6.75196,43.18945 103.61523,-52.51367 A 68.892409,68.892409 0 0 1 581.64648,339.39062 Z M 436.9082,412.74219 220.38281,522.47656 a 68.892408,68.892408 0 0 1 18.79492,37.08985 L 443.66016,455.93359 Z"
+        id="path9729"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #ce3d1a; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="M 367.27539,142.4375 262.79492,346.4082 293.64258,377.375 404.26562,161.41797 A 68.892408,68.892408 0 0 1 367.27539,142.4375 Z m -131.6543,257.02148 -52.92187,103.31446 a 68.892409,68.892409 0 0 1 36.98633,18.97851 l 46.78125,-91.32812 z"
+        id="path9713"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #d0188f; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 150.76758,304.91797 a 68.892408,68.892408 0 0 1 -34.41602,7.19531 68.892408,68.892408 0 0 1 -6.65039,-0.69531 l 30.90235,197.66211 a 68.892409,68.892409 0 0 1 34.41601,-7.19531 68.892409,68.892409 0 0 1 6.64649,0.69531 z"
+        id="path1015"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #5b36e9; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 239.3418,560.54492 a 68.892408,68.892408 0 0 1 0.7207,13.87696 68.892408,68.892408 0 0 1 -7.26758,27.17968 l 197.62891,31.71289 a 68.892409,68.892409 0 0 1 -0.72266,-13.8789 68.892409,68.892409 0 0 1 7.26953,-27.17774 z"
+        id="path1674"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #30b873; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 601.13281,377.19922 -91.21875,178.08203 a 68.892408,68.892408 0 0 1 36.99414,18.98242 L 638.125,396.18359 a 68.892409,68.892409 0 0 1 -36.99219,-18.98437 z"
+        id="path1676"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #ebe305; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 476.72266,125.33008 a 68.892408,68.892408 0 0 1 -29.47071,29.33203 l 141.26563,141.81055 a 68.892409,68.892409 0 0 1 29.46875,-29.33204 z"
+        id="path1678"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #f47601; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 347.78711,104.63086 -178.57617,90.49805 a 68.892409,68.892409 0 0 1 18.79297,37.08593 l 178.57421,-90.50195 a 68.892408,68.892408 0 0 1 -18.79101,-37.08203 z"
+        id="path1680"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #57c115; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 446.92578,154.82617 a 68.892408,68.892408 0 0 1 -34.98242,7.48242 68.892408,68.892408 0 0 1 -6.0293,-0.63281 l 15.81836,101.29102 43.16211,6.92578 z m -16,167.02735 37.40039,239.48242 a 68.892409,68.892409 0 0 1 33.91406,-6.94336 68.892409,68.892409 0 0 1 7.20704,0.79101 L 474.08984,328.77734 Z"
+        id="path9758"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <path
+        style="color: #000000; font-style: normal; font-variant: normal; font-weight: normal; font-stretch: normal; font-size: medium; line-height: normal; font-family: sans-serif; font-variant-ligatures: normal; font-variant-position: normal; font-variant-caps: normal; font-variant-numeric: normal; font-variant-alternates: normal; font-feature-settings: normal; text-indent: 0; text-align: start; text-decoration: none; text-decoration-line: none; text-decoration-style: solid; text-decoration-color: #000000; letter-spacing: normal; word-spacing: normal; text-transform: none; writing-mode: lr-tb; direction: ltr; text-orientation: mixed; dominant-baseline: auto; baseline-shift: baseline; text-anchor: start; white-space: normal; shape-padding: 0; clip-rule: nonzero; display: inline; overflow: visible; visibility: visible; opacity: 1; isolation: auto; mix-blend-mode: normal; color-interpolation: sRGB; color-interpolation-filters: linearRGB; solid-color: #000000; solid-opacity: 1; vector-effect: none; fill: #dbb210; fill-opacity: 1; fill-rule: nonzero; stroke: none; stroke-width: 41.5748; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 4; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; color-rendering: auto; image-rendering: auto; shape-rendering: auto; text-rendering: auto; enable-background: accumulate"
+        d="m 188.13086,232.97461 a 68.892408,68.892408 0 0 1 0.75781,14.0957 68.892408,68.892408 0 0 1 -7.16015,26.98242 l 101.36914,16.28125 19.92382,-38.9082 z m 173.73633,27.90039 -19.92578,38.91211 239.51367,38.4668 a 68.892409,68.892409 0 0 1 -0.69531,-13.71875 68.892409,68.892409 0 0 1 7.34961,-27.32422 z"
+        id="path9760"
+        transform="matrix(0.26458333,0,0,0.26458333,-6.6789703,32.495842)"
+      />
+      <circle
+        style="fill: #ffca00; fill-opacity: 0.995968; stroke: none; stroke-width: 0.264583; stroke-opacity: 0.960784"
+        id="path817"
+        cx="106.26596"
+        cy="51.535553"
+        r="16.570711"
+        transform="rotate(3.1178174)"
+      />
+      <circle
+        id="path819"
+        style="fill: #64ff00; fill-opacity: 0.995968; stroke: none; stroke-width: 0.264583; stroke-opacity: 0.960784"
+        cx="171.42836"
+        cy="110.19328"
+        r="16.570711"
+        transform="rotate(3.1178174)"
+      />
+      <circle
+        id="path823"
+        style="fill: #00a3ff; fill-opacity: 0.995968; stroke: none; stroke-width: 0.264583; stroke-opacity: 0.960784"
+        cx="135.76379"
+        cy="190.27704"
+        r="16.570711"
+        transform="rotate(3.1178174)"
+      />
+      <circle
+        style="fill: #9500ff; fill-opacity: 0.995968; stroke: none; stroke-width: 0.264583; stroke-opacity: 0.960784"
+        id="path825"
+        cx="48.559471"
+        cy="181.1138"
+        r="16.570711"
+        transform="rotate(3.1178174)"
+      />
+      <circle
+        id="path827"
+        style="fill: #ff0000; fill-opacity: 0.995968; stroke: none; stroke-width: 0.264583; stroke-opacity: 0.960784"
+        cx="30.328812"
+        cy="95.366837"
+        r="16.570711"
+        transform="rotate(3.1178174)"
+      />
+    </g>
+  </g>
+  <circle
+    style="opacity: 1; fill: none; stroke: #84b5d9; stroke-width: 4.91342; stroke-linejoin: miter; stroke-dasharray: none; stroke-dashoffset: 0; stroke-opacity: 1; paint-order: normal"
+    id="path8"
+    cx="55.926456"
+    cy="56.073448"
+    transform="rotate(-0.07519625)"
+    r="53.543289"
+  />
+  <metadata
+    id="metadata8"
+  >
+    <rdf:RDF>
+      <cc:Work rdf:about="">
+        <dc:title>Fedify</dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+</svg>
diff --git a/schema/netlify.toml b/schema/netlify.toml
new file mode 100644
index 000000000..0f8dbbb25
--- /dev/null
+++ b/schema/netlify.toml
@@ -0,0 +1,17 @@
+# Netlify configuration for hosting the published JSON Schemas at
+# json-schema.fedify.dev.  Point the Netlify site's base directory at this
+# `schema/` folder; everything here is then served as the site root.
+#
+# The header rules below mirror `_headers` (either mechanism is sufficient):
+# schemas are served cross-origin with the JSON Schema media type and a long
+# immutable cache, because every published version file is immutable.
+
+[build]
+  publish = "."
+
+[[headers]]
+  for = "/bench/*"
+  [headers.values]
+    Access-Control-Allow-Origin = "*"
+    Content-Type = "application/schema+json"
+    Cache-Control = "public, max-age=31536000, immutable"

From aa1e5ca4e9d16925f80db246752e5f19809c6d6c Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 02:38:29 +0900
Subject: [PATCH 16/47] Document the fedify bench command

Extend the benchmarking manual with the client side: a getting-started
scenario suite, the actors and signature-standards model, `${{ }}`
templating, open- and closed-loop load with the signing modes, the
output formats and CI usage, the safety gate, and the http/loopback
caveats.  Add the @fedify/cli changelog entry for the new command.

https://github.com/fedify-dev/fedify/issues/783
https://github.com/fedify-dev/fedify/issues/744

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 CHANGES.md                  |  13 +++
 docs/manual/benchmarking.md | 161 +++++++++++++++++++++++++++++++++++-
 schema/README.md            |  12 +--
 3 files changed, 178 insertions(+), 8 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 007b79c67..e3a3ea53f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -277,6 +277,19 @@ To be released.
 [#782]: https://github.com/fedify-dev/fedify/issues/782
 [#787]: https://github.com/fedify-dev/fedify/pull/787
 
+### @fedify/cli
+
+ -  Added the `fedify bench` command for benchmarking Fedify federation
+    workloads.  It acts as a synthetic remote actor that drives
+    ActivityPub-specific load (signed inbox deliveries and WebFinger lookups)
+    against a cooperative `benchmarkMode` target and reports latency,
+    throughput, success rate, and errors, reading server-side metrics from the
+    target's stats endpoint.  Benchmarks are described by a YAML or JSON
+    scenario suite validated against a published JSON Schema, with an `expect`
+    block per scenario that gates a run for CI.  [[#744], [#783]]
+
+[#783]: https://github.com/fedify-dev/fedify/issues/783
+
 ### @fedify/fixture
 
  -  Added `createTestMeterProvider()` and `TestMetricRecorder` helpers for
diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md
index 022ba37a1..91f3cd9e4 100644
--- a/docs/manual/benchmarking.md
+++ b/docs/manual/benchmarking.md
@@ -1,7 +1,8 @@
 ---
 description: >-
-  Fedify can expose cooperative benchmark endpoints for measuring federation
-  workloads without requiring an external metrics backend.
+  Fedify can run as a cooperative benchmark target, and the fedify bench command
+  drives ActivityPub-specific load against it to measure federation workloads
+  without requiring an external metrics backend.
 ---
 
 Benchmarking
@@ -80,6 +81,162 @@ const federation = createFederation<void>({
 ~~~~
 
 
+The `fedify bench` command
+--------------------------
+
+*This command is available since Fedify 2.3.0.*
+
+Once a target runs in benchmark mode, the `fedify bench` command drives
+ActivityPub-specific load against it and reports latency, throughput, success
+rate, and errors.  It acts as a synthetic remote actor: it generates keys,
+serves its own actor and key documents over loopback, and signs every inbox
+delivery with the same `@fedify/fedify` signer a real peer uses, so the measured
+crypto cost is real.
+
+> [!NOTE]
+> This version runs the `inbox` and `webfinger` scenario types.  The scenario
+> format can express the others (`actor`, `object`, `fanout`, `collection`,
+> `failure`, and `mixed`), but they are not executed yet.
+
+### A scenario suite
+
+A benchmark is described by a *suite* file in YAML (JSON works too, since YAML
+is a superset).  The suite declares the `target`, shared `defaults`, the
+`actors` to sign as, and a list of `scenarios`, each with an optional `expect`
+block of pass/fail thresholds:
+
+~~~~ yaml
+# yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v1.json
+version: 1
+target: http://localhost:3000
+defaults:
+  duration: 30s
+  warmup: 5s            # excluded from results; also warms the key cache
+  load:
+    rate: 200/s         # open-loop; or closed-loop with `concurrency: 50`
+actors:
+- count: 3
+  signatureStandards: [draft-cavage-http-signatures-12, ld-signatures]
+scenarios:
+- name: inbox-shared
+  type: inbox
+  recipient: "http://${{ target.host }}/users/alice"
+  inbox: shared
+  activity:
+    type: Create
+    object:
+      type: Note
+      content: { generate: lorem, size: 2KB }
+  expect:
+    successRate: ">= 99%"
+    latency.p95: "< 100ms"
+~~~~
+
+Run it against the target and read the terminal report:
+
+~~~~ sh
+fedify bench scenario.yaml
+~~~~
+
+The `# yaml-language-server:` line gives editors autocomplete and validation
+against the [published schema].
+Override the file's target with `--target`, choose the output with
+`--format`/`--output`, and inspect a run without sending anything with
+`--dry-run`.
+
+[published schema]: https://json-schema.fedify.dev/bench/scenario-v1.json
+
+### Actors
+
+You pick signature *standards*, not key algorithms; the key set is derived,
+because a Fedify actor is inherently multi-key.  An actor uses exactly one HTTP
+request signature scheme, plus any document signature schemes:
+
+| Standard                          | Layer        | Algorithm                  |
+| --------------------------------- | ------------ | -------------------------- |
+| `draft-cavage-http-signatures-12` | HTTP request | RSA                        |
+| `rfc9421`                         | HTTP request | RSA                        |
+| `ld-signatures`                   | document     | RSA (`RsaSignature2017`)   |
+| `fep8b32`                         | document     | Ed25519 (`eddsa-jcs-2022`) |
+
+`draft-cavage-http-signatures-12` and `rfc9421` are mutually exclusive (one HTTP
+scheme per actor).  Several actor groups with different standard sets model a
+heterogeneous fleet, which is what a server actually receives.
+
+### Templating
+
+Values support GitHub-Actions-style `${{ … }}` templating, kept logic-less
+(references and whitelisted helper calls only).  For example
+`${{ target.host }}` expands to the target's host.  Generated payloads use typed
+directives such as `content: { generate: lorem, size: 2KB }` rather than string
+templates.  The tool owns actor URLs and activity ids, so each request gets a
+unique activity id automatically (which Fedify's always-on inbox idempotency
+requires).
+
+### Load generation and signing
+
+Open-loop (`rate`) is the default and the realistic model for incoming
+federation traffic: requests are launched on schedule regardless of when earlier
+responses return, and each request's latency is measured from its scheduled
+time (the coordinated-omission correction), so a stalled target shows up as
+latency instead of being hidden.  Closed-loop (`concurrency`) runs a fixed
+number of virtual users.  Arrival is `constant` (default) or `poisson`, and
+`maxInFlight` caps concurrent in-flight requests.
+
+Signing is kept off the send critical path, set per scenario with `signing`:
+
+ -  `pipeline` (default): background signers keep a bounded buffer filled, and
+    buffer starvation surfaces the client as the bottleneck.
+ -  `jit`: sign in the send path, for a strict signature-time-window target.
+ -  `presign`: pre-sign an estimated open-loop run before the timed window
+    (open-loop only; Poisson arrivals may still sign a few extra during the
+    run).
+
+### Output
+
+Choose the format with `--format text` (default), `json`, or `markdown`;
+`--output` only chooses the destination (a file instead of standard output) and
+does not infer the format, so pass both (for example
+`--format json --output report.json`).  JSON is the canonical machine form: it
+validates against the [report schema] and carries
+its own `$schema`; the text and Markdown renderers derive from the same model,
+keeping client-measured and server-reported numbers distinct.  In GitHub
+Actions, append the Markdown report to the job summary:
+
+~~~~ sh
+fedify bench scenario.yaml --format markdown >> "$GITHUB_STEP_SUMMARY"
+~~~~
+
+An `expect` gate that fails exits the command non-zero, so a suite doubles as a
+CI check.  Keep CI gates on robust signals such as success rate, error counts,
+and gross throughput or latency floors; precise latency-percentile regression
+belongs in a controlled environment, not a shared CI runner.
+
+[report schema]: https://json-schema.fedify.dev/bench/report-v1.json
+
+### Safety
+
+`fedify bench` runs without friction against a loopback or private target, or
+any target that advertises benchmark mode.  A public target that does not
+advertise benchmark mode is refused unless you pass `--allow-unsafe-target`,
+which is mandatory (never prompted) in CI and any non-interactive context.  Use
+`--dry-run` to print the plan without sending anything.
+
+### Local targets over HTTP
+
+An `inbox` recipient given as an `acct:` handle is resolved through WebFinger,
+which goes over HTTPS, so against a plain-HTTP loopback target give the
+`recipient` as the actor's URI (for example
+`http://localhost:3000/users/alice`) instead.  The `webfinger` scenario is
+unaffected: it requests `/.well-known/webfinger` on the target directly, so it
+can benchmark `acct:` lookups over plain HTTP.
+
+Signed scenarios such as `inbox` also require a loopback or private target,
+because the benchmark's synthetic actor server is only reachable on the
+client's loopback; a public target cannot dereference its keys, so use a read
+scenario such as `webfinger` there.
+
+
 Benchmark stats endpoint
 ------------------------
 
diff --git a/schema/README.md b/schema/README.md
index 81e481da9..0ae5a8f86 100644
--- a/schema/README.md
+++ b/schema/README.md
@@ -1,11 +1,11 @@
 <!-- deno-fmt-ignore-file -->
 
-Fedify JSON Schemas
+Fedify JSON schemas
 ===================
 
 This directory holds the published JSON Schemas (draft 2020-12) for Fedify file
-formats.  It is deployed to <https://json-schema.fedify.dev/> by Netlify on every
-push to the *main* branch; the directory layout maps onto the URL, so
+formats.  It is deployed to <https://json-schema.fedify.dev/> by Netlify on
+every push to the *main* branch; the directory layout maps onto the URL, so
 *schema/bench/scenario-v1.json* is served at
 <https://json-schema.fedify.dev/bench/scenario-v1.json>.
 
@@ -16,7 +16,7 @@ Current schemas:
 
 
 Versioning: append-only and immutable
---------------------------------------
+-------------------------------------
 
 A published version file is **never edited**.  Each schema's `$id` equals its
 hosted URL, and external consumers pin that URL, so editing a published file
@@ -27,7 +27,7 @@ available, and review enforces it otherwise.
 
 
 Source of truth and regeneration
----------------------------------
+--------------------------------
 
 The schemas are authored as embedded objects in the CLI so the validator can
 use them without reading files at runtime (which keeps the `deno compile`
@@ -70,7 +70,7 @@ The benchmark schema tests (*packages/cli/src/bench/schema.test.ts*) enforce:
 Hosting
 -------
 
-*_headers* and *netlify.toml* configure Netlify to serve the schemas
+*\_headers* and *netlify.toml* configure Netlify to serve the schemas
 cross-origin (editors and online validators fetch them), with the
 `application/schema+json` media type and a long immutable cache.  Point the
 Netlify site's base directory at this *schema/* folder.

From 490038a12ec661f1d910cd8b33dc4e7ac8b106bc Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 05:30:22 +0900
Subject: [PATCH 17/47] Honor or reject inbox scenario options in fedify bench

Address four behavioral gaps where the bench engine silently accepted
options it did not actually apply:

 -  Reject `runs` greater than 1 during normalization.  Repeated runs are
    not implemented yet, so accepting the field gave a single run while
    implying several.

 -  Fail a scenario that measured zero requests instead of letting every
    `expect` assertion pass vacuously, and reject a `warmup` that is not
    shorter than the `duration` (which would leave no measured window).

 -  Reject inbox `activity` options the runner cannot honor.  The runner
    always delivers a `Create` carrying an embedded `Note`, so a
    non-`Create` activity type, a non-`Note` `object.type`, or
    `embedObject: false` is now refused up front through a new optional
    `validate()` on the runner, called during preflight.  Scalar-or-list
    type fields are checked in full, not just their first element.

 -  Implement multi-recipient delivery in the inbox runner: every
    recipient's inbox is discovered, and deliveries (with the synthetic
    actors that sign them) are rotated across the recipients, modeling a
    server receiving from many peers into many local inboxes.

The scenario format and JSON Schema still express these options; only the
inbox/webfinger runners constrain what they execute in this version.

https://github.com/fedify-dev/fedify/issues/783
https://github.com/fedify-dev/fedify/issues/744

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 docs/manual/benchmarking.md                   |  16 +-
 .../bench/__fixtures__/scenarios/ci-gate.json |   1 -
 packages/cli/src/bench/action.ts              |  10 +-
 packages/cli/src/bench/result/build.test.ts   |  10 ++
 packages/cli/src/bench/result/build.ts        |   6 +-
 .../cli/src/bench/scenario/normalize.test.ts  |  34 ++++
 packages/cli/src/bench/scenario/normalize.ts  |  33 ++--
 .../cli/src/bench/scenarios/inbox.test.ts     | 152 ++++++++++++++++--
 packages/cli/src/bench/scenarios/inbox.ts     |  84 ++++++++--
 packages/cli/src/bench/scenarios/runner.ts    |   6 +
 10 files changed, 307 insertions(+), 45 deletions(-)

diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md
index 91f3cd9e4..0c72ee2d3 100644
--- a/docs/manual/benchmarking.md
+++ b/docs/manual/benchmarking.md
@@ -96,7 +96,16 @@ crypto cost is real.
 > [!NOTE]
 > This version runs the `inbox` and `webfinger` scenario types.  The scenario
 > format can express the others (`actor`, `object`, `fanout`, `collection`,
-> `failure`, and `mixed`), but they are not executed yet.
+> `failure`, and `mixed`), but they are not executed yet.  Within the runnable
+> types, a few options the format accepts are also not implemented yet and are
+> rejected up front with a clear message:
+>
+>  -  `runs` greater than `1` (repeated runs).
+>  -  An `inbox` `activity` that is not a `Create` carrying an embedded `Note`;
+>     that is, a non-`Create` `type`, a non-`Note` `object.type`, or
+>     `embedObject: false`.
+>  -  A `warmup` that is not shorter than the `duration` (which would leave no
+>     measured window).
 
 ### A scenario suite
 
@@ -144,6 +153,11 @@ Override the file's target with `--target`, choose the output with
 `--format`/`--output`, and inspect a run without sending anything with
 `--dry-run`.
 
+An `inbox` scenario's `recipient` may be a single value or a list.  With a
+list, deliveries are rotated across the recipients (and across the synthetic
+`actors` signing them), modeling a server that receives from many peers into
+many local inboxes.
+
 [published schema]: https://json-schema.fedify.dev/bench/scenario-v1.json
 
 ### Actors
diff --git a/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json b/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json
index efc9dfeee..e0e26c4ae 100644
--- a/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json
+++ b/packages/cli/src/bench/__fixtures__/scenarios/ci-gate.json
@@ -3,7 +3,6 @@
   "version": 1,
   "target": "http://localhost:3000",
   "defaults": {
-    "runs": 3,
     "load": { "rate": "200/s" }
   },
   "actors": [
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index 5bef894d4..bd5707277 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -81,11 +81,15 @@ export default async function runBench(
     return void exit(2);
   }
 
-  // Preflight every runner so an unsupported scenario type fails fast, before
-  // any probe or load.
+  // Preflight every runner so an unsupported scenario type or an option the
+  // runner cannot honor fails fast, before any probe or load.
   let runners;
   try {
-    runners = suite.scenarios.map((scenario) => runnerFor(scenario.type));
+    runners = suite.scenarios.map((scenario) => {
+      const runner = runnerFor(scenario.type);
+      runner.validate?.(scenario);
+      return runner;
+    });
   } catch (error) {
     log(error instanceof Error ? error.message : String(error));
     return void exit(2);
diff --git a/packages/cli/src/bench/result/build.test.ts b/packages/cli/src/bench/result/build.test.ts
index 5125d80e0..ec34dd45b 100644
--- a/packages/cli/src/bench/result/build.test.ts
+++ b/packages/cli/src/bench/result/build.test.ts
@@ -52,6 +52,16 @@ test("buildScenarioResult - summarizes load and evaluates expect", () => {
   assert.strictEqual(result.passed, true);
 });
 
+test("buildScenarioResult - a run that measured nothing never passes", () => {
+  // No requests means every `expect` assertion is vacuously satisfied, but the
+  // scenario must still fail rather than report a green gate.
+  const result = buildScenarioResult(resolvedInbox(), {
+    ...measurement(),
+    requests: { total: 0, ok: 0, failed: 0, successRate: 1 },
+  });
+  assert.strictEqual(result.passed, false);
+});
+
 test("buildReport - gate passes only when all scenarios pass", () => {
   const ok = buildScenarioResult(resolvedInbox(), measurement());
   const bad = buildScenarioResult(resolvedInbox(), {
diff --git a/packages/cli/src/bench/result/build.ts b/packages/cli/src/bench/result/build.ts
index 844611202..364e91a3c 100644
--- a/packages/cli/src/bench/result/build.ts
+++ b/packages/cli/src/bench/result/build.ts
@@ -50,6 +50,10 @@ export function buildScenarioResult(
   measurement: ScenarioMeasurement,
 ): ScenarioResult {
   const { results, passed } = evaluateExpect(scenario.expect, measurement);
+  // A scenario that measured no requests must never pass: an empty sample set
+  // makes every `expect` assertion vacuously true (and a missing-metric one
+  // could only fail), so without this guard a run that sent nothing would
+  // report a green gate.
   return {
     name: scenario.name,
     type: scenario.type,
@@ -60,7 +64,7 @@ export function buildScenarioResult(
     server: measurement.server,
     errors: measurement.errors,
     expectations: results,
-    passed,
+    passed: passed && measurement.requests.total > 0,
     ...(measurement.histogram ? { histogram: measurement.histogram } : {}),
   };
 }
diff --git a/packages/cli/src/bench/scenario/normalize.test.ts b/packages/cli/src/bench/scenario/normalize.test.ts
index b05000129..48d9ddf18 100644
--- a/packages/cli/src/bench/scenario/normalize.test.ts
+++ b/packages/cli/src/bench/scenario/normalize.test.ts
@@ -160,3 +160,37 @@ test("normalizeSuite - jit signing allows a time-windowed target", () => {
   assert.strictEqual(s.signing, "jit");
   assert.strictEqual(s.signatureTimeWindow, true);
 });
+
+test("normalizeSuite - rejects warmup not shorter than duration", () => {
+  assert.throws(
+    () =>
+      normalizeSuite(suite({
+        defaults: { duration: "10s", warmup: "10s" },
+      })),
+    (error: unknown) =>
+      error instanceof SuiteNormalizeError && /warmup/.test(error.message),
+  );
+  assert.throws(
+    () =>
+      normalizeSuite(suite({
+        defaults: { duration: "10s", warmup: "30s" },
+      })),
+    SuiteNormalizeError,
+  );
+});
+
+test("normalizeSuite - allows warmup shorter than duration", () => {
+  const s = normalizeSuite(suite({
+    defaults: { duration: "10s", warmup: "9s" },
+  })).scenarios[0];
+  assert.strictEqual(s.durationMs, 10_000);
+  assert.strictEqual(s.warmupMs, 9000);
+});
+
+test("normalizeSuite - rejects multiple runs (runs > 1)", () => {
+  assert.throws(
+    () => normalizeSuite(suite({ defaults: { runs: 3 } })),
+    (error: unknown) =>
+      error instanceof SuiteNormalizeError && /runs/.test(error.message),
+  );
+});
diff --git a/packages/cli/src/bench/scenario/normalize.ts b/packages/cli/src/bench/scenario/normalize.ts
index b94180e68..7d449eeaa 100644
--- a/packages/cli/src/bench/scenario/normalize.ts
+++ b/packages/cli/src/bench/scenario/normalize.ts
@@ -140,21 +140,36 @@ function resolveScenario(scenario: Scenario, suite: Suite): ResolvedScenario {
         "open-loop rate, or signing: pipeline or jit.",
     );
   }
+  const durationMs = resolveDuration(
+    scenario.duration ?? defaults.duration,
+    DEFAULT_DURATION_MS,
+  );
+  const warmupMs = resolveDuration(
+    scenario.warmup ?? defaults.warmup,
+    DEFAULT_WARMUP_MS,
+  );
+  if (warmupMs >= durationMs) {
+    throw new SuiteNormalizeError(
+      `Scenario "${scenario.name}": warmup (${warmupMs}ms) must be shorter ` +
+        `than duration (${durationMs}ms); otherwise no requests are measured.`,
+    );
+  }
+  const runs = scenario.runs ?? defaults.runs ?? DEFAULT_RUNS;
+  if (runs > 1) {
+    throw new SuiteNormalizeError(
+      `Scenario "${scenario.name}": multiple runs (runs > 1) are not yet ` +
+        "implemented in fedify bench; set runs to 1.",
+    );
+  }
   return {
     name: scenario.name,
     type: scenario.type,
     load,
-    durationMs: resolveDuration(
-      scenario.duration ?? defaults.duration,
-      DEFAULT_DURATION_MS,
-    ),
-    warmupMs: resolveDuration(
-      scenario.warmup ?? defaults.warmup,
-      DEFAULT_WARMUP_MS,
-    ),
+    durationMs,
+    warmupMs,
     signing,
     signatureTimeWindow,
-    runs: scenario.runs ?? defaults.runs ?? DEFAULT_RUNS,
+    runs,
     recipients: asList(scenario.recipient),
     inbox: scenario.inbox,
     activity: scenario.activity,
diff --git a/packages/cli/src/bench/scenarios/inbox.test.ts b/packages/cli/src/bench/scenarios/inbox.test.ts
index 4fc874a6f..451802c73 100644
--- a/packages/cli/src/bench/scenarios/inbox.test.ts
+++ b/packages/cli/src/bench/scenarios/inbox.test.ts
@@ -15,18 +15,18 @@ import { spawnSyntheticServer } from "../server/synthetic.ts";
 import { inboxRunner } from "./inbox.ts";
 
 // Stands up a real Fedify federation in benchmark mode that serves WebFinger,
-// the recipient actor, and an inbox that verifies incoming signatures.
-async function spawnBenchmarkTarget() {
+// the recipient actor(s), and an inbox that verifies incoming signatures.
+async function spawnBenchmarkTarget(usernames: string[] = ["alice"]) {
   // No message queue, so incoming activities are processed inline (which also
   // keeps the test process from being held open by a queue worker timer).
   const federation = createFederation<void>({
     kv: new MemoryKvStore(),
     benchmarkMode: true,
   });
-  let keyPairs: CryptoKeyPair[] | undefined;
+  const keyPairsByUser = new Map<string, CryptoKeyPair[]>();
   federation
     .setActorDispatcher("/users/{identifier}", async (ctx, identifier) => {
-      if (identifier !== "alice") return null;
+      if (!usernames.includes(identifier)) return null;
       const pairs = await ctx.getActorKeyPairs(identifier);
       return new Person({
         id: ctx.getActorUri(identifier),
@@ -37,14 +37,20 @@ async function spawnBenchmarkTarget() {
         assertionMethods: pairs.map((p) => p.multikey),
       });
     })
-    .mapHandle((_ctx, username) => (username === "alice" ? "alice" : null))
+    .mapHandle((_ctx, username) =>
+      usernames.includes(username) ? username : null
+    )
     .setKeyPairsDispatcher(async (_ctx, identifier) => {
-      if (identifier !== "alice") return [];
-      keyPairs ??= [
-        await generateCryptoKeyPair("RSASSA-PKCS1-v1_5"),
-        await generateCryptoKeyPair("Ed25519"),
-      ];
-      return keyPairs;
+      if (!usernames.includes(identifier)) return [];
+      let pairs = keyPairsByUser.get(identifier);
+      if (pairs == null) {
+        pairs = [
+          await generateCryptoKeyPair("RSASSA-PKCS1-v1_5"),
+          await generateCryptoKeyPair("Ed25519"),
+        ];
+        keyPairsByUser.set(identifier, pairs);
+      }
+      return pairs;
     });
 
   let received = 0;
@@ -54,17 +60,25 @@ async function spawnBenchmarkTarget() {
       received++;
     });
 
+  // Record every inbox path that was POSTed to, so a test can confirm that
+  // deliveries were spread across multiple recipients' personal inboxes.
+  const inboxHits = new Set<string>();
   const server = serve({
     port: 0,
     hostname: "127.0.0.1",
     silent: true,
-    fetch: (request: Request) =>
-      federation.fetch(request, { contextData: undefined }),
+    fetch: (request: Request) => {
+      if (request.method === "POST") {
+        inboxHits.add(new URL(request.url).pathname);
+      }
+      return federation.fetch(request, { contextData: undefined });
+    },
   });
   await server.ready();
   return {
     url: new URL(server.url!),
     receivedCount: () => received,
+    inboxHits: () => inboxHits,
     close: () => server.close(true),
   };
 }
@@ -127,3 +141,115 @@ test("inboxRunner - signed deliveries verify against a benchmarkMode target", as
     }
   }
 });
+
+test("inboxRunner - rotates deliveries across multiple recipients", async () => {
+  const target = await spawnBenchmarkTarget(["alice", "bob"]);
+  let fleet: Awaited<ReturnType<typeof spawnSyntheticServer>> | undefined;
+  try {
+    fleet = await spawnSyntheticServer(
+      await buildFleet([{
+        count: 2,
+        signatureStandards: ["draft-cavage-http-signatures-12"],
+      }]),
+    );
+    const suite: Suite = {
+      version: 1,
+      target: target.url.href,
+      scenarios: [{
+        name: "inbox-multi",
+        type: "inbox",
+        recipient: [
+          new URL("/users/alice", target.url).href,
+          new URL("/users/bob", target.url).href,
+        ],
+        // Personal inboxes so each recipient's deliveries hit a distinct path.
+        inbox: "personal",
+        load: { concurrency: 2 },
+        duration: "300ms",
+      }],
+    };
+    const scenario = normalizeSuite(suite).scenarios[0];
+    const measurement = await inboxRunner.run({
+      scenario,
+      target: target.url,
+      documentLoader: await getDocumentLoader({ allowPrivateAddress: true }),
+      contextLoader: await getContextLoader({ allowPrivateAddress: true }),
+      allowPrivateAddress: true,
+      fleet,
+    });
+
+    assert.strictEqual(
+      measurement.requests.successRate,
+      1,
+      `expected all deliveries to succeed; errors: ${
+        JSON.stringify(measurement.errors)
+      }`,
+    );
+    // Both recipients' personal inboxes received deliveries.
+    const hits = target.inboxHits();
+    assert.ok(
+      hits.has("/users/alice/inbox"),
+      `expected alice's inbox to be hit; hits: ${JSON.stringify([...hits])}`,
+    );
+    assert.ok(
+      hits.has("/users/bob/inbox"),
+      `expected bob's inbox to be hit; hits: ${JSON.stringify([...hits])}`,
+    );
+  } finally {
+    try {
+      await fleet?.close();
+    } finally {
+      await target.close();
+    }
+  }
+});
+
+test("inboxRunner.validate - rejects activity options it cannot honor", () => {
+  function resolve(activity: Record<string, unknown>) {
+    return normalizeSuite({
+      version: 1,
+      target: "http://localhost:3000",
+      scenarios: [{
+        name: "inbox",
+        type: "inbox",
+        recipient: "http://localhost:3000/users/alice",
+        // deno-lint-ignore no-explicit-any
+        activity: activity as any,
+      }],
+    }).scenarios[0];
+  }
+  assert.throws(
+    () => inboxRunner.validate!(resolve({ type: "Announce" })),
+    /Create activities/,
+  );
+  assert.throws(
+    () =>
+      inboxRunner.validate!(
+        resolve({ type: "Create", embedObject: false }),
+      ),
+    /embedObject/,
+  );
+  assert.throws(
+    () =>
+      inboxRunner.validate!(
+        resolve({ type: "Create", object: { type: "Image" } }),
+      ),
+    /Note objects/,
+  );
+  // A list whose first item is supported but a later one is not is rejected.
+  assert.throws(
+    () => inboxRunner.validate!(resolve({ type: ["Create", "Announce"] })),
+    /Create activities/,
+  );
+  assert.throws(
+    () =>
+      inboxRunner.validate!(
+        resolve({ type: "Create", object: { type: ["Note", "Image"] } }),
+      ),
+    /Note objects/,
+  );
+  // The default Create/Note activity is accepted.
+  assert.doesNotThrow(() =>
+    inboxRunner.validate!(resolve({ type: "Create", object: { type: "Note" } }))
+  );
+});
diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts
index ce34f3221..29abcc865 100644
--- a/packages/cli/src/bench/scenarios/inbox.ts
+++ b/packages/cli/src/bench/scenarios/inbox.ts
@@ -15,6 +15,7 @@ import { runLoad } from "../load/generator.ts";
 import { aggregateSamples } from "../metrics/aggregate.ts";
 import { fetchServerMetrics } from "../metrics/stats-client.ts";
 import { asList } from "../scenario/coerce.ts";
+import type { ResolvedScenario } from "../scenario/normalize.ts";
 import type { ActivitySpec } from "../scenario/types.ts";
 import type { SyntheticActor } from "../server/synthetic.ts";
 import { createActivityIdMinter } from "../signing/activity-id.ts";
@@ -34,8 +35,18 @@ import {
   sendRequest,
 } from "./runner.ts";
 
+/** One discovered delivery target: an inbox and the actor it belongs to. */
+interface InboxTarget {
+  readonly inbox: URL;
+  readonly actorUri: URL;
+}
+
 /** The `inbox` scenario runner. */
 export const inboxRunner: ScenarioRunner = {
+  validate(scenario: ResolvedScenario): void {
+    validateActivity(scenario);
+  },
+
   async run(context: RunContext) {
     const { scenario, fleet } = context;
     if (fleet == null || fleet.actors.length < 1) {
@@ -46,29 +57,40 @@ export const inboxRunner: ScenarioRunner = {
     if (scenario.recipients.length < 1) {
       throw new Error("The inbox scenario requires a recipient.");
     }
+    validateActivity(scenario);
     const fetchImpl = context.fetch ?? fetch;
-    const discovered = await discoverInbox(scenario.recipients[0], {
-      documentLoader: context.documentLoader,
-      contextLoader: context.contextLoader,
-      allowPrivateAddress: context.allowPrivateAddress,
-    });
-    const inbox = selectInbox(discovered, scenario.inbox);
+    // Discover every recipient's inbox the way a real peer would, then rotate
+    // across them so multi-recipient suites spread load over each inbox.
+    const targets: InboxTarget[] = [];
+    for (const recipient of scenario.recipients) {
+      const discovered = await discoverInbox(recipient, {
+        documentLoader: context.documentLoader,
+        contextLoader: context.contextLoader,
+        allowPrivateAddress: context.allowPrivateAddress,
+      });
+      targets.push({
+        inbox: selectInbox(discovered, scenario.inbox),
+        actorUri: discovered.actorUri,
+      });
+    }
 
     const actors = fleet.actors;
     const minter = createActivityIdMinter(fleet.url);
-    let actorIndex = 0;
+    let index = 0;
     const factory = () => {
-      const actor = actors[actorIndex++ % actors.length];
+      const i = index++;
+      const actor = actors[i % actors.length];
+      const target = targets[i % targets.length];
       const activity = buildActivity(
         scenario.activity,
         actor,
         minter.next(),
         fleet.url,
-        discovered.actorUri,
+        target.actorUri,
       );
       return signInboxDelivery({
         actor,
-        inbox,
+        inbox: target.inbox,
         activity,
         contextLoader: context.contextLoader,
       });
@@ -106,6 +128,40 @@ export const inboxRunner: ScenarioRunner = {
   },
 };
 
+/**
+ * Rejects the activity options the inbox runner cannot yet honor: it always
+ * delivers a `Create` carrying an embedded `Note`, so a different activity or
+ * object type, or `embedObject: false`, is refused with a clear message.
+ */
+function validateActivity(scenario: ResolvedScenario): void {
+  const spec = scenario.activity;
+  if (spec == null) return;
+  // `type` and `object.type` are scalar-or-list, so check every supplied value:
+  // a list such as `[Create, Announce]` is just as unsupported as `Announce`.
+  const badType = asList(spec.type).find((type) => type !== "Create");
+  if (badType != null) {
+    throw new Error(
+      `Scenario "${scenario.name}": the inbox runner currently supports only ` +
+        `Create activities; got ${JSON.stringify(badType)}.`,
+    );
+  }
+  if (spec.embedObject === false) {
+    throw new Error(
+      `Scenario "${scenario.name}": the inbox runner always embeds the ` +
+        "activity's object; embedObject: false is not yet supported.",
+    );
+  }
+  const badObjectType = asList(spec.object?.type).find((type) =>
+    type !== "Note"
+  );
+  if (badObjectType != null) {
+    throw new Error(
+      `Scenario "${scenario.name}": the inbox runner currently supports only ` +
+        `Note objects; got ${JSON.stringify(badObjectType)}.`,
+    );
+  }
+}
+
 function buildActivity(
   spec: ActivitySpec | undefined,
   actor: SyntheticActor,
@@ -113,13 +169,7 @@ function buildActivity(
   base: URL,
   recipient: URL,
 ): Activity {
-  const type = asList(spec?.type)[0] ?? "Create";
-  if (type !== "Create") {
-    throw new Error(
-      `The inbox runner currently supports only Create activities; got ` +
-        `${JSON.stringify(type)}.`,
-    );
-  }
+  // `validateActivity` has already rejected anything but a Create/Note here.
   const note = new Note({
     id: new URL(`/objects/${crypto.randomUUID()}`, base),
     attribution: actor.id,
diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts
index 8f1a59cfc..37389dc12 100644
--- a/packages/cli/src/bench/scenarios/runner.ts
+++ b/packages/cli/src/bench/scenarios/runner.ts
@@ -34,6 +34,12 @@ export interface RunContext {
 /** A runner for one scenario type. */
 export interface ScenarioRunner {
   run(context: RunContext): Promise<ScenarioMeasurement>;
+  /**
+   * Optionally rejects a resolved scenario the runner cannot honor, before any
+   * probe or load.  Called during preflight; throwing here surfaces as a
+   * configuration error (exit 2) with the thrown message.
+   */
+  validate?(scenario: ResolvedScenario): void;
 }
 
 /** Performs one HTTP send and classifies the result as a send outcome. */

From 0e32942fbba77dc71140c4def48aa36671a93860 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 10:37:11 +0900
Subject: [PATCH 18/47] Validate expect assertions before sending bench load

A malformed `expect` assertion was only parsed while evaluating
results, which happens after the entire benchmark load has been sent.
Worse, the run loop has no catch around result building, so the
resulting AssertionParseError escaped uncaught and crashed the command
instead of failing as a configuration error.

Add validateExpectBlock(), which parses every assertion in a scenario's
`expect` block, and run it in the preflight step (alongside runner
validation) before any probe or load.  A typo in a CI gate now exits 2
without sending traffic, with a message naming the offending metric.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/action.test.ts         | 33 +++++++++++++++++
 packages/cli/src/bench/action.ts              |  7 ++--
 .../src/bench/result/expect/evaluate.test.ts  | 35 ++++++++++++++++++-
 .../cli/src/bench/result/expect/evaluate.ts   | 32 ++++++++++++++++-
 4 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts
index ea622ccd3..bb13e2d3a 100644
--- a/packages/cli/src/bench/action.test.ts
+++ b/packages/cli/src/bench/action.test.ts
@@ -223,6 +223,39 @@ scenarios:
   assert.match(message, /loopback or private/);
 });
 
+test("runBench - malformed expect assertion exits 2 before any load", async () => {
+  // The expect typo must be caught in preflight, so the run exits 2 (a config
+  // error) without ever probing the target or sending load.
+  const file = await writeSuite(`version: 1
+target: http://localhost:3000
+scenarios:
+  - name: wf
+    type: webfinger
+    recipient: "acct:alice@x"
+    expect:
+      successRate: "totally not valid"
+`);
+  let code = -1;
+  let message = "";
+  let fetched = false;
+  await runBench(command({ scenario: file }), {
+    exit: (c) => {
+      code = c;
+    },
+    writeOutput: () => Promise.resolve(),
+    log: (m) => {
+      message = m;
+    },
+    fetch: () => {
+      fetched = true;
+      return Promise.reject(new Error("no request should be sent"));
+    },
+  });
+  assert.strictEqual(code, 2);
+  assert.match(message, /expect|assertion/i);
+  assert.strictEqual(fetched, false);
+});
+
 test("runBench - invalid suite exits 2", async () => {
   const file = await writeSuite(`target: http://localhost:3000
 scenarios:
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index bd5707277..92cb87f58 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -11,6 +11,7 @@ import {
 } from "./result/build.ts";
 import { probeBenchmarkMode } from "./discovery/probe.ts";
 import { renderReport, type ReportFormat } from "./render/index.ts";
+import { validateExpectBlock } from "./result/expect/evaluate.ts";
 import { loadSuiteFile, renderSuiteTemplates } from "./scenario/load.ts";
 import {
   normalizeSuite,
@@ -81,13 +82,15 @@ export default async function runBench(
     return void exit(2);
   }
 
-  // Preflight every runner so an unsupported scenario type or an option the
-  // runner cannot honor fails fast, before any probe or load.
+  // Preflight every runner so an unsupported scenario type, an option the
+  // runner cannot honor, or a malformed `expect` assertion fails fast, before
+  // any probe or load.
   let runners;
   try {
     runners = suite.scenarios.map((scenario) => {
       const runner = runnerFor(scenario.type);
       runner.validate?.(scenario);
+      validateExpectBlock(scenario.expect);
       return runner;
     });
   } catch (error) {
diff --git a/packages/cli/src/bench/result/expect/evaluate.test.ts b/packages/cli/src/bench/result/expect/evaluate.test.ts
index 0947bed53..cdbfc398b 100644
--- a/packages/cli/src/bench/result/expect/evaluate.test.ts
+++ b/packages/cli/src/bench/result/expect/evaluate.test.ts
@@ -1,6 +1,11 @@
 import assert from "node:assert/strict";
 import test from "node:test";
-import { evaluateExpect, type MetricView } from "./evaluate.ts";
+import { AssertionParseError } from "./assert.ts";
+import {
+  evaluateExpect,
+  type MetricView,
+  validateExpectBlock,
+} from "./evaluate.ts";
 
 function metrics(overrides: Partial<MetricView> = {}): MetricView {
   return {
@@ -119,3 +124,31 @@ test("evaluateExpect - incompatible assertion unit fails", () => {
   );
   assert.strictEqual(results[0].pass, false);
 });
+
+test("validateExpectBlock - accepts a well-formed block", () => {
+  assert.doesNotThrow(() =>
+    validateExpectBlock({
+      successRate: ">= 99%",
+      "latency.p95": "< 100ms",
+      "errors.5xx": "== 0",
+      "queueDrain.p95": { assert: "< 2s", severity: "warn" },
+    })
+  );
+});
+
+test("validateExpectBlock - throws on a malformed assertion", () => {
+  assert.throws(
+    () => validateExpectBlock({ successRate: "totally not valid" }),
+    (error: unknown) =>
+      error instanceof AssertionParseError &&
+      /successRate/.test(error.message),
+  );
+});
+
+test("validateExpectBlock - throws on an entry without an assertion", () => {
+  assert.throws(
+    // deno-lint-ignore no-explicit-any
+    () => validateExpectBlock({ successRate: { severity: "warn" } as any }),
+    AssertionParseError,
+  );
+});
diff --git a/packages/cli/src/bench/result/expect/evaluate.ts b/packages/cli/src/bench/result/expect/evaluate.ts
index 498828b0c..f4bf752be 100644
--- a/packages/cli/src/bench/result/expect/evaluate.ts
+++ b/packages/cli/src/bench/result/expect/evaluate.ts
@@ -16,9 +16,39 @@ import type {
   PartialLatencyMs,
   ScenarioResult,
 } from "../model.ts";
-import { compare, parseAssertion } from "./assert.ts";
+import { AssertionParseError, compare, parseAssertion } from "./assert.ts";
 import { type MetricUnit, metricUnit } from "./metrics.ts";
 
+/**
+ * Parses every assertion in an `expect` block, throwing on the first malformed
+ * one.  Run during preflight so that a typo in a CI gate is reported as a
+ * configuration error before any load is sent, instead of crashing the run with
+ * an uncaught {@link AssertionParseError} after the traffic has already gone out.
+ * @param expect The scenario's `expect` block.
+ * @throws {AssertionParseError} If an entry has no assertion string or its
+ *         assertion cannot be parsed.
+ */
+export function validateExpectBlock(expect: ExpectBlock): void {
+  for (const [metric, value] of Object.entries(expect)) {
+    const assertion = typeof value === "string" ? value : value.assert;
+    if (typeof assertion !== "string") {
+      throw new AssertionParseError(
+        `The \`expect\` entry for "${metric}" has no assertion string.`,
+      );
+    }
+    try {
+      parseAssertion(assertion);
+    } catch (error) {
+      if (!(error instanceof AssertionParseError)) throw error;
+      throw new AssertionParseError(
+        `Invalid \`expect\` assertion for "${metric}": ${
+          JSON.stringify(assertion)
+        }.`,
+      );
+    }
+  }
+}
+
 /** The subset of a scenario result that `expect` metrics are looked up from. */
 export type MetricView = Pick<
   ScenarioResult,

From 96ddf54e3d06a4eefc26e75100eb4949d9a89084 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 11:04:35 +0900
Subject: [PATCH 19/47] Scope bench server metrics to the measured window

The cooperative `stats` endpoint is cumulative and has no reset, but the
inbox and webfinger runners read it once at the end, so the reported
server numbers (and any signatureVerification.* expectations) folded in
warm-up traffic and every earlier scenario in the suite.  Client samples
were already windowed; the server side was not, so the two disagreed.

Take a server snapshot at the measured-window boundary and diff it
against the end snapshot:

 -  stats-client.ts gains a raw `ServerSnapshot` (signature histogram and
    queue-depth gauge), `parseServerSnapshot`, `diffSnapshots` (subtracts
    bucket counts; the gauge is not cumulative, so the end value is kept),
    and `snapshotToMetrics`.  `fetchServerSnapshot` returns `null` only on
    transport or parse failure; an available-but-empty snapshot is
    non-null, so an unavailable baseline is never mistaken for an empty
    one.  Histogram subtraction requires identical bucket boundaries, and
    refuses (yields no signature metric) otherwise.

 -  runner.ts gains `withMeasuredWindowStart`, which gates every measured
    send on a one-shot boundary callback so the baseline is captured
    before any measured request reaches the target.

 -  The inbox and webfinger runners snapshot the baseline at the boundary
    and report server metrics only when both ends of the window were
    captured, instead of falling back to the cumulative snapshot.

A few warm-up requests still in flight at the boundary may be attributed
to the window; a hard drain would distort the coordinated-omission client
latency, so that bounded residue is accepted and documented.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 docs/manual/benchmarking.md                   |  10 +-
 .../src/bench/metrics/stats-client.test.ts    | 132 +++++++++++-
 .../cli/src/bench/metrics/stats-client.ts     | 188 ++++++++++++++----
 .../cli/src/bench/scenarios/inbox.test.ts     |  50 +++++
 packages/cli/src/bench/scenarios/inbox.ts     |  28 ++-
 .../cli/src/bench/scenarios/runner.test.ts    |  57 ++++++
 packages/cli/src/bench/scenarios/runner.ts    |  33 ++-
 packages/cli/src/bench/scenarios/webfinger.ts |  32 ++-
 8 files changed, 482 insertions(+), 48 deletions(-)
 create mode 100644 packages/cli/src/bench/scenarios/runner.test.ts

diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md
index 0c72ee2d3..07e5c10ae 100644
--- a/docs/manual/benchmarking.md
+++ b/docs/manual/benchmarking.md
@@ -214,8 +214,14 @@ does not infer the format, so pass both (for example
 `--format json --output report.json`).  JSON is the canonical machine form: it
 validates against the [report schema] and carries
 its own `$schema`; the text and Markdown renderers derive from the same model,
-keeping client-measured and server-reported numbers distinct.  In GitHub
-Actions, append the Markdown report to the job summary:
+keeping client-measured and server-reported numbers distinct.  Both sides are
+scoped to a measured window: client latency excludes warm-up samples, and the
+server-reported numbers are the difference between a `stats` snapshot taken when
+the measured window opens and one taken when it closes, so they exclude every
+earlier scenario in the suite and the scenario's own warm-up traffic (apart from
+warm-up requests still in flight at the boundary, a residue no larger than the
+number of requests in flight at that moment).  In GitHub Actions, append the
+Markdown report to the job summary:
 
 ~~~~ sh
 fedify bench scenario.yaml --format markdown >> "$GITHUB_STEP_SUMMARY"
diff --git a/packages/cli/src/bench/metrics/stats-client.test.ts b/packages/cli/src/bench/metrics/stats-client.test.ts
index ca185ea17..4ca68ca02 100644
--- a/packages/cli/src/bench/metrics/stats-client.test.ts
+++ b/packages/cli/src/bench/metrics/stats-client.test.ts
@@ -1,6 +1,14 @@
 import assert from "node:assert/strict";
 import test from "node:test";
-import { fetchServerMetrics, parseServerMetrics } from "./stats-client.ts";
+import {
+  diffSnapshots,
+  fetchServerMetrics,
+  fetchServerSnapshot,
+  parseServerMetrics,
+  parseServerSnapshot,
+  type ServerSnapshot,
+  snapshotToMetrics,
+} from "./stats-client.ts";
 
 function snapshot() {
   return {
@@ -126,3 +134,125 @@ test("fetchServerMetrics - null on a failed request", async () => {
   );
   assert.strictEqual(metrics, null);
 });
+
+test("parseServerSnapshot - extracts raw histogram and queue depth", () => {
+  const snap = parseServerSnapshot(snapshot());
+  assert.deepEqual(snap?.signature?.boundaries, [5, 10, 25, 50, 100]);
+  assert.deepEqual(snap?.signature?.counts, [10, 20, 30, 20, 15, 5]);
+  assert.strictEqual(snap?.queueDepthMax, 7);
+});
+
+test("parseServerSnapshot - empty (non-null) when no relevant instruments", () => {
+  // A parseable-but-empty snapshot yields an empty snapshot, not null, so a
+  // successful baseline fetch is distinguishable from an unavailable one.
+  assert.deepEqual(
+    parseServerSnapshot({ version: 1, source: "server", scopeMetrics: [] }),
+    { signature: null, queueDepthMax: null },
+  );
+});
+
+test("diffSnapshots - subtracts the baseline bucket counts", () => {
+  const baseline: ServerSnapshot = {
+    signature: { boundaries: [5, 10, 25], counts: [4, 6, 10, 0] },
+    queueDepthMax: 2,
+  };
+  const end: ServerSnapshot = {
+    signature: { boundaries: [5, 10, 25], counts: [10, 16, 30, 4] },
+    queueDepthMax: 9,
+  };
+  const diff = diffSnapshots(baseline, end);
+  assert.deepEqual(diff?.signature?.counts, [6, 10, 20, 4]);
+  // The queue depth is a gauge, so the end value is kept (not subtracted).
+  assert.strictEqual(diff?.queueDepthMax, 9);
+});
+
+test("diffSnapshots - an empty baseline keeps the full end histogram", () => {
+  // Nothing was recorded before the window opened, so the whole end histogram
+  // belongs to the window.
+  const baseline: ServerSnapshot = { signature: null, queueDepthMax: null };
+  const end: ServerSnapshot = {
+    signature: { boundaries: [5], counts: [3, 1] },
+    queueDepthMax: 4,
+  };
+  const diff = diffSnapshots(baseline, end);
+  assert.deepEqual(diff.signature?.counts, [3, 1]);
+  assert.strictEqual(diff.queueDepthMax, 4);
+});
+
+test("diffSnapshots - incompatible bucketing drops the signature histogram", () => {
+  // Same length but different boundary values is not comparable; refuse to
+  // subtract rather than misattribute counts.
+  const baseline: ServerSnapshot = {
+    signature: { boundaries: [5, 10, 20], counts: [1, 1, 1, 1] },
+    queueDepthMax: null,
+  };
+  const end: ServerSnapshot = {
+    signature: { boundaries: [5, 10, 25], counts: [2, 2, 2, 2] },
+    queueDepthMax: null,
+  };
+  assert.strictEqual(diffSnapshots(baseline, end).signature, null);
+});
+
+test("diffSnapshots - mismatched bucket lengths drop the signature histogram", () => {
+  const baseline: ServerSnapshot = {
+    signature: { boundaries: [5, 10], counts: [1, 1, 1] },
+    queueDepthMax: null,
+  };
+  const end: ServerSnapshot = {
+    signature: { boundaries: [5, 10, 25], counts: [2, 2, 2, 2] },
+    queueDepthMax: null,
+  };
+  assert.strictEqual(diffSnapshots(baseline, end).signature, null);
+});
+
+test("diffSnapshots + snapshotToMetrics - percentiles reflect only the window", () => {
+  // The window's requests landed entirely in the fastest bucket, even though
+  // the cumulative end snapshot is dominated by slow earlier requests.
+  const baseline: ServerSnapshot = {
+    signature: {
+      boundaries: [5, 10, 25, 50, 100],
+      counts: [0, 0, 0, 0, 0, 100],
+    },
+    queueDepthMax: null,
+  };
+  const end: ServerSnapshot = {
+    signature: {
+      boundaries: [5, 10, 25, 50, 100],
+      counts: [50, 0, 0, 0, 0, 100],
+    },
+    queueDepthMax: null,
+  };
+  const metrics = snapshotToMetrics(diffSnapshots(baseline, end));
+  assert.strictEqual(metrics?.signatureVerificationMs?.overall.p50, 5);
+  assert.strictEqual(metrics?.signatureVerificationMs?.overall.p95, 5);
+});
+
+test("snapshotToMetrics - omits a signature histogram with no measurements", () => {
+  const empty: ServerSnapshot = {
+    signature: { boundaries: [5, 10], counts: [0, 0, 0] },
+    queueDepthMax: null,
+  };
+  assert.strictEqual(snapshotToMetrics(empty), null);
+});
+
+test("fetchServerSnapshot - null on a failed request, empty on success", async () => {
+  // A failed fetch is unavailable (null); a successful but empty snapshot is a
+  // real, diffable baseline (non-null), so the two are not conflated.
+  const unavailable = await fetchServerSnapshot(
+    new URL("http://localhost:3000"),
+    () => Promise.resolve(new Response("nope", { status: 503 })),
+  );
+  assert.strictEqual(unavailable, null);
+
+  const empty = await fetchServerSnapshot(
+    new URL("http://localhost:3000"),
+    () =>
+      Promise.resolve(
+        new Response(
+          JSON.stringify({ version: 1, source: "server", scopeMetrics: [] }),
+          { headers: { "content-type": "application/json" } },
+        ),
+      ),
+  );
+  assert.deepEqual(empty, { signature: null, queueDepthMax: null });
+});
diff --git a/packages/cli/src/bench/metrics/stats-client.ts b/packages/cli/src/bench/metrics/stats-client.ts
index 42e46b745..101039a5a 100644
--- a/packages/cli/src/bench/metrics/stats-client.ts
+++ b/packages/cli/src/bench/metrics/stats-client.ts
@@ -5,6 +5,12 @@
  * (see *@fedify/fedify*'s benchmark module).  This module projects the relevant
  * instruments — signature verification latency and queue depth — into the
  * report's `server` section, marked distinct from client-measured numbers.
+ *
+ * The server reader is cumulative and has no reset, so a single snapshot covers
+ * the target's whole lifetime.  To scope server numbers to one scenario's
+ * measured window, callers take a {@link ServerSnapshot} baseline at the window
+ * start and another at the end, {@link diffSnapshots} the two, and project the
+ * difference with {@link snapshotToMetrics}.
  * @since 2.3.0
  * @module
  */
@@ -36,70 +42,154 @@ interface Snapshot {
   >;
 }
 
+/** An explicit-bucket histogram: bucket upper boundaries and their counts. */
+export interface ServerHistogram {
+  readonly boundaries: number[];
+  readonly counts: number[];
+}
+
+/**
+ * The relevant instruments extracted from a `stats` snapshot, kept in raw
+ * (un-projected) form so that two snapshots can be diffed.
+ */
+export interface ServerSnapshot {
+  /** The signature-verification latency histogram, or `null` if absent. */
+  readonly signature: ServerHistogram | null;
+  /** The maximum observed queue depth, or `null` if absent. */
+  readonly queueDepthMax: number | null;
+}
+
 /**
- * Parses a `stats` snapshot into the report's server metrics, or `null` when
- * the snapshot carries no relevant instruments.
+ * Parses a `stats` snapshot into raw server instruments.  A successful parse
+ * always yields a snapshot, even when it carries no relevant instruments (both
+ * fields `null`); `null` is reserved for an unparseable snapshot, so callers can
+ * tell "available but empty" apart from "unavailable".
  * @param snapshot The parsed `stats` JSON.
- * @returns The server metrics, or `null`.
+ * @returns The raw server snapshot, or `null` if it could not be parsed.
  */
-export function parseServerMetrics(snapshot: unknown): ServerMetrics | null {
+export function parseServerSnapshot(snapshot: unknown): ServerSnapshot | null {
   try {
     const metrics = flattenMetrics(snapshot as Snapshot);
-    const result: {
-      signatureVerificationMs?: { overall: PartialLatencyMs };
-      queue?: { depthMax?: number };
-    } = {};
 
-    const signature = metrics.find((m) =>
+    const sig = metrics.find((m) =>
       m.dataPointType === "histogram" &&
       (m.name ?? "").includes("signature.verification")
     );
-    const merged = signature == null
-      ? null
-      : mergeHistogram(signature.dataPoints);
-    if (merged != null) {
-      result.signatureVerificationMs = {
-        overall: {
-          p50: histogramPercentile(merged, 50),
-          p95: histogramPercentile(merged, 95),
-          p99: histogramPercentile(merged, 99),
-        },
-      };
-    }
+    const signature = sig == null ? null : mergeHistogram(sig.dataPoints);
 
+    let queueDepthMax: number | null = null;
     const depth = metrics.find((m) => m.name === "fedify.queue.depth");
     if (depth != null && Array.isArray(depth.dataPoints)) {
-      const values = depth.dataPoints
-        .map((p) => p.value)
-        .filter((v): v is number => typeof v === "number");
-      if (values.length > 0) result.queue = { depthMax: Math.max(...values) };
+      const values = depth.dataPoints.map((p) => p.value).filter(
+        isFiniteNumber,
+      );
+      if (values.length > 0) queueDepthMax = Math.max(...values);
     }
 
-    return Object.keys(result).length > 0 ? result : null;
+    return { signature, queueDepthMax };
   } catch {
     return null;
   }
 }
 
 /**
- * Fetches and parses the target's server metrics.
+ * Subtracts a baseline snapshot from an end snapshot, yielding the instruments
+ * accumulated between the two (the measured window).  Signature histogram
+ * counts are diffed bucket by bucket; the queue depth is a gauge, not a
+ * cumulative count, so the end value is kept as-is.  Callers that cannot obtain
+ * both snapshots should not call this (and should report no server metrics)
+ * rather than passing a stand-in, since a missing baseline cannot be diffed.
+ * @param baseline The snapshot taken at the measured-window start.
+ * @param end The snapshot taken at the measured-window end.
+ * @returns The windowed snapshot.
+ */
+export function diffSnapshots(
+  baseline: ServerSnapshot,
+  end: ServerSnapshot,
+): ServerSnapshot {
+  return {
+    signature: diffHistogram(baseline.signature, end.signature),
+    queueDepthMax: end.queueDepthMax,
+  };
+}
+
+/**
+ * Projects a raw server snapshot into the report's server metrics, or `null`
+ * when it carries no usable measurement.
+ * @param snapshot The raw (optionally diffed) server snapshot.
+ * @returns The projected server metrics, or `null`.
+ */
+export function snapshotToMetrics(
+  snapshot: ServerSnapshot | null,
+): ServerMetrics | null {
+  if (snapshot == null) return null;
+  const result: {
+    signatureVerificationMs?: { overall: PartialLatencyMs };
+    queue?: { depthMax?: number };
+  } = {};
+
+  if (snapshot.signature != null) {
+    const total = snapshot.signature.counts.reduce((sum, n) => sum + n, 0);
+    if (total > 0) {
+      result.signatureVerificationMs = {
+        overall: {
+          p50: histogramPercentile(snapshot.signature, 50),
+          p95: histogramPercentile(snapshot.signature, 95),
+          p99: histogramPercentile(snapshot.signature, 99),
+        },
+      };
+    }
+  }
+  if (snapshot.queueDepthMax != null) {
+    result.queue = { depthMax: snapshot.queueDepthMax };
+  }
+
+  return Object.keys(result).length > 0 ? result : null;
+}
+
+/**
+ * Parses a `stats` snapshot directly into the report's server metrics, or
+ * `null` when no relevant instruments are present.  Equivalent to
+ * `snapshotToMetrics(parseServerSnapshot(snapshot))`.
+ * @param snapshot The parsed `stats` JSON.
+ * @returns The server metrics, or `null`.
+ */
+export function parseServerMetrics(snapshot: unknown): ServerMetrics | null {
+  return snapshotToMetrics(parseServerSnapshot(snapshot));
+}
+
+/**
+ * Fetches and parses the target's raw server snapshot.
  * @param target The target base URL.
  * @param fetchImpl The fetch implementation (overridable for tests).
- * @returns The server metrics, or `null` if unavailable.
+ * @returns The raw server snapshot, or `null` if unavailable.
  */
-export async function fetchServerMetrics(
+export async function fetchServerSnapshot(
   target: URL,
   fetchImpl: typeof fetch = fetch,
-): Promise<ServerMetrics | null> {
+): Promise<ServerSnapshot | null> {
   try {
     const response = await fetchImpl(new URL(STATS_PATH, target));
     if (!response.ok) return null;
-    return parseServerMetrics(await response.json());
+    return parseServerSnapshot(await response.json());
   } catch {
     return null;
   }
 }
 
+/**
+ * Fetches and projects the target's server metrics from a single snapshot.
+ * @param target The target base URL.
+ * @param fetchImpl The fetch implementation (overridable for tests).
+ * @returns The server metrics, or `null` if unavailable.
+ */
+export async function fetchServerMetrics(
+  target: URL,
+  fetchImpl: typeof fetch = fetch,
+): Promise<ServerMetrics | null> {
+  return snapshotToMetrics(await fetchServerSnapshot(target, fetchImpl));
+}
+
 function isFiniteNumber(value: unknown): value is number {
   return typeof value === "number" && Number.isFinite(value);
 }
@@ -113,14 +203,9 @@ function flattenMetrics(snapshot: Snapshot): SnapshotMetric[] {
   );
 }
 
-interface Histogram {
-  readonly boundaries: number[];
-  readonly counts: number[];
-}
-
 function mergeHistogram(
   dataPoints: SnapshotMetric["dataPoints"],
-): Histogram | null {
+): ServerHistogram | null {
   if (!Array.isArray(dataPoints)) return null;
   let boundaries: number[] | null = null;
   let counts: number[] | null = null;
@@ -141,7 +226,34 @@ function mergeHistogram(
   return boundaries != null && counts != null ? { boundaries, counts } : null;
 }
 
-function histogramPercentile(histogram: Histogram, p: number): number {
+function diffHistogram(
+  baseline: ServerHistogram | null,
+  end: ServerHistogram | null,
+): ServerHistogram | null {
+  if (end == null) return null;
+  // A null baseline means nothing was recorded before the window opened, so the
+  // whole end histogram belongs to the window.
+  if (baseline == null) return end;
+  // Two cumulative snapshots of the same instrument share fixed bucket
+  // boundaries; if they somehow disagree, the buckets are not comparable, so
+  // refuse to subtract rather than misattribute counts.
+  if (!histogramsCompatible(baseline, end)) return null;
+  const counts = end.counts.map((count, i) =>
+    Math.max(0, count - baseline.counts[i])
+  );
+  return { boundaries: end.boundaries, counts };
+}
+
+function histogramsCompatible(
+  a: ServerHistogram,
+  b: ServerHistogram,
+): boolean {
+  return a.boundaries.length === b.boundaries.length &&
+    a.counts.length === b.counts.length &&
+    a.boundaries.every((boundary, i) => boundary === b.boundaries[i]);
+}
+
+function histogramPercentile(histogram: ServerHistogram, p: number): number {
   const { boundaries, counts } = histogram;
   const total = counts.reduce((sum, n) => sum + n, 0);
   if (total === 0) return 0;
diff --git a/packages/cli/src/bench/scenarios/inbox.test.ts b/packages/cli/src/bench/scenarios/inbox.test.ts
index 451802c73..100107886 100644
--- a/packages/cli/src/bench/scenarios/inbox.test.ts
+++ b/packages/cli/src/bench/scenarios/inbox.test.ts
@@ -142,6 +142,56 @@ test("inboxRunner - signed deliveries verify against a benchmarkMode target", as
   }
 });
 
+test("inboxRunner - reports server metrics scoped past the warm-up", async () => {
+  const target = await spawnBenchmarkTarget();
+  let fleet: Awaited<ReturnType<typeof spawnSyntheticServer>> | undefined;
+  try {
+    fleet = await spawnSyntheticServer(
+      await buildFleet([{
+        count: 1,
+        signatureStandards: ["draft-cavage-http-signatures-12"],
+      }]),
+    );
+    const suite: Suite = {
+      version: 1,
+      target: target.url.href,
+      scenarios: [{
+        name: "inbox-warmup",
+        type: "inbox",
+        recipient: new URL("/users/alice", target.url).href,
+        inbox: "shared",
+        load: { concurrency: 2 },
+        // A non-zero warm-up exercises the measured-window baseline snapshot.
+        warmup: "120ms",
+        duration: "400ms",
+      }],
+    };
+    const scenario = normalizeSuite(suite).scenarios[0];
+    const measurement = await inboxRunner.run({
+      scenario,
+      target: target.url,
+      documentLoader: await getDocumentLoader({ allowPrivateAddress: true }),
+      contextLoader: await getContextLoader({ allowPrivateAddress: true }),
+      allowPrivateAddress: true,
+      fleet,
+    });
+
+    assert.strictEqual(measurement.requests.successRate, 1);
+    // The measured window verified signatures, so server metrics survive the
+    // baseline diff rather than being cancelled out by warm-up traffic.
+    assert.ok(
+      measurement.server?.signatureVerificationMs != null,
+      "expected windowed server signature-verification metrics",
+    );
+  } finally {
+    try {
+      await fleet?.close();
+    } finally {
+      await target.close();
+    }
+  }
+});
+
 test("inboxRunner - rotates deliveries across multiple recipients", async () => {
   const target = await spawnBenchmarkTarget(["alice", "bob"]);
   let fleet: Awaited<ReturnType<typeof spawnSyntheticServer>> | undefined;
diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts
index 29abcc865..6df795c46 100644
--- a/packages/cli/src/bench/scenarios/inbox.ts
+++ b/packages/cli/src/bench/scenarios/inbox.ts
@@ -13,7 +13,12 @@ import type { Activity } from "@fedify/vocab";
 import { discoverInbox, selectInbox } from "../discovery/discover.ts";
 import { runLoad } from "../load/generator.ts";
 import { aggregateSamples } from "../metrics/aggregate.ts";
-import { fetchServerMetrics } from "../metrics/stats-client.ts";
+import {
+  diffSnapshots,
+  fetchServerSnapshot,
+  type ServerSnapshot,
+  snapshotToMetrics,
+} from "../metrics/stats-client.ts";
 import { asList } from "../scenario/coerce.ts";
 import type { ResolvedScenario } from "../scenario/normalize.ts";
 import type { ActivitySpec } from "../scenario/types.ts";
@@ -33,6 +38,7 @@ import {
   type RunContext,
   type ScenarioRunner,
   sendRequest,
+  withMeasuredWindowStart,
 } from "./runner.ts";
 
 /** One discovered delivery target: an inbox and the actor it belongs to. */
@@ -99,7 +105,7 @@ export const inboxRunner: ScenarioRunner = {
       total: estimateTotal(scenario),
     });
 
-    const send = async () => {
+    const rawSend = async () => {
       let request: Request;
       try {
         request = await pipeline.next();
@@ -108,6 +114,16 @@ export const inboxRunner: ScenarioRunner = {
       }
       return sendRequest(request, fetchImpl);
     };
+    // Snapshot the server's cumulative metrics at the measured-window boundary
+    // so warm-up and earlier scenarios are diffed out of the reported numbers.
+    // A few warm-up requests still in flight when the baseline is taken may be
+    // attributed to the window; that residue is bounded by the in-flight count.
+    let baseline: ServerSnapshot | null = null;
+    let baselineTaken = false;
+    const send = withMeasuredWindowStart(scenario.warmupMs, async () => {
+      baseline = await fetchServerSnapshot(context.target, fetchImpl);
+      baselineTaken = true;
+    }, rawSend);
 
     try {
       await pipeline.prime();
@@ -120,7 +136,13 @@ export const inboxRunner: ScenarioRunner = {
         measuredWindowMs: measuredWindowMs(scenario),
         includeHistogram: true,
       });
-      const server = await fetchServerMetrics(context.target, fetchImpl);
+      const end = await fetchServerSnapshot(context.target, fetchImpl);
+      // Only report server metrics when both ends of the window were captured;
+      // a missing baseline cannot be diffed (and falling back to the cumulative
+      // snapshot would silently reintroduce warm-up and earlier-scenario load).
+      const server = baselineTaken && baseline != null && end != null
+        ? snapshotToMetrics(diffSnapshots(baseline, end))
+        : null;
       return { ...measurement, server };
     } finally {
       await pipeline.close();
diff --git a/packages/cli/src/bench/scenarios/runner.test.ts b/packages/cli/src/bench/scenarios/runner.test.ts
new file mode 100644
index 000000000..ffcf08a7e
--- /dev/null
+++ b/packages/cli/src/bench/scenarios/runner.test.ts
@@ -0,0 +1,57 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import type { SendOutcome } from "../load/generator.ts";
+import { withMeasuredWindowStart } from "./runner.ts";
+
+const ok: SendOutcome = { ok: true, status: 200 };
+
+test("withMeasuredWindowStart - fires once at the warm-up boundary", async () => {
+  const seenAt: number[] = [];
+  let fires = 0;
+  const send = withMeasuredWindowStart(
+    100,
+    () => {
+      fires++;
+    },
+    (scheduledAtMs) => {
+      seenAt.push(scheduledAtMs);
+      return Promise.resolve(ok);
+    },
+  );
+  for (const offset of [0, 40, 99, 100, 140, 200]) await send(offset);
+  // Fires exactly once, at the first send whose scheduled time reaches 100.
+  assert.strictEqual(fires, 1);
+  // The underlying send still ran for every request, in order.
+  assert.deepEqual(seenAt, [0, 40, 99, 100, 140, 200]);
+});
+
+test("withMeasuredWindowStart - fires before the first send when no warm-up", async () => {
+  const order: string[] = [];
+  const send = withMeasuredWindowStart(
+    0,
+    () => {
+      order.push("boundary");
+    },
+    (_scheduledAtMs) => {
+      order.push("send");
+      return Promise.resolve(ok);
+    },
+  );
+  await send(0);
+  await send(10);
+  // The callback runs before the very first send, then never again.
+  assert.deepEqual(order, ["boundary", "send", "send"]);
+});
+
+test("withMeasuredWindowStart - never fires if no request reaches the window", async () => {
+  let fires = 0;
+  const send = withMeasuredWindowStart(
+    1000,
+    () => {
+      fires++;
+    },
+    () => Promise.resolve(ok),
+  );
+  for (const offset of [0, 100, 999]) await send(offset);
+  assert.strictEqual(fires, 0);
+});
diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts
index 37389dc12..6bb6aba62 100644
--- a/packages/cli/src/bench/scenarios/runner.ts
+++ b/packages/cli/src/bench/scenarios/runner.ts
@@ -9,7 +9,7 @@
 import type { DocumentLoader } from "@fedify/vocab-runtime";
 import type { Rng } from "../load/arrival.ts";
 import type { Clock } from "../load/clock.ts";
-import type { LoadPlan, SendOutcome } from "../load/generator.ts";
+import type { LoadPlan, SendFunction, SendOutcome } from "../load/generator.ts";
 import type { ResolvedScenario } from "../scenario/normalize.ts";
 import type { ScenarioMeasurement } from "../result/build.ts";
 import type { SyntheticServer } from "../server/synthetic.ts";
@@ -82,3 +82,34 @@ export function estimateTotal(scenario: ResolvedScenario): number | undefined {
   if (scenario.load.kind !== "open") return undefined;
   return Math.ceil(scenario.load.ratePerSec * (scenario.durationMs / 1000));
 }
+
+/**
+ * Wraps a send function so that `onMeasuredWindowStart` runs exactly once, at
+ * the warm-up boundary, and *every* measured request waits for it to settle
+ * before being sent.  Runners use this to snapshot a server-side baseline so
+ * reported server metrics cover only the measured window rather than the
+ * target's cumulative lifetime; awaiting it on every measured send guarantees
+ * the baseline is taken before any measured traffic reaches the target, so no
+ * measured request can leak into the baseline.
+ *
+ * The barrier is cheap: only the handful of requests scheduled while the
+ * baseline snapshot is in flight wait for it (recording that wait as their own
+ * latency, the coordinated-omission-correct outcome); once it settles, later
+ * waits resolve immediately.
+ * @param warmupMs The warm-up window length, in milliseconds.
+ * @param onMeasuredWindowStart The one-shot callback, run at the boundary.
+ * @param send The underlying send function.
+ * @returns A send function that gates measured sends on the callback.
+ */
+export function withMeasuredWindowStart(
+  warmupMs: number,
+  onMeasuredWindowStart: () => void | Promise<void>,
+  send: SendFunction,
+): SendFunction {
+  let started: Promise<void> | undefined;
+  return (scheduledAtMs: number) => {
+    if (scheduledAtMs < warmupMs) return send(scheduledAtMs);
+    started ??= Promise.resolve(onMeasuredWindowStart());
+    return started.then(() => send(scheduledAtMs));
+  };
+}
diff --git a/packages/cli/src/bench/scenarios/webfinger.ts b/packages/cli/src/bench/scenarios/webfinger.ts
index 125f3882c..f2e4f189e 100644
--- a/packages/cli/src/bench/scenarios/webfinger.ts
+++ b/packages/cli/src/bench/scenarios/webfinger.ts
@@ -8,13 +8,19 @@
 import { convertUrlIfHandle } from "../../webfinger/lib.ts";
 import { runLoad } from "../load/generator.ts";
 import { aggregateSamples } from "../metrics/aggregate.ts";
-import { fetchServerMetrics } from "../metrics/stats-client.ts";
+import {
+  diffSnapshots,
+  fetchServerSnapshot,
+  type ServerSnapshot,
+  snapshotToMetrics,
+} from "../metrics/stats-client.ts";
 import {
   loadPlanOf,
   measuredWindowMs,
   type RunContext,
   type ScenarioRunner,
   sendRequest,
+  withMeasuredWindowStart,
 } from "./runner.ts";
 
 function webfingerUrl(target: URL, recipient: string): URL {
@@ -33,8 +39,22 @@ export const webfingerRunner: ScenarioRunner = {
         ? context.scenario.recipients
         : [context.target.host]).map((r) => webfingerUrl(context.target, r));
     let index = 0;
-    const send = () =>
+    const rawSend = () =>
       sendRequest(new Request(urls[index++ % urls.length]), fetchImpl);
+    // Snapshot the server's cumulative metrics at the measured-window boundary
+    // so warm-up and earlier scenarios are diffed out of the reported numbers.
+    // A few warm-up requests still in flight when the baseline is taken may be
+    // attributed to the window; that residue is bounded by the in-flight count.
+    let baseline: ServerSnapshot | null = null;
+    let baselineTaken = false;
+    const send = withMeasuredWindowStart(
+      context.scenario.warmupMs,
+      async () => {
+        baseline = await fetchServerSnapshot(context.target, fetchImpl);
+        baselineTaken = true;
+      },
+      rawSend,
+    );
     const result = await runLoad(
       loadPlanOf(context.scenario, context.rng),
       send,
@@ -44,7 +64,13 @@ export const webfingerRunner: ScenarioRunner = {
       measuredWindowMs: measuredWindowMs(context.scenario),
       includeHistogram: true,
     });
-    const server = await fetchServerMetrics(context.target, fetchImpl);
+    const end = await fetchServerSnapshot(context.target, fetchImpl);
+    // Only report server metrics when both ends of the window were captured; a
+    // missing baseline cannot be diffed (and falling back to the cumulative
+    // snapshot would silently reintroduce warm-up and earlier-scenario load).
+    const server = baselineTaken && baseline != null && end != null
+      ? snapshotToMetrics(diffSnapshots(baseline, end))
+      : null;
     return { ...measurement, server };
   },
 };

From 782aea055bb74063aaa480e5a0dc980eff024cc0 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 11:34:15 +0900
Subject: [PATCH 20/47] Accept partial load overrides in the bench scenario
 schema

The scenario schema's `load` object required exactly one of `rate` or
`concurrency`, so a block that set only `arrival` or `maxInFlight` and
inherited its load model was rejected before normalization, even though
`resolveLoad()` already supports such partial overrides (inheriting the
model, or falling back to the default open-loop rate).

Relax the constraint to forbid only `rate` and `concurrency` together,
allowing either or neither.  This lets a suite write, for example,
`defaults: { load: { maxInFlight: 100 } }` or override just `arrival` on
one scenario.  The embedded schema literal and the published
schema/bench/scenario-v1.json are regenerated together (the v1 file is
new on this branch, so it is not yet immutable).

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/scenario/normalize.test.ts  | 14 ++++++++++
 packages/cli/src/bench/scenario/schema.ts     |  9 +++---
 .../cli/src/bench/scenario/validate.test.ts   | 21 +++++++++++++-
 schema/bench/scenario-v1.json                 | 28 ++++---------------
 4 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/packages/cli/src/bench/scenario/normalize.test.ts b/packages/cli/src/bench/scenario/normalize.test.ts
index 48d9ddf18..084cded22 100644
--- a/packages/cli/src/bench/scenario/normalize.test.ts
+++ b/packages/cli/src/bench/scenario/normalize.test.ts
@@ -81,6 +81,20 @@ test("normalizeSuite - load inherits arrival/maxInFlight from defaults", () => {
   });
 });
 
+test("normalizeSuite - partial load override keeps the default model", () => {
+  // `maxInFlight` only, with no rate/concurrency anywhere: falls back to the
+  // built-in open-loop default rate while applying the override.
+  const s = normalizeSuite(suite({
+    defaults: { load: { maxInFlight: 100 } },
+  })).scenarios[0];
+  assert.deepEqual(s.load, {
+    kind: "open",
+    ratePerSec: 50,
+    arrival: "constant",
+    maxInFlight: 100,
+  });
+});
+
 test("normalizeSuite - parses fanout queueDrainTimeout to ms", () => {
   const s = normalizeSuite(suite({
     scenarios: [{
diff --git a/packages/cli/src/bench/scenario/schema.ts b/packages/cli/src/bench/scenario/schema.ts
index f2f18894a..83c4983b5 100644
--- a/packages/cli/src/bench/scenario/schema.ts
+++ b/packages/cli/src/bench/scenario/schema.ts
@@ -136,10 +136,11 @@ export const scenarioSchemaV1 = {
         arrival: { $ref: "#/$defs/arrival" },
         maxInFlight: { type: "integer", minimum: 1 },
       },
-      oneOf: [
-        { required: ["rate"], not: { required: ["concurrency"] } },
-        { required: ["concurrency"], not: { required: ["rate"] } },
-      ],
+      // `rate` (open-loop) and `concurrency` (closed-loop) are mutually
+      // exclusive, but neither is required here: a load block may set only
+      // `arrival`/`maxInFlight` and inherit the model from `defaults` (or the
+      // built-in open-loop default), which the normalizer already supports.
+      not: { required: ["rate", "concurrency"] },
     },
     defaults: {
       type: "object",
diff --git a/packages/cli/src/bench/scenario/validate.test.ts b/packages/cli/src/bench/scenario/validate.test.ts
index 1973cf435..ebc938c92 100644
--- a/packages/cli/src/bench/scenario/validate.test.ts
+++ b/packages/cli/src/bench/scenario/validate.test.ts
@@ -61,12 +61,31 @@ test("validateSuite - enforces exactly one HTTP signature scheme", () => {
   assert.throws(() => validateSuite(docOnly), SuiteValidationError);
 });
 
-test("validateSuite - enforces rate XOR concurrency", () => {
+test("validateSuite - rejects rate and concurrency together", () => {
   const bad = validInbox() as Record<string, unknown>;
   bad.defaults = { load: { rate: "100/s", concurrency: 50 } };
   assert.throws(() => validateSuite(bad), SuiteValidationError);
 });
 
+test("validateSuite - accepts a partial load override", () => {
+  // Only `maxInFlight`/`arrival`, inheriting the load model: the normalizer
+  // supports this (falling back to the default open-loop rate), so the schema
+  // must not reject it for lacking `rate`/`concurrency`.
+  const partial = validInbox() as Record<string, unknown>;
+  partial.defaults = { load: { maxInFlight: 100, arrival: "poisson" } };
+  assert.doesNotThrow(() => validateSuite(partial));
+
+  const scenarioOverride = validInbox() as Record<string, unknown>;
+  scenarioOverride.defaults = { load: { rate: "100/s" } };
+  scenarioOverride.scenarios = [{
+    name: "inbox-shared",
+    type: "inbox",
+    recipient: "acct:alice@x",
+    load: { maxInFlight: 50 },
+  }];
+  assert.doesNotThrow(() => validateSuite(scenarioOverride));
+});
+
 test("validateSuite - enforces per-type expect metric allowlist", () => {
   const bad = {
     version: 1,
diff --git a/schema/bench/scenario-v1.json b/schema/bench/scenario-v1.json
index f0e4bcba0..c60a5a262 100644
--- a/schema/bench/scenario-v1.json
+++ b/schema/bench/scenario-v1.json
@@ -124,28 +124,12 @@
           "minimum": 1
         }
       },
-      "oneOf": [
-        {
-          "required": [
-            "rate"
-          ],
-          "not": {
-            "required": [
-              "concurrency"
-            ]
-          }
-        },
-        {
-          "required": [
-            "concurrency"
-          ],
-          "not": {
-            "required": [
-              "rate"
-            ]
-          }
-        }
-      ]
+      "not": {
+        "required": [
+          "rate",
+          "concurrency"
+        ]
+      }
     },
     "defaults": {
       "type": "object",

From 98a22d7ce15078b8e167e7282fcc02b15b052f70 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 11:49:47 +0900
Subject: [PATCH 21/47] Make signed bench targets reach the synthetic actor
 server

The synthetic actor/key server bound loopback and advertised
`127.0.0.1` actor and key IDs, which the target dereferences to verify
HTTP signatures.  A same-machine (loopback) target reaches it, but a
non-loopback target dereferences its own `127.0.0.1`, fails key lookup,
and rejects every signed delivery.  The command nonetheless allowed
signed scenarios against private targets, so they failed silently.

Add a `--advertise-host` option.  When set, the synthetic server binds
every interface (`0.0.0.0`, or `::` for an IPv6 host) and advertises the
given host in its actor, key, and base URLs, so a non-loopback target
can dereference them.  `resolveAdvertiseHost()` validates the value as a
bare host name, IPv4 address, or IPv6 literal (bracketing IPv6 for the
URL authority and binding the matching family), rejecting a scheme,
port, path, or other URL syntax with a clear configuration error.

Signed scenarios are now refused (exit 2) when the target is
non-loopback and no `--advertise-host` is given, instead of running and
failing on the target.  The documentation is updated accordingly.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 docs/manual/benchmarking.md                   |  12 +-
 packages/cli/src/bench/action.test.ts         |  31 ++++-
 packages/cli/src/bench/action.ts              |  28 +++--
 packages/cli/src/bench/command.ts             |   9 ++
 .../cli/src/bench/server/synthetic.test.ts    |  83 +++++++++++++-
 packages/cli/src/bench/server/synthetic.ts    | 107 +++++++++++++++++-
 6 files changed, 250 insertions(+), 20 deletions(-)

diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md
index 07e5c10ae..51d24c841 100644
--- a/docs/manual/benchmarking.md
+++ b/docs/manual/benchmarking.md
@@ -251,10 +251,14 @@ which goes over HTTPS, so against a plain-HTTP loopback target give the
 unaffected: it requests `/.well-known/webfinger` on the target directly, so it
 can benchmark `acct:` lookups over plain HTTP.
 
-Signed scenarios such as `inbox` also require a loopback or private target,
-because the benchmark's synthetic actor server is only reachable on the
-client's loopback; a public target cannot dereference its keys, so use a read
-scenario such as `webfinger` there.
+Signed scenarios such as `inbox` make the target dereference the benchmark's
+synthetic actor server while verifying signatures, so that server must be
+reachable from the target.  A loopback target reaches it automatically (both
+run on the same machine).  For a non-loopback target, pass `--advertise-host`
+with an address the target can reach (for example the client's LAN IP); the
+synthetic server then binds every interface and advertises that host in the
+actor and key URLs.  Without it, a non-loopback signed scenario is refused
+(use a read scenario such as `webfinger`, which needs no synthetic server).
 
 
 Benchmark stats endpoint
diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts
index bb13e2d3a..9f0c2677f 100644
--- a/packages/cli/src/bench/action.test.ts
+++ b/packages/cli/src/bench/action.test.ts
@@ -220,7 +220,36 @@ scenarios:
       ),
   });
   assert.strictEqual(code, 2);
-  assert.match(message, /loopback or private/);
+  assert.match(message, /advertise-host/);
+});
+
+test("runBench - rejects a signed scenario against a non-loopback target", async () => {
+  // A private (non-loopback) target passes the safety gate, but a signed
+  // scenario without --advertise-host cannot reach the synthetic actor server,
+  // so it is refused (exit 2) before any load.
+  const file = await writeSuite(`version: 1
+target: http://10.10.0.5:8000
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "http://10.10.0.5:8000/users/alice"
+    load: { concurrency: 2 }
+    duration: 100ms
+`);
+  let code = -1;
+  let message = "";
+  await runBench(command({ scenario: file }), {
+    exit: (c) => {
+      code = c;
+    },
+    writeOutput: () => Promise.resolve(),
+    log: (m) => {
+      message = m;
+    },
+    fetch: () => Promise.reject(new Error("offline")),
+  });
+  assert.strictEqual(code, 2);
+  assert.match(message, /advertise-host/);
 });
 
 test("runBench - malformed expect assertion exits 2 before any load", async () => {
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index 92cb87f58..435e49dba 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -24,6 +24,7 @@ import { assertTargetAllowed, UnsafeTargetError } from "./safety/gate.ts";
 import { classifyTarget } from "./safety/tiers.ts";
 import { runnerFor } from "./scenarios/registry.ts";
 import {
+  resolveAdvertiseHost,
   spawnSyntheticServer,
   type SyntheticServer,
 } from "./server/synthetic.ts";
@@ -93,6 +94,9 @@ export default async function runBench(
       validateExpectBlock(scenario.expect);
       return runner;
     });
+    if (command.advertiseHost != null) {
+      resolveAdvertiseHost(command.advertiseHost);
+    }
   } catch (error) {
     log(error instanceof Error ? error.message : String(error));
     return void exit(2);
@@ -120,17 +124,21 @@ export default async function runBench(
     throw error;
   }
 
-  // The synthetic actor server is only reachable on the client's loopback, so
-  // a remote (public) target cannot dereference its keys.  Signed scenarios
-  // therefore require a loopback or private target.
+  // The target dereferences the synthetic actor server while verifying
+  // signatures.  By default that server is loopback-only, reachable just by a
+  // same-machine (loopback) target; a non-loopback target needs an advertised,
+  // reachable host (--advertise-host).  Without one, refuse signed scenarios
+  // rather than let every signed delivery fail key lookup.
   if (
-    tier === "public" && suite.scenarios.some((s) => SIGNED_TYPES.has(s.type))
+    tier !== "loopback" && command.advertiseHost == null &&
+    suite.scenarios.some((s) => SIGNED_TYPES.has(s.type))
   ) {
     log(
-      "Signed scenarios (inbox) require a loopback or private target: the " +
-        "benchmark's synthetic actor server is only reachable on the client's " +
-        "loopback, so a public target cannot dereference its keys.  Use a " +
-        "local target, or a read scenario such as webfinger.",
+      "Signed scenarios (inbox) need the benchmark's synthetic actor server to " +
+        "be reachable from the target.  A loopback target reaches it " +
+        "automatically; for a non-loopback target, pass --advertise-host with " +
+        "an address the target can reach (the synthetic server then binds all " +
+        "interfaces), or use a read scenario such as webfinger.",
     );
     return void exit(2);
   }
@@ -149,7 +157,9 @@ export default async function runBench(
   const startedAt = new Date().toISOString();
   try {
     if (suite.scenarios.some((s) => SIGNED_TYPES.has(s.type))) {
-      fleet = await spawnSyntheticServer(await buildFleet(suite.actors));
+      fleet = await spawnSyntheticServer(await buildFleet(suite.actors), {
+        advertiseHost: command.advertiseHost,
+      });
     }
     const results = [];
     for (let i = 0; i < suite.scenarios.length; i++) {
diff --git a/packages/cli/src/bench/command.ts b/packages/cli/src/bench/command.ts
index 0acc04e82..c48dd10fd 100644
--- a/packages/cli/src/bench/command.ts
+++ b/packages/cli/src/bench/command.ts
@@ -79,6 +79,15 @@ export const benchCommand = command(
         }),
         false,
       ),
+      advertiseHost: optional(
+        option("--advertise-host", string({ metavar: "HOST" }), {
+          description:
+            message`Host (name or IP) a non-loopback target can reach the \
+benchmark's synthetic actor server at.  Required for signed scenarios against a \
+non-loopback target; binds the synthetic server on all interfaces and uses this \
+host in the actor and key URLs the target dereferences.`,
+        }),
+      ),
       allowUnsafeTarget,
     }),
     userAgentOption,
diff --git a/packages/cli/src/bench/server/synthetic.test.ts b/packages/cli/src/bench/server/synthetic.test.ts
index 37ce2dc56..7ea1ef117 100644
--- a/packages/cli/src/bench/server/synthetic.test.ts
+++ b/packages/cli/src/bench/server/synthetic.test.ts
@@ -3,7 +3,11 @@ import assert from "node:assert/strict";
 import test from "node:test";
 import { getContextLoader, getDocumentLoader } from "../../docloader.ts";
 import { buildFleet } from "../actor/fleet.ts";
-import { spawnSyntheticServer } from "./synthetic.ts";
+import {
+  AdvertiseHostError,
+  resolveAdvertiseHost,
+  spawnSyntheticServer,
+} from "./synthetic.ts";
 
 test("spawnSyntheticServer - serves a verifiable actor document", async () => {
   const fleet = await buildFleet([{
@@ -53,6 +57,34 @@ test("spawnSyntheticServer - serves a verifiable actor document", async () => {
   }
 });
 
+test("spawnSyntheticServer - advertises a reachable host in actor URLs", async () => {
+  const fleet = await buildFleet([{
+    count: 1,
+    signatureStandards: ["draft-cavage-http-signatures-12"],
+  }]);
+  // 192.0.2.0/24 is TEST-NET-1: a non-loopback host that is never actually
+  // routed, so this checks the advertised URLs without needing a remote peer.
+  const server = await spawnSyntheticServer(fleet, {
+    advertiseHost: "192.0.2.10",
+  });
+  try {
+    const actor = server.actors[0];
+    assert.strictEqual(actor.id.hostname, "192.0.2.10");
+    assert.strictEqual(server.url.hostname, "192.0.2.10");
+    assert.strictEqual(actor.rsaKeyId?.hostname, "192.0.2.10");
+    // The advertised port matches the bound port, and the document is still
+    // served (the server binds all interfaces, so loopback reaches it).
+    const local = new URL(
+      actor.id.pathname,
+      `http://127.0.0.1:${actor.id.port}`,
+    );
+    const response = await fetch(local);
+    assert.strictEqual(response.status, 200);
+  } finally {
+    await server.close();
+  }
+});
+
 test("spawnSyntheticServer - unknown paths 404", async () => {
   const fleet = await buildFleet([{
     signatureStandards: ["rfc9421"],
@@ -68,3 +100,52 @@ test("spawnSyntheticServer - unknown paths 404", async () => {
     await server.close();
   }
 });
+
+test("resolveAdvertiseHost - DNS and IPv4 bind all IPv4 interfaces", () => {
+  assert.deepEqual(resolveAdvertiseHost("bench.local"), {
+    bindHost: "0.0.0.0",
+    urlHost: "bench.local",
+  });
+  assert.deepEqual(resolveAdvertiseHost("192.168.1.10"), {
+    bindHost: "0.0.0.0",
+    urlHost: "192.168.1.10",
+  });
+  // Surrounding whitespace is trimmed.
+  assert.deepEqual(resolveAdvertiseHost("  10.0.0.5  "), {
+    bindHost: "0.0.0.0",
+    urlHost: "10.0.0.5",
+  });
+});
+
+test("resolveAdvertiseHost - IPv6 binds all IPv6 interfaces and is bracketed", () => {
+  assert.deepEqual(resolveAdvertiseHost("2001:db8::1"), {
+    bindHost: "::",
+    urlHost: "[2001:db8::1]",
+  });
+  // An already-bracketed literal is accepted as-is.
+  assert.deepEqual(resolveAdvertiseHost("[2001:db8::1]"), {
+    bindHost: "::",
+    urlHost: "[2001:db8::1]",
+  });
+});
+
+test("resolveAdvertiseHost - rejects ports, schemes, paths, and junk", () => {
+  for (
+    const bad of [
+      "",
+      "  ",
+      "10.0.0.5:8080",
+      "http://10.0.0.5",
+      "10.0.0.5/path",
+      "user@host",
+      "[2001:db8::1",
+      "2001:db8:::",
+    ]
+  ) {
+    assert.throws(
+      () => resolveAdvertiseHost(bad),
+      AdvertiseHostError,
+      `expected ${JSON.stringify(bad)} to be rejected`,
+    );
+  }
+});
diff --git a/packages/cli/src/bench/server/synthetic.ts b/packages/cli/src/bench/server/synthetic.ts
index 177c365c0..0013e110b 100644
--- a/packages/cli/src/bench/server/synthetic.ts
+++ b/packages/cli/src/bench/server/synthetic.ts
@@ -2,8 +2,12 @@
  * The benchmark's own synthetic actor/key server.
  *
  * It serves the actor documents (with embedded keys) that the target
- * dereferences while verifying signatures, over plain loopback HTTP — which
- * works because `benchmarkMode` enables `allowPrivateAddress` on the target.
+ * dereferences while verifying signatures, over plain HTTP — which works
+ * because `benchmarkMode` enables `allowPrivateAddress` on the target.  By
+ * default it binds loopback and advertises a `127.0.0.1` base URL, which a
+ * same-machine (loopback) target can reach.  For a non-loopback target, pass
+ * `advertiseHost`: the server then binds every interface and advertises that
+ * host in the actor/key URLs, so the remote target can dereference them.
  * @since 2.3.0
  * @module
  */
@@ -38,6 +42,12 @@ export interface SyntheticServer {
 export interface SyntheticServerOptions {
   /** The context loader used to render actor documents. */
   readonly contextLoader?: DocumentLoader;
+  /**
+   * A host (name or IP) reachable from the target.  When set, the server binds
+   * every interface and advertises actor/key URLs at this host (with its chosen
+   * port) instead of `127.0.0.1`, so a non-loopback target can dereference them.
+   */
+  readonly advertiseHost?: string;
 }
 
 /**
@@ -51,10 +61,17 @@ export async function spawnSyntheticServer(
   members: readonly FleetMember[],
   options: SyntheticServerOptions = {},
 ): Promise<SyntheticServer> {
+  // Resolved before binding so a malformed --advertise-host fails fast.
+  const advertised = options.advertiseHost == null
+    ? null
+    : resolveAdvertiseHost(options.advertiseHost);
   const routes = new Map<string, string>();
   const server = serve({
     port: 0,
-    hostname: "127.0.0.1",
+    // Bind a reachable interface when advertising a host (every IPv6 or every
+    // IPv4 interface, matching the advertised host's family), otherwise stay on
+    // loopback.
+    hostname: advertised?.bindHost ?? "127.0.0.1",
     silent: true,
     fetch(request: Request): Response {
       const { pathname } = new URL(request.url);
@@ -69,7 +86,13 @@ export async function spawnSyntheticServer(
   await server.ready();
   const actors: SyntheticActor[] = [];
   try {
-    const base = new URL(server.url!);
+    const bound = new URL(server.url!);
+    // Actor and key IDs must use an address the target can dereference; the
+    // bound (loopback) URL works for a same-machine target, otherwise the
+    // advertised host (with the bound port) is used.
+    const base = advertised == null
+      ? bound
+      : new URL(`http://${advertised.urlHost}:${bound.port}/`);
     const contextLoader = options.contextLoader ??
       await getContextLoader({ allowPrivateAddress: true });
     for (const member of members) {
@@ -89,7 +112,7 @@ export async function spawnSyntheticServer(
       actors.push(actor);
     }
     return {
-      url: new URL(server.url!),
+      url: base,
       actors,
       async close() {
         await server.close(true);
@@ -101,3 +124,77 @@ export async function spawnSyntheticServer(
     throw error;
   }
 }
+
+/** A validated advertise host: where to bind and how to write it in a URL. */
+export interface ResolvedAdvertiseHost {
+  /** The address to bind the synthetic server to. */
+  readonly bindHost: string;
+  /** The host as it appears in a URL authority (IPv6 is bracketed). */
+  readonly urlHost: string;
+}
+
+/** An error raised when `--advertise-host` is not a usable bare host. */
+export class AdvertiseHostError extends Error {}
+
+/**
+ * Validates and normalizes an `--advertise-host` value into a bind address and a
+ * URL-authority host.  It must be a bare host name, IPv4 address, or IPv6
+ * literal (bracketed or not); a scheme, port, path, or other URL syntax is
+ * rejected, since the synthetic server's chosen port is appended automatically.
+ * An IPv6 host binds every IPv6 interface (`::`); anything else binds every IPv4
+ * interface (`0.0.0.0`).
+ * @param host The raw `--advertise-host` value.
+ * @returns The bind address and the URL-authority host.
+ * @throws {AdvertiseHostError} If the value is not a usable bare host.
+ */
+export function resolveAdvertiseHost(host: string): ResolvedAdvertiseHost {
+  const trimmed = host.trim();
+  if (trimmed === "") {
+    throw new AdvertiseHostError("--advertise-host must not be empty.");
+  }
+  if (/[\s/\\@?#]/.test(trimmed) || trimmed.includes("://")) {
+    throw new AdvertiseHostError(
+      `Invalid --advertise-host ${JSON.stringify(host)}: give a bare host ` +
+        "name or IP address, with no scheme, path, or whitespace.",
+    );
+  }
+  let urlHost: string;
+  let bindHost: string;
+  if (trimmed.startsWith("[")) {
+    if (!trimmed.endsWith("]")) {
+      throw new AdvertiseHostError(
+        `Invalid --advertise-host ${JSON.stringify(host)}: unbalanced ` +
+          "brackets around the IPv6 address.",
+      );
+    }
+    urlHost = trimmed;
+    bindHost = "::";
+  } else {
+    const colons = (trimmed.match(/:/g) ?? []).length;
+    if (colons === 1) {
+      throw new AdvertiseHostError(
+        `Invalid --advertise-host ${
+          JSON.stringify(host)
+        }: omit the port; the ` +
+          "synthetic server's chosen port is appended automatically.",
+      );
+    }
+    if (colons >= 2) {
+      // A bare IPv6 literal; bracket it for the URL authority.
+      urlHost = `[${trimmed}]`;
+      bindHost = "::";
+    } else {
+      urlHost = trimmed;
+      bindHost = "0.0.0.0";
+    }
+  }
+  try {
+    new URL(`http://${urlHost}/`);
+  } catch {
+    throw new AdvertiseHostError(
+      `Invalid --advertise-host ${JSON.stringify(host)}: not a valid host ` +
+        "name or IP address.",
+    );
+  }
+  return { bindHost, urlHost };
+}

From f1ae5b7501b89a6d8fd8427aa98a85a1267566db Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 12:38:54 +0900
Subject: [PATCH 22/47] Apply the configured User-Agent to all bench traffic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `--user-agent` value was passed only to the document loader, so the
benchmark's main requests — the runners' inbox POSTs and WebFinger GETs,
the benchmark-mode probe, and the server stats reads — went out with the
runtime's default User-Agent.  A target that inspects, logs, or
rate-limits by User-Agent saw the wrong value, so the option was
silently ineffective for the traffic that matters.

Wrap the fetch implementation once with withUserAgent(), so every
benchmark request carries the configured User-Agent.  A prebuilt request
(the signed inbox delivery, a WebFinger GET) has the header set in place
rather than recloned, leaving the already-signed body and digest
untouched; the User-Agent is not part of the signed header set, so this
does not affect verification.  A User-Agent the caller already set is
left as-is.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/action.test.ts | 58 +++++++++++++++++++++++++--
 packages/cli/src/bench/action.ts      | 38 +++++++++++++++++-
 2 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts
index 9f0c2677f..2116d1a86 100644
--- a/packages/cli/src/bench/action.test.ts
+++ b/packages/cli/src/bench/action.test.ts
@@ -10,7 +10,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import test from "node:test";
 import { serve } from "srvx";
-import runBench from "./action.ts";
+import runBench, { withUserAgent } from "./action.ts";
 import type { BenchCommand } from "./command.ts";
 
 async function spawnTarget() {
@@ -45,15 +45,24 @@ async function spawnTarget() {
     Create,
     () => {},
   );
+  let inboxUserAgent: string | null = null;
   const server = serve({
     port: 0,
     hostname: "127.0.0.1",
     silent: true,
-    fetch: (request: Request) =>
-      federation.fetch(request, { contextData: undefined }),
+    fetch: (request: Request) => {
+      if (request.method === "POST") {
+        inboxUserAgent = request.headers.get("user-agent");
+      }
+      return federation.fetch(request, { contextData: undefined });
+    },
   });
   await server.ready();
-  return { url: new URL(server.url!), close: () => server.close(true) };
+  return {
+    url: new URL(server.url!),
+    inboxUserAgent: () => inboxUserAgent,
+    close: () => server.close(true),
+  };
 }
 
 function command(overrides: Partial<BenchCommand>): BenchCommand {
@@ -117,11 +126,52 @@ test("runBench - passing gate exits 0 and writes a valid report", async () => {
     assert.strictEqual(report.passed, true);
     assert.strictEqual(report.scenarios[0].requests.successRate, 1);
     assert.ok(report.target.statsAvailable);
+    // The configured User-Agent reached the actual benchmark traffic, not just
+    // the document loader.
+    assert.strictEqual(target.inboxUserAgent(), "Fedify-bench-test/1.0");
   } finally {
     await target.close();
   }
 });
 
+test("withUserAgent - sets the User-Agent on a prebuilt request", async () => {
+  let seen: string | null = null;
+  const wrapped = withUserAgent((input) => {
+    seen = (input as Request).headers.get("user-agent");
+    return Promise.resolve(new Response("ok"));
+  }, "Bench/9.9");
+  await wrapped(new Request("http://x.test/a"));
+  assert.strictEqual(seen, "Bench/9.9");
+});
+
+test("withUserAgent - sets the User-Agent on a URL request, keeping init headers", async () => {
+  let ua: string | null = null;
+  let accept: string | null = null;
+  const wrapped = withUserAgent((_input, init) => {
+    const headers = new Headers(init?.headers);
+    ua = headers.get("user-agent");
+    accept = headers.get("accept");
+    return Promise.resolve(new Response("ok"));
+  }, "Bench/9.9");
+  await wrapped(new URL("http://x.test/a"), {
+    headers: { accept: "application/json" },
+  });
+  assert.strictEqual(ua, "Bench/9.9");
+  assert.strictEqual(accept, "application/json");
+});
+
+test("withUserAgent - does not override an explicit User-Agent", async () => {
+  let seen: string | null = null;
+  const wrapped = withUserAgent((input) => {
+    seen = (input as Request).headers.get("user-agent");
+    return Promise.resolve(new Response("ok"));
+  }, "Bench/9.9");
+  await wrapped(
+    new Request("http://x.test/a", { headers: { "user-agent": "Custom/1" } }),
+  );
+  assert.strictEqual(seen, "Custom/1");
+});
+
 test("runBench - failing gate exits 1", async () => {
   const target = await spawnTarget();
   try {
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index 435e49dba..9973cc2d5 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -67,7 +67,10 @@ export default async function runBench(
   const writeOutput = deps.writeOutput ?? defaultWriteOutput;
   const log = deps.log ??
     ((message: string) => process.stderr.write(`${message}\n`));
-  const fetchImpl = deps.fetch ?? fetch;
+  // Apply the configured User-Agent to all benchmark traffic — the probe, the
+  // stats reads, and the runners' inbox/WebFinger requests — not just the
+  // document loader, so a target that inspects the UA sees it on every request.
+  const fetchImpl = withUserAgent(deps.fetch ?? fetch, command.userAgent);
 
   // Loading, validation, and normalization failures are all user-facing
   // configuration errors.
@@ -202,6 +205,39 @@ export default async function runBench(
   }
 }
 
+/**
+ * Wraps a fetch implementation so every request carries the given User-Agent,
+ * unless the caller already set one.  A prebuilt {@link Request} (the signed
+ * inbox delivery, a WebFinger GET) is mutated in place rather than recloned, so
+ * an already-signed body and its digest are left untouched; the User-Agent is
+ * not part of the signed header set, so adding it does not affect verification.
+ * @param fetchImpl The underlying fetch implementation.
+ * @param userAgent The User-Agent header value to apply.
+ * @returns A fetch implementation that injects the User-Agent.
+ */
+export function withUserAgent(
+  fetchImpl: typeof fetch,
+  userAgent: string,
+): typeof fetch {
+  // Cast the wrapper to `typeof fetch`: the standard contract it implements is a
+  // subset of the runtime's overloaded fetch type (which carries extra non-
+  // standard overloads), so the assignment is sound but not structurally
+  // inferable.
+  return ((input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
+    if (input instanceof Request && init === undefined) {
+      if (!input.headers.has("user-agent")) {
+        input.headers.set("user-agent", userAgent);
+      }
+      return fetchImpl(input);
+    }
+    const headers = new Headers(
+      init?.headers ?? (input instanceof Request ? input.headers : undefined),
+    );
+    if (!headers.has("user-agent")) headers.set("user-agent", userAgent);
+    return fetchImpl(input, { ...init, headers });
+  }) as typeof fetch;
+}
+
 async function defaultWriteOutput(
   content: string,
   outputPath: string | undefined,

From 80dd18a44db270f6d3276d94c5be429bc2b2d2a5 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 12:46:24 +0900
Subject: [PATCH 23/47] Show server queue depth without drain latency in
 reports

The text and Markdown renderers only surfaced server queue metrics when
a drain-latency histogram was present, with the depth shown merely as a
suffix to that line.  The current stats reader supplies
`queue.depthMax` without `drainMs`, so queue depth never appeared in the
human-readable output even though it was in the JSON model; the Markdown
form rendered no queue metrics at all.

Render queue depth on its own:

 -  text: keep the combined drain line (now only when it has at least one
    percentile), otherwise print a standalone `Server queue depth max`
    line whenever a depth is reported.
 -  Markdown: add a queue drain p95 row when present and a queue depth max
    row whenever a depth is reported.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/render/markdown.ts    | 11 ++++++
 packages/cli/src/bench/render/render.test.ts | 36 ++++++++++++++++++++
 packages/cli/src/bench/render/text.ts        | 18 +++++++---
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/packages/cli/src/bench/render/markdown.ts b/packages/cli/src/bench/render/markdown.ts
index a722686e6..511f34b26 100644
--- a/packages/cli/src/bench/render/markdown.ts
+++ b/packages/cli/src/bench/render/markdown.ts
@@ -64,6 +64,17 @@ function renderScenario(scenario: ScenarioResult): string[] {
       `| Signature verification p95 (server) | ${formatNumber(sig.p95)}ms |`,
     );
   }
+  const queue = scenario.server?.queue;
+  if (queue?.drainMs?.p95 != null) {
+    lines.push(
+      `| Queue drain p95 (server) | ${formatNumber(queue.drainMs.p95)}ms |`,
+    );
+  }
+  if (queue?.depthMax != null) {
+    lines.push(
+      `| Queue depth max (server) | ${formatNumber(queue.depthMax)} |`,
+    );
+  }
 
   if (scenario.errors.length > 0) {
     lines.push("", "| Error | Count |", "| --- | --- |");
diff --git a/packages/cli/src/bench/render/render.test.ts b/packages/cli/src/bench/render/render.test.ts
index 008383c5c..f9bcb4406 100644
--- a/packages/cli/src/bench/render/render.test.ts
+++ b/packages/cli/src/bench/render/render.test.ts
@@ -60,6 +60,42 @@ test("renderReport - shows actuals in the metric's natural unit", () => {
   assert.match(text, /successRate >= 99%\s+\(actual 99\.4%\)/);
 });
 
+test("renderReport - shows queue depth even without drain latency", () => {
+  // The stats reader supplies queue depth but no drain-latency histogram; both
+  // the text and Markdown forms must still surface the depth.
+  const base = report.scenarios[0];
+  const r: BenchReport = {
+    ...report,
+    scenarios: [{
+      ...base,
+      server: { ...(base.server ?? {}), queue: { depthMax: 42 } },
+    }],
+  };
+  const text = renderReport(r, "text");
+  assert.match(text, /Server queue depth max: 42/);
+  const md = renderReport(r, "markdown");
+  assert.match(md, /Queue depth max \(server\) \| 42/);
+});
+
+test("renderReport - empty drain latency falls back to the depth line", () => {
+  // An empty drainMs object carries no percentile, so neither form should print
+  // a meaningless drain line; both still surface the depth (here zero).
+  const base = report.scenarios[0];
+  const r: BenchReport = {
+    ...report,
+    scenarios: [{
+      ...base,
+      server: { ...(base.server ?? {}), queue: { drainMs: {}, depthMax: 0 } },
+    }],
+  };
+  const text = renderReport(r, "text");
+  assert.doesNotMatch(text, /Server queue drain/);
+  assert.match(text, /Server queue depth max: 0/);
+  const md = renderReport(r, "markdown");
+  assert.doesNotMatch(md, /Queue drain/);
+  assert.match(md, /Queue depth max \(server\) \| 0/);
+});
+
 test("renderReport markdown - includes tables and the gate result", () => {
   const md = renderReport(report, "markdown");
   assert.match(md, /# Fedify benchmark report/);
diff --git a/packages/cli/src/bench/render/text.ts b/packages/cli/src/bench/render/text.ts
index 88b18b276..da13f30c1 100644
--- a/packages/cli/src/bench/render/text.ts
+++ b/packages/cli/src/bench/render/text.ts
@@ -79,14 +79,17 @@ function renderScenario(scenario: ScenarioResult): string[] {
       }`,
     );
   }
-  if (scenario.server?.queue?.drainMs != null) {
-    const depth = scenario.server.queue.depthMax;
+  const queue = scenario.server?.queue;
+  if (queue?.drainMs != null && hasPartial(queue.drainMs)) {
+    const depth = queue.depthMax;
     const suffix = depth == null ? "" : `  (depth max ${formatNumber(depth)})`;
     lines.push(
-      `  Server queue drain (ms): ${
-        describePartial(scenario.server.queue.drainMs)
-      }${suffix}`,
+      `  Server queue drain (ms): ${describePartial(queue.drainMs)}${suffix}`,
     );
+  } else if (queue?.depthMax != null) {
+    // Queue depth is reported even when no drain-latency histogram is present
+    // (the current stats reader supplies depth but not drain latency).
+    lines.push(`  Server queue depth max: ${formatNumber(queue.depthMax)}`);
   }
   if (scenario.errors.length > 0) {
     lines.push("  Errors:");
@@ -127,3 +130,8 @@ function describePartial(latency: PartialLatencyMs): string {
   if (latency.p99 != null) parts.push(`p99 ${formatNumber(latency.p99)}`);
   return parts.join("  ");
 }
+
+/** Whether a partial latency carries at least one renderable percentile. */
+function hasPartial(latency: PartialLatencyMs): boolean {
+  return latency.p50 != null || latency.p95 != null || latency.p99 != null;
+}

From 9b8881eb196ef676bc9817c31d4f3dcd859905a1 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 13:32:26 +0900
Subject: [PATCH 24/47] Reject non-HTTP and credentialed bench targets

`new URL("localhost:3000")` parses as the `localhost:` scheme with an
empty host, a common typo for a missing `http://`.  Normalization
accepted it, so `--dry-run` succeeded while a real run would misclassify
the target or build an unsupported fetch URL.  Targets carrying
credentials (`http://user:pass@host`) were likewise accepted even though
`fetch` rejects them.

Reject, during normalization, any target whose protocol is not `http:`
or `https:`, whose host is empty, or that carries embedded credentials,
with a message pointing at the likely fix.  The probe and runners only
make bare HTTP(S) requests, so these never produce a working run.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/scenario/normalize.test.ts  | 39 +++++++++++++++++++
 packages/cli/src/bench/scenario/normalize.ts  | 15 +++++++
 2 files changed, 54 insertions(+)

diff --git a/packages/cli/src/bench/scenario/normalize.test.ts b/packages/cli/src/bench/scenario/normalize.test.ts
index 084cded22..8456a264b 100644
--- a/packages/cli/src/bench/scenario/normalize.test.ts
+++ b/packages/cli/src/bench/scenario/normalize.test.ts
@@ -139,6 +139,45 @@ test("normalizeSuite - rejects an invalid target URL", () => {
   );
 });
 
+test("normalizeSuite - rejects a non-http(s) or host-less target", () => {
+  // `localhost:3000` (a missing-scheme typo) parses as the `localhost:` scheme
+  // with no host; reject it rather than misbehave later.
+  for (
+    const bad of [
+      "localhost:3000",
+      "ftp://localhost:3000",
+      "file:///tmp/x",
+      "ws://localhost:3000",
+      // `fetch` rejects URLs carrying credentials, so reject them up front.
+      "http://user@localhost:3000",
+      "http://user:pass@localhost:3000",
+    ]
+  ) {
+    assert.throws(
+      () => normalizeSuite(suite({ target: bad })),
+      SuiteNormalizeError,
+      `expected ${JSON.stringify(bad)} to be rejected`,
+    );
+  }
+  // The same rejection applies to a --target override.
+  assert.throws(
+    () => normalizeSuite(suite(), { target: "localhost:3000" }),
+    SuiteNormalizeError,
+  );
+});
+
+test("normalizeSuite - accepts http and https targets", () => {
+  assert.strictEqual(
+    normalizeSuite(suite({ target: "http://localhost:3000" })).target.protocol,
+    "http:",
+  );
+  assert.strictEqual(
+    normalizeSuite(suite({ target: "https://staging.example" })).target
+      .protocol,
+    "https:",
+  );
+});
+
 test("normalizeSuite - pipeline signing rejects a time-windowed target", () => {
   assert.throws(
     () =>
diff --git a/packages/cli/src/bench/scenario/normalize.ts b/packages/cli/src/bench/scenario/normalize.ts
index 7d449eeaa..ab0cbe1fa 100644
--- a/packages/cli/src/bench/scenario/normalize.ts
+++ b/packages/cli/src/bench/scenario/normalize.ts
@@ -111,6 +111,21 @@ export function normalizeSuite(
   } catch {
     throw new SuiteNormalizeError(`Invalid target URL: ${targetString}.`);
   }
+  // `new URL("localhost:3000")` parses as the `localhost:` scheme with no host,
+  // a common typo for a missing `http://`.  The probe and runners only make
+  // HTTP(S) requests (and `fetch` rejects URLs carrying credentials), so reject
+  // anything that is not a bare http(s) URL with a host.
+  if (
+    (target.protocol !== "http:" && target.protocol !== "https:") ||
+    target.hostname === "" ||
+    target.username !== "" || target.password !== ""
+  ) {
+    throw new SuiteNormalizeError(
+      `Invalid target URL ${JSON.stringify(targetString)}: a benchmark ` +
+        "target must be an http: or https: URL with a host and no embedded " +
+        "credentials (for example http://localhost:3000).",
+    );
+  }
   return {
     target,
     actors: suite.actors ?? [],

From b2d4ad9f19dfe1b50a574802daf3192548393222 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 13:47:08 +0900
Subject: [PATCH 25/47] Gate every inbox load destination, not just the target

The safety gate classified only the suite `target`, but an `inbox`
scenario's actual signed-load destination is the discovered inbox (or an
explicit `inbox:` URL), which can differ from the target.  A loopback
`target` with a public `recipient`, or `inbox: https://prod.example/inbox`,
would send benchmark POST load to a public inbox with no gate at all,
bypassing the guard against accidentally benchmarking production.  The
synthetic-reachability rule was likewise only checked against the target
tier, not the destination that actually verifies signatures.

Gate each resolved inbox destination before any load reaches it:

 -  assertInboxDestinationAllowed() refuses a public destination unless it
    shares the gated target's origin while the target advertises benchmark
    mode (inheriting its gate), or --allow-unsafe-target is given; and
    refuses a non-loopback destination unless a reachable synthetic host
    was advertised (--advertise-host).  Origins are compared (scheme, host,
    effective port), so an http inbox does not inherit an https target.
 -  The inbox runner calls an injected destination gate for each resolved
    inbox before sending; the orchestrator maps a refusal to exit 2.

Discovery (a read) still runs, but no benchmark load is sent to an
ungated destination.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/src/bench/action.test.ts      |  34 +++++++
 packages/cli/src/bench/action.ts           |  25 ++++-
 packages/cli/src/bench/safety/gate.test.ts | 104 ++++++++++++++++++++-
 packages/cli/src/bench/safety/gate.ts      |  58 +++++++++++-
 packages/cli/src/bench/scenarios/inbox.ts  |   9 +-
 packages/cli/src/bench/scenarios/runner.ts |   7 ++
 6 files changed, 230 insertions(+), 7 deletions(-)

diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts
index 2116d1a86..c3b7be68c 100644
--- a/packages/cli/src/bench/action.test.ts
+++ b/packages/cli/src/bench/action.test.ts
@@ -302,6 +302,40 @@ scenarios:
   assert.match(message, /advertise-host/);
 });
 
+test("runBench - refuses an inbox destination off the gated target (exit 2)", async () => {
+  // A loopback target passes the gate, but an explicit public `inbox:` is the
+  // actual load destination; it must be gated too, or production could be
+  // benchmarked through the back door.
+  const target = await spawnTarget();
+  try {
+    const file = await writeSuite(`version: 1
+target: ${target.url.href}
+scenarios:
+  - name: inbox-shared
+    type: inbox
+    recipient: "${new URL("/users/alice", target.url).href}"
+    inbox: "https://prod.example/inbox"
+    load: { concurrency: 2 }
+    duration: 250ms
+`);
+    let code = -1;
+    let message = "";
+    await runBench(command({ scenario: file }), {
+      exit: (c) => {
+        code = c;
+      },
+      writeOutput: () => Promise.resolve(),
+      log: (m) => {
+        message = m;
+      },
+    });
+    assert.strictEqual(code, 2);
+    assert.match(message, /public inbox|allow-unsafe-target/);
+  } finally {
+    await target.close();
+  }
+});
+
 test("runBench - malformed expect assertion exits 2 before any load", async () => {
   // The expect typo must be caught in preflight, so the run exits 2 (a config
   // error) without ever probing the target or sending load.
diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index 9973cc2d5..3a4013b68 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -20,7 +20,11 @@ import {
 } from "./scenario/normalize.ts";
 import type { Suite } from "./scenario/types.ts";
 import { validateSuite } from "./scenario/validate.ts";
-import { assertTargetAllowed, UnsafeTargetError } from "./safety/gate.ts";
+import {
+  assertInboxDestinationAllowed,
+  assertTargetAllowed,
+  UnsafeTargetError,
+} from "./safety/gate.ts";
 import { classifyTarget } from "./safety/tiers.ts";
 import { runnerFor } from "./scenarios/registry.ts";
 import {
@@ -156,6 +160,16 @@ export default async function runBench(
     userAgent: command.userAgent,
   });
 
+  // Gates each resolved inbox destination (which can differ from the suite
+  // target) before the runner sends load to it.
+  const assertDestinationAllowed = (url: URL): void =>
+    assertInboxDestinationAllowed(url, {
+      targetOrigin: suite.target.origin,
+      targetBenchmarkMode: probe.benchmarkMode,
+      allowUnsafe: command.allowUnsafeTarget,
+      advertised: command.advertiseHost != null,
+    });
+
   let fleet: SyntheticServer | undefined;
   const startedAt = new Date().toISOString();
   try {
@@ -176,6 +190,7 @@ export default async function runBench(
         allowPrivateAddress,
         fleet: fleet ?? null,
         fetch: fetchImpl,
+        assertDestinationAllowed,
       });
       results.push(buildScenarioResult(scenario, measurement));
     }
@@ -200,6 +215,14 @@ export default async function runBench(
       command.output,
     );
     return void exit(report.passed ? 0 : 1);
+  } catch (error) {
+    // A refused inbox destination (gated inside the runner, once resolved) is a
+    // safety error, like the target gate above: report it and exit 2.
+    if (error instanceof UnsafeTargetError) {
+      log(error.message);
+      return void exit(2);
+    }
+    throw error;
   } finally {
     await fleet?.close();
   }
diff --git a/packages/cli/src/bench/safety/gate.test.ts b/packages/cli/src/bench/safety/gate.test.ts
index 55948dbf8..6998b7df7 100644
--- a/packages/cli/src/bench/safety/gate.test.ts
+++ b/packages/cli/src/bench/safety/gate.test.ts
@@ -1,6 +1,10 @@
 import assert from "node:assert/strict";
 import test from "node:test";
-import { assertTargetAllowed, UnsafeTargetError } from "./gate.ts";
+import {
+  assertInboxDestinationAllowed,
+  assertTargetAllowed,
+  UnsafeTargetError,
+} from "./gate.ts";
 
 test("assertTargetAllowed - loopback/private are always allowed", () => {
   assert.doesNotThrow(() =>
@@ -66,3 +70,101 @@ test("assertTargetAllowed - dry-run bypasses the gate", () => {
     })
   );
 });
+
+function destContext(
+  overrides: Partial<Parameters<typeof assertInboxDestinationAllowed>[1]> = {},
+) {
+  return {
+    targetOrigin: "http://127.0.0.1:3000",
+    targetBenchmarkMode: false,
+    allowUnsafe: false,
+    advertised: false,
+    ...overrides,
+  };
+}
+
+test("assertInboxDestinationAllowed - loopback inbox is allowed", () => {
+  assert.doesNotThrow(() =>
+    assertInboxDestinationAllowed(
+      new URL("http://127.0.0.1:3000/inbox"),
+      destContext(),
+    )
+  );
+});
+
+test("assertInboxDestinationAllowed - a public inbox off the target is refused", () => {
+  // A loopback target with a public inbox (a public recipient, or an explicit
+  // inbox URL) must not receive load without the unsafe flag.
+  assert.throws(
+    () =>
+      assertInboxDestinationAllowed(
+        new URL("https://prod.example/inbox"),
+        destContext(),
+      ),
+    (error: unknown) =>
+      error instanceof UnsafeTargetError && /public inbox/.test(error.message),
+  );
+});
+
+test("assertInboxDestinationAllowed - the unsafe flag allows a public inbox", () => {
+  assert.doesNotThrow(() =>
+    assertInboxDestinationAllowed(
+      new URL("https://prod.example/inbox"),
+      destContext({ allowUnsafe: true, advertised: true }),
+    )
+  );
+});
+
+test("assertInboxDestinationAllowed - an inbox on the target origin inherits its gate", () => {
+  // Same origin as the gated target, which advertises benchmark mode.
+  assert.doesNotThrow(() =>
+    assertInboxDestinationAllowed(
+      new URL("https://staging.example/inbox"),
+      destContext({
+        targetOrigin: "https://staging.example",
+        targetBenchmarkMode: true,
+        advertised: true,
+      }),
+    )
+  );
+});
+
+test("assertInboxDestinationAllowed - same host, different scheme does not inherit", () => {
+  // The target is https (its benchmark-mode probe covered port 443); an http
+  // inbox on the same hostname is a different service (port 80), so it must not
+  // inherit the target's gate.
+  assert.throws(
+    () =>
+      assertInboxDestinationAllowed(
+        new URL("http://prod.example/inbox"),
+        destContext({
+          targetOrigin: "https://prod.example",
+          targetBenchmarkMode: true,
+          advertised: true,
+        }),
+      ),
+    (error: unknown) =>
+      error instanceof UnsafeTargetError && /public inbox/.test(error.message),
+  );
+});
+
+test("assertInboxDestinationAllowed - a non-loopback inbox needs an advertised host", () => {
+  // Private inbox is not a safety problem, but the synthetic server is
+  // unreachable from it unless a reachable host was advertised.
+  assert.throws(
+    () =>
+      assertInboxDestinationAllowed(
+        new URL("http://10.0.0.5:8000/inbox"),
+        destContext({ targetOrigin: "http://10.0.0.5:8000" }),
+      ),
+    (error: unknown) =>
+      error instanceof UnsafeTargetError &&
+      /advertise-host/.test(error.message),
+  );
+  assert.doesNotThrow(() =>
+    assertInboxDestinationAllowed(
+      new URL("http://10.0.0.5:8000/inbox"),
+      destContext({ targetOrigin: "http://10.0.0.5:8000", advertised: true }),
+    )
+  );
+});
diff --git a/packages/cli/src/bench/safety/gate.ts b/packages/cli/src/bench/safety/gate.ts
index 153316c6f..86f39496d 100644
--- a/packages/cli/src/bench/safety/gate.ts
+++ b/packages/cli/src/bench/safety/gate.ts
@@ -11,7 +11,7 @@
  * @module
  */
 
-import type { TargetTier } from "./tiers.ts";
+import { classifyTarget, type TargetTier } from "./tiers.ts";
 
 /** An error raised when a target is refused by the safety gate. */
 export class UnsafeTargetError extends Error {}
@@ -45,3 +45,59 @@ export function assertTargetAllowed(context: GateContext): void {
       "(mandatory in CI and any non-interactive context).",
   );
 }
+
+/** The inputs to gating a resolved inbox load destination. */
+export interface InboxDestinationGateContext {
+  /**
+   * The gated benchmark target's origin (scheme, host, and effective port).
+   * Compared by origin, not bare host, so a destination only inherits the
+   * target's gate when it is the very service the benchmark-mode probe covered
+   * (e.g. an `http://host` inbox does not inherit an `https://host` target).
+   */
+  readonly targetOrigin: string;
+  /** Whether the gated target advertises benchmark mode. */
+  readonly targetBenchmarkMode: boolean;
+  /** Whether `--allow-unsafe-target` was given. */
+  readonly allowUnsafe: boolean;
+  /** Whether a reachable synthetic host was advertised (`--advertise-host`). */
+  readonly advertised: boolean;
+}
+
+/**
+ * Asserts that a resolved inbox URL — the actual destination of signed
+ * benchmark load — may be sent to.  The suite's `target` is gated separately by
+ * {@link assertTargetAllowed}; this catches a destination that differs from it
+ * (a public `recipient`, or an explicit `inbox:` URL), so production cannot be
+ * benchmarked through the back door.
+ *
+ * A destination is allowed when it is loopback or private, or shares the gated
+ * target's host while the target advertises benchmark mode (inheriting its
+ * gate), or `--allow-unsafe-target` is given.  Because the destination's server
+ * dereferences the synthetic actor while verifying signatures, a non-loopback
+ * destination additionally requires an advertised, reachable synthetic host.
+ * @param url The resolved inbox URL.
+ * @param context The destination gate inputs.
+ * @throws {UnsafeTargetError} If the destination is refused.
+ */
+export function assertInboxDestinationAllowed(
+  url: URL,
+  context: InboxDestinationGateContext,
+): void {
+  const tier = classifyTarget(url);
+  const inheritsTargetGate = url.origin === context.targetOrigin &&
+    context.targetBenchmarkMode;
+  if (tier === "public" && !inheritsTargetGate && !context.allowUnsafe) {
+    throw new UnsafeTargetError(
+      `Refusing to send benchmark load to ${url.href}: it is a public inbox ` +
+        "that is neither part of the benchmarked target nor covered by " +
+        "benchmark mode.  Pass --allow-unsafe-target to override.",
+    );
+  }
+  if (tier !== "loopback" && !context.advertised) {
+    throw new UnsafeTargetError(
+      `Refusing to send signed benchmark load to ${url.href}: the synthetic ` +
+        "actor server is unreachable from a non-loopback inbox.  Pass " +
+        "--advertise-host with an address it can reach.",
+    );
+  }
+}
diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts
index 6df795c46..713ea55cf 100644
--- a/packages/cli/src/bench/scenarios/inbox.ts
+++ b/packages/cli/src/bench/scenarios/inbox.ts
@@ -74,10 +74,11 @@ export const inboxRunner: ScenarioRunner = {
         contextLoader: context.contextLoader,
         allowPrivateAddress: context.allowPrivateAddress,
       });
-      targets.push({
-        inbox: selectInbox(discovered, scenario.inbox),
-        actorUri: discovered.actorUri,
-      });
+      const inbox = selectInbox(discovered, scenario.inbox);
+      // Gate the actual load destination before sending anything to it: it can
+      // differ from the gated target (a public recipient, or an explicit inbox).
+      context.assertDestinationAllowed?.(inbox);
+      targets.push({ inbox, actorUri: discovered.actorUri });
     }
 
     const actors = fleet.actors;
diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts
index 6bb6aba62..d84a38355 100644
--- a/packages/cli/src/bench/scenarios/runner.ts
+++ b/packages/cli/src/bench/scenarios/runner.ts
@@ -29,6 +29,13 @@ export interface RunContext {
   readonly rng?: Rng;
   /** Fetch implementation (overridable for tests). */
   readonly fetch?: typeof fetch;
+  /**
+   * Gates a resolved load destination (a discovered or explicit inbox URL)
+   * before any load is sent to it, throwing if it is not allowed.  The suite
+   * `target` is gated by the orchestrator; this covers destinations that differ
+   * from it.  Optional so direct runner tests need not supply it.
+   */
+  readonly assertDestinationAllowed?: (url: URL) => void;
 }
 
 /** A runner for one scenario type. */

From 4d980e2bf4cf00911a489ed5bf288baeef9e9a27 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 14:34:54 +0900
Subject: [PATCH 26/47] Do not follow redirects in benchmark traffic

The default fetch follows redirects, which let two safety checks be
bypassed.  A public target whose `stats` endpoint redirected to a host
serving benchmark-mode JSON was marked as advertising benchmark mode, so
the gate allowed load against it.  And a gated loopback, private, or
benchmark target that answered a WebFinger GET or a signed inbox POST
with a 307/308 could carry that load to an ungated public service,
slipping past the destination gate.

Make every benchmark request non-following:

 -  The benchmark-mode probe and the server stats read use
    `redirect: "manual"`, so a redirect is treated as "not advertised"
    and "unavailable" respectively rather than trusted.
 -  `sendRequest` re-wraps any non-manual request as `redirect: "manual"`
    and records a redirect (opaque or 3xx) as a failed send, so no signed
    load reaches the redirect target; the WebFinger and inbox requests are
    built with `redirect: "manual"` so the common path needs no re-clone.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 .../cli/src/bench/discovery/probe.test.ts     | 21 +++++++++
 packages/cli/src/bench/discovery/probe.ts     |  5 +++
 .../cli/src/bench/metrics/stats-client.ts     |  6 ++-
 .../cli/src/bench/scenarios/runner.test.ts    | 43 ++++++++++++++++++-
 packages/cli/src/bench/scenarios/runner.ts    | 19 +++++++-
 packages/cli/src/bench/scenarios/webfinger.ts |  5 ++-
 packages/cli/src/bench/signing/signer.ts      |  3 ++
 7 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/packages/cli/src/bench/discovery/probe.test.ts b/packages/cli/src/bench/discovery/probe.test.ts
index 43be7a913..df41f3748 100644
--- a/packages/cli/src/bench/discovery/probe.test.ts
+++ b/packages/cli/src/bench/discovery/probe.test.ts
@@ -50,3 +50,24 @@ test("probeBenchmarkMode - a network error means no benchmark mode", async () =>
   );
   assert.deepEqual(probe, { benchmarkMode: false, fedifyVersion: null });
 });
+
+test("probeBenchmarkMode - does not follow redirects", async () => {
+  // The probe requests a non-following (manual) redirect; a redirect response
+  // therefore does not advertise benchmark mode, even if the redirect target
+  // would.
+  let requestedRedirect: string | undefined;
+  const probe = await probeBenchmarkMode(
+    new URL("http://public.example"),
+    (_input, init) => {
+      requestedRedirect = init?.redirect;
+      return Promise.resolve(
+        new Response(null, {
+          status: 302,
+          headers: { location: "https://benchmark.example/stats" },
+        }),
+      );
+    },
+  );
+  assert.strictEqual(requestedRedirect, "manual");
+  assert.deepEqual(probe, { benchmarkMode: false, fedifyVersion: null });
+});
diff --git a/packages/cli/src/bench/discovery/probe.ts b/packages/cli/src/bench/discovery/probe.ts
index 9ef82af72..cec057ded 100644
--- a/packages/cli/src/bench/discovery/probe.ts
+++ b/packages/cli/src/bench/discovery/probe.ts
@@ -32,8 +32,13 @@ export async function probeBenchmarkMode(
   fetchImpl: typeof fetch = fetch,
 ): Promise<BenchmarkProbe> {
   try {
+    // Do not follow redirects: a public target whose `stats` endpoint redirects
+    // to a host that does serve benchmark-mode JSON must not be taken as
+    // advertising benchmark mode itself.  A redirect yields a non-ok (manual)
+    // response, which falls through to "not advertised".
     const response = await fetchImpl(new URL(STATS_PATH, target), {
       headers: { accept: "application/json" },
+      redirect: "manual",
     });
     if (!response.ok) return notAdvertised();
     const json = await response.json() as {
diff --git a/packages/cli/src/bench/metrics/stats-client.ts b/packages/cli/src/bench/metrics/stats-client.ts
index 101039a5a..7039d4ea5 100644
--- a/packages/cli/src/bench/metrics/stats-client.ts
+++ b/packages/cli/src/bench/metrics/stats-client.ts
@@ -169,7 +169,11 @@ export async function fetchServerSnapshot(
   fetchImpl: typeof fetch = fetch,
 ): Promise<ServerSnapshot | null> {
   try {
-    const response = await fetchImpl(new URL(STATS_PATH, target));
+    // Do not follow redirects: the stats reading must come from the target
+    // itself, not from wherever a redirect points.
+    const response = await fetchImpl(new URL(STATS_PATH, target), {
+      redirect: "manual",
+    });
     if (!response.ok) return null;
     return parseServerSnapshot(await response.json());
   } catch {
diff --git a/packages/cli/src/bench/scenarios/runner.test.ts b/packages/cli/src/bench/scenarios/runner.test.ts
index ffcf08a7e..4b6fb9e33 100644
--- a/packages/cli/src/bench/scenarios/runner.test.ts
+++ b/packages/cli/src/bench/scenarios/runner.test.ts
@@ -1,10 +1,51 @@
 import assert from "node:assert/strict";
 import test from "node:test";
 import type { SendOutcome } from "../load/generator.ts";
-import { withMeasuredWindowStart } from "./runner.ts";
+import { sendRequest, withMeasuredWindowStart } from "./runner.ts";
 
 const ok: SendOutcome = { ok: true, status: 200 };
 
+test("sendRequest - does not follow redirects and counts them as failures", async () => {
+  let requestedRedirect: RequestRedirect | undefined;
+  const outcome = await sendRequest(
+    new Request("http://target.test/inbox", { method: "POST" }),
+    (input) => {
+      requestedRedirect = (input as Request).redirect;
+      return Promise.resolve(
+        new Response(null, {
+          status: 308,
+          headers: { location: "https://public.example/inbox" },
+        }),
+      );
+    },
+  );
+  // The send used a non-following (manual) redirect, and the redirect is a
+  // failed send rather than a delivery to the redirect target.
+  assert.strictEqual(requestedRedirect, "manual");
+  assert.strictEqual(outcome.ok, false);
+  assert.strictEqual(outcome.reason, "redirect");
+});
+
+test("sendRequest - a 2xx is a successful send", async () => {
+  const outcome = await sendRequest(
+    new Request("http://target.test/inbox", { method: "POST" }),
+    () => Promise.resolve(new Response(null, { status: 202 })),
+  );
+  assert.deepEqual(outcome, { ok: true, status: 202 });
+});
+
+test("sendRequest - a 4xx/5xx is a failed send with its status", async () => {
+  const outcome = await sendRequest(
+    new Request("http://target.test/inbox", { method: "POST" }),
+    () => Promise.resolve(new Response(null, { status: 500 })),
+  );
+  assert.deepEqual(outcome, {
+    ok: false,
+    status: 500,
+    reason: "status_500",
+  });
+});
+
 test("withMeasuredWindowStart - fires once at the warm-up boundary", async () => {
   const seenAt: number[] = [];
   let fires = 0;
diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts
index d84a38355..cbb7acea3 100644
--- a/packages/cli/src/bench/scenarios/runner.ts
+++ b/packages/cli/src/bench/scenarios/runner.ts
@@ -54,10 +54,27 @@ export async function sendRequest(
   request: Request,
   fetchImpl: typeof fetch,
 ): Promise<SendOutcome> {
+  // Never follow redirects: a redirect could carry signed benchmark load to a
+  // host the safety gate never classified, so treat any redirect as a failed
+  // send.  Requests are normally built with `redirect: "manual"` already; this
+  // re-wraps any that are not, as a safety net.
+  const noFollow = request.redirect === "manual"
+    ? request
+    : new Request(request, { redirect: "manual" });
   try {
-    const response = await fetchImpl(request);
+    const response = await fetchImpl(noFollow);
     // Drain the body so the connection can be reused.
     await response.arrayBuffer().catch(() => {});
+    if (
+      response.type === "opaqueredirect" ||
+      (response.status >= 300 && response.status < 400)
+    ) {
+      return {
+        ok: false,
+        status: response.status === 0 ? undefined : response.status,
+        reason: "redirect",
+      };
+    }
     if (response.ok) return { ok: true, status: response.status };
     return {
       ok: false,
diff --git a/packages/cli/src/bench/scenarios/webfinger.ts b/packages/cli/src/bench/scenarios/webfinger.ts
index f2e4f189e..2567dc2bc 100644
--- a/packages/cli/src/bench/scenarios/webfinger.ts
+++ b/packages/cli/src/bench/scenarios/webfinger.ts
@@ -40,7 +40,10 @@ export const webfingerRunner: ScenarioRunner = {
         : [context.target.host]).map((r) => webfingerUrl(context.target, r));
     let index = 0;
     const rawSend = () =>
-      sendRequest(new Request(urls[index++ % urls.length]), fetchImpl);
+      sendRequest(
+        new Request(urls[index++ % urls.length], { redirect: "manual" }),
+        fetchImpl,
+      );
     // Snapshot the server's cumulative metrics at the measured-window boundary
     // so warm-up and earlier scenarios are diffed out of the reported numbers.
     // A few warm-up requests still in flight when the baseline is taken may be
diff --git a/packages/cli/src/bench/signing/signer.ts b/packages/cli/src/bench/signing/signer.ts
index 906290a11..27a609f10 100644
--- a/packages/cli/src/bench/signing/signer.ts
+++ b/packages/cli/src/bench/signing/signer.ts
@@ -70,6 +70,9 @@ export async function signInboxDelivery(
     method: "POST",
     headers: { "content-type": "application/activity+json" },
     body,
+    // Benchmark deliveries must not follow redirects to an ungated host; the
+    // sender re-applies this as a safety net if signing drops it.
+    redirect: "manual",
   });
   return await signRequest(
     request,

From 581a6049091ea93f7c6f0942034f1e2bf2e84aa2 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 16:54:32 +0900
Subject: [PATCH 27/47] Raise the Bun test timeout for @fedify/cli
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The benchmark end-to-end tests do real RSA key generation, signed inbox
delivery, and server round-trips, which take a few seconds under CI CPU
contention.  Bun applies a default per-test timeout of 5000 ms (node:test
and deno test have none), and the cli package's `test:bun` was the only
one without a `--timeout` flag, so `runBench - passing gate exits 0…` and
`runBench - failing gate exits 1` timed out on the Bun CI job while
passing everywhere else.

Run the cli Bun tests with `--timeout 60000`, matching the heaviest
sibling packages (fedify, vocab, the database adapters).

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 packages/cli/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cli/package.json b/packages/cli/package.json
index 510693518..048d8bcd3 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -18,7 +18,7 @@
     "test": "node --test --experimental-transform-types 'src/**/*.test.ts' '!src/init/test/**'",
     "test-init": "deno task test-init",
     "pretest:bun": "pnpm build",
-    "test:bun": "bun test",
+    "test:bun": "bun test --timeout 60000",
     "run": "pnpm build && node --disable-warning=ExperimentalWarning dist/mod.js",
     "runi": "tsdown && node --disable-warning=ExperimentalWarning dist/mod.js",
     "run:bun": "pnpm build && bun dist/mod.js",

From 8a89a5daef30deb9a62d27fa63ef039b84c2e63c Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 17:18:21 +0900
Subject: [PATCH 28/47] Fix the docs build broken by inline templating braces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The benchmarking manual's "Templating" section used inline code spans
containing `${{ … }}`.  VitePress compiles each Markdown page as a Vue
component, and Vue interpreted the `{{ … }}` inside the inline `<code>`
as a mustache interpolation, producing invalid generated code
(`_ctx.…`) and failing the VitePress build with a Rollup parse error.
Fenced code blocks were unaffected because they render through the
syntax highlighter.

Rewrite the paragraph so it no longer puts double-brace delimiters in an
inline code span: it describes the templating in prose and points to the
`recipient` line in the example suite above, where the literal
`${{ target.host }}` already appears inside a fenced YAML block.  A full
`vitepress build` now succeeds.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 docs/manual/benchmarking.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md
index 51d24c841..1d32c506b 100644
--- a/docs/manual/benchmarking.md
+++ b/docs/manual/benchmarking.md
@@ -179,9 +179,10 @@ heterogeneous fleet, which is what a server actually receives.
 
 ### Templating
 
-Values support GitHub-Actions-style `${{ … }}` templating, kept logic-less
-(references and whitelisted helper calls only).  For example
-`${{ target.host }}` expands to the target's host.  Generated payloads use typed
+Values support GitHub-Actions-style templating, written with double-brace
+delimiters and kept logic-less: references and whitelisted helper calls only, no
+arbitrary code.  The `recipient` line in the example suite above uses one to
+build the actor URI from the target's host.  Generated payloads use typed
 directives such as `content: { generate: lorem, size: 2KB }` rather than string
 templates.  The tool owns actor URLs and activity ids, so each request gets a
 unique activity id automatically (which Fedify's always-on inbox idempotency

From 59c6b136c03b0ff385cf7a3136c2951e16318f7b Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 17:32:28 +0900
Subject: [PATCH 29/47] Show templating braces via a v-pre container in the
 docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix avoided the VitePress build failure by rewording the
"Templating" section to drop the inline `${{ … }}` code, since Vue
compiled those braces inside inline code as a mustache interpolation.

Use VitePress's own escape instead: wrap the paragraph in a `::: v-pre`
container, where Vue leaves interpolation untouched, and restore the
explicit inline `${{ … }}` and `${{ target.host }}` so the templating
syntax is shown directly again.  A full `vitepress build` succeeds and
the rendered page contains the literal braces; `hongdown --check` stays
happy with the container (unlike a raw inline `<code v-pre>`, which it
reflows and breaks).

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
Assisted-by: Codex:gpt-5.5
---
 docs/manual/benchmarking.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md
index 1d32c506b..3d103d31f 100644
--- a/docs/manual/benchmarking.md
+++ b/docs/manual/benchmarking.md
@@ -179,15 +179,18 @@ heterogeneous fleet, which is what a server actually receives.
 
 ### Templating
 
-Values support GitHub-Actions-style templating, written with double-brace
-delimiters and kept logic-less: references and whitelisted helper calls only, no
-arbitrary code.  The `recipient` line in the example suite above uses one to
-build the actor URI from the target's host.  Generated payloads use typed
+::: v-pre
+
+Values support GitHub-Actions-style `${{ … }}` templating, kept logic-less
+(references and whitelisted helper calls only).  For example
+`${{ target.host }}` expands to the target's host.  Generated payloads use typed
 directives such as `content: { generate: lorem, size: 2KB }` rather than string
 templates.  The tool owns actor URLs and activity ids, so each request gets a
 unique activity id automatically (which Fedify's always-on inbox idempotency
 requires).
 
+:::
+
 ### Load generation and signing
 
 Open-loop (`rate`) is the default and the realistic model for incoming

From cc6a588dca6f03d03495aa74b8183940a36e7ce9 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 17:59:34 +0900
Subject: [PATCH 30/47] Stop refilling the presign buffer during the timed
 window

In `presign` mode the whole run is meant to be signed before the timed
window opens.  The buffered producer kept refilling as `next()` drained
it, so background signers created replacement requests during the run,
doing crypto on the client and skewing the very cost presign isolates.

Cap background production at the pre-signed total so the signers stop
once the run is signed; if an open-loop run overshoots its estimate (a
few extra Poisson arrivals), those are signed on demand rather than
triggering a continuous background refill.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3361163774

Assisted-by: Claude Code:claude-opus-4-8
---
 .../cli/src/bench/signing/pipeline.test.ts    | 22 +++++++++++++++++++
 packages/cli/src/bench/signing/pipeline.ts    | 21 ++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/packages/cli/src/bench/signing/pipeline.test.ts b/packages/cli/src/bench/signing/pipeline.test.ts
index 891e253be..e1671d341 100644
--- a/packages/cli/src/bench/signing/pipeline.test.ts
+++ b/packages/cli/src/bench/signing/pipeline.test.ts
@@ -73,6 +73,28 @@ test("presign - signs the whole run up front without starvation", async () => {
   await pipeline.close();
 });
 
+test("presign - signs exactly the run up front and never refills", async () => {
+  let calls = 0;
+  const factory = () => {
+    calls++;
+    return Promise.resolve(new Request("http://sink/x", { method: "POST" }));
+  };
+  const pipeline = createSigningPipeline("presign", factory, {
+    total: 3,
+    signers: 2,
+  });
+  await pipeline.prime();
+  // The whole run is signed up front, and nothing beyond it.
+  assert.strictEqual(calls, 3);
+  // Draining the buffer must not trigger background refills during the run.
+  for (let i = 0; i < 3; i++) await pipeline.next();
+  assert.strictEqual(calls, 3);
+  // Overshooting the pre-signed estimate signs the extra on demand.
+  await pipeline.next();
+  assert.strictEqual(calls, 4);
+  await pipeline.close();
+});
+
 test("close - rejects a pending consumer", async () => {
   const pipeline = createSigningPipeline("pipeline", fakeFactory(50), {
     bufferSize: 1,
diff --git a/packages/cli/src/bench/signing/pipeline.ts b/packages/cli/src/bench/signing/pipeline.ts
index bd661c863..e9ba94af0 100644
--- a/packages/cli/src/bench/signing/pipeline.ts
+++ b/packages/cli/src/bench/signing/pipeline.ts
@@ -74,6 +74,10 @@ export function createSigningPipeline(
       fillTarget: total,
       signers,
       countStarvation: false,
+      // Sign the whole run up front and then stop: the background signers must
+      // not refill as the buffer drains, or signing would run during the timed
+      // window and defeat the point of presigning.
+      maxProduced: total,
     });
   }
   const bufferSize = options.bufferSize ?? DEFAULT_BUFFER_SIZE;
@@ -99,6 +103,12 @@ interface BufferedOptions {
   readonly fillTarget: number;
   readonly signers: number;
   readonly countStarvation: boolean;
+  /**
+   * A cap on how many requests the background signers produce in total.  Used by
+   * `presign` to sign the run once and then stop; omitted (unbounded) for
+   * `pipeline`, which refills the buffer for the whole run.
+   */
+  readonly maxProduced?: number;
 }
 
 function createBuffered(
@@ -110,6 +120,8 @@ function createBuffered(
     resolve: (request: Request) => void;
     reject: (error: unknown) => void;
   }> = [];
+  const maxProduced = options.maxProduced ?? Infinity;
+  let produced = 0;
   let starvationCount = 0;
   let inFlight = 0;
   let closed = false;
@@ -137,6 +149,10 @@ function createBuffered(
 
   async function producer(): Promise<void> {
     while (!closed) {
+      // Stop once the whole run is signed (presign): don't refill as the buffer
+      // drains, so signing stays out of the timed window.  Unbounded for
+      // `pipeline`, which keeps the buffer full for the whole run.
+      if (produced + inFlight >= maxProduced) break;
       if (
         waiters.length === 0 && ready.length + inFlight >= options.bufferSize
       ) {
@@ -154,6 +170,7 @@ function createBuffered(
         const result = await Promise.race([pending, closeSignal]);
         if (result === CLOSED || closed) break;
         consecutiveFailures = 0;
+        produced++;
         deliver(result);
       } catch (error) {
         // A transient failure is dropped, but a run of failures with no
@@ -176,6 +193,10 @@ function createBuffered(
       if (buffered != null) return Promise.resolve(buffered);
       if (fatalError != null) return Promise.reject(fatalError);
       if (closed) return Promise.reject(new PipelineClosedError("closed"));
+      // Presign overshoot: the run asked for more than the pre-signed total
+      // (e.g. a few extra Poisson arrivals), so sign the extra on demand rather
+      // than refilling the whole run in the background.
+      if (produced >= maxProduced) return Promise.resolve().then(factory);
       if (options.countStarvation) starvationCount++;
       return new Promise<Request>((resolve, reject) => {
         waiters.push({ resolve, reject });

From 9d0368570b7a9c464fb50f324b5e8c86fb3f466e Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 18:01:55 +0900
Subject: [PATCH 31/47] Bound recursion depth and payload size from suite input

A scenario suite is parsed from user-supplied YAML or JSON, so its shape
is untrusted enough that a pathologically deep tree or an enormous
generated payload should fail with a clear error rather than overflow the
stack or exhaust memory.

Add a recursion-depth guard to the template renderer and to the
config-hash walk, and (separately) cap a generated payload's size in
`resolveGenerate`, where the string is actually allocated; `parseSize`
stays a plain unit parser.  The template renderer also keeps the original
reference for subtrees it did not change, avoiding needless cloning.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3361181132
https://github.com/fedify-dev/fedify/pull/791#discussion_r3361181139
https://github.com/fedify-dev/fedify/pull/791#discussion_r3361222943

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/result/build.test.ts   |  6 ++++
 packages/cli/src/bench/result/build.ts        | 17 +++++++---
 .../cli/src/bench/template/generate.test.ts   | 15 +++++++++
 packages/cli/src/bench/template/generate.ts   | 14 +++++++++
 .../cli/src/bench/template/template.test.ts   | 11 +++++++
 packages/cli/src/bench/template/template.ts   | 31 ++++++++++++++++---
 6 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/packages/cli/src/bench/result/build.test.ts b/packages/cli/src/bench/result/build.test.ts
index ec34dd45b..19b14b9be 100644
--- a/packages/cli/src/bench/result/build.test.ts
+++ b/packages/cli/src/bench/result/build.test.ts
@@ -130,3 +130,9 @@ test("detectEnvironment - reports runtime, os, and cpu count", () => {
   assert.ok(env.os.length > 0);
   assert.ok(env.cpuCount >= 0);
 });
+
+test("configHash - rejects pathologically deep config", () => {
+  let deep: unknown = 1;
+  for (let i = 0; i < 200; i++) deep = { n: deep };
+  assert.throws(() => configHash(deep), RangeError);
+});
diff --git a/packages/cli/src/bench/result/build.ts b/packages/cli/src/bench/result/build.ts
index 364e91a3c..37f9d0eed 100644
--- a/packages/cli/src/bench/result/build.ts
+++ b/packages/cli/src/bench/result/build.ts
@@ -137,7 +137,13 @@ export function configHash(config: unknown): string {
   return `sha256:${digest}`;
 }
 
-function canonicalJson(value: unknown): string {
+/** A guard against unbounded recursion on pathologically nested input. */
+const MAX_HASH_DEPTH = 100;
+
+function canonicalJson(value: unknown, depth = 0): string {
+  if (depth > MAX_HASH_DEPTH) {
+    throw new RangeError("Maximum depth exceeded while hashing the config.");
+  }
   // Mirror JSON.stringify: `undefined` is dropped from objects and becomes
   // `null` inside arrays.
   if (value === undefined) return "null";
@@ -146,18 +152,19 @@ function canonicalJson(value: unknown): string {
   // are hashed by their serialized form rather than as an empty object.
   const toJson = (value as { toJSON?: unknown }).toJSON;
   if (typeof toJson === "function") {
-    return canonicalJson((toJson as () => unknown).call(value));
+    return canonicalJson((toJson as () => unknown).call(value), depth + 1);
   }
   if (Array.isArray(value)) {
-    return `[${value.map(canonicalJson).join(",")}]`;
+    return `[${value.map((v) => canonicalJson(v, depth + 1)).join(",")}]`;
   }
   const entries = Object.entries(value as Record<string, unknown>)
     .filter(([, v]) => v !== undefined)
     .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0));
   return `{${
-    entries.map(([k, v]) => `${JSON.stringify(k)}:${canonicalJson(v)}`).join(
-      ",",
+    entries.map(([k, v]) =>
+      `${JSON.stringify(k)}:${canonicalJson(v, depth + 1)}`
     )
+      .join(",")
   }}`;
 }
 
diff --git a/packages/cli/src/bench/template/generate.test.ts b/packages/cli/src/bench/template/generate.test.ts
index edfffe3d9..4d35d72d9 100644
--- a/packages/cli/src/bench/template/generate.test.ts
+++ b/packages/cli/src/bench/template/generate.test.ts
@@ -71,3 +71,18 @@ test("resolveGenerate - unknown generator throws", () => {
     RangeError,
   );
 });
+
+test("resolveGenerate - rejects an oversized payload", () => {
+  // Guards against memory exhaustion / String.repeat overflow from a huge size.
+  // `parseSize` still parses the units; the limit applies when generating.
+  assert.strictEqual(parseSize("1GB"), 1024 ** 3);
+  assert.throws(
+    () => resolveGenerate({ generate: "lorem", size: "200MB" }),
+    RangeError,
+  );
+  // The maximum (100 MiB) itself is still produced.
+  assert.strictEqual(
+    resolveGenerate({ generate: "lorem", size: "100MB" }).length,
+    100 * 1024 * 1024,
+  );
+});
diff --git a/packages/cli/src/bench/template/generate.ts b/packages/cli/src/bench/template/generate.ts
index 49a8b4ab7..e96aa40f2 100644
--- a/packages/cli/src/bench/template/generate.ts
+++ b/packages/cli/src/bench/template/generate.ts
@@ -9,6 +9,14 @@
  * @module
  */
 
+/**
+ * The largest payload {@link resolveGenerate} will produce (100 MiB).  A
+ * generated payload is held in memory as a single string, so a much larger size
+ * would exhaust memory or overflow `String.repeat`; a realistic benchmark body
+ * is far smaller.  (`parseSize` itself stays a plain parser with no limit.)
+ */
+const MAX_PAYLOAD_SIZE = 100 * 1024 * 1024;
+
 /** Multipliers for the size units accepted by {@link parseSize}. */
 const SIZE_UNITS: Readonly<Record<string, number>> = {
   b: 1,
@@ -98,6 +106,12 @@ const LOREM =
  */
 export function resolveGenerate(directive: GenerateDirective): string {
   const size = directive.size == null ? 0 : parseSize(directive.size);
+  if (size > MAX_PAYLOAD_SIZE) {
+    throw new RangeError(
+      `Payload size ${JSON.stringify(directive.size)} exceeds the maximum of ` +
+        `${MAX_PAYLOAD_SIZE} bytes.`,
+    );
+  }
   switch (directive.generate) {
     case "lorem":
       return generateLorem(size);
diff --git a/packages/cli/src/bench/template/template.test.ts b/packages/cli/src/bench/template/template.test.ts
index 10fe54000..163ce7a32 100644
--- a/packages/cli/src/bench/template/template.test.ts
+++ b/packages/cli/src/bench/template/template.test.ts
@@ -115,3 +115,14 @@ test("defaultHelpers - uuid returns a UUID string", () => {
     /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/,
   );
 });
+
+test("renderTemplates - rejects pathologically deep nesting", () => {
+  let deep: unknown = "leaf";
+  for (let i = 0; i < 200; i++) deep = { nested: deep };
+  assert.throws(() => renderTemplates(deep, ctx), TemplateError);
+});
+
+test("renderTemplates - returns the same reference for unchanged subtrees", () => {
+  const value = { a: { b: "no expressions here" }, list: [1, 2] };
+  assert.strictEqual(renderTemplates(value, ctx), value);
+});
diff --git a/packages/cli/src/bench/template/template.ts b/packages/cli/src/bench/template/template.ts
index 9cbdb0dad..f27d6e930 100644
--- a/packages/cli/src/bench/template/template.ts
+++ b/packages/cli/src/bench/template/template.ts
@@ -36,6 +36,9 @@ const IDENT_RE = /^[A-Za-z_]\w*$/;
 /** Property names that must never be resolved, to avoid prototype access. */
 const FORBIDDEN = new Set(["__proto__", "prototype", "constructor"]);
 
+/** A guard against unbounded recursion on pathologically nested input. */
+const MAX_DEPTH = 100;
+
 /**
  * Recursively renders every `${{ ... }}` expression in a value.
  *
@@ -52,15 +55,35 @@ export function renderTemplates<T>(value: T, context: TemplateContext = {}): T {
   return renderValue(value, context) as T;
 }
 
-function renderValue(value: unknown, ctx: TemplateContext): unknown {
+function renderValue(
+  value: unknown,
+  ctx: TemplateContext,
+  depth = 0,
+): unknown {
+  if (depth > MAX_DEPTH) {
+    throw new TemplateError("Maximum template nesting depth exceeded.");
+  }
   if (typeof value === "string") return renderString(value, ctx);
-  if (Array.isArray(value)) return value.map((item) => renderValue(item, ctx));
+  // Walk arrays and objects, but keep the original reference for any subtree
+  // that did not change, to avoid needless cloning.
+  if (Array.isArray(value)) {
+    let changed = false;
+    const out = value.map((item) => {
+      const rendered = renderValue(item, ctx, depth + 1);
+      if (rendered !== item) changed = true;
+      return rendered;
+    });
+    return changed ? out : value;
+  }
   if (value != null && typeof value === "object") {
+    let changed = false;
     const out: Record<string, unknown> = {};
     for (const [key, item] of Object.entries(value)) {
-      out[key] = renderValue(item, ctx);
+      const rendered = renderValue(item, ctx, depth + 1);
+      if (rendered !== item) changed = true;
+      out[key] = rendered;
     }
-    return out;
+    return changed ? out : value;
   }
   return value;
 }

From 3e964040e87716985a97987d03f04d2c1076b15a Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 18:02:05 +0900
Subject: [PATCH 32/47] Enforce exactly one HTTP signature standard per actor
 group

httpStandardOf claimed in its error message that a group must declare
exactly one HTTP request signature standard, but it used find(), so a
group listing both draft-cavage-http-signatures-12 and rfc9421 silently
took the first instead of being rejected.  The JSON Schema already
forbids this, so this is defense in depth for the runtime path.

Collect all HTTP standards and throw when there is not exactly one, so
the function honors its own contract even on unvalidated input.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3361222914

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/actor/fleet.test.ts | 19 +++++++++++++++++++
 packages/cli/src/bench/actor/fleet.ts      | 15 ++++++++++++---
 2 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 packages/cli/src/bench/actor/fleet.test.ts

diff --git a/packages/cli/src/bench/actor/fleet.test.ts b/packages/cli/src/bench/actor/fleet.test.ts
new file mode 100644
index 000000000..76d03194d
--- /dev/null
+++ b/packages/cli/src/bench/actor/fleet.test.ts
@@ -0,0 +1,19 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { buildFleet } from "./fleet.ts";
+
+test("buildFleet - rejects a group with two HTTP signature standards", async () => {
+  await assert.rejects(
+    buildFleet([{
+      signatureStandards: ["draft-cavage-http-signatures-12", "rfc9421"],
+    }]),
+    TypeError,
+  );
+});
+
+test("buildFleet - rejects a group with no HTTP signature standard", async () => {
+  await assert.rejects(
+    buildFleet([{ signatureStandards: ["ld-signatures"] }]),
+    TypeError,
+  );
+});
diff --git a/packages/cli/src/bench/actor/fleet.ts b/packages/cli/src/bench/actor/fleet.ts
index 1453d43c9..d7e0a8187 100644
--- a/packages/cli/src/bench/actor/fleet.ts
+++ b/packages/cli/src/bench/actor/fleet.ts
@@ -33,16 +33,25 @@ export interface FleetMember {
 function httpStandardOf(
   standards: readonly SignatureStandard[],
 ): HttpSignatureStandard {
-  const http = standards.find((s) =>
+  // The JSON Schema already requires exactly one HTTP request scheme per group;
+  // enforce it here too so this function honors its own contract even if called
+  // with unvalidated input, rather than silently picking the first of several.
+  const http = standards.filter((s) =>
     s === "draft-cavage-http-signatures-12" || s === "rfc9421"
   );
-  if (http == null) {
+  if (http.length === 0) {
     throw new TypeError(
       "Every actor group must declare exactly one HTTP request signature " +
         "standard.",
     );
   }
-  return http as HttpSignatureStandard;
+  if (http.length > 1) {
+    throw new TypeError(
+      "Every actor group must declare exactly one HTTP request signature " +
+        `standard, but multiple were given: ${http.join(", ")}.`,
+    );
+  }
+  return http[0] as HttpSignatureStandard;
 }
 
 /**

From 374b41ecc5ce07b13c25a7f27e4359d3d42f7fea Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 18:02:14 +0900
Subject: [PATCH 33/47] Make the sumErrors status range a single atomic
 argument

sumErrors took optional min and max parameters but dereferenced max with
a non-null assertion, so a future caller passing only min would crash.
Replace the loose pair with one optional { min, max } range so the bounds
cannot be supplied half-way, and update the call sites.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3361222933

Assisted-by: Claude Code:claude-opus-4-8
---
 .../cli/src/bench/result/expect/evaluate.ts   | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/packages/cli/src/bench/result/expect/evaluate.ts b/packages/cli/src/bench/result/expect/evaluate.ts
index f4bf752be..092226468 100644
--- a/packages/cli/src/bench/result/expect/evaluate.ts
+++ b/packages/cli/src/bench/result/expect/evaluate.ts
@@ -138,9 +138,9 @@ function lookupValue(metrics: MetricView, metric: string): number | null {
     case "errors.total":
       return sumErrors(metrics.errors);
     case "errors.4xx":
-      return sumErrors(metrics.errors, 400, 500);
+      return sumErrors(metrics.errors, { min: 400, max: 500 });
     case "errors.5xx":
-      return sumErrors(metrics.errors, 500, 600);
+      return sumErrors(metrics.errors, { min: 500, max: 600 });
   }
   if (metric.startsWith("latency.")) {
     return latencyField(metrics.client.latencyMs, metric.slice(8));
@@ -194,13 +194,22 @@ function partialField(
   }
 }
 
-function sumErrors(errors: ErrorBucket[], min?: number, max?: number): number {
+/**
+ * Sums error counts, optionally restricted to a half-open HTTP status range.
+ * The bounds are a single coupled argument so a caller cannot pass one without
+ * the other.
+ */
+function sumErrors(
+  errors: ErrorBucket[],
+  range?: { readonly min: number; readonly max: number },
+): number {
   let total = 0;
   for (const error of errors) {
-    if (min == null) {
+    if (range == null) {
       total += error.count;
     } else if (
-      error.status != null && error.status >= min && error.status < max!
+      error.status != null && error.status >= range.min &&
+      error.status < range.max
     ) {
       total += error.count;
     }

From 95fd24c030b8ae1cc563af2ee5ad615bf654434e Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 19:29:30 +0900
Subject: [PATCH 34/47] Validate an explicit inbox URL before running

An `inbox` value other than "shared" or "personal" is an explicit inbox
URL that selectInbox() feeds to new URL() during the run.  A typo like
inbox: shraed threw there, mid-run and outside runBench's
configuration-error handling, crashing the CLI instead of exiting 2; a
non-http(s) URL would also slip through to the send path as a measured
failure.

Validate the inbox mode in the runner's preflight validate(): anything
other than "shared"/"personal" must be a bare http(s) URL with a host and
no credentials, mirroring the target URL check, so a bad value is a
clean configuration error before any load.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3361632607

Assisted-by: Claude Code:claude-opus-4-8
---
 .../cli/src/bench/scenarios/inbox.test.ts     | 26 ++++++++++++++++
 packages/cli/src/bench/scenarios/inbox.ts     | 31 +++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/packages/cli/src/bench/scenarios/inbox.test.ts b/packages/cli/src/bench/scenarios/inbox.test.ts
index 100107886..57df0e970 100644
--- a/packages/cli/src/bench/scenarios/inbox.test.ts
+++ b/packages/cli/src/bench/scenarios/inbox.test.ts
@@ -303,3 +303,29 @@ test("inboxRunner.validate - rejects activity options it cannot honor", () => {
     inboxRunner.validate!(resolve({ type: "Create", object: { type: "Note" } }))
   );
 });
+
+test("inboxRunner.validate - rejects a malformed or non-http inbox value", () => {
+  function resolve(inbox: string) {
+    return normalizeSuite({
+      version: 1,
+      target: "http://localhost:3000",
+      scenarios: [{
+        name: "inbox",
+        type: "inbox",
+        recipient: "http://localhost:3000/users/alice",
+        inbox,
+      }],
+    }).scenarios[0];
+  }
+  // A typo that is not a URL would otherwise crash selectInbox mid-run.
+  assert.throws(() => inboxRunner.validate!(resolve("shraed")), /inbox/);
+  // A non-http(s) URL would slip to the send path as a failure.
+  assert.throws(
+    () => inboxRunner.validate!(resolve("ftp://host/inbox")),
+    /http\(s\)/,
+  );
+  // shared, personal, and a bare http(s) URL are accepted.
+  for (const ok of ["shared", "personal", "https://host.example/inbox"]) {
+    assert.doesNotThrow(() => inboxRunner.validate!(resolve(ok)));
+  }
+});
diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts
index 713ea55cf..8061bd727 100644
--- a/packages/cli/src/bench/scenarios/inbox.ts
+++ b/packages/cli/src/bench/scenarios/inbox.ts
@@ -51,6 +51,7 @@ interface InboxTarget {
 export const inboxRunner: ScenarioRunner = {
   validate(scenario: ResolvedScenario): void {
     validateActivity(scenario);
+    validateInbox(scenario);
   },
 
   async run(context: RunContext) {
@@ -151,6 +152,36 @@ export const inboxRunner: ScenarioRunner = {
   },
 };
 
+/**
+ * Validates the scenario's `inbox` mode.  `"shared"` and `"personal"` select a
+ * discovered inbox; any other value is an explicit inbox URL the run will POST
+ * to, so it must be a usable bare http(s) URL.  Without this preflight check, a
+ * typo like `inbox: shraed` would crash `selectInbox` with an uncaught error
+ * mid-run, and a non-http URL would slip through to the send path.
+ */
+function validateInbox(scenario: ResolvedScenario): void {
+  const mode = scenario.inbox;
+  if (mode == null || mode === "shared" || mode === "personal") return;
+  let url: URL;
+  try {
+    url = new URL(mode);
+  } catch {
+    throw new Error(
+      `Scenario "${scenario.name}": inbox must be "shared", "personal", or an ` +
+        `http(s) URL; got ${JSON.stringify(mode)}.`,
+    );
+  }
+  if (
+    (url.protocol !== "http:" && url.protocol !== "https:") ||
+    url.hostname === "" || url.username !== "" || url.password !== ""
+  ) {
+    throw new Error(
+      `Scenario "${scenario.name}": inbox URL must be a bare http(s) URL with ` +
+        `a host and no credentials; got ${JSON.stringify(mode)}.`,
+    );
+  }
+}
+
 /**
  * Rejects the activity options the inbox runner cannot yet honor: it always
  * delivers a `Create` carrying an embedded `Note`, so a different activity or

From 46302d3007f9b222eeeba084d3bb560e8cb6091f Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 19:45:15 +0900
Subject: [PATCH 35/47] Re-validate the inbox URL inside the inbox runner's
 run()

run() already re-runs validateActivity() defensively because validate()
is optional in the ScenarioRunner contract, but it did not re-run the
inbox-mode check, so a direct run() call (as in the unit tests) would
validate the activity yet skip the explicit-inbox-URL validation.  Call
validateInbox() alongside it for the same protection.

https://github.com/fedify-dev/fedify/issues/783

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/scenarios/inbox.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts
index 8061bd727..9feb62681 100644
--- a/packages/cli/src/bench/scenarios/inbox.ts
+++ b/packages/cli/src/bench/scenarios/inbox.ts
@@ -64,7 +64,10 @@ export const inboxRunner: ScenarioRunner = {
     if (scenario.recipients.length < 1) {
       throw new Error("The inbox scenario requires a recipient.");
     }
+    // `validate()` is optional in the runner contract, so re-check here too,
+    // keeping a direct `run()` call (as in tests) safe.
     validateActivity(scenario);
+    validateInbox(scenario);
     const fetchImpl = context.fetch ?? fetch;
     // Discover every recipient's inbox the way a real peer would, then rotate
     // across them so multi-recipient suites spread load over each inbox.

From d9b5c575d84d52999583c32a10a84705cccf8e2d Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 20:03:43 +0900
Subject: [PATCH 36/47] Type the bench preflight runners array explicitly

`let runners;` had no type or initializer, so it was implicitly `any`
and indexing it later (runners[i].run(...)) lost type safety.  Annotate
it as ReturnType<typeof runnerFor>[].

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362064930

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/action.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index 3a4013b68..d30636a91 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -93,7 +93,7 @@ export default async function runBench(
   // Preflight every runner so an unsupported scenario type, an option the
   // runner cannot honor, or a malformed `expect` assertion fails fast, before
   // any probe or load.
-  let runners;
+  let runners: ReturnType<typeof runnerFor>[];
   try {
     runners = suite.scenarios.map((scenario) => {
       const runner = runnerFor(scenario.type);

From 6a1d685b7f582dc948e13a24a378ffcdbcc7a176 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 20:32:39 +0900
Subject: [PATCH 37/47] Render templates copy-on-write to avoid cloning
 unchanged subtrees

renderValue preserved object identity but still eagerly built a throwaway
array (value.map) or object ({}) for every container, even when no nested
expression changed.  Allocate a new container only once a child actually
changes, back-filling the unchanged prefix, so a subtree with no template
expressions is returned by reference with no allocation.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362256269

Assisted-by: Claude Code:claude-opus-4-8
---
 .../cli/src/bench/template/template.test.ts   | 19 +++++++++++
 packages/cli/src/bench/template/template.ts   | 34 +++++++++++--------
 2 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/packages/cli/src/bench/template/template.test.ts b/packages/cli/src/bench/template/template.test.ts
index 163ce7a32..c2b82882c 100644
--- a/packages/cli/src/bench/template/template.test.ts
+++ b/packages/cli/src/bench/template/template.test.ts
@@ -126,3 +126,22 @@ test("renderTemplates - returns the same reference for unchanged subtrees", () =
   const value = { a: { b: "no expressions here" }, list: [1, 2] };
   assert.strictEqual(renderTemplates(value, ctx), value);
 });
+
+test("renderTemplates - copy-on-write keeps unchanged siblings intact", () => {
+  // Only some entries change; the rest must be carried over correctly when the
+  // container is lazily copied on the first change.
+  const value = {
+    keep: "static",
+    host: "${{ target.host }}",
+    list: ["x", "${{ name }}", "z"],
+  };
+  const out = renderTemplates(value, ctx) as Record<string, unknown>;
+  assert.deepEqual(out, {
+    keep: "static",
+    host: "example.com",
+    list: ["x", "bob", "z"],
+  });
+  // The unchanged leaf string is the same reference; a changed sibling is not.
+  assert.strictEqual(out.keep, value.keep);
+  assert.notStrictEqual(out.host, value.host);
+});
diff --git a/packages/cli/src/bench/template/template.ts b/packages/cli/src/bench/template/template.ts
index f27d6e930..ba93b592e 100644
--- a/packages/cli/src/bench/template/template.ts
+++ b/packages/cli/src/bench/template/template.ts
@@ -64,26 +64,32 @@ function renderValue(
     throw new TemplateError("Maximum template nesting depth exceeded.");
   }
   if (typeof value === "string") return renderString(value, ctx);
-  // Walk arrays and objects, but keep the original reference for any subtree
-  // that did not change, to avoid needless cloning.
+  // Walk arrays and objects copy-on-write: allocate a new container only once a
+  // child actually changes (back-filling the unchanged prefix), so an unchanged
+  // subtree is returned by reference with no cloning at all.
   if (Array.isArray(value)) {
-    let changed = false;
-    const out = value.map((item) => {
+    let out: unknown[] | undefined;
+    for (let i = 0; i < value.length; i++) {
+      const item = value[i];
       const rendered = renderValue(item, ctx, depth + 1);
-      if (rendered !== item) changed = true;
-      return rendered;
-    });
-    return changed ? out : value;
+      if (out == null && rendered !== item) out = value.slice(0, i);
+      if (out != null) out.push(rendered);
+    }
+    return out ?? value;
   }
   if (value != null && typeof value === "object") {
-    let changed = false;
-    const out: Record<string, unknown> = {};
-    for (const [key, item] of Object.entries(value)) {
+    const entries = Object.entries(value);
+    let out: Record<string, unknown> | undefined;
+    for (let i = 0; i < entries.length; i++) {
+      const [key, item] = entries[i];
       const rendered = renderValue(item, ctx, depth + 1);
-      if (rendered !== item) changed = true;
-      out[key] = rendered;
+      if (out == null && rendered !== item) {
+        out = {};
+        for (let j = 0; j < i; j++) out[entries[j][0]] = entries[j][1];
+      }
+      if (out != null) out[key] = rendered;
     }
-    return changed ? out : value;
+    return out ?? value;
   }
   return value;
 }

From 9da7473312cdc3995ad2e8eaf4d04d426c65f09a Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 20:51:54 +0900
Subject: [PATCH 38/47] Defer the measured-window callback so a sync throw
 rejects

withMeasuredWindowStart wrapped the callback as
Promise.resolve(onMeasuredWindowStart()), which runs it synchronously
before Promise.resolve, so a synchronous throw in the callback would
escape the promise chain instead of becoming a rejection.  Invoke it
through Promise.resolve().then(...), matching the signing pipeline's
pattern, so a sync throw rejects the gated send.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362371710

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/scenarios/runner.test.ts | 12 ++++++++++++
 packages/cli/src/bench/scenarios/runner.ts      |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/packages/cli/src/bench/scenarios/runner.test.ts b/packages/cli/src/bench/scenarios/runner.test.ts
index 4b6fb9e33..5a8c15a22 100644
--- a/packages/cli/src/bench/scenarios/runner.test.ts
+++ b/packages/cli/src/bench/scenarios/runner.test.ts
@@ -96,3 +96,15 @@ test("withMeasuredWindowStart - never fires if no request reaches the window", a
   for (const offset of [0, 100, 999]) await send(offset);
   assert.strictEqual(fires, 0);
 });
+
+test("withMeasuredWindowStart - a synchronous callback throw becomes a rejection", async () => {
+  const send = withMeasuredWindowStart(
+    0,
+    () => {
+      throw new Error("boom");
+    },
+    () => Promise.resolve(ok),
+  );
+  // The throw must surface as a rejected promise, not escape synchronously.
+  await assert.rejects(send(0), /boom/);
+});
diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts
index cbb7acea3..8a87f6a70 100644
--- a/packages/cli/src/bench/scenarios/runner.ts
+++ b/packages/cli/src/bench/scenarios/runner.ts
@@ -133,7 +133,9 @@ export function withMeasuredWindowStart(
   let started: Promise<void> | undefined;
   return (scheduledAtMs: number) => {
     if (scheduledAtMs < warmupMs) return send(scheduledAtMs);
-    started ??= Promise.resolve(onMeasuredWindowStart());
+    // Defer the call through `.then` so a synchronous throw in the callback
+    // becomes a rejection rather than escaping the promise chain.
+    started ??= Promise.resolve().then(onMeasuredWindowStart);
     return started.then(() => send(scheduledAtMs));
   };
 }

From 180abef664e46060feef5ff8e24c4a97f37a7906 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 20:54:15 +0900
Subject: [PATCH 39/47] Sign exactly the encoded body bytes

signInboxDelivery passed body.buffer to signRequest.  body comes from
TextEncoder().encode() (an exact-fit view), so this was correct, but it
would include trailing bytes were body ever a view into a larger buffer,
breaking the digest.  Slice the exact view bytes instead.  signRequest's
body option is an ArrayBuffer, so passing the Uint8Array directly would
not type-check.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362371722

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/signing/signer.ts | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/packages/cli/src/bench/signing/signer.ts b/packages/cli/src/bench/signing/signer.ts
index 27a609f10..4c40ae758 100644
--- a/packages/cli/src/bench/signing/signer.ts
+++ b/packages/cli/src/bench/signing/signer.ts
@@ -78,6 +78,15 @@ export async function signInboxDelivery(
     request,
     actor.keys.rsa.privateKey,
     actor.rsaKeyId,
-    { spec: actor.httpStandard, body: body.buffer as ArrayBuffer },
+    {
+      spec: actor.httpStandard,
+      // Slice exactly the encoded view: passing `body.buffer` would include
+      // any trailing bytes were `body` ever a view into a larger buffer, and
+      // signRequest's `body` option is an ArrayBuffer (not a Uint8Array).
+      body: body.buffer.slice(
+        body.byteOffset,
+        body.byteOffset + body.byteLength,
+      ) as ArrayBuffer,
+    },
   );
 }

From 72f56c7f621fbec51da0289d5ed81ba8ce0be5ea Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 21:21:57 +0900
Subject: [PATCH 40/47] Handle escaped quotes in template helper arguments

splitTopLevel did not track the backslash escape, so an escaped quote
inside a helper string argument was treated as a closing quote and split
the arguments wrongly; parseArg's regex also forbade any embedded quote.
Track the escape state when splitting and accept (then unescape) escaped
quotes when parsing the argument.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362476195

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/template/template.test.ts |  7 +++++++
 packages/cli/src/bench/template/template.ts      | 16 +++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/packages/cli/src/bench/template/template.test.ts b/packages/cli/src/bench/template/template.test.ts
index c2b82882c..2a1eed1ff 100644
--- a/packages/cli/src/bench/template/template.test.ts
+++ b/packages/cli/src/bench/template/template.test.ts
@@ -145,3 +145,10 @@ test("renderTemplates - copy-on-write keeps unchanged siblings intact", () => {
   assert.strictEqual(out.keep, value.keep);
   assert.notStrictEqual(out.host, value.host);
 });
+
+test("renderTemplates - handles escaped quotes in helper arguments", () => {
+  // The single-quoted argument contains an escaped single quote.
+  assert.strictEqual(renderTemplates("${{ upper('a\\'b') }}", ctx), "A'B");
+  // And a double-quoted argument with an escaped double quote.
+  assert.strictEqual(renderTemplates('${{ lower("X\\"Y") }}', ctx), 'x"y');
+});
diff --git a/packages/cli/src/bench/template/template.ts b/packages/cli/src/bench/template/template.ts
index ba93b592e..b91275c98 100644
--- a/packages/cli/src/bench/template/template.ts
+++ b/packages/cli/src/bench/template/template.ts
@@ -143,8 +143,17 @@ function splitTopLevel(source: string): string[] {
   const parts: string[] = [];
   let current = "";
   let quote: string | null = null;
+  let escaped = false;
   for (const char of source) {
-    if (quote != null) {
+    if (escaped) {
+      // A backslash escapes the next character (including a quote), so it does
+      // not open or close a string.
+      current += char;
+      escaped = false;
+    } else if (char === "\\") {
+      current += char;
+      escaped = true;
+    } else if (quote != null) {
       if (char === quote) quote = null;
       current += char;
     } else if (char === "'" || char === '"') {
@@ -165,8 +174,9 @@ function splitTopLevel(source: string): string[] {
 }
 
 function parseArg(arg: string, ctx: TemplateContext): unknown {
-  const str = arg.match(/^'([^']*)'$/) ?? arg.match(/^"([^"]*)"$/);
-  if (str != null) return str[1];
+  // Accept escaped quotes inside a quoted string, then unescape `\x` to `x`.
+  const str = arg.match(/^'([\s\S]*)'$/) ?? arg.match(/^"([\s\S]*)"$/);
+  if (str != null) return str[1].replace(/\\(.)/g, "$1");
   if (/^-?\d+(?:\.\d+)?$/.test(arg)) return Number(arg);
   if (arg === "true") return true;
   if (arg === "false") return false;

From 428c538be1d81929b5ccaad128e2e076896c0a1a Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 21:24:12 +0900
Subject: [PATCH 41/47] Bind dual-stack when advertising a hostname

resolveAdvertiseHost bound an advertised hostname to the IPv4 wildcard
(0.0.0.0).  If the hostname resolves to an AAAA record (or the target
prefers IPv6), the target dereferences the actor URLs over IPv6 with
nothing listening, so signed deliveries fail key lookup.  A hostname can
resolve to either family, so bind dual-stack (::); an IPv4 literal still
binds 0.0.0.0 and an IPv6 literal still binds ::.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362494723

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/server/synthetic.test.ts | 15 ++++++++++-----
 packages/cli/src/bench/server/synthetic.ts      |  4 +++-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/packages/cli/src/bench/server/synthetic.test.ts b/packages/cli/src/bench/server/synthetic.test.ts
index 7ea1ef117..8e4d9b1b5 100644
--- a/packages/cli/src/bench/server/synthetic.test.ts
+++ b/packages/cli/src/bench/server/synthetic.test.ts
@@ -101,11 +101,7 @@ test("spawnSyntheticServer - unknown paths 404", async () => {
   }
 });
 
-test("resolveAdvertiseHost - DNS and IPv4 bind all IPv4 interfaces", () => {
-  assert.deepEqual(resolveAdvertiseHost("bench.local"), {
-    bindHost: "0.0.0.0",
-    urlHost: "bench.local",
-  });
+test("resolveAdvertiseHost - IPv4 literal binds the IPv4 wildcard", () => {
   assert.deepEqual(resolveAdvertiseHost("192.168.1.10"), {
     bindHost: "0.0.0.0",
     urlHost: "192.168.1.10",
@@ -117,6 +113,15 @@ test("resolveAdvertiseHost - DNS and IPv4 bind all IPv4 interfaces", () => {
   });
 });
 
+test("resolveAdvertiseHost - a hostname binds dual-stack", () => {
+  // A hostname can resolve to an A or AAAA record, so bind every interface of
+  // both families rather than assuming IPv4.
+  assert.deepEqual(resolveAdvertiseHost("bench.local"), {
+    bindHost: "::",
+    urlHost: "bench.local",
+  });
+});
+
 test("resolveAdvertiseHost - IPv6 binds all IPv6 interfaces and is bracketed", () => {
   assert.deepEqual(resolveAdvertiseHost("2001:db8::1"), {
     bindHost: "::",
diff --git a/packages/cli/src/bench/server/synthetic.ts b/packages/cli/src/bench/server/synthetic.ts
index 0013e110b..76d1b98b3 100644
--- a/packages/cli/src/bench/server/synthetic.ts
+++ b/packages/cli/src/bench/server/synthetic.ts
@@ -185,7 +185,9 @@ export function resolveAdvertiseHost(host: string): ResolvedAdvertiseHost {
       bindHost = "::";
     } else {
       urlHost = trimmed;
-      bindHost = "0.0.0.0";
+      // An IPv4 literal binds the IPv4 wildcard; a hostname can resolve to
+      // either family, so bind dual-stack (::) to also serve an AAAA record.
+      bindHost = /^\d{1,3}(?:\.\d{1,3}){3}$/.test(trimmed) ? "0.0.0.0" : "::";
     }
   }
   try {

From bf94455946423ddae256ce51675f8c8956e46aa0 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 21:24:25 +0900
Subject: [PATCH 42/47] Describe --dry-run accurately as an offline plan
 preview

The --dry-run help promised to "resolve discovery", but the command
returns right after printing the normalized plan: it never contacts the
target, performs recipient discovery, or gates the resolved inbox, so a
bad recipient or off-target inbox can look valid in a dry run and only
fail in the real run.  Match the help (and the gate's comment) to what
dry-run actually does, consistent with the manual: print the plan
without contacting the target or sending load.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362494725

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/command.ts     | 3 ++-
 packages/cli/src/bench/safety/gate.ts | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/packages/cli/src/bench/command.ts b/packages/cli/src/bench/command.ts
index c48dd10fd..ffca2c90b 100644
--- a/packages/cli/src/bench/command.ts
+++ b/packages/cli/src/bench/command.ts
@@ -75,7 +75,8 @@ export const benchCommand = command(
       dryRun: withDefault(
         flag("--dry-run", {
           description:
-            message`Resolve discovery and print the plan without sending load.`,
+            message`Print the normalized plan without contacting the target or \
+sending load.`,
         }),
         false,
       ),
diff --git a/packages/cli/src/bench/safety/gate.ts b/packages/cli/src/bench/safety/gate.ts
index 86f39496d..c89ed867f 100644
--- a/packages/cli/src/bench/safety/gate.ts
+++ b/packages/cli/src/bench/safety/gate.ts
@@ -5,8 +5,8 @@
  * advertises benchmark mode (the operator's "not production" assertion).  Only
  * a public target that does not advertise benchmark mode is gated, behind an
  * explicit `--allow-unsafe-target`.  There is no interactive prompt, so the
- * flag is mandatory in CI and any non-TTY context.  A `--dry-run` only inspects
- * (discovery reads), so it bypasses the gate.
+ * flag is mandatory in CI and any non-TTY context.  An inspection-only run
+ * (the `dryRun` flag) sends no load, so it bypasses the gate.
  * @since 2.3.0
  * @module
  */

From 74a5f3cb2af553c14928b3f44fa8f24af824c152 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 21:38:01 +0900
Subject: [PATCH 43/47] Harden server stats parsing against malformed snapshots

Two defensive gaps in parsing the target's stats JSON: a null or
undefined entry in a scope's metrics array made the whole parse throw
(caught, but silently dropping every server metric), and merging
histogram data points summed counts whenever the lengths matched without
checking that the bucket boundaries were identical, which would misalign
buckets and skew percentiles.

Filter out null metric entries so the rest still parse, and only sum
histogram points that share the exact same boundaries.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362615371
https://github.com/fedify-dev/fedify/pull/791#discussion_r3362615365

Assisted-by: Claude Code:claude-opus-4-8
---
 .../src/bench/metrics/stats-client.test.ts    | 36 +++++++++++++++++++
 .../cli/src/bench/metrics/stats-client.ts     | 16 ++++++---
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/packages/cli/src/bench/metrics/stats-client.test.ts b/packages/cli/src/bench/metrics/stats-client.test.ts
index 4ca68ca02..bf6958350 100644
--- a/packages/cli/src/bench/metrics/stats-client.test.ts
+++ b/packages/cli/src/bench/metrics/stats-client.test.ts
@@ -256,3 +256,39 @@ test("fetchServerSnapshot - null on a failed request, empty on success", async (
   );
   assert.deepEqual(empty, { signature: null, queueDepthMax: null });
 });
+
+test("parseServerSnapshot - skips null metric entries and parses the rest", () => {
+  const snap = parseServerSnapshot({
+    scopeMetrics: [{
+      metrics: [
+        null,
+        {
+          name: "activitypub.signature.verification.duration",
+          dataPointType: "histogram",
+          dataPoints: [
+            { value: { buckets: { boundaries: [5, 10], counts: [1, 2, 3] } } },
+          ],
+        },
+      ],
+    }],
+  });
+  assert.deepEqual(snap?.signature?.counts, [1, 2, 3]);
+});
+
+test("parseServerSnapshot - does not sum histogram points with different boundaries", () => {
+  const snap = parseServerSnapshot({
+    scopeMetrics: [{
+      metrics: [{
+        name: "activitypub.signature.verification.duration",
+        dataPointType: "histogram",
+        dataPoints: [
+          { value: { buckets: { boundaries: [5, 10], counts: [1, 1, 1] } } },
+          { value: { buckets: { boundaries: [5, 20], counts: [2, 2, 2] } } },
+        ],
+      }],
+    }],
+  });
+  // The second point's boundaries differ, so it is skipped, not misaligned.
+  assert.deepEqual(snap?.signature?.boundaries, [5, 10]);
+  assert.deepEqual(snap?.signature?.counts, [1, 1, 1]);
+});
diff --git a/packages/cli/src/bench/metrics/stats-client.ts b/packages/cli/src/bench/metrics/stats-client.ts
index 7039d4ea5..2280aa202 100644
--- a/packages/cli/src/bench/metrics/stats-client.ts
+++ b/packages/cli/src/bench/metrics/stats-client.ts
@@ -202,9 +202,12 @@ function flattenMetrics(snapshot: Snapshot): SnapshotMetric[] {
   const scopes = Array.isArray(snapshot?.scopeMetrics)
     ? snapshot.scopeMetrics
     : [];
-  return scopes.flatMap((scope) =>
-    Array.isArray(scope?.metrics) ? scope.metrics : []
-  );
+  return scopes.flatMap((scope) => {
+    const metrics = scope?.metrics;
+    // Drop null/undefined entries so one malformed element does not make the
+    // whole snapshot parse throw and silently omit every server metric.
+    return Array.isArray(metrics) ? metrics.filter((m) => m != null) : [];
+  });
 }
 
 function mergeHistogram(
@@ -223,7 +226,12 @@ function mergeHistogram(
     if (boundaries == null) {
       boundaries = [...b];
       counts = [...c];
-    } else if (counts != null && counts.length === c.length) {
+    } else if (
+      counts != null && counts.length === c.length &&
+      boundaries.length === b.length && boundaries.every((v, i) => v === b[i])
+    ) {
+      // Only sum data points that share the exact same bucketing; differing
+      // boundaries would misalign the counts and skew the percentiles.
       for (let i = 0; i < c.length; i++) counts[i] += c[i];
     }
   }

From 1f6d30f26849802066b2a4d22c7c96f120c8954e Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 21:40:24 +0900
Subject: [PATCH 44/47] Keep the unsafe-target override CLI-only

--allow-unsafe-target was config-backed, so a system, user, or project
.fedify.toml with bench.allowUnsafeTarget = true would make every run
behave as if the flag were passed, silently disabling the safety gate and
letting load reach a public, non-benchmark target.  The override is meant
to be an explicit per-run acknowledgment, not a persisted default.

Make the flag a plain CLI flag (no config binding) and drop
allowUnsafeTarget from the bench config schema, so it can only be given on
the command line.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362634246

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/command.ts | 14 +++++++-------
 packages/cli/src/config.ts        |  4 +++-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/packages/cli/src/bench/command.ts b/packages/cli/src/bench/command.ts
index ffca2c90b..3196bc833 100644
--- a/packages/cli/src/bench/command.ts
+++ b/packages/cli/src/bench/command.ts
@@ -34,17 +34,17 @@ const formatOption = bindConfig(
   },
 );
 
-const allowUnsafeTarget = bindConfig(
+// Deliberately NOT config-backed: this safety override must be an explicit
+// per-run acknowledgment on the command line, so a persisted config file cannot
+// silently disable the gate for every run.
+const allowUnsafeTarget = withDefault(
   flag("--allow-unsafe-target", {
     description:
       message`Allow benchmarking a public target that does not advertise \
-benchmark mode.`,
+benchmark mode.  Must be given on the command line for each run; it cannot be \
+set in a configuration file.`,
   }),
-  {
-    context: configContext,
-    key: (config) => config.bench?.allowUnsafeTarget ?? false,
-    default: false,
-  },
+  false,
 );
 
 export const benchCommand = command(
diff --git a/packages/cli/src/config.ts b/packages/cli/src/config.ts
index 58b63664e..11be46e9f 100644
--- a/packages/cli/src/config.ts
+++ b/packages/cli/src/config.ts
@@ -110,10 +110,12 @@ const nodeinfoSchema = object({
 
 /**
  * Schema for the bench command configuration.
+ *
+ * `allowUnsafeTarget` is intentionally absent: the unsafe-target override is a
+ * CLI-only, per-run acknowledgment, never a persisted default.
  */
 const benchSchema = object({
   format: optional(picklist(["text", "json", "markdown"])),
-  allowUnsafeTarget: optional(boolean()),
 });
 
 /**

From 71eb909c0f19c9b6f80c73277954bd860c1393ca Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 22:04:48 +0900
Subject: [PATCH 45/47] Tolerate immutable request headers in withUserAgent

withUserAgent set the User-Agent on a prebuilt Request in place.  If such
a request ever has immutable headers, set() throws a TypeError and the
send crashes.  Try the in-place set (the fast path for the requests this
tool builds, which have mutable headers) and fall back to a cloned
Request with merged headers if it throws.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362706656

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/action.ts | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts
index d30636a91..b91486b7e 100644
--- a/packages/cli/src/bench/action.ts
+++ b/packages/cli/src/bench/action.ts
@@ -248,10 +248,16 @@ export function withUserAgent(
   // inferable.
   return ((input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
     if (input instanceof Request && init === undefined) {
-      if (!input.headers.has("user-agent")) {
+      if (input.headers.has("user-agent")) return fetchImpl(input);
+      try {
         input.headers.set("user-agent", userAgent);
+        return fetchImpl(input);
+      } catch {
+        // Some Request objects have immutable headers; fall back to a clone.
+        const headers = new Headers(input.headers);
+        headers.set("user-agent", userAgent);
+        return fetchImpl(new Request(input, { headers }));
       }
-      return fetchImpl(input);
     }
     const headers = new Headers(
       init?.headers ?? (input instanceof Request ? input.headers : undefined),

From b7418a7820aef577a514fb9f2f3c920f9f30a86a Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 22:07:05 +0900
Subject: [PATCH 46/47] Use a valid URL for the webfinger recipient fallback

When a webfinger scenario has no recipients, the runner fell back to the
target's schemeless host (for example localhost:3000), which
convertUrlIfHandle cannot parse as a URL and would throw.  Fall back to
the target's full href, which is always a valid URL.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362706664

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/scenarios/webfinger.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/cli/src/bench/scenarios/webfinger.ts b/packages/cli/src/bench/scenarios/webfinger.ts
index 2567dc2bc..14b77a093 100644
--- a/packages/cli/src/bench/scenarios/webfinger.ts
+++ b/packages/cli/src/bench/scenarios/webfinger.ts
@@ -37,7 +37,9 @@ export const webfingerRunner: ScenarioRunner = {
     const urls =
       (context.scenario.recipients.length > 0
         ? context.scenario.recipients
-        : [context.target.host]).map((r) => webfingerUrl(context.target, r));
+        // Fall back to the target's full URL (a valid URL), not its schemeless
+        // host, which convertUrlIfHandle could not parse.
+        : [context.target.href]).map((r) => webfingerUrl(context.target, r));
     let index = 0;
     const rawSend = () =>
       sendRequest(

From c12f90713d4ff48149d9476ed058d4578e969b28 Mon Sep 17 00:00:00 2001
From: Hong Minhee <hong@minhee.org>
Date: Fri, 5 Jun 2026 22:07:17 +0900
Subject: [PATCH 47/47] Derive module dir without import.meta.dirname for Node
 20.0

import.meta.dirname is only available on Node >= 20.11, but the package
supports Node >= 20.0, so on Node 20.0 to 20.10 it is undefined and feeds
undefined into join(), aborting the schema and render tests before they
run.  Derive the directory from dirname(fileURLToPath(import.meta.url))
instead, which works across all supported Node versions.

https://github.com/fedify-dev/fedify/pull/791#discussion_r3362735401

Assisted-by: Claude Code:claude-opus-4-8
---
 packages/cli/src/bench/render/render.test.ts | 13 +++++--------
 packages/cli/src/bench/schema-paths.ts       |  9 +++++++--
 packages/cli/src/bench/schema.test.ts        |  6 ++++--
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/packages/cli/src/bench/render/render.test.ts b/packages/cli/src/bench/render/render.test.ts
index f9bcb4406..4c6688a61 100644
--- a/packages/cli/src/bench/render/render.test.ts
+++ b/packages/cli/src/bench/render/render.test.ts
@@ -1,21 +1,18 @@
 import { type Schema, Validator } from "@cfworker/json-schema";
 import assert from "node:assert/strict";
 import { readFileSync } from "node:fs";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import test from "node:test";
+import { fileURLToPath } from "node:url";
 import type { BenchReport } from "../result/model.ts";
 import { reportSchemaV1 } from "../result/schema.ts";
 import { renderReport } from "./index.ts";
 
+// `import.meta.dirname` needs Node >= 20.11; derive it from the URL instead.
+const here = dirname(fileURLToPath(import.meta.url));
 const report = JSON.parse(
   readFileSync(
-    join(
-      import.meta.dirname!,
-      "..",
-      "__fixtures__",
-      "reports",
-      "inbox-report.json",
-    ),
+    join(here, "..", "__fixtures__", "reports", "inbox-report.json"),
     "utf-8",
   ),
 ) as BenchReport;
diff --git a/packages/cli/src/bench/schema-paths.ts b/packages/cli/src/bench/schema-paths.ts
index bbfc0d8cf..b554ebd21 100644
--- a/packages/cli/src/bench/schema-paths.ts
+++ b/packages/cli/src/bench/schema-paths.ts
@@ -9,11 +9,16 @@
  * @module
  */
 
-import { join } from "node:path";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+
+// `import.meta.dirname` is only available on Node >= 20.11, but this package
+// supports Node >= 20.0, so derive the directory from the module URL instead.
+const here = dirname(fileURLToPath(import.meta.url));
 
 /** The absolute path to the repository's *schema/bench/* directory. */
 export const SCHEMA_DIR: string = join(
-  import.meta.dirname!,
+  here,
   "..",
   "..",
   "..",
diff --git a/packages/cli/src/bench/schema.test.ts b/packages/cli/src/bench/schema.test.ts
index abace7489..647a1c559 100644
--- a/packages/cli/src/bench/schema.test.ts
+++ b/packages/cli/src/bench/schema.test.ts
@@ -2,14 +2,16 @@ import { type Schema, Validator } from "@cfworker/json-schema";
 import assert from "node:assert/strict";
 import { execFileSync } from "node:child_process";
 import { readdirSync, readFileSync } from "node:fs";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import test from "node:test";
+import { fileURLToPath } from "node:url";
 import { parseSuiteText } from "./scenario/load.ts";
 import { SCHEMA_DIR, serializeSchema } from "./schema-paths.ts";
 import { PUBLISHED_SCHEMAS } from "./schemas.ts";
 
 const REPO_ROOT = join(SCHEMA_DIR, "..", "..");
-const FIXTURES = join(import.meta.dirname!, "__fixtures__");
+// `import.meta.dirname` needs Node >= 20.11; derive it from the URL instead.
+const FIXTURES = join(dirname(fileURLToPath(import.meta.url)), "__fixtures__");
 
 function collectRefs(node: unknown, refs: string[] = []): string[] {
   if (Array.isArray(node)) {