From bfdd7c37ac93a6657bfe20349be2fd19338937c7 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Thu, 11 Jun 2026 23:51:04 +0800
Subject: [PATCH 1/5] test: add parse/export/search performance benchmarks and
 CI artifacts

---
 .github/workflows/ci.yml              | 35 ++++++++++++
 CONTRIBUTING.md                       |  1 +
 benchmarks/README.md                  | 36 ++++++++++++
 benchmarks/baselines.json             | 10 ++++
 pyproject.toml                        |  5 +-
 requirements-dev.txt                  |  1 +
 tests/benchmarks/__init__.py          |  0
 tests/benchmarks/conftest.py          | 79 +++++++++++++++++++++++++++
 tests/benchmarks/test_export_bench.py | 38 +++++++++++++
 tests/benchmarks/test_parse_bench.py  | 24 ++++++++
 tests/benchmarks/test_parse_memory.py | 23 ++++++++
 tests/benchmarks/test_search_bench.py | 18 ++++++
 12 files changed, 269 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/baselines.json
 create mode 100644 tests/benchmarks/__init__.py
 create mode 100644 tests/benchmarks/conftest.py
 create mode 100644 tests/benchmarks/test_export_bench.py
 create mode 100644 tests/benchmarks/test_parse_bench.py
 create mode 100644 tests/benchmarks/test_parse_memory.py
 create mode 100644 tests/benchmarks/test_search_bench.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 545dfa5..e7372ff 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -204,3 +204,38 @@ jobs:
           esac
           npm install --no-save "${PKG}@${ROLLUP_VERSION}"
       - run: npm test
+
+  benchmarks:
+    name: Performance benchmarks (informational)
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      actions: write
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: |
+            requirements.txt
+            requirements-dev.txt
+
+      - name: Install dev dependencies
+        run: pip install -r requirements-dev.txt
+
+      - name: Run benchmarks
+        run: >
+          pytest tests/benchmarks/
+          --benchmark-only
+          --benchmark-json=benchmark-results.json
+          --benchmark-columns=min,max,mean,stddev,rounds
+          -o addopts=
+
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: benchmark-results
+          path: benchmark-results.json
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 48bc61d..a123b2f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -66,6 +66,7 @@ pytest tests/test_api_integration.py -v
 pytest tests/test_search.py -v
 pytest tests/test_api_routes.py -v
 pytest tests/test_error_codes.py -v
+pytest tests/benchmarks/ --benchmark-only -o addopts= -v   # performance baselines (see benchmarks/README.md)
 ```
 
 ### JavaScript (vitest)
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..b8d9e05
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,36 @@
+# Performance benchmarks
+
+Repeatable local measurements for parse, bulk export, and search hot paths.
+
+## Run locally
+
+```bash
+pip install -r requirements-dev.txt
+pytest tests/benchmarks/ --benchmark-only -o addopts= -v
+```
+
+## Memory check
+
+```bash
+pytest tests/benchmarks/test_parse_memory.py -v
+```
+
+The memory test also runs as part of the normal `pytest` suite (timing benchmarks are skipped via `--benchmark-skip` in `pyproject.toml`).
+
+## Scenarios
+
+| Group | What |
+|-------|------|
+| parse | `parse_session` on 10 / 500 / 5000+ line JSONL |
+| export | `run_bulk_export` over 10 / 50 / 100 sessions |
+| search | `GET /api/search` over a 50-session synthetic corpus |
+
+Large JSONL files (5000+ lines) are generated at test session scope under pytest's temp directory — not committed to git.
+
+## CI
+
+The `benchmarks` workflow job uploads `benchmark-results.json` as a downloadable artifact. There is no regression gate yet.
+
+## Refresh baselines
+
+After intentional performance work, copy key means from a local run into `baselines.json` with a date and machine note. This file is informational only; CI does not compare against it.
diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json
new file mode 100644
index 0000000..123a2b4
--- /dev/null
+++ b/benchmarks/baselines.json
@@ -0,0 +1,10 @@
+{
+  "_note": "Informational snapshot only — CI does not gate on these values.",
+  "updated": null,
+  "machine": null,
+  "groups": {
+    "parse": {},
+    "export": {},
+    "search": {}
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
index 5e8f63b..b71fd6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,8 +5,11 @@ packages = ["api", "utils", "models"]
 exclude = ["tests/"]
 
 [tool.pytest.ini_options]
-addopts = "--cov=api --cov=utils --cov-report=term-missing --cov-report=xml:coverage.xml"
+addopts = "--cov=api --cov=utils --cov-report=term-missing --cov-report=xml:coverage.xml --benchmark-skip"
 testpaths = ["tests"]
+markers = [
+    "benchmark: performance benchmarks (pytest-benchmark)",
+]
 
 [tool.coverage.run]
 omit = [
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 7e83784..e4ef069 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -6,3 +6,4 @@ pytest-cov>=5.0
 ruff>=0.9.0
 pip-audit>=2.7.0
 hypothesis>=6.100.0
+pytest-benchmark>=4.0.0
diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
new file mode 100644
index 0000000..fd419e8
--- /dev/null
+++ b/tests/benchmarks/conftest.py
@@ -0,0 +1,79 @@
+"""Synthetic corpora for parse/export/search performance benchmarks."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from app import create_app
+
+FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
+TEMPLATE_LINE = (FIXTURES / "session_with_tools.jsonl").read_text(encoding="utf-8").splitlines()[0]
+
+
+def write_jsonl(path: Path, line_count: int) -> Path:
+    """Write a JSONL session file with *line_count* rows derived from the template fixture."""
+    with path.open("w", encoding="utf-8") as f:
+        for i in range(line_count):
+            entry = json.loads(TEMPLATE_LINE)
+            entry["timestamp"] = f"2026-06-12T10:{i % 60:02d}:00Z"
+            if i % 3 == 1:
+                msg = entry.setdefault("message", {})
+                if isinstance(msg, dict) and "content" in msg:
+                    msg["content"] = [{"type": "text", "text": f"benchmark token {i} searchable"}]
+            f.write(json.dumps(entry, separators=(",", ":")) + "\n")
+    return path
+
+
+def seed_search_corpus(
+    base_dir: Path,
+    *,
+    session_count: int = 50,
+    lines_per_session: int = 20,
+) -> Path:
+    """Create a multi-session project tree under *base_dir* for search benchmarks."""
+    project = base_dir / "bench-project"
+    project.mkdir(parents=True)
+    for i in range(session_count):
+        write_jsonl(project / f"session_{i:04d}.jsonl", lines_per_session)
+    return base_dir
+
+
+@pytest.fixture(scope="session")
+def parse_small_file(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    root = tmp_path_factory.mktemp("bench")
+    return write_jsonl(root / "small.jsonl", 10)
+
+
+@pytest.fixture(scope="session")
+def parse_medium_file(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    root = tmp_path_factory.mktemp("bench")
+    return write_jsonl(root / "medium.jsonl", 500)
+
+
+@pytest.fixture(scope="session")
+def parse_large_file(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    root = tmp_path_factory.mktemp("bench")
+    return write_jsonl(root / "large.jsonl", 5000)
+
+
+@pytest.fixture
+def export_corpus(tmp_path: Path, request: pytest.FixtureRequest) -> Path:
+    """Project dir with N session files. Parametrize N via indirect fixture."""
+    count = request.param
+    project = tmp_path / "bench-project"
+    project.mkdir()
+    for i in range(count):
+        write_jsonl(project / f"session_{i:04d}.jsonl", 20)
+    return project
+
+
+@pytest.fixture
+def bench_client_search_corpus(tmp_path: Path):
+    """Flask test client backed by a 50-session synthetic project tree."""
+    seed_search_corpus(tmp_path)
+    app = create_app(base_dir=str(tmp_path))
+    app.config["TESTING"] = True
+    return app.test_client()
diff --git a/tests/benchmarks/test_export_bench.py b/tests/benchmarks/test_export_bench.py
new file mode 100644
index 0000000..c33bf53
--- /dev/null
+++ b/tests/benchmarks/test_export_bench.py
@@ -0,0 +1,38 @@
+"""Benchmark run_bulk_export over 10, 50, and 100 session corpora."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from utils.export_engine import NoopSink, run_bulk_export
+
+
+@pytest.mark.benchmark(group="export")
+@pytest.mark.parametrize(
+    "export_corpus",
+    [10, 50, 100],
+    indirect=True,
+    ids=["sessions-10", "sessions-50", "sessions-100"],
+)
+def test_bulk_export_session_count(
+    benchmark,
+    export_corpus: Path,
+) -> None:
+    projects = [{"name": "bench-project", "path": str(export_corpus), "display_name": "Bench"}]
+
+    def _run() -> object:
+        return run_bulk_export(
+            projects=projects,
+            since="all",
+            rules=[],
+            last_export_sessions={},
+            sink=NoopSink(),
+            fmt="md",
+            path_layout="api",
+            manifest_style="api",
+        )
+
+    result = benchmark(_run)
+    assert result.exported_session_count > 0
diff --git a/tests/benchmarks/test_parse_bench.py b/tests/benchmarks/test_parse_bench.py
new file mode 100644
index 0000000..d400cca
--- /dev/null
+++ b/tests/benchmarks/test_parse_bench.py
@@ -0,0 +1,24 @@
+"""Benchmark parse_session on small, medium, and large JSONL corpora."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from utils.jsonl_parser import parse_session
+
+
+@pytest.mark.benchmark(group="parse")
+def test_parse_session_small(benchmark, parse_small_file: Path) -> None:
+    benchmark(parse_session, str(parse_small_file))
+
+
+@pytest.mark.benchmark(group="parse")
+def test_parse_session_medium(benchmark, parse_medium_file: Path) -> None:
+    benchmark(parse_session, str(parse_medium_file))
+
+
+@pytest.mark.benchmark(group="parse")
+def test_parse_session_large(benchmark, parse_large_file: Path) -> None:
+    benchmark(parse_session, str(parse_large_file))
diff --git a/tests/benchmarks/test_parse_memory.py b/tests/benchmarks/test_parse_memory.py
new file mode 100644
index 0000000..ad2d1e9
--- /dev/null
+++ b/tests/benchmarks/test_parse_memory.py
@@ -0,0 +1,23 @@
+"""Peak memory ceiling for large-file parse_session (regular pytest, not benchmark-only)."""
+
+from __future__ import annotations
+
+import tracemalloc
+from pathlib import Path
+
+from utils.jsonl_parser import parse_session
+
+
+def test_large_parse_peak_memory_under_ceiling(parse_large_file: Path) -> None:
+    path = parse_large_file
+    file_bytes = path.stat().st_size
+    ceiling = file_bytes * 10
+
+    tracemalloc.start()
+    try:
+        parse_session(str(path))
+        _, peak = tracemalloc.get_traced_memory()
+    finally:
+        tracemalloc.stop()
+
+    assert peak < ceiling, f"peak {peak} bytes exceeds 10x file size {file_bytes}"
diff --git a/tests/benchmarks/test_search_bench.py b/tests/benchmarks/test_search_bench.py
new file mode 100644
index 0000000..16b9b87
--- /dev/null
+++ b/tests/benchmarks/test_search_bench.py
@@ -0,0 +1,18 @@
+"""Benchmark full-corpus search via the HTTP test client."""
+
+from __future__ import annotations
+
+import pytest
+from flask.testing import FlaskClient
+
+
+@pytest.mark.benchmark(group="search")
+def test_search_full_corpus(
+    benchmark,
+    bench_client_search_corpus: FlaskClient,
+) -> None:
+    def _run() -> object:
+        return bench_client_search_corpus.get("/api/search?q=searchable&limit=50")
+
+    resp = benchmark(_run)
+    assert resp.status_code == 200

From 68ba23cefc7e914c5beb266ac5eb0e92fd21fb4f Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Fri, 12 Jun 2026 00:44:43 +0800
Subject: [PATCH 2/5] chore: pin pytest-benchmark 5.2.3 and clarify json.dumps
 in bench fixtures

Pin pytest-benchmark to 5.2.3 after verifying compatibility with pytest 9.0 and the benchmark suite. Annotate benchmark JSONL serialization to document that json.dumps is intentional for file I/O, not Flask jsonify.
---
 requirements-dev.txt         | 2 +-
 tests/benchmarks/conftest.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index e4ef069..b54a2d1 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -6,4 +6,4 @@ pytest-cov>=5.0
 ruff>=0.9.0
 pip-audit>=2.7.0
 hypothesis>=6.100.0
-pytest-benchmark>=4.0.0
+pytest-benchmark==5.2.3
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
index fd419e8..6d07f88 100644
--- a/tests/benchmarks/conftest.py
+++ b/tests/benchmarks/conftest.py
@@ -23,7 +23,11 @@ def write_jsonl(path: Path, line_count: int) -> Path:
                 msg = entry.setdefault("message", {})
                 if isinstance(msg, dict) and "content" in msg:
                     msg["content"] = [{"type": "text", "text": f"benchmark token {i} searchable"}]
-            f.write(json.dumps(entry, separators=(",", ":")) + "\n")
+            # json.dumps for file I/O — jsonify is Flask's HTTP helper, not file serialization.
+            serialized = (
+                json.dumps(entry, separators=(",", ":")) + "\n"  # linters-ignore: prefer-jsonify
+            )
+            f.write(serialized)
     return path
 
 

From c1d303007d01c1e58338796c12a5ac927d0b7188 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Fri, 12 Jun 2026 03:18:50 +0800
Subject: [PATCH 3/5] refactor(benchmarks): harden corpus helpers in conftest

Parse the JSONL template once per write_jsonl call and deepcopy entries
in the loop. Use exist_ok=True when creating bench-project in
seed_search_corpus.
---
 tests/benchmarks/conftest.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
index 6d07f88..cd4369c 100644
--- a/tests/benchmarks/conftest.py
+++ b/tests/benchmarks/conftest.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import json
+from copy import deepcopy
 from pathlib import Path
 
 import pytest
@@ -15,9 +16,10 @@
 
 def write_jsonl(path: Path, line_count: int) -> Path:
     """Write a JSONL session file with *line_count* rows derived from the template fixture."""
+    template = json.loads(TEMPLATE_LINE)
     with path.open("w", encoding="utf-8") as f:
         for i in range(line_count):
-            entry = json.loads(TEMPLATE_LINE)
+            entry = deepcopy(template)
             entry["timestamp"] = f"2026-06-12T10:{i % 60:02d}:00Z"
             if i % 3 == 1:
                 msg = entry.setdefault("message", {})
@@ -39,7 +41,7 @@ def seed_search_corpus(
 ) -> Path:
     """Create a multi-session project tree under *base_dir* for search benchmarks."""
     project = base_dir / "bench-project"
-    project.mkdir(parents=True)
+    project.mkdir(parents=True, exist_ok=True)
     for i in range(session_count):
         write_jsonl(project / f"session_{i:04d}.jsonl", lines_per_session)
     return base_dir

From ea3b1ca6e0821c822ebb3cf7e57ad40206c6cc41 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Fri, 12 Jun 2026 05:27:17 +0800
Subject: [PATCH 4/5] fix(benchmarks): harden memory test and ruff test glob

Reset tracemalloc peak before measuring large-file parse, assert non-empty
message count, extend E402 per-file-ignores to tests/**, and clarify README
that benchmark tests live under tests/benchmarks/.
---
 benchmarks/README.md                  | 4 +++-
 pyproject.toml                        | 2 +-
 tests/benchmarks/test_parse_memory.py | 4 +++-
 tests/benchmarks/test_search_bench.py | 1 +
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index b8d9e05..e62da18 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,5 +1,7 @@
 # Performance benchmarks
 
+Test files live under `tests/benchmarks/`; this directory holds only documentation and the informational `baselines.json` snapshot.
+
 Repeatable local measurements for parse, bulk export, and search hot paths.
 
 ## Run locally
@@ -12,7 +14,7 @@ pytest tests/benchmarks/ --benchmark-only -o addopts= -v
 ## Memory check
 
 ```bash
-pytest tests/benchmarks/test_parse_memory.py -v
+pytest tests/benchmarks/test_parse_memory.py -v -o addopts=
 ```
 
 The memory test also runs as part of the normal `pytest` suite (timing benchmarks are skipped via `--benchmark-skip` in `pyproject.toml`).
diff --git a/pyproject.toml b/pyproject.toml
index b71fd6f..7203ef0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,4 +34,4 @@ combine-as-imports = true
 # CLI bootstrap: sys.path must be set before local imports.
 "scripts/export.py" = ["E402"]
 # Tests mirror the same path bootstrap before importing app/utils.
-"tests/*.py" = ["E402"]
+"tests/**/*.py" = ["E402"]
diff --git a/tests/benchmarks/test_parse_memory.py b/tests/benchmarks/test_parse_memory.py
index ad2d1e9..fcba372 100644
--- a/tests/benchmarks/test_parse_memory.py
+++ b/tests/benchmarks/test_parse_memory.py
@@ -14,8 +14,10 @@ def test_large_parse_peak_memory_under_ceiling(parse_large_file: Path) -> None:
     ceiling = file_bytes * 10
 
     tracemalloc.start()
+    tracemalloc.clear_traces()
     try:
-        parse_session(str(path))
+        result = parse_session(str(path))
+        assert len(result["messages"]) > 0, "parse_session returned no messages"
         _, peak = tracemalloc.get_traced_memory()
     finally:
         tracemalloc.stop()
diff --git a/tests/benchmarks/test_search_bench.py b/tests/benchmarks/test_search_bench.py
index 16b9b87..2d51a68 100644
--- a/tests/benchmarks/test_search_bench.py
+++ b/tests/benchmarks/test_search_bench.py
@@ -16,3 +16,4 @@ def _run() -> object:
 
     resp = benchmark(_run)
     assert resp.status_code == 200
+    assert resp.get_json(), "expected search hits from synthetic searchable tokens"

From 800244ac58aa94757d5e9b2cddcb9453e3c25bd3 Mon Sep 17 00:00:00 2001
From: chen <clean6378@gmail.com>
Date: Fri, 12 Jun 2026 21:34:44 +0800
Subject: [PATCH 5/5] fix(benchmarks): address PR #76 review feedback from
 @timon0305

Drop unnecessary actions: write on benchmarks CI job; assert explicit
search hit count on list response; document 10x memory ceiling and v1
template limitations in README; note NoopSink export rounds are stateless.
---
 .github/workflows/ci.yml              | 1 -
 benchmarks/README.md                  | 4 ++++
 tests/benchmarks/test_export_bench.py | 1 +
 tests/benchmarks/test_parse_memory.py | 3 +++
 tests/benchmarks/test_search_bench.py | 3 ++-
 5 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e7372ff..f157984 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -210,7 +210,6 @@ jobs:
     runs-on: ubuntu-latest
     permissions:
       contents: read
-      actions: write
     steps:
       - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
         with:
diff --git a/benchmarks/README.md b/benchmarks/README.md
index e62da18..75ff9cc 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -29,6 +29,10 @@ The memory test also runs as part of the normal `pytest` suite (timing benchmark
 
 Large JSONL files (5000+ lines) are generated at test session scope under pytest's temp directory — not committed to git.
 
+Corpora repeat one row from `tests/fixtures/session_with_tools.jsonl`, so parse/export numbers measure steady-state throughput on a narrow schema slice — not full parser branch coverage. Treat as v1 baselines, not exhaustive perf proof.
+
+The memory test (`test_parse_memory.py`) is intentionally **not** skipped by `--benchmark-skip`; it runs in the main `pytest` job and builds the session-scoped 5000-line fixture once per session.
+
 ## CI
 
 The `benchmarks` workflow job uploads `benchmark-results.json` as a downloadable artifact. There is no regression gate yet.
diff --git a/tests/benchmarks/test_export_bench.py b/tests/benchmarks/test_export_bench.py
index c33bf53..46c0eaf 100644
--- a/tests/benchmarks/test_export_bench.py
+++ b/tests/benchmarks/test_export_bench.py
@@ -23,6 +23,7 @@ def test_bulk_export_session_count(
     projects = [{"name": "bench-project", "path": str(export_corpus), "display_name": "Bench"}]
 
     def _run() -> object:
+        # NoopSink + since="all" + empty last_export_sessions: no disk/state writes per round.
         return run_bulk_export(
             projects=projects,
             since="all",
diff --git a/tests/benchmarks/test_parse_memory.py b/tests/benchmarks/test_parse_memory.py
index fcba372..de1c886 100644
--- a/tests/benchmarks/test_parse_memory.py
+++ b/tests/benchmarks/test_parse_memory.py
@@ -11,6 +11,9 @@
 def test_large_parse_peak_memory_under_ceiling(parse_large_file: Path) -> None:
     path = parse_large_file
     file_bytes = path.stat().st_size
+    # Issue #7 ceiling: Python heap peak (tracemalloc) vs on-disk JSONL size. Parsed
+    # dict/str objects often exceed raw bytes; 10x is a generous v1 guard — relax with
+    # a comment here if the parser legitimately grows.
     ceiling = file_bytes * 10
 
     tracemalloc.start()
diff --git a/tests/benchmarks/test_search_bench.py b/tests/benchmarks/test_search_bench.py
index 2d51a68..95c5c5b 100644
--- a/tests/benchmarks/test_search_bench.py
+++ b/tests/benchmarks/test_search_bench.py
@@ -16,4 +16,5 @@ def _run() -> object:
 
     resp = benchmark(_run)
     assert resp.status_code == 200
-    assert resp.get_json(), "expected search hits from synthetic searchable tokens"
+    hits = resp.get_json()
+    assert isinstance(hits, list) and len(hits) > 0, "expected search hits from synthetic corpus"