Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,37 @@ jobs:
esac
npm install --no-save "${PKG}@${ROLLUP_VERSION}"
- run: npm test

benchmarks:
name: Performance benchmarks (informational)
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
persist-credentials: false

- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: "3.12"
cache: pip
cache-dependency-path: |
requirements.txt
requirements-dev.txt

- name: Install dev dependencies
run: pip install -r requirements-dev.txt

- name: Run benchmarks
run: >
pytest tests/benchmarks/
--benchmark-only
--benchmark-json=benchmark-results.json
--benchmark-columns=min,max,mean,stddev,rounds
-o addopts=

- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: benchmark-results
path: benchmark-results.json
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pytest tests/test_api_integration.py -v
pytest tests/test_search.py -v
pytest tests/test_api_routes.py -v
pytest tests/test_error_codes.py -v
pytest tests/benchmarks/ --benchmark-only -o addopts= -v # performance baselines (see benchmarks/README.md)
```

### JavaScript (vitest)
Expand Down
42 changes: 42 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Performance benchmarks

Test files live under `tests/benchmarks/`; this directory holds only documentation and the informational `baselines.json` snapshot.

Repeatable local measurements for parse, bulk export, and search hot paths.

## Run locally

```bash
pip install -r requirements-dev.txt
pytest tests/benchmarks/ --benchmark-only -o addopts= -v
```

## Memory check

```bash
pytest tests/benchmarks/test_parse_memory.py -v -o addopts=
```

The memory test also runs as part of the normal `pytest` suite (timing benchmarks are skipped via `--benchmark-skip` in `pyproject.toml`).

## Scenarios

| Group | What |
|-------|------|
| parse | `parse_session` on 10 / 500 / 5000+ line JSONL |
| export | `run_bulk_export` over 10 / 50 / 100 sessions |
| search | `GET /api/search` over a 50-session synthetic corpus |

Large JSONL files (5000+ lines) are generated at test session scope under pytest's temp directory — not committed to git.

Corpora repeat one row from `tests/fixtures/session_with_tools.jsonl`, so parse/export numbers measure steady-state throughput on a narrow schema slice — not full parser branch coverage. Treat as v1 baselines, not exhaustive perf proof.

The memory test (`test_parse_memory.py`) is intentionally **not** skipped by `--benchmark-skip`; it runs in the main `pytest` job and builds the session-scoped 5000-line fixture once per session.

## CI

The `benchmarks` workflow job uploads `benchmark-results.json` as a downloadable artifact. There is no regression gate yet.

## Refresh baselines

After intentional performance work, copy key means from a local run into `baselines.json` with a date and machine note. This file is informational only; CI does not compare against it.
10 changes: 10 additions & 0 deletions benchmarks/baselines.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"_note": "Informational snapshot only — CI does not gate on these values.",
"updated": null,
"machine": null,
"groups": {
"parse": {},
"export": {},
"search": {}
}
}
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ packages = ["api", "utils", "models"]
exclude = ["tests/"]

[tool.pytest.ini_options]
addopts = "--cov=api --cov=utils --cov-report=term-missing --cov-report=xml:coverage.xml"
addopts = "--cov=api --cov=utils --cov-report=term-missing --cov-report=xml:coverage.xml --benchmark-skip"
testpaths = ["tests"]
markers = [
"benchmark: performance benchmarks (pytest-benchmark)",
]

[tool.coverage.run]
omit = [
Expand All @@ -31,4 +34,4 @@ combine-as-imports = true
# CLI bootstrap: sys.path must be set before local imports.
"scripts/export.py" = ["E402"]
# Tests mirror the same path bootstrap before importing app/utils.
"tests/*.py" = ["E402"]
"tests/**/*.py" = ["E402"]
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ pytest-cov>=5.0
ruff>=0.9.0
pip-audit>=2.7.0
hypothesis>=6.100.0
pytest-benchmark==5.2.3
Empty file added tests/benchmarks/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions tests/benchmarks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""Synthetic corpora for parse/export/search performance benchmarks."""

from __future__ import annotations

import json
from copy import deepcopy
from pathlib import Path

import pytest

from app import create_app

FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
TEMPLATE_LINE = (FIXTURES / "session_with_tools.jsonl").read_text(encoding="utf-8").splitlines()[0]


def write_jsonl(path: Path, line_count: int) -> Path:
"""Write a JSONL session file with *line_count* rows derived from the template fixture."""
template = json.loads(TEMPLATE_LINE)
Comment thread
clean6378-max-it marked this conversation as resolved.
with path.open("w", encoding="utf-8") as f:
for i in range(line_count):
entry = deepcopy(template)
entry["timestamp"] = f"2026-06-12T10:{i % 60:02d}:00Z"
if i % 3 == 1:
msg = entry.setdefault("message", {})
if isinstance(msg, dict) and "content" in msg:
msg["content"] = [{"type": "text", "text": f"benchmark token {i} searchable"}]
# json.dumps for file I/O — jsonify is Flask's HTTP helper, not file serialization.
serialized = (
json.dumps(entry, separators=(",", ":")) + "\n" # linters-ignore: prefer-jsonify
)
f.write(serialized)
return path


def seed_search_corpus(
base_dir: Path,
*,
session_count: int = 50,
lines_per_session: int = 20,
) -> Path:
"""Create a multi-session project tree under *base_dir* for search benchmarks."""
project = base_dir / "bench-project"
project.mkdir(parents=True, exist_ok=True)
for i in range(session_count):
write_jsonl(project / f"session_{i:04d}.jsonl", lines_per_session)
return base_dir
Comment thread
coderabbitai[bot] marked this conversation as resolved.


@pytest.fixture(scope="session")
def parse_small_file(tmp_path_factory: pytest.TempPathFactory) -> Path:
root = tmp_path_factory.mktemp("bench")
return write_jsonl(root / "small.jsonl", 10)


@pytest.fixture(scope="session")
def parse_medium_file(tmp_path_factory: pytest.TempPathFactory) -> Path:
root = tmp_path_factory.mktemp("bench")
return write_jsonl(root / "medium.jsonl", 500)


@pytest.fixture(scope="session")
def parse_large_file(tmp_path_factory: pytest.TempPathFactory) -> Path:
root = tmp_path_factory.mktemp("bench")
return write_jsonl(root / "large.jsonl", 5000)


@pytest.fixture
def export_corpus(tmp_path: Path, request: pytest.FixtureRequest) -> Path:
"""Project dir with N session files. Parametrize N via indirect fixture."""
count = request.param
project = tmp_path / "bench-project"
project.mkdir()
for i in range(count):
write_jsonl(project / f"session_{i:04d}.jsonl", 20)
return project


@pytest.fixture
def bench_client_search_corpus(tmp_path: Path):
"""Flask test client backed by a 50-session synthetic project tree."""
seed_search_corpus(tmp_path)
app = create_app(base_dir=str(tmp_path))
app.config["TESTING"] = True
return app.test_client()
39 changes: 39 additions & 0 deletions tests/benchmarks/test_export_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Benchmark run_bulk_export over 10, 50, and 100 session corpora."""

from __future__ import annotations

from pathlib import Path

import pytest

from utils.export_engine import NoopSink, run_bulk_export


@pytest.mark.benchmark(group="export")
@pytest.mark.parametrize(
"export_corpus",
[10, 50, 100],
indirect=True,
ids=["sessions-10", "sessions-50", "sessions-100"],
)
def test_bulk_export_session_count(
benchmark,
export_corpus: Path,
) -> None:
projects = [{"name": "bench-project", "path": str(export_corpus), "display_name": "Bench"}]

def _run() -> object:
# NoopSink + since="all" + empty last_export_sessions: no disk/state writes per round.
return run_bulk_export(
Comment thread
clean6378-max-it marked this conversation as resolved.
projects=projects,
since="all",
rules=[],
last_export_sessions={},
sink=NoopSink(),
fmt="md",
path_layout="api",
manifest_style="api",
)

result = benchmark(_run)
assert result.exported_session_count > 0
24 changes: 24 additions & 0 deletions tests/benchmarks/test_parse_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Benchmark parse_session on small, medium, and large JSONL corpora."""

from __future__ import annotations

from pathlib import Path

import pytest

from utils.jsonl_parser import parse_session


@pytest.mark.benchmark(group="parse")
def test_parse_session_small(benchmark, parse_small_file: Path) -> None:
benchmark(parse_session, str(parse_small_file))


@pytest.mark.benchmark(group="parse")
def test_parse_session_medium(benchmark, parse_medium_file: Path) -> None:
benchmark(parse_session, str(parse_medium_file))


@pytest.mark.benchmark(group="parse")
def test_parse_session_large(benchmark, parse_large_file: Path) -> None:
benchmark(parse_session, str(parse_large_file))
28 changes: 28 additions & 0 deletions tests/benchmarks/test_parse_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Peak memory ceiling for large-file parse_session (regular pytest, not benchmark-only)."""

from __future__ import annotations

import tracemalloc
from pathlib import Path

from utils.jsonl_parser import parse_session


def test_large_parse_peak_memory_under_ceiling(parse_large_file: Path) -> None:
path = parse_large_file
file_bytes = path.stat().st_size
# Issue #7 ceiling: Python heap peak (tracemalloc) vs on-disk JSONL size. Parsed
# dict/str objects often exceed raw bytes; 10x is a generous v1 guard — relax with
# a comment here if the parser legitimately grows.
ceiling = file_bytes * 10

tracemalloc.start()
tracemalloc.clear_traces()
try:
result = parse_session(str(path))
assert len(result["messages"]) > 0, "parse_session returned no messages"
_, peak = tracemalloc.get_traced_memory()
finally:
tracemalloc.stop()

assert peak < ceiling, f"peak {peak} bytes exceeds 10x file size {file_bytes}"
Comment thread
clean6378-max-it marked this conversation as resolved.
20 changes: 20 additions & 0 deletions tests/benchmarks/test_search_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Benchmark full-corpus search via the HTTP test client."""

from __future__ import annotations

import pytest
from flask.testing import FlaskClient


@pytest.mark.benchmark(group="search")
def test_search_full_corpus(
benchmark,
bench_client_search_corpus: FlaskClient,
) -> None:
def _run() -> object:
return bench_client_search_corpus.get("/api/search?q=searchable&limit=50")

resp = benchmark(_run)
assert resp.status_code == 200
hits = resp.get_json()
assert isinstance(hits, list) and len(hits) > 0, "expected search hits from synthetic corpus"
Loading