diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cbd89fc..545dfa5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, windows-latest, macos-latest] permissions: contents: read steps: @@ -83,7 +83,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, windows-latest, macos-latest] permissions: contents: read steps: @@ -138,7 +138,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, windows-latest, macos-latest] permissions: contents: read actions: write @@ -174,7 +174,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, windows-latest, macos-latest] permissions: contents: read steps: @@ -188,4 +188,19 @@ jobs: cache: npm - run: npm ci + # npm optional-deps bug: platform rollup binaries are sometimes skipped after npm ci + # (https://github.com/npm/cli/issues/4828). Install the native package explicitly. + - name: Ensure Rollup native binary + shell: bash + run: | + ROLLUP_VERSION=$(node -p "require('./node_modules/rollup/package.json').version") + case "$(node -p "process.platform + '-' + process.arch")" in + darwin-arm64) PKG="@rollup/rollup-darwin-arm64" ;; + darwin-x64) PKG="@rollup/rollup-darwin-x64" ;; + linux-x64) PKG="@rollup/rollup-linux-x64-gnu" ;; + win32-x64) PKG="@rollup/rollup-win32-x64-msvc" ;; + win32-arm64) PKG="@rollup/rollup-win32-arm64-msvc" ;; + *) echo "Unsupported Node platform: $(node -p "process.platform + '-' + process.arch")"; exit 1 ;; + esac + npm install --no-save "${PKG}@${ROLLUP_VERSION}" - run: npm test diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 752eacb..48bc61d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ Thanks for considering a patch. This repo is a small Flask app plus a hash-route - **Python 3.12** (matches CI) - **Node 20+** (only if you change `static/js/` or run frontend unit tests) -CI runs **`ruff check`**, **`ruff format --check`**, **`pip-audit`**, **`pytest`**, **integration tests**, and **Vitest** on **ubuntu-latest** and **windows-latest** (Python 3.12, Node 20). Type-check (`mypy`) and production install smoke run on Ubuntu only. +CI runs **`ruff check`**, **`ruff format --check`**, **`pip-audit`**, **`pytest`**, **integration tests**, and **Vitest** on **Ubuntu, Windows, and macOS** (`ubuntu-latest`, `windows-latest`, `macos-latest`; Python 3.12, Node 20). Type-check (`mypy`) and production install smoke run on Ubuntu only. ### Bootstrap (Windows PowerShell) @@ -112,7 +112,7 @@ npm run test:coverage # optional - [ ] `ruff check .` and `ruff format --check .` green locally - [ ] `pytest -q` green locally - [ ] `npm test` green if JS changed - - [ ] CI jobs green (`lint-and-audit`, `pytest`, `integration-tests`, `js-tests` on Ubuntu + Windows; `mypy`, `prod-install-smoke` on Ubuntu) + - [ ] CI jobs green (`lint-and-audit`, `pytest`, `integration-tests`, `js-tests` on Ubuntu + Windows + macOS; `mypy`, `prod-install-smoke` on Ubuntu) - [ ] PR description includes a **Test plan** section - [ ] API changes update [`docs/api-reference.md`](docs/api-reference.md) if behavior or errors change diff --git a/package-lock.json b/package-lock.json index f2ecedd..e9d2a94 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,9 +11,6 @@ "jsdom": "^26.1.0", "marked": "^12.0.1", "vitest": "^3.2.4" - }, - "optionalDependencies": { - "@rollup/rollup-linux-x64-gnu": "4.60.4" } }, "node_modules/@ampproject/remapping": { @@ -281,6 +278,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ diff --git a/package.json b/package.json index e6aaf52..8c0ac63 100644 --- a/package.json +++ b/package.json @@ -12,8 +12,5 @@ "jsdom": "^26.1.0", "marked": "^12.0.1", "vitest": "^3.2.4" - }, - "optionalDependencies": { - "@rollup/rollup-linux-x64-gnu": "4.60.4" } } diff --git a/requirements-dev.txt b/requirements-dev.txt index 20aab30..7e83784 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,3 +5,4 @@ types-Flask==1.1.6 pytest-cov>=5.0 ruff>=0.9.0 pip-audit>=2.7.0 +hypothesis>=6.100.0 diff --git a/tests/conftest.py b/tests/conftest.py index 2d483b9..f3e0a3d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,14 +2,22 @@ from __future__ import annotations +import os import shutil from collections.abc import Mapping from pathlib import Path import pytest +from hypothesis import settings from app import create_app +# Hypothesis profiles drive fuzz example counts/deadlines (deadline disabled to +# avoid timing flakiness on slow/CI runners). CI runs fewer examples for speed. +settings.register_profile("dev", max_examples=200, deadline=None) +settings.register_profile("ci", max_examples=100, deadline=None) +settings.load_profile("ci" if os.environ.get("CI") else "dev") + FIXTURES = Path(__file__).parent / "fixtures" diff --git a/tests/test_parser_fuzz.py b/tests/test_parser_fuzz.py new file mode 100644 index 0000000..bde0ff1 --- /dev/null +++ b/tests/test_parser_fuzz.py @@ -0,0 +1,306 @@ +"""Hypothesis fuzz tests for parse_session — adversarial JSONL must not crash.""" + +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path + +from hypothesis import HealthCheck, given, settings, strategies as st + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from utils.jsonl_parser import parse_session + +# Only suppress the tmp_path health check; max_examples and deadline come from +# the active Hypothesis profile (ci/dev) registered in conftest.py. +FUZZ_SETTINGS = settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) + +# Structured errors that are acceptable instead of a clean parse. Empty for now — +# the invariant is that parse_session never raises an unhandled exception. +ALLOWED_EXCEPTIONS: tuple[type[BaseException], ...] = () + + +def _parse_file_without_crash(path: str) -> None: + try: + parse_session(path) + except ALLOWED_EXCEPTIONS: + return + except Exception as exc: + raise AssertionError(f"unhandled {type(exc).__name__}: {exc}") from exc + + +def _write_jsonl(path: os.PathLike[str], lines: list[str]) -> str: + path_str = str(path) + with open(path_str, "w", encoding="utf-8", errors="replace") as f: + for line in lines: + f.write(line) + if not line.endswith("\n"): + f.write("\n") + return path_str + + +# --------------------------------------------------------------------------- +# Strategy building blocks +# --------------------------------------------------------------------------- + +_RECORD_TYPES = st.sampled_from( + ["user", "assistant", "system", "progress", "totally-new-claude-record", "future-record-v99"] +) + +_json_leaf = st.one_of( + st.none(), + st.booleans(), + st.integers(), + # Allow NaN/Infinity: json.loads accepts these literals, so the parser must too. + st.floats(allow_nan=True, allow_infinity=True), + st.text(max_size=200), +) + +_json_value = st.recursive( + _json_leaf, + lambda children: st.one_of( + st.lists(children, max_size=8), + st.dictionaries(st.text(min_size=1, max_size=20), children, max_size=8), + ), + max_leaves=40, +) + +_minimal_user = { + "type": "user", + "timestamp": "2026-06-11T00:00:00Z", + "message": {"content": [{"type": "text", "text": "hello"}]}, +} + +_minimal_assistant = { + "type": "assistant", + "timestamp": "2026-06-11T00:00:01Z", + "message": { + "model": "claude-test", + "content": [{"type": "text", "text": "hi"}], + "usage": {"input_tokens": 1, "output_tokens": 1}, + }, +} + + +@st.composite +def structured_entry(draw: st.DrawFn) -> dict: + """Fuzzed session record with optional missing/extra fields.""" + record_type = draw(_RECORD_TYPES) + base: dict = {"type": record_type} + if draw(st.booleans()): + base["timestamp"] = draw( + st.one_of( + st.text(max_size=40), + st.just("2026-06-11T00:00:00Z"), + st.integers(), + ) + ) + if record_type == "user": + entry = dict(_minimal_user) + entry.update(base) + if draw(st.booleans()): + entry.pop("message", None) + if draw(st.booleans()): + entry["message"] = draw( + st.one_of( + st.text(), + st.dictionaries(st.text(max_size=10), _json_value, max_size=6), + st.just({"content": draw(_json_value)}), + ) + ) + elif record_type == "assistant": + entry = dict(_minimal_assistant) + entry.update(base) + if draw(st.booleans()): + msg_val = entry.get("message", {}) + msg = dict(msg_val) if isinstance(msg_val, dict) else {} + if draw(st.booleans()): + msg["usage"] = draw( + st.one_of(st.text(), st.integers(), st.dictionaries(st.text(), _json_value)) + ) + if draw(st.booleans()): + msg["model"] = draw(st.one_of(st.text(), st.integers(), st.none())) + if draw(st.booleans()): + msg["content"] = draw(_json_value) + entry["message"] = msg + elif record_type == "system": + entry = {**base, "subtype": draw(st.text(max_size=30)), "content": draw(_json_value)} + elif record_type == "progress": + entry = { + **base, + "data": draw(st.dictionaries(st.text(max_size=10), _json_value, max_size=6)), + } + else: + entry = {**base, "payload": draw(_json_value)} + for _ in range(draw(st.integers(min_value=0, max_value=3))): + entry[draw(st.text(min_size=1, max_size=15))] = draw(_json_value) + return entry + + +# --------------------------------------------------------------------------- +# Fuzz strategies +# --------------------------------------------------------------------------- + + +@FUZZ_SETTINGS +@given(st.lists(st.text(min_size=0, max_size=500), min_size=0, max_size=30)) +def test_raw_line_soup_does_not_crash(tmp_path: Path, lines: list[str]) -> None: + """Malformed JSON lines, garbage text, and empty lines.""" + path = _write_jsonl(tmp_path / "soup.jsonl", lines) + _parse_file_without_crash(path) + + +@FUZZ_SETTINGS +@given(st.text(min_size=1, max_size=500)) +def test_truncated_json_line(tmp_path: Path, prefix: str) -> None: + """Partial JSON simulating concurrent writes (object cut mid-serialization).""" + full_line = json.dumps({"type": "user", "message": {"content": prefix}}) + truncated = full_line[: max(1, len(full_line) // 2)] + path = _write_jsonl(tmp_path / "trunc.jsonl", [truncated]) + _parse_file_without_crash(path) + + +@FUZZ_SETTINGS +@given(st.lists(structured_entry(), min_size=0, max_size=15)) +def test_structured_entries_with_fuzzed_fields(tmp_path: Path, entries: list[dict]) -> None: + """Unknown types, missing/extra fields, wrong-typed nested values.""" + lines = [json.dumps(e, default=str) for e in entries] + path = _write_jsonl(tmp_path / "structured.jsonl", lines) + _parse_file_without_crash(path) + + +@FUZZ_SETTINGS +@given(st.lists(_json_value, min_size=1, max_size=5)) +def test_deep_nesting_in_message_content(tmp_path: Path, nested_values: list) -> None: + entry = { + "type": "user", + "timestamp": "2026-06-11T00:00:00Z", + "message": {"content": nested_values}, + } + path = _write_jsonl(tmp_path / "nest.jsonl", [json.dumps(entry, default=str)]) + _parse_file_without_crash(path) + + +@FUZZ_SETTINGS +@given(st.integers(min_value=10_000, max_value=50_000)) +def test_long_line_payload(tmp_path: Path, length: int) -> None: + payload = "x" * length + entry = { + "type": "user", + "timestamp": "2026-06-11T00:00:00Z", + "message": {"content": [{"type": "text", "text": payload}]}, + } + path = _write_jsonl(tmp_path / "long.jsonl", [json.dumps(entry)]) + _parse_file_without_crash(path) + + +@FUZZ_SETTINGS +@given(st.lists(st.text(max_size=100), min_size=1, max_size=10)) +def test_empty_lines_between_records(tmp_path: Path, texts: list[str]) -> None: + lines: list[str] = [] + for text in texts: + lines.append("") + lines.append( + json.dumps( + { + "type": "user", + "timestamp": "2026-06-11T00:00:00Z", + "message": {"content": [{"type": "text", "text": text}]}, + } + ) + ) + lines.append(" ") + path = _write_jsonl(tmp_path / "empty.jsonl", lines) + _parse_file_without_crash(path) + + +def test_null_bytes_in_file(tmp_path: Path) -> None: + """Binary-safe write with null bytes; parser uses errors='replace'.""" + valid = json.dumps( + { + "type": "user", + "timestamp": "2026-06-11T00:00:00Z", + "message": {"content": [{"type": "text", "text": "after null"}]}, + } + ).encode("utf-8") + blob = b"\x00garbage\x00\n" + valid + b"\n\x00" + path = tmp_path / "nulls.jsonl" + path.write_bytes(blob) + _parse_file_without_crash(str(path)) + + +def test_unknown_record_type_is_graceful(tmp_path: Path) -> None: + """Unknown type values are counted but do not crash parsing.""" + lines = [ + '{"type": "totally-new-claude-record", "timestamp": "2026-06-11T00:00:00Z", "payload": {}}', + '{"type": "user", "message": {"content": [{"type": "text", "text": "ok"}]}}', + ] + path = _write_jsonl(tmp_path / "unknown.jsonl", lines) + session = parse_session(path) + assert session["metadata"]["entry_counts"].get("totally-new-claude-record") == 1 + # Unknown type produces no message; only the valid user line does. + assert len(session["messages"]) == 1 + + +def test_non_numeric_usage_tokens_do_not_crash(tmp_path: Path) -> None: + """Non-numeric usage fields must coerce to 0, not raise TypeError on +=.""" + entry = { + "type": "assistant", + "timestamp": "2026-06-11T00:00:00Z", + "message": { + "model": "claude-test", + "content": [{"type": "text", "text": "hi"}], + "usage": { + "input_tokens": "five", + "output_tokens": ["not", "a", "number"], + "cache_creation": {"ephemeral_5m_input_tokens": "lots"}, + }, + }, + } + path = _write_jsonl(tmp_path / "bad_usage.jsonl", [json.dumps(entry)]) + session = parse_session(path) + assert session["metadata"]["total_input_tokens"] == 0 + assert session["metadata"]["total_output_tokens"] == 0 + assert session["metadata"]["total_ephemeral_5m_tokens"] == 0 + + +def test_negative_usage_tokens_clamp_to_zero(tmp_path: Path) -> None: + """Negative token counts must not reduce session metadata totals.""" + entry = { + "type": "assistant", + "timestamp": "2026-06-11T00:00:00Z", + "message": { + "model": "claude-test", + "content": [{"type": "text", "text": "hi"}], + "usage": { + "input_tokens": -100, + "output_tokens": -1.5, + "cache_creation": {"ephemeral_5m_input_tokens": -50}, + }, + }, + } + path = _write_jsonl(tmp_path / "negative_usage.jsonl", [json.dumps(entry)]) + session = parse_session(path) + assert session["metadata"]["total_input_tokens"] == 0 + assert session["metadata"]["total_output_tokens"] == 0 + assert session["metadata"]["total_ephemeral_5m_tokens"] == 0 + + +def test_non_finite_usage_tokens_do_not_crash(tmp_path: Path) -> None: + """json.loads accepts NaN/Infinity literals; int(nan)/int(inf) raise, so the + parser must coerce them to 0 rather than propagate ValueError/OverflowError.""" + # Raw literals (not valid via json.dumps of finite floats) — written directly. + line = ( + '{"type": "assistant", "message": {"usage": ' + '{"input_tokens": NaN, "output_tokens": Infinity, ' + '"cache_read_input_tokens": -Infinity, ' + '"cache_creation": {"ephemeral_5m_input_tokens": NaN}}}}' + ) + path = _write_jsonl(tmp_path / "nonfinite.jsonl", [line]) + session = parse_session(path) + assert session["metadata"]["total_input_tokens"] == 0 + assert session["metadata"]["total_output_tokens"] == 0 + assert session["metadata"]["total_cache_read_tokens"] == 0 + assert session["metadata"]["total_ephemeral_5m_tokens"] == 0 diff --git a/utils/jsonl_parser.py b/utils/jsonl_parser.py index b3a4539..6186b7b 100644 --- a/utils/jsonl_parser.py +++ b/utils/jsonl_parser.py @@ -2,6 +2,7 @@ actually work with -- messages, tool calls, token counts, file activity, etc.""" import json +import math import os from datetime import datetime from typing import Any @@ -40,6 +41,19 @@ ] +def _safe_int(val: Any) -> int: + """Coerce a value to a non-negative int for token accounting; non-numeric, + non-finite, or negative input becomes 0 so fuzzed/malformed usage fields + never raise during arithmetic and counters cannot go below zero.""" + if isinstance(val, bool): + return 0 + if isinstance(val, int): + return max(0, val) + if isinstance(val, float): + return max(0, int(val)) if math.isfinite(val) else 0 + return 0 + + def parse_session(filepath: str) -> SessionDict: """Main entry point. Reads every line from a .jsonl file and builds up a session dict with messages, metadata (tokens, models, tool counts), @@ -96,6 +110,9 @@ def parse_session(filepath: str) -> SessionDict: except json.JSONDecodeError: continue + if not isinstance(entry, dict): + continue + entry_type = entry.get("type") ts = entry.get("timestamp") # file-history-snapshot stores timestamp inside snapshot @@ -109,11 +126,11 @@ def parse_session(filepath: str) -> SessionDict: metadata["first_timestamp"] = ts metadata["last_timestamp"] = ts - # Count entry types + # Count entry types (upstream may send non-str/unhashable discriminants; + # coerce to str. Falsy types like "" are skipped, matching prior behavior). if entry_type: - metadata["entry_counts"][entry_type] = ( - metadata["entry_counts"].get(entry_type, 0) + 1 - ) + type_key = entry_type if isinstance(entry_type, str) else str(entry_type) + metadata["entry_counts"][type_key] = metadata["entry_counts"].get(type_key, 0) + 1 # Track sidechain if entry.get("isSidechain"): @@ -135,10 +152,12 @@ def parse_session(filepath: str) -> SessionDict: metadata["files_created"] = sorted(metadata["files_created"]) # Compute wall clock time - if metadata["first_timestamp"] and metadata["last_timestamp"]: + first_ts = metadata["first_timestamp"] + last_ts = metadata["last_timestamp"] + if isinstance(first_ts, str) and isinstance(last_ts, str): try: - t0 = datetime.fromisoformat(metadata["first_timestamp"].replace("Z", "+00:00")) - t1 = datetime.fromisoformat(metadata["last_timestamp"].replace("Z", "+00:00")) + t0 = datetime.fromisoformat(first_ts.replace("Z", "+00:00")) + t1 = datetime.fromisoformat(last_ts.replace("Z", "+00:00")) metadata["session_wall_time_seconds"] = max(0, (t1 - t0).total_seconds()) except (ValueError, AttributeError): pass @@ -209,7 +228,7 @@ def _process_assistant( and tool_use calls, and accumulates token/model/tool stats.""" msg = _entry_message(entry) model = msg.get("model", "") - if model and model != "": + if isinstance(model, str) and model and model != "": metadata["models_used"].add(model) # API error tracking @@ -219,29 +238,29 @@ def _process_assistant( usage = msg.get("usage", {}) if not isinstance(usage, dict): usage = {} - metadata["total_input_tokens"] += usage.get("input_tokens") or 0 - metadata["total_output_tokens"] += usage.get("output_tokens") or 0 - metadata["total_cache_read_tokens"] += usage.get("cache_read_input_tokens") or 0 - metadata["total_cache_creation_tokens"] += usage.get("cache_creation_input_tokens") or 0 + metadata["total_input_tokens"] += _safe_int(usage.get("input_tokens")) + metadata["total_output_tokens"] += _safe_int(usage.get("output_tokens")) + metadata["total_cache_read_tokens"] += _safe_int(usage.get("cache_read_input_tokens")) + metadata["total_cache_creation_tokens"] += _safe_int(usage.get("cache_creation_input_tokens")) # Extended cache metrics cache_creation = usage.get("cache_creation", {}) if isinstance(cache_creation, dict): - metadata["total_ephemeral_5m_tokens"] += ( - cache_creation.get("ephemeral_5m_input_tokens") or 0 + metadata["total_ephemeral_5m_tokens"] += _safe_int( + cache_creation.get("ephemeral_5m_input_tokens") ) - metadata["total_ephemeral_1h_tokens"] += ( - cache_creation.get("ephemeral_1h_input_tokens") or 0 + metadata["total_ephemeral_1h_tokens"] += _safe_int( + cache_creation.get("ephemeral_1h_input_tokens") ) # Service tier tier = usage.get("service_tier") - if tier: + if isinstance(tier, str) and tier: metadata["service_tiers"].add(tier) # Stop reason tracking stop_reason = msg.get("stop_reason", "") - if stop_reason: + if isinstance(stop_reason, str) and stop_reason: metadata["stop_reasons"][stop_reason] = metadata["stop_reasons"].get(stop_reason, 0) + 1 content_parts = _normalize_content(msg.get("content", [])) @@ -256,7 +275,8 @@ def _process_assistant( elif ptype == "thinking": thinking_parts.append(part.get("thinking", "")) elif ptype == "tool_use": - tool_name = part.get("name", "unknown") + raw_name = part.get("name", "unknown") + tool_name = raw_name if isinstance(raw_name, str) else "unknown" raw_input = part.get("input", {}) safe_input = raw_input if isinstance(raw_input, dict) else {} metadata["total_tool_calls"] += 1 @@ -287,11 +307,11 @@ def _process_assistant( "is_sidechain": entry.get("isSidechain", False), "is_api_error": entry.get("isApiErrorMessage", False), "usage": { - "input_tokens": usage.get("input_tokens") or 0, - "output_tokens": usage.get("output_tokens") or 0, - "cache_read": usage.get("cache_read_input_tokens") or 0, - "cache_creation": usage.get("cache_creation_input_tokens") or 0, - "service_tier": usage.get("service_tier"), + "input_tokens": _safe_int(usage.get("input_tokens")), + "output_tokens": _safe_int(usage.get("output_tokens")), + "cache_read": _safe_int(usage.get("cache_read_input_tokens")), + "cache_creation": _safe_int(usage.get("cache_creation_input_tokens")), + "service_tier": tier if isinstance(tier, str) else None, }, } ) @@ -355,7 +375,8 @@ def _track_file_activity( ) -> None: """Look at what each tool call did and record which files got touched, what commands got run, what URLs got fetched.""" - fp = tool_input.get("file_path", "") + raw_fp = tool_input.get("file_path", "") + fp = raw_fp if isinstance(raw_fp, str) else "" if tool_name == "Read" and fp: metadata["files_read"].add(fp) elif tool_name == "Write" and fp: @@ -364,9 +385,9 @@ def _track_file_activity( metadata["files_written"].add(fp) elif tool_name == "Bash": cmd = tool_input.get("command", "") - if cmd: + if isinstance(cmd, str) and cmd: metadata["bash_commands"].append(cmd) elif tool_name in ("WebFetch", "WebSearch"): url_or_query = tool_input.get("url") or tool_input.get("query", "") - if url_or_query: + if isinstance(url_or_query, str) and url_or_query: metadata["web_fetches"].append(url_or_query)