From 02905c4b78d4f9cc9d36258f5a4b5a0b5b9acbb5 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Wed, 10 Jun 2026 16:20:00 -0400 Subject: [PATCH 01/11] feat: initial implementation --- .github/workflows/tests.yml | 12 ++--- CHANGELOG.md | 9 +++- api/composers.py | 9 ++-- api/config_api.py | 10 ++-- api/export_api.py | 13 +++--- api/flask_config.py | 4 +- api/logs.py | 7 +-- api/pdf.py | 11 +++-- api/search.py | 11 +++-- api/workspaces.py | 13 +++--- app.py | 24 +++++----- models/__init__.py | 3 ++ models/conversation.py | 6 +-- models/parse_warnings.py | 7 +-- models/search.py | 27 +++++++++++ pyproject.toml | 13 ++++++ services/cli_tabs.py | 11 +++-- services/search.py | 33 +++++++------- services/summary_cache.py | 18 ++++---- services/workspace_context.py | 23 +++++----- services/workspace_db.py | 62 ++++++++++++++----------- services/workspace_listing.py | 34 ++++++++------ services/workspace_resolver.py | 44 ++++++++++-------- services/workspace_tabs.py | 83 +++++++++++++++++++++------------- tests/test_models.py | 28 ++++++++++++ utils/cli_chat_reader.py | 36 +++++++-------- utils/cursor_md_exporter.py | 17 +++---- utils/debug_flag.py | 4 +- utils/exclusion_rules.py | 23 ++++++---- utils/path_helpers.py | 19 ++++---- utils/text_extract.py | 4 +- utils/tool_parser.py | 5 +- utils/workspace_descriptor.py | 5 +- 33 files changed, 387 insertions(+), 241 deletions(-) create mode 100644 models/search.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f642b2..72ede31 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -132,11 +132,9 @@ jobs: run: dist\CursorChatBrowser\CursorChatBrowser.exe --help # ── Typecheck: mypy ─────────────────────────────────────────────────────── - # Codebase already has type hints across most of the surface (~70+ typed - # functions). Mypy runs with --ignore-missing-imports for untyped - # third-party deps; strict-optional is enabled (mypy default). The - # transitional `continue-on-error: true` was removed in #29 once mypy - # reached zero errors on this repo — type failures now block merges. + # strict = true in pyproject.toml (issue #100). Per-module overrides skip + # scripts/export.py and tests/ until those surfaces are fully annotated. + # Type failures block merges — no continue-on-error. typecheck: name: Typecheck (mypy) runs-on: ubuntu-latest @@ -157,9 +155,7 @@ jobs: python -m pip install 'mypy>=1.10,<2' - name: Run mypy - # No `continue-on-error` — mypy now exits zero on this repo (closes #29), - # so type errors must fail the job from here on. - run: mypy --ignore-missing-imports --pretty . + run: mypy . # ── Secret scan: gitleaks ───────────────────────────────────────────────── # Catches accidentally committed credentials. Runs over full git history diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c020b8..cc141c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.1.0] - 2026-06-04 +### Added +- **Strict mypy** — `strict = true` in `pyproject.toml`; core TypedDict models + (`SearchResult`, `ConversationSummary`) and full annotations on API routes and + `utils/` (#100) + +### Changed +- CI typecheck job runs `mypy .` using pyproject config (strict production code; + per-module overrides for `scripts/export.py` and `tests.*`) ### Added - **Summary disk cache (Phase 3)** — project list and tab summaries cached under diff --git a/api/composers.py b/api/composers.py index 9a8b2bb..1b90c12 100644 --- a/api/composers.py +++ b/api/composers.py @@ -9,8 +9,9 @@ import os import sqlite3 from contextlib import closing +from typing import Any -from flask import Blueprint, jsonify +from flask import Blueprint, Response, jsonify from utils.workspace_path import resolve_workspace_path from utils.path_helpers import to_epoch_ms @@ -20,13 +21,13 @@ _logger = logging.getLogger(__name__) -def _read_json_file(path: str): +def _read_json_file(path: str) -> Any: with open(path, "r", encoding="utf-8") as f: return json.load(f) @bp.route("/api/composers") -def list_composers(): +def list_composers() -> tuple[Response, int] | Response: try: workspace_path = resolve_workspace_path() composers = [] @@ -120,7 +121,7 @@ def list_composers(): @bp.route("/api/composers/") -def get_composer(composer_id): +def get_composer(composer_id: str) -> tuple[Response, int] | Response: try: workspace_path = resolve_workspace_path() diff --git a/api/config_api.py b/api/config_api.py index ffefd69..46f5796 100644 --- a/api/config_api.py +++ b/api/config_api.py @@ -11,7 +11,7 @@ import subprocess import sys -from flask import Blueprint, jsonify, request +from flask import Blueprint, Response, jsonify, request from utils.path_validation import WorkspacePathError, validate_workspace_path from utils.workspace_path import set_workspace_path_override @@ -21,7 +21,7 @@ @bp.route("/api/detect-environment") -def detect_environment(): +def detect_environment() -> Response: try: is_wsl = False is_remote = bool( @@ -56,7 +56,7 @@ def detect_environment(): @bp.route("/api/validate-path", methods=["POST"]) -def validate_path(): +def validate_path() -> tuple[Response, int] | Response: """Same path rules as POST /api/set-workspace: realpath, markers (issue #15).""" try: body = request.get_json(silent=True) or {} @@ -97,7 +97,7 @@ def validate_path(): @bp.route("/api/set-workspace", methods=["POST"]) -def set_workspace(): +def set_workspace() -> tuple[Response, int] | Response: # Reject non-dict JSON bodies (array / string / number / null). Without # this, get_json returns the value directly, the truthy fallback `or {}` # is bypassed, and `body.get("path", "")` raises AttributeError — which @@ -126,7 +126,7 @@ def set_workspace(): @bp.route("/api/get-username") -def get_username(): +def get_username() -> Response: try: username = "YOUR_USERNAME" diff --git a/api/export_api.py b/api/export_api.py index 4feb412..26fd9b7 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -12,6 +12,7 @@ import zipfile from datetime import datetime from pathlib import Path +from typing import Any, cast from flask import Blueprint, Response, jsonify, request @@ -38,13 +39,13 @@ def _get_state_dir() -> str: return os.path.join(str(Path.home()), ".cursor-chat-browser") -def _get_export_state() -> dict: +def _get_export_state() -> dict[str, Any]: """Read the export state file.""" state_path = os.path.join(_get_state_dir(), "export_state.json") if os.path.isfile(state_path): try: with open(state_path, "r", encoding="utf-8") as f: - return json.load(f) + return cast(dict[str, Any], json.load(f)) except (json.JSONDecodeError, ValueError, OSError) as e: _logger.warning( "Could not read export state from %s: %s", @@ -54,7 +55,7 @@ def _get_export_state() -> dict: return {} -def _save_export_state(count: int): +def _save_export_state(count: int) -> None: """Save export state after an export.""" state_dir = _get_state_dir() os.makedirs(state_dir, exist_ok=True) @@ -68,14 +69,14 @@ def _save_export_state(count: int): @bp.route("/api/export/state") -def get_export_state(): +def get_export_state() -> Response: """Return the last export timestamp.""" state = _get_export_state() return jsonify(state) @bp.route("/api/export", methods=["POST"]) -def export_chats(): +def export_chats() -> tuple[Response, int] | Response: """Export chats as a zip archive. Exclusion rules (``EXCLUSION_RULES`` app config key) are evaluated against @@ -112,7 +113,7 @@ def export_chats(): ws_id_to_slug[e["name"]] = slug(display) today = datetime.now().strftime("%Y-%m-%d") - exported = [] + exported: list[dict[str, Any]] = [] rules = exclusion_rules() # ── Database reading via service layer ──────────────────────────────── diff --git a/api/flask_config.py b/api/flask_config.py index 30bf4ae..9b72136 100644 --- a/api/flask_config.py +++ b/api/flask_config.py @@ -2,9 +2,11 @@ from __future__ import annotations +from typing import Any + from flask import current_app -def exclusion_rules() -> list: +def exclusion_rules() -> list[list[Any]]: """Return loaded exclusion rules from app config (empty list when unset).""" return current_app.config.get("EXCLUSION_RULES") or [] diff --git a/api/logs.py b/api/logs.py index f5607ea..b209975 100644 --- a/api/logs.py +++ b/api/logs.py @@ -10,8 +10,9 @@ import sqlite3 from contextlib import closing from datetime import datetime +from typing import Any -from flask import Blueprint, jsonify +from flask import Blueprint, Response, jsonify from utils.workspace_path import resolve_workspace_path from utils.path_helpers import to_epoch_ms, warn_workspace_json_read @@ -26,7 +27,7 @@ def _extract_chat_id_from_bubble_key(key: str) -> str | None: @bp.route("/api/logs") -def get_logs(): +def get_logs() -> tuple[Response, int] | Response: try: workspace_path = resolve_workspace_path() logs = [] @@ -40,7 +41,7 @@ def get_logs(): conn.row_factory = sqlite3.Row rows = conn.execute("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'").fetchall() - chat_map: dict[str, list] = {} + chat_map: dict[str, list[Any]] = {} for row in rows: chat_id = _extract_chat_id_from_bubble_key(row["key"]) if not chat_id: diff --git a/api/pdf.py b/api/pdf.py index 5d36f56..b0175e7 100644 --- a/api/pdf.py +++ b/api/pdf.py @@ -6,6 +6,7 @@ import io import logging import re +from typing import Any from flask import Blueprint, Response, jsonify, request @@ -43,7 +44,7 @@ def _safe_text(text: str) -> str: @bp.route("/api/generate-pdf", methods=["POST"]) -def generate_pdf(): +def generate_pdf() -> tuple[Response, int] | Response: try: body = request.get_json(silent=True) or {} markdown_text = body.get("markdown", "") @@ -52,10 +53,10 @@ def generate_pdf(): from fpdf import FPDF class PDFDoc(FPDF): - def header(self): + def header(self) -> None: pass - def footer(self): + def footer(self) -> None: self.set_y(-15) self.set_font("Helvetica", "I", 8) self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C") @@ -74,7 +75,7 @@ def footer(self): # Parse markdown line by line lines = markdown_text.split("\n") in_code_block = False - code_lines = [] + code_lines: list[str] = [] for line in lines: try: @@ -179,7 +180,7 @@ def footer(self): return jsonify({"error": "Failed to generate PDF"}), 500 -def _render_code_block(pdf, code_text: str): +def _render_code_block(pdf: Any, code_text: str) -> None: """Render a code block with a dark background.""" pdf.ln(3) pdf.set_font("Courier", "", 8) diff --git a/api/search.py b/api/search.py index 3ad9c89..700b6c3 100644 --- a/api/search.py +++ b/api/search.py @@ -4,10 +4,11 @@ """ import logging +from typing import Any -from flask import Blueprint, current_app, jsonify, request +from flask import Blueprint, Response, current_app, jsonify, request -from models import ParseWarningCollector +from models import ParseWarningCollector, SearchResult from services.search import ( rank_results, search_cli_sessions, @@ -21,7 +22,7 @@ @bp.route("/api/search") -def search(): +def search() -> tuple[Response, int] | Response: try: query = request.args.get("q", "").strip() search_type = request.args.get("type", "all") @@ -34,7 +35,7 @@ def search(): parse_warnings = ParseWarningCollector() query_lower = query.lower() - results = [] + results: list[SearchResult] = [] results.extend( search_global_storage(workspace_path, query, query_lower, rules, parse_warnings) ) @@ -46,7 +47,7 @@ def search(): search_cli_sessions(get_cli_chats_path(), query, query_lower, rules) ) - payload: dict = {"results": rank_results(results)} + payload: dict[str, Any] = {"results": rank_results(results)} return jsonify(parse_warnings.attach_to(payload)) except Exception: diff --git a/api/workspaces.py b/api/workspaces.py index d8556c9..f5f8ac7 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -10,8 +10,9 @@ import logging import os from datetime import datetime, timezone +from typing import Any -from flask import Blueprint, jsonify, request +from flask import Blueprint, Response, jsonify, request from api.flask_config import exclusion_rules @@ -55,14 +56,14 @@ def _request_nocache() -> bool: @bp.route("/api/workspaces") -def list_workspaces(): +def list_workspaces() -> tuple[Response, int] | Response: try: workspace_path = resolve_workspace_path() rules = exclusion_rules() projects, warnings = list_workspace_projects( workspace_path, rules, nocache=_request_nocache(), ) - payload: dict = {"projects": projects} + payload: dict[str, Any] = {"projects": projects} if warnings: payload["warnings"] = warnings return jsonify(payload) @@ -76,7 +77,7 @@ def list_workspaces(): # --------------------------------------------------------------------------- @bp.route("/api/workspaces/") -def get_workspace(workspace_id): +def get_workspace(workspace_id: str) -> tuple[Response, int] | Response: try: if workspace_id == "global": return jsonify({ @@ -154,7 +155,7 @@ def get_workspace(workspace_id): # --------------------------------------------------------------------------- @bp.route("/api/workspaces//tabs") -def get_workspace_tabs(workspace_id): +def get_workspace_tabs(workspace_id: str) -> tuple[Response, int] | Response: if workspace_id.startswith("cli:"): try: return get_cli_workspace_tabs(workspace_id, exclusion_rules()) @@ -182,7 +183,7 @@ def get_workspace_tabs(workspace_id): # --------------------------------------------------------------------------- @bp.route("/api/workspaces//tabs/") -def get_workspace_tab(workspace_id, composer_id): +def get_workspace_tab(workspace_id: str, composer_id: str) -> tuple[Response, int] | Response: if workspace_id.startswith("cli:"): return jsonify({"error": "Per-tab lazy load is not supported for CLI workspaces"}), 400 try: diff --git a/app.py b/app.py index afebb1c..a9216a2 100644 --- a/app.py +++ b/app.py @@ -9,8 +9,9 @@ import sys from datetime import datetime from pathlib import Path +from typing import cast -from flask import Flask, render_template, send_from_directory +from flask import Flask, Response, render_template, send_from_directory from utils.debug_flag import resolve_debug_flag @@ -24,18 +25,18 @@ from utils.exclusion_rules import resolve_exclusion_rules_path, load_rules -def _get_base_path(): +def _get_base_path() -> Path: """Return the directory that contains templates/ and static/. In a PyInstaller bundle the files live under sys._MEIPASS; otherwise they sit next to this source file. """ if getattr(sys, "frozen", False): - return Path(sys._MEIPASS) + return Path(getattr(sys, "_MEIPASS")) return Path(__file__).resolve().parent -def create_app(exclusion_rules_path=None): +def create_app(exclusion_rules_path: str | None = None) -> Flask: logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(funcName)s: %(message)s", @@ -56,7 +57,7 @@ def create_app(exclusion_rules_path=None): app.config["EXCLUSION_RULES"] = load_rules(resolved) @app.context_processor - def inject_year(): + def inject_year() -> dict[str, int]: return {"current_year": datetime.now().year} # Register API blueprints @@ -71,25 +72,26 @@ def inject_year(): # ---------- Page routes ---------- @app.route("/") - def home(): + def home() -> str: return render_template("index.html") @app.route("/config") - def config_page(): + def config_page() -> str: return render_template("config.html") @app.route("/search") - def search_page(): + def search_page() -> str: return render_template("search.html") @app.route("/workspace/") - def workspace_page(workspace_id): + def workspace_page(workspace_id: str) -> str: return render_template("workspace.html", workspace_id=workspace_id) # Serve favicon @app.route("/favicon.ico") - def favicon(): - return send_from_directory(app.static_folder, "favicon.ico", mimetype="image/x-icon") + def favicon() -> Response: + static_folder = cast(str, app.static_folder) + return send_from_directory(static_folder, "favicon.ico", mimetype="image/x-icon") return app diff --git a/models/__init__.py b/models/__init__.py index e7b94d5..3c73172 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -3,15 +3,18 @@ from models.errors import SchemaError from models.parse_warnings import ParseWarningCollector from models.export import ExportEntry +from models.search import ConversationSummary, SearchResult from models.workspace import Workspace __all__ = [ "Bubble", "CliSessionMeta", "Composer", + "ConversationSummary", "ExportEntry", "ParseWarningCollector", "SchemaError", + "SearchResult", "Workspace", "WorkspaceLocalComposer", ] diff --git a/models/conversation.py b/models/conversation.py index 9de6016..bc1308b 100644 --- a/models/conversation.py +++ b/models/conversation.py @@ -2,7 +2,7 @@ import logging from dataclasses import dataclass, field -from typing import Any +from typing import Any, cast from models.errors import SchemaError from models.from_dict_validation import ( @@ -126,7 +126,7 @@ def _optional_counter(self, key: str) -> int | float: type(value).__name__, ) return 0 - return value + return cast(int | float, value) @property def total_lines_added(self) -> int | float: @@ -313,7 +313,7 @@ def thinking_duration_ms(self) -> int | float | None: type(value).__name__, ) return None - return value + return cast(int | float, value) @property def context_window_status_at_creation(self) -> dict[str, Any]: diff --git a/models/parse_warnings.py b/models/parse_warnings.py index bcfe802..bfe7e1f 100644 --- a/models/parse_warnings.py +++ b/models/parse_warnings.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from typing import Any @dataclass @@ -48,9 +49,9 @@ def has_warnings(self) -> bool: or bool(self.source_failures) ) - def to_api_list(self) -> list[dict]: + def to_api_list(self) -> list[dict[str, Any]]: """Structured warnings for JSON API responses (issue #67).""" - warnings: list[dict] = [] + warnings: list[dict[str, Any]] = [] if self.composers_skipped: n = self.composers_skipped noun = "conversation" if n == 1 else "conversations" @@ -89,7 +90,7 @@ def to_api_list(self) -> list[dict]: }) return warnings - def attach_to(self, payload: dict) -> dict: + def attach_to(self, payload: dict[str, Any]) -> dict[str, Any]: """Add ``warnings`` to a dict response when any failures were recorded.""" if self.has_warnings: payload = {**payload, "warnings": self.to_api_list()} diff --git a/models/search.py b/models/search.py new file mode 100644 index 0000000..80648ce --- /dev/null +++ b/models/search.py @@ -0,0 +1,27 @@ +"""Typed shapes for search API results and composer summary metadata.""" + +from __future__ import annotations + +from typing import Any, NotRequired, TypedDict + + +class ConversationSummary(TypedDict, total=False): + """Cursor ``conversationSummary`` blob on composer rows (schema varies by version).""" + + summary: str + title: str + bullets: list[str] + raw: dict[str, Any] + + +class SearchResult(TypedDict): + """One hit returned by ``/api/search`` and the search service helpers.""" + + workspaceId: str + workspaceFolder: str | None + chatId: str + chatTitle: str + timestamp: int | str + matchingText: str + type: str # "composer" | "chat" | "cli_agent" + source: NotRequired[str] # "cli" for CLI agent sessions diff --git a/pyproject.toml b/pyproject.toml index 2ad1af5..678f218 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,9 +90,22 @@ include = [ # Mirrors the flags used in .github/workflows/tests.yml so local `mypy .` # and CI produce identical results. [tool.mypy] +strict = true ignore_missing_imports = true pretty = true # Exclude virtual-env and build artefact directories so that `mypy .` from the # repo root matches CI behaviour (CI runs in a clean runner without a local venv). # Anchored regexes — unanchored `venv/` would match any path segment containing "venv/". exclude = ["^venv/", "^\\.venv/", "^build/", "^dist/"] + +# Standalone CLI export script (~985 LOC) duplicates utils/ helpers; typed +# incrementally — issue #100 allows per-module override until consolidated. +[[tool.mypy.overrides]] +module = "scripts.export" +ignore_errors = true + +# Test modules use unittest/pytest patterns that are not worth strict-checking +# alongside production code; route handlers and utils are fully strict. +[[tool.mypy.overrides]] +module = "tests.*" +ignore_errors = true diff --git a/services/cli_tabs.py b/services/cli_tabs.py index f036aa2..a9e8e3d 100644 --- a/services/cli_tabs.py +++ b/services/cli_tabs.py @@ -2,8 +2,9 @@ import logging from datetime import datetime +from typing import Any -from flask import jsonify +from flask import Response, jsonify from utils.cli_chat_reader import list_cli_projects, messages_to_bubbles, traverse_blobs from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules @@ -12,7 +13,9 @@ _logger = logging.getLogger(__name__) -def get_cli_workspace_tabs(workspace_id: str, rules: list): +def get_cli_workspace_tabs( + workspace_id: str, rules: list[Any], +) -> Response | tuple[Response, int]: """Return Flask JSON response with tabs for a Cursor CLI project. Args: @@ -98,7 +101,7 @@ def get_cli_workspace_tabs(workspace_id: str, rules: list): # Aggregate metadata total_tool_calls = 0 - tool_breakdown: dict = {} + tool_breakdown: dict[str, int] = {} for b in bubbles: tcs = (b.get("metadata") or {}).get("toolCalls") or [] total_tool_calls += len(tcs) @@ -106,7 +109,7 @@ def get_cli_workspace_tabs(workspace_id: str, rules: list): tn = tc.get("name", "unknown") tool_breakdown[tn] = tool_breakdown.get(tn, 0) + 1 - tab_meta: dict | None = None + tab_meta: dict[str, Any] | None = None if total_tool_calls or tool_breakdown: tab_meta = {"totalToolCalls": total_tool_calls or None} if tool_breakdown: diff --git a/services/search.py b/services/search.py index b3209a7..fa68ce7 100644 --- a/services/search.py +++ b/services/search.py @@ -24,6 +24,7 @@ from contextlib import closing from datetime import datetime from pathlib import Path +from typing import Any __all__ = [ "rank_results", @@ -31,7 +32,7 @@ "search_global_storage", "search_legacy_workspaces", ] -from models import Bubble, Composer, ParseWarningCollector, SchemaError +from models import Bubble, Composer, ParseWarningCollector, SchemaError, SearchResult from services.workspace_db import ( build_composer_id_to_workspace_id, collect_workspace_entries, @@ -54,7 +55,7 @@ # --------------------------------------------------------------------------- -def _json_dump_safe(value) -> str: +def _json_dump_safe(value: object) -> str: """Best-effort JSON serialisation for exclusion-rule matching.""" try: return json.dumps(value, ensure_ascii=False, sort_keys=True) @@ -148,16 +149,16 @@ def _build_ws_id_to_name( def _build_search_bubble_map( - global_db, + global_db: sqlite3.Connection, parse_warnings: ParseWarningCollector, -) -> dict[str, dict]: +) -> dict[str, dict[str, Any]]: """Load ``bubbleId:*`` rows from an open global DB connection. Returns ``{bubble_id: {"text": str, "raw": dict}}``. Rows that fail schema validation or JSON decoding are skipped; the skip is recorded in *parse_warnings*. """ - bubble_map: dict[str, dict] = {} + bubble_map: dict[str, dict[str, Any]] = {} for row in global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'" ): @@ -188,9 +189,9 @@ def search_global_storage( workspace_path: str, query: str, query_lower: str, - rules: list, + rules: list[Any], parse_warnings: ParseWarningCollector, -) -> list[dict]: +) -> list[SearchResult]: """Search composer conversations stored in the global ``cursorDiskKV`` table. This is the primary data source for current Cursor versions. @@ -206,7 +207,7 @@ def search_global_storage( List of search result dicts with keys ``workspaceId``, ``workspaceFolder``, ``chatId``, ``chatTitle``, ``timestamp``, ``matchingText``, ``type``. """ - results: list[dict] = [] + results: list[SearchResult] = [] try: workspace_entries = collect_workspace_entries(workspace_path) ws_id_to_name = _build_ws_id_to_name(workspace_entries) @@ -346,8 +347,8 @@ def search_legacy_workspaces( query: str, query_lower: str, search_type: str, - rules: list, -) -> list[dict]: + rules: list[Any], +) -> list[SearchResult]: """Search legacy per-workspace ItemTable chat data. Iterates per-workspace ``state.vscdb`` files looking for the @@ -364,7 +365,7 @@ def search_legacy_workspaces( Returns: List of search result dicts with ``type`` set to ``"chat"``. """ - results: list[dict] = [] + results: list[SearchResult] = [] if search_type not in ("all", "chat"): return results @@ -461,8 +462,8 @@ def search_cli_sessions( cli_chats_path: str, query: str, query_lower: str, - rules: list, -) -> list[dict]: + rules: list[Any], +) -> list[SearchResult]: """Search Cursor CLI agent sessions stored as JSONL + blob files. Reads from ``~/.cursor/chats/`` (or the path returned by @@ -478,7 +479,7 @@ def search_cli_sessions( List of search result dicts with ``type`` set to ``"cli_agent"`` and ``source`` set to ``"cli"``. """ - results: list[dict] = [] + results: list[SearchResult] = [] try: cli_projects = list_cli_projects(cli_chats_path) for cp in cli_projects: @@ -557,7 +558,7 @@ def search_cli_sessions( # --------------------------------------------------------------------------- -def rank_results(results: list[dict]) -> list[dict]: +def rank_results(results: list[SearchResult]) -> list[SearchResult]: """Sort *results* by timestamp descending. All three source types use epoch-millisecond integers, except @@ -565,7 +566,7 @@ def rank_results(results: list[dict]) -> list[dict]: ``lastSendTime`` field. ISO strings are converted to epoch-ms so cross-source comparisons are made in the same unit. """ - def _ts(r: dict) -> float: + def _ts(r: SearchResult) -> float: t = r.get("timestamp", 0) if isinstance(t, str): try: diff --git a/services/summary_cache.py b/services/summary_cache.py index b0010e1..076f13b 100644 --- a/services/summary_cache.py +++ b/services/summary_cache.py @@ -35,7 +35,7 @@ def nocache_enabled(*, request_nocache: bool = False) -> bool: ) -def _rules_digest(rules: list) -> str: +def _rules_digest(rules: list[Any]) -> str: try: payload = json.dumps(rules, sort_keys=True, ensure_ascii=False) except (TypeError, ValueError): @@ -54,10 +54,10 @@ def _file_mtime_ns(path: str | None) -> int | None: def fingerprint_workspace_storage( workspace_path: str, - workspace_entries: list[dict], + workspace_entries: list[dict[str, Any]], *, global_db_path: str | None, - rules: list, + rules: list[Any], cli_chats_path: str | None = None, ) -> dict[str, Any]: """Build a fingerprint dict for cache invalidation.""" @@ -129,7 +129,9 @@ def _write_cache_file(path: Path | str, payload: dict[str, Any]) -> None: _logger.warning("Summary cache write failed for %s: %s", path, e) -def get_cached_projects(fingerprint: dict[str, Any]) -> tuple[list[dict], list[dict]] | None: +def get_cached_projects( + fingerprint: dict[str, Any], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]] | None: data = _read_cache_file(PROJECTS_CACHE_FILE) if not data: return None @@ -146,8 +148,8 @@ def get_cached_projects(fingerprint: dict[str, Any]) -> tuple[list[dict], list[d def set_cached_projects( fingerprint: dict[str, Any], - projects: list[dict], - warnings: list[dict], + projects: list[dict[str, Any]], + warnings: list[dict[str, Any]], ) -> None: _write_cache_file( PROJECTS_CACHE_FILE, @@ -194,7 +196,7 @@ def _tab_summaries_path(workspace_id: str) -> Path: def get_cached_tab_summaries( fingerprint: dict[str, Any], workspace_id: str, -) -> tuple[dict, int] | None: +) -> tuple[dict[str, Any], int] | None: data = _read_cache_file(_tab_summaries_path(workspace_id)) if not data: return None @@ -212,7 +214,7 @@ def get_cached_tab_summaries( def set_cached_tab_summaries( fingerprint: dict[str, Any], workspace_id: str, - payload: dict, + payload: dict[str, Any], status: int, ) -> None: _write_cache_file( diff --git a/services/workspace_context.py b/services/workspace_context.py index d05580d..38cbbf3 100644 --- a/services/workspace_context.py +++ b/services/workspace_context.py @@ -4,6 +4,7 @@ import sqlite3 from dataclasses import dataclass, replace +from typing import Any from services.workspace_db import ( build_composer_id_to_workspace_id, @@ -23,26 +24,26 @@ class WorkspaceContext: """Precomputed workspace-resolution maps for conversation assignment.""" - workspace_entries: list[dict] + workspace_entries: list[dict[str, Any]] invalid_workspace_ids: set[str] composer_id_to_workspace_id: dict[str, str] project_name_to_workspace_id: dict[str, str] workspace_path_to_id: dict[str, str] - project_layouts_map: dict[str, list] - bubble_map: dict[str, dict] + project_layouts_map: dict[str, list[str]] + bubble_map: dict[str, dict[str, Any]] def _entries( workspace_path: str, - workspace_entries: list[dict] | None, -) -> list[dict]: + workspace_entries: list[dict[str, Any]] | None, +) -> list[dict[str, Any]]: if workspace_entries is not None: return workspace_entries return collect_workspace_entries(workspace_path) def _assemble_context( - entries: list[dict], + entries: list[dict[str, Any]], *, invalid_workspace_ids: set[str], workspace_path_to_id: dict[str, str], @@ -62,7 +63,7 @@ def _assemble_context( def resolve_workspace_context( workspace_path: str, *, - workspace_entries: list[dict] | None = None, + workspace_entries: list[dict[str, Any]] | None = None, ) -> WorkspaceContext: """Full workspace maps with an uncached composer→workspace scan (CLI export).""" entries = _entries(workspace_path, workspace_entries) @@ -78,9 +79,9 @@ def resolve_workspace_context( def resolve_workspace_context_cached( workspace_path: str, - rules: list, + rules: list[Any], *, - workspace_entries: list[dict] | None = None, + workspace_entries: list[dict[str, Any]] | None = None, nocache: bool = False, ) -> WorkspaceContext: """Full workspace maps with a mtime-keyed composer map (listing / tabs).""" @@ -98,7 +99,7 @@ def resolve_workspace_context_cached( def resolve_workspace_context_minimal( workspace_path: str, *, - workspace_entries: list[dict] | None = None, + workspace_entries: list[dict[str, Any]] | None = None, ) -> WorkspaceContext: """Entries, project-name, and composer maps only (HTTP export). @@ -125,7 +126,7 @@ def enrich_workspace_context_from_global_db( populate_bubble_map: bool = False, ) -> WorkspaceContext: """Return *ctx* with global KV maps loaded from an open global DB connection.""" - updates: dict = {} + updates: dict[str, Any] = {} if populate_project_layouts: updates["project_layouts_map"] = load_project_layouts_map(global_db) if populate_bubble_map: diff --git a/services/workspace_db.py b/services/workspace_db.py index 3a90c4d..b1b6113 100644 --- a/services/workspace_db.py +++ b/services/workspace_db.py @@ -4,8 +4,10 @@ import logging import os import sqlite3 +from collections.abc import Iterator from contextlib import closing, contextmanager from pathlib import Path +from typing import Any _logger = logging.getLogger(__name__) @@ -20,13 +22,13 @@ # corrupt table cannot propagate to callers. -def load_bubble_map(global_db) -> dict[str, dict]: +def load_bubble_map(global_db: sqlite3.Connection) -> dict[str, dict[str, Any]]: """Load all ``bubbleId:*`` KV entries into ``{bubble_id: bubble_dict}``. Skips rows whose JSON value is not a dict; JSON parse errors are logged at DEBUG level so a single malformed row cannot block the rest. """ - bubble_map: dict[str, dict] = {} + bubble_map: dict[str, dict[str, Any]] = {} try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'" @@ -47,7 +49,7 @@ def load_bubble_map(global_db) -> dict[str, dict]: return bubble_map -def _extract_root_paths_from_context(ctx: dict) -> list[str]: +def _extract_root_paths_from_context(ctx: dict[str, Any]) -> list[str]: """Pull ``rootPath`` strings from a messageRequestContext JSON object.""" paths: list[str] = [] layouts = ctx.get("projectLayouts") @@ -63,7 +65,9 @@ def _extract_root_paths_from_context(ctx: dict) -> list[str]: return paths -def load_project_layouts_for_composer(global_db, composer_id: str) -> list[str]: +def load_project_layouts_for_composer( + global_db: sqlite3.Connection, composer_id: str, +) -> list[str]: """Scoped MRC load: ``messageRequestContext:{composer_id}:%`` only.""" paths: list[str] = [] try: @@ -87,14 +91,14 @@ def load_project_layouts_for_composer(global_db, composer_id: str) -> list[str]: return paths -def load_project_layouts_map(global_db) -> dict[str, list]: +def load_project_layouts_map(global_db: sqlite3.Connection) -> dict[str, list[str]]: """Load ``projectLayouts`` from all ``messageRequestContext:*`` KV entries. Returns ``{composer_id: [root_path_str, ...]}``. Prefer :func:`load_project_layouts_for_composer` on list paths when only a few composers need layout fallbacks. """ - layouts_map: dict[str, list] = {} + layouts_map: dict[str, list[str]] = {} try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'messageRequestContext:%'" @@ -116,13 +120,13 @@ def load_project_layouts_map(global_db) -> dict[str, list]: return layouts_map -def load_code_block_diff_map(global_db) -> dict[str, list]: +def load_code_block_diff_map(global_db: sqlite3.Connection) -> dict[str, list[dict[str, Any]]]: """Load ``codeBlockDiff:*`` KV entries into ``{composer_id: [diff_dict]}``. Each diff dict contains all fields from the raw JSON value plus a ``diffId`` key taken from the third path component of the KV key. """ - diff_map: dict[str, list] = {} + diff_map: dict[str, list[dict[str, Any]]] = {} try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'codeBlockDiff:%'" @@ -146,13 +150,15 @@ def load_code_block_diff_map(global_db) -> dict[str, list]: return diff_map -def load_bubbles_for_composer(global_db, composer_id: str) -> dict[str, dict]: +def load_bubbles_for_composer( + global_db: sqlite3.Connection, composer_id: str, +) -> dict[str, dict[str, Any]]: """Load ``bubbleId:{composer_id}:*`` KV entries into ``{bubble_id: bubble_dict}``. Scoped alternative to :func:`load_bubble_map` for single-conversation assembly; avoids a full global ``bubbleId:%`` scan. """ - bubble_map: dict[str, dict] = {} + bubble_map: dict[str, dict[str, Any]] = {} try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", @@ -175,15 +181,15 @@ def load_bubbles_for_composer(global_db, composer_id: str) -> dict[str, dict]: def load_message_request_context_for_composer( - global_db, composer_id: str -) -> list[dict]: + global_db: sqlite3.Connection, composer_id: str, +) -> list[dict[str, Any]]: """Load ``messageRequestContext:{composer_id}:*`` KV entries. Returns a list of context dicts, each with an injected ``contextId`` key taken from the third path component of the KV key. Scoped alternative to the global MRC pass inside :func:`load_project_layouts_map`. """ - contexts: list[dict] = [] + contexts: list[dict[str, Any]] = [] try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", @@ -210,15 +216,15 @@ def load_message_request_context_for_composer( def load_code_block_diffs_for_composer( - global_db, composer_id: str -) -> list[dict]: + global_db: sqlite3.Connection, composer_id: str, +) -> list[dict[str, Any]]: """Load ``codeBlockDiff:{composer_id}:*`` KV entries. Returns a list of diff dicts, each with an injected ``diffId`` key. Scoped alternative to :func:`load_code_block_diff_map` for single-conversation assembly. """ - diffs: list[dict] = [] + diffs: list[dict[str, Any]] = [] try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", @@ -239,7 +245,7 @@ def load_code_block_diffs_for_composer( return diffs -def collect_workspace_entries(workspace_path: str) -> list[dict]: +def collect_workspace_entries(workspace_path: str) -> list[dict[str, Any]]: """Scan workspace directory and return entries with workspace.json. Args: @@ -249,7 +255,7 @@ def collect_workspace_entries(workspace_path: str) -> list[dict]: List of dicts with keys ``name`` (folder id) and ``workspaceJsonPath``. Returns an empty list if ``workspace_path`` is missing or unreadable. """ - entries = [] + entries: list[dict[str, Any]] = [] try: for name in os.listdir(workspace_path): full = os.path.join(workspace_path, name) @@ -265,7 +271,7 @@ def collect_workspace_entries(workspace_path: str) -> list[dict]: return entries -def collect_invalid_workspace_ids(workspace_entries: list[dict]) -> set[str]: +def collect_invalid_workspace_ids(workspace_entries: list[dict[str, Any]]) -> set[str]: """Return workspace IDs whose descriptors have no resolvable folder paths. Args: @@ -305,7 +311,9 @@ def global_storage_db_path(workspace_path: str) -> str: return os.path.normpath(os.path.join(workspace_path, "..", "globalStorage", "state.vscdb")) -def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: list) -> dict: +def build_composer_id_to_workspace_id( + workspace_path: str, workspace_entries: list[dict[str, Any]], +) -> dict[str, str]: """Build mapping from composer ID to workspace folder name. Reads ``composer.composerData`` from each workspace's ``state.vscdb``. @@ -318,7 +326,7 @@ def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: li Returns: Dict mapping ``composerId`` strings to workspace folder names. """ - mapping: dict = {} + mapping: dict[str, str] = {} for entry in workspace_entries: db_path = os.path.join(workspace_path, entry["name"], "state.vscdb") if not os.path.isfile(db_path): @@ -327,7 +335,7 @@ def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: li # Path.as_uri() percent-encodes reserved chars; ``f"file:{path}"`` # breaks sqlite URI parsing on paths with spaces, ``#``, etc. db_uri = Path(db_path).resolve().as_uri() + "?mode=ro" - row: tuple | None = None + row: tuple[Any, ...] | None = None try: with closing(sqlite3.connect(db_uri, uri=True)) as conn: row = conn.execute( @@ -355,11 +363,11 @@ def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: li def build_composer_id_to_workspace_id_cached( workspace_path: str, - workspace_entries: list, - rules: list, + workspace_entries: list[dict[str, Any]], + rules: list[Any], *, nocache: bool = False, -) -> dict: +) -> dict[str, str]: """Like :func:`build_composer_id_to_workspace_id` with optional disk cache.""" from services.summary_cache import ( fingerprint_workspace_storage, @@ -389,7 +397,9 @@ def build_composer_id_to_workspace_id_cached( @contextmanager -def open_global_db(workspace_path: str): +def open_global_db( + workspace_path: str, +) -> Iterator[tuple[sqlite3.Connection | None, str]]: """Open Cursor global storage SQLite database read-only. Args: diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 9f8128e..b1aeed1 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -5,6 +5,7 @@ import os import sqlite3 from datetime import datetime, timezone +from typing import Any, cast _logger = logging.getLogger(__name__) @@ -43,7 +44,7 @@ def _composer_valid_for_listing( - cd: dict, + cd: dict[str, Any], composer_id: str, parse_warnings: ParseWarningCollector, ) -> bool: @@ -72,10 +73,10 @@ def _composer_valid_for_listing( def list_workspace_projects( workspace_path: str, - rules: list, + rules: list[Any], *, nocache: bool = False, -) -> tuple[list[dict], list[dict]]: +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: """List workspace projects for GET /api/workspaces. Args: @@ -116,11 +117,11 @@ def list_workspace_projects( def _build_workspace_projects_uncached( workspace_path: str, - rules: list, - workspace_entries: list[dict], + rules: list[Any], + workspace_entries: list[dict[str, Any]], *, nocache: bool, -) -> tuple[list[dict], list[dict]]: +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: parse_warnings = ParseWarningCollector() ctx = resolve_workspace_context_cached( workspace_path, @@ -133,24 +134,29 @@ def _build_workspace_projects_uncached( workspace_path_map = ctx.workspace_path_to_id composer_id_to_ws = ctx.composer_id_to_workspace_id - conversation_map: dict[str, list] = {} + conversation_map: dict[str, list[dict[str, Any]]] = {} with open_global_db(workspace_path) as (global_db, _): if global_db: - def _safe_fetchall(query: str, params: tuple = ()) -> list: + def _safe_fetchall( + query: str, params: tuple[Any, ...] = (), + ) -> list[sqlite3.Row]: try: - return global_db.execute(query, params).fetchall() + return cast( + list[sqlite3.Row], + global_db.execute(query, params).fetchall(), + ) except sqlite3.Error: return [] try: composer_rows = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) - project_layouts_map: dict[str, list] = {} + project_layouts_map: dict[str, list[str]] = {} if invalid_workspace_ids: project_layouts_map = load_project_layouts_map(global_db) - bubble_map: dict[str, dict] = {} + bubble_map: dict[str, dict[str, Any]] = {} invalid_workspace_aliases: dict[str, str] = {} if invalid_workspace_ids: invalid_workspace_aliases = infer_invalid_workspace_aliases( @@ -229,7 +235,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: ) # Group workspace entries by normalized folder path - folder_to_entries: dict[str, list] = {} + folder_to_entries: dict[str, list[dict[str, Any]]] = {} entry_folder_map: dict[str, str] = {} for entry in workspace_entries: norm_folder = "" @@ -246,8 +252,8 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: entry_folder_map[entry["name"]] = norm_folder folder_to_entries.setdefault(norm_folder, []).append(entry) - projects: list[dict] = [] - seen_folders: set = set() + projects: list[dict[str, Any]] = [] + seen_folders: set[str] = set() for entry in workspace_entries: norm_folder = entry_folder_map[entry["name"]] if norm_folder in seen_folders: diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index 284b2b2..7997468 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -9,7 +9,7 @@ from collections.abc import Mapping from contextlib import closing from pathlib import Path -from typing import Any +from typing import Any, cast _logger = logging.getLogger(__name__) @@ -86,7 +86,7 @@ def infer_workspace_name_from_context(workspace_path: str, workspace_id: str) -> # Path.as_uri() percent-encodes reserved chars (#, ?, spaces, etc.); # naive f"file:{path}" breaks sqlite URI parsing. _db_uri = Path(local_db_path).resolve().as_uri() + "?mode=ro" - row: tuple | None = None + row: tuple[Any, ...] | None = None try: with closing(sqlite3.connect(_db_uri, uri=True)) as lconn: row = lconn.execute( @@ -161,7 +161,7 @@ def infer_workspace_name_from_context(workspace_path: str, workspace_id: str) -> def get_project_from_file_path( file_path: str, - workspace_entries: list[dict], + workspace_entries: list[dict[str, Any]], ) -> str | None: """Map a file path to the workspace folder that contains it. @@ -192,7 +192,9 @@ def get_project_from_file_path( return best_match -def create_project_name_to_workspace_id_map(workspace_entries): +def create_project_name_to_workspace_id_map( + workspace_entries: list[dict[str, Any]], +) -> dict[str, str]: """Map workspace folder basenames to workspace folder names. Args: @@ -201,7 +203,7 @@ def create_project_name_to_workspace_id_map(workspace_entries): Returns: Dict mapping last path segment (folder name) to workspace id. """ - mapping = {} + mapping: dict[str, str] = {} for entry in workspace_entries: try: wd = read_json_file(entry["workspaceJsonPath"]) @@ -216,7 +218,9 @@ def create_project_name_to_workspace_id_map(workspace_entries): return mapping -def create_workspace_path_to_id_map(workspace_entries): +def create_workspace_path_to_id_map( + workspace_entries: list[dict[str, Any]], +) -> dict[str, str]: """Map normalized workspace root paths to workspace folder names. Args: @@ -225,7 +229,7 @@ def create_workspace_path_to_id_map(workspace_entries): Returns: Dict mapping normalized folder paths to workspace ids. """ - out = {} + out: dict[str, str] = {} for entry in workspace_entries: try: wd = read_json_file(entry["workspaceJsonPath"]) @@ -238,14 +242,14 @@ def create_workspace_path_to_id_map(workspace_entries): def determine_project_for_conversation( - composer_data: Composer | dict, + composer_data: Composer | dict[str, Any], composer_id: str, - project_layouts_map: dict, - project_name_to_workspace_id: dict, - workspace_path_to_id: dict, - workspace_entries: list, + project_layouts_map: dict[str, list[str]], + project_name_to_workspace_id: dict[str, str], + workspace_path_to_id: dict[str, str], + workspace_entries: list[dict[str, Any]], bubble_map: Mapping[str, Bubble | dict[str, Any]], - composer_id_to_workspace_id: dict | None = None, + composer_id_to_workspace_id: dict[str, str] | None = None, invalid_workspace_ids: set[str] | None = None, ) -> str | None: """Resolve which workspace folder owns a composer conversation. @@ -365,19 +369,19 @@ def determine_project_for_conversation( best_len = len(item["name"]) best_id = item["id"] if best_id: - return best_id + return cast(str, best_id) return None def infer_invalid_workspace_aliases( - composer_rows: list, - project_layouts_map: dict, - project_name_map: dict, - workspace_path_map: dict, - workspace_entries: list, + composer_rows: list[sqlite3.Row], + project_layouts_map: dict[str, list[str]], + project_name_map: dict[str, str], + workspace_path_map: dict[str, str], + workspace_entries: list[dict[str, Any]], bubble_map: Mapping[str, Bubble | dict[str, Any]], - composer_id_to_ws: dict, + composer_id_to_ws: dict[str, str], invalid_workspace_ids: set[str], ) -> dict[str, str]: """Map invalid workspace IDs to valid replacements by majority vote. diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 10e62bc..3c2a6c8 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -7,7 +7,7 @@ import sqlite3 from collections.abc import Mapping from datetime import datetime -from typing import Any +from typing import Any, cast _logger = logging.getLogger(__name__) @@ -112,12 +112,12 @@ def _assemble_tab_from_composer_data( composer_id: str, composer: Composer, bubble_map: Mapping[str, Bubble | dict[str, Any]], - contexts: list[dict], - code_block_diffs: list[dict], + contexts: list[dict[str, Any]], + code_block_diffs: list[dict[str, Any]], workspace_display_name: str, - rules: list, + rules: list[Any], parse_warnings: ParseWarningCollector, -) -> dict | None: +) -> dict[str, Any] | None: """Assemble a single tab dict from a validated :class:`Composer`. Args: @@ -345,7 +345,7 @@ def _assemble_tab_from_composer_data( total_cost = 0.0 total_tool_calls = 0 total_thinking_ms = 0 - models_set: set = set() + models_set: set[str] = set() for b in bubbles: m = b.get("metadata") or {} if m.get("inputTokens"): @@ -435,7 +435,11 @@ def _assemble_tab_from_composer_data( return tab -def _build_matching_ws_ids(workspace_id: str, workspace_path: str, workspace_entries: list) -> set[str]: +def _build_matching_ws_ids( + workspace_id: str, + workspace_path: str, + workspace_entries: list[dict[str, Any]], +) -> set[str]: """Return the set of workspace folder IDs that share the same project folder as *workspace_id*. Cursor sometimes creates multiple workspace entries for the same on-disk @@ -471,10 +475,10 @@ def _build_matching_ws_ids(workspace_id: str, workspace_path: str, workspace_ent def list_workspace_tab_summaries( workspace_id: str, workspace_path: str, - rules: list, + rules: list[Any], *, nocache: bool = False, -) -> tuple[dict, int]: +) -> tuple[dict[str, Any], int]: """Return summary tab list for GET /api/workspaces//tabs?summary=1. Does **not** load the global ``bubbleId:%`` index. Each tab entry contains @@ -518,13 +522,13 @@ def list_workspace_tab_summaries( def _build_workspace_tab_summaries_uncached( workspace_id: str, workspace_path: str, - rules: list, - workspace_entries: list, + rules: list[Any], + workspace_entries: list[dict[str, Any]], *, nocache: bool, -) -> tuple[dict, int]: +) -> tuple[dict[str, Any], int]: parse_warnings = ParseWarningCollector() - response: dict = {"tabs": []} + response: dict[str, Any] = {"tabs": []} ctx = resolve_workspace_context_cached( workspace_path, @@ -544,13 +548,18 @@ def _build_workspace_tab_summaries_uncached( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - def _safe_fetchall(query: str, params: tuple = ()) -> list: + def _safe_fetchall( + query: str, params: tuple[Any, ...] = (), + ) -> list[sqlite3.Row]: try: - return global_db.execute(query, params).fetchall() + return cast( + list[sqlite3.Row], + global_db.execute(query, params).fetchall(), + ) except sqlite3.Error: return [] - project_layouts_map: dict[str, list] = {} + project_layouts_map: dict[str, list[str]] = {} if invalid_workspace_ids: project_layouts_map = load_project_layouts_map(global_db) @@ -633,11 +642,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: )): continue - tab_meta: dict | None = None + tab_meta: dict[str, Any] | None = None if _early_model_names: tab_meta = {"modelsUsed": _early_model_names} - tab_entry: dict = { + tab_entry: dict[str, Any] = { "id": composer_id, "title": title, "timestamp": _composer_tab_timestamp_ms(composer), @@ -664,8 +673,8 @@ def assemble_single_tab( workspace_id: str, composer_id: str, workspace_path: str, - rules: list, -) -> tuple[dict, int]: + rules: list[Any], +) -> tuple[dict[str, Any], int]: """Assemble a single conversation tab for GET /api/workspaces//tabs/. Loads only the KV rows scoped to *composer_id* (``bubbleId:{id}:%``, @@ -700,9 +709,14 @@ def assemble_single_tab( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - def _safe_fetchall(query: str, params: tuple = ()) -> list: + def _safe_fetchall( + query: str, params: tuple[Any, ...] = (), + ) -> list[sqlite3.Row]: try: - return global_db.execute(query, params).fetchall() + return cast( + list[sqlite3.Row], + global_db.execute(query, params).fetchall(), + ) except sqlite3.Error: return [] @@ -738,7 +752,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Verify the conversation belongs to the requested workspace. # Always scoped: only load messageRequestContext rows for this composer. - project_layouts_map: dict[str, list] = {} + project_layouts_map: dict[str, list[str]] = {} invalid_workspace_aliases: dict[str, str] = {} project_layouts_map[composer_id] = load_project_layouts_for_composer( global_db, composer_id, @@ -790,15 +804,15 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: if tab is None: return {"error": "Conversation not found"}, 404 - response: dict = {"tab": tab} + response: dict[str, Any] = {"tab": tab} return parse_warnings.attach_to(response), 200 def assemble_workspace_tabs( workspace_id: str, workspace_path: str, - rules: list, -) -> tuple[dict, int]: + rules: list[Any], +) -> tuple[dict[str, Any], int]: """Build tabs payload for GET /api/workspaces//tabs (IDE workspaces). Args: @@ -814,7 +828,7 @@ def assemble_workspace_tabs( ``{"error": "Global storage not found"}``. """ parse_warnings = ParseWarningCollector() - response: dict = {"tabs": []} + response: dict[str, Any] = {"tabs": []} ctx = resolve_workspace_context_cached(workspace_path, rules) workspace_entries = ctx.workspace_entries @@ -825,8 +839,8 @@ def assemble_workspace_tabs( matching_ws_ids = _build_matching_ws_ids(workspace_id, workspace_path, workspace_entries) bubble_map: dict[str, Bubble] = {} - code_block_diff_map: dict[str, list] = {} - message_request_context_map: dict[str, list] = {} + code_block_diff_map: dict[str, list[dict[str, Any]]] = {} + message_request_context_map: dict[str, list[dict[str, Any]]] = {} with open_global_db(workspace_path) as (global_db, _): if global_db is None: @@ -834,9 +848,14 @@ def assemble_workspace_tabs( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - def _safe_fetchall(query: str, params: tuple = ()) -> list: + def _safe_fetchall( + query: str, params: tuple[Any, ...] = (), + ) -> list[sqlite3.Row]: try: - return global_db.execute(query, params).fetchall() + return cast( + list[sqlite3.Row], + global_db.execute(query, params).fetchall(), + ) except sqlite3.Error: return [] @@ -881,7 +900,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Load messageRequestContext rows once; build both # message_request_context_map and project_layouts_map from the same pass. - project_layouts_map: dict[str, list] = {} + project_layouts_map: dict[str, list[str]] = {} for row in _safe_fetchall("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'messageRequestContext:%'"): parts = row["key"].split(":") if len(parts) < 2: diff --git a/tests/test_models.py b/tests/test_models.py index 04a8b84..c7e46eb 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -14,6 +14,7 @@ Composer, ExportEntry, SchemaError, + SearchResult, Workspace, WorkspaceLocalComposer, ) @@ -267,5 +268,32 @@ def test_blob_chain_empty_returns_empty_list(self) -> None: self.assertEqual(extract_blob_refs(b""), []) +class SearchResultTypedDictTests(unittest.TestCase): + def test_search_result_accepts_composer_shape(self) -> None: + hit: SearchResult = { + "workspaceId": "ws-1", + "workspaceFolder": "my-project", + "chatId": "composer-abc", + "chatTitle": "Refactor search", + "timestamp": 1_715_000_000_000, + "matchingText": "...matching snippet...", + "type": "composer", + } + self.assertEqual(hit["type"], "composer") + + def test_search_result_accepts_cli_agent_shape(self) -> None: + hit: SearchResult = { + "workspaceId": "cli:proj1", + "workspaceFolder": None, + "chatId": "sess-1", + "chatTitle": "CLI session", + "timestamp": 1_715_000_000_000, + "matchingText": "hello", + "type": "cli_agent", + "source": "cli", + } + self.assertEqual(hit["source"], "cli") + + if __name__ == "__main__": unittest.main() diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py index b87a5d1..b0d6d0c 100644 --- a/utils/cli_chat_reader.py +++ b/utils/cli_chat_reader.py @@ -42,21 +42,21 @@ import sqlite3 from contextlib import closing from datetime import datetime, timezone -from typing import Generator +from typing import Any, Generator, cast # --------------------------------------------------------------------------- # Low-level store.db helpers # --------------------------------------------------------------------------- -def _read_meta(db_path: str) -> dict: +def _read_meta(db_path: str) -> dict[str, Any]: """Read and decode the session metadata row from a ``store.db``.""" # `closing(...)` guarantees .close() on scope exit (issue #17). with closing(sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)) as conn: try: row = conn.execute("SELECT value FROM meta WHERE key = '0'").fetchone() if row and row[0]: - return json.loads(bytes.fromhex(row[0]).decode("utf-8")) + return cast(dict[str, Any], json.loads(bytes.fromhex(row[0]).decode("utf-8"))) except Exception: pass return {} @@ -79,7 +79,7 @@ def extract_blob_refs(data: bytes) -> list[str]: return refs -def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]: +def classify_blob_data(data: bytes) -> tuple[dict[str, Any] | None, list[str]]: """Classify a blob payload as a JSON message or a binary chain node. Returns ``(message_dict, [])`` when *data* decodes to a dict with a @@ -96,7 +96,7 @@ def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]: return None, extract_blob_refs(data) -def traverse_blobs(db_path: str) -> list[dict]: +def traverse_blobs(db_path: str) -> list[dict[str, Any]]: """Reconstruct the conversation from a ``store.db`` blob graph. Starting from ``latestRootBlobId``, performs BFS over the blob DAG: @@ -129,7 +129,7 @@ def traverse_blobs(db_path: str) -> list[dict]: root_id: str = meta.latest_root_blob_id # Load all blobs, classifying each as JSON or binary - json_blobs: dict[str, dict] = {} + json_blobs: dict[str, dict[str, Any]] = {} chain_blobs: dict[str, list[str]] = {} for blob_id, data in conn.execute("SELECT id, data FROM blobs"): @@ -147,7 +147,7 @@ def traverse_blobs(db_path: str) -> list[dict]: visited: set[str] = set() queue: deque[str] = deque([root_id]) - messages: list[dict] = [] + messages: list[dict[str, Any]] = [] while queue: bid = queue.popleft() @@ -175,7 +175,7 @@ def traverse_blobs(db_path: str) -> list[dict]: _WORKSPACE_PATH_RE = re.compile(r"Workspace Path:\s*(.+?)(?:\n|$)") -def content_to_text(content) -> str: +def content_to_text(content: str | list[Any] | Any) -> str: """Flatten Vercel AI SDK content (string or typed-part array) to plain text.""" if isinstance(content, str): return content @@ -192,11 +192,11 @@ def content_to_text(content) -> str: return "" -def extract_tool_calls(content) -> list[dict]: +def extract_tool_calls(content: str | list[Any] | Any) -> list[dict[str, Any]]: """Extract tool-call parts from assistant message content.""" if not isinstance(content, list): return [] - calls: list[dict] = [] + calls: list[dict[str, Any]] = [] for part in content: if isinstance(part, dict) and part.get("type") == "tool-call": calls.append({ @@ -207,7 +207,7 @@ def extract_tool_calls(content) -> list[dict]: return calls -def extract_workspace_path(messages: list[dict]) -> str | None: +def extract_workspace_path(messages: list[dict[str, Any]]) -> str | None: """Extract the workspace path from the ```` preamble in the first user message that contains one.""" for msg in messages: @@ -233,7 +233,7 @@ def strip_user_info(text: str) -> str: return _USER_INFO_RE.sub("", text).strip() -def messages_to_bubbles(messages: list[dict], created_at_ms: int) -> list[dict]: +def messages_to_bubbles(messages: list[dict[str, Any]], created_at_ms: int) -> list[dict[str, Any]]: """Convert CLI message dicts to the bubble format used by the browser UI. Each bubble has: @@ -273,7 +273,7 @@ def messages_to_bubbles(messages: list[dict], created_at_ms: int) -> list[dict]: # only if not already set, to avoid clobbering a keyed entry. tool_outputs.setdefault("", content) - bubbles: list[dict] = [] + bubbles: list[dict[str, Any]] = [] seq = 0 for msg in messages: @@ -303,7 +303,7 @@ def messages_to_bubbles(messages: list[dict], created_at_ms: int) -> list[dict]: if not text.strip() and not tool_calls: continue - bubble: dict = {"type": "ai", "text": text, "timestamp": ts} + bubble: dict[str, Any] = {"type": "ai", "text": text, "timestamp": ts} if tool_calls: # Convert to the format parse_tool_call returns formatted_calls = [] @@ -360,7 +360,7 @@ def messages_to_bubbles(messages: list[dict], created_at_ms: int) -> list[dict]: # Project / session enumeration # --------------------------------------------------------------------------- -def iter_sessions(chats_path: str) -> Generator[dict, None, None]: +def iter_sessions(chats_path: str) -> Generator[dict[str, Any], None, None]: """Yield one dict per CLI session under ``chats_path``. Each dict contains: @@ -390,7 +390,7 @@ def iter_sessions(chats_path: str) -> Generator[dict, None, None]: } -def list_cli_projects(chats_path: str) -> list[dict]: +def list_cli_projects(chats_path: str) -> list[dict[str, Any]]: """Return one dict per CLI project (unique ``project_id``). Each dict: @@ -401,7 +401,7 @@ def list_cli_projects(chats_path: str) -> list[dict]: from the first user message found, and ``workspace_name`` is the last path segment of ``workspace_path``. """ - projects: dict[str, dict] = {} + projects: dict[str, dict[str, Any]] = {} for session in iter_sessions(chats_path): pid = session["project_id"] @@ -450,7 +450,7 @@ def list_cli_projects(chats_path: str) -> list[dict]: # Aggregate statistics for a project's sessions # --------------------------------------------------------------------------- -def aggregate_session_stats(session: dict) -> dict: +def aggregate_session_stats(session: dict[str, Any]) -> dict[str, Any]: """Return aggregate statistics for one CLI session. Reads and converts blobs, then counts tool calls and computes wall-clock diff --git a/utils/cursor_md_exporter.py b/utils/cursor_md_exporter.py index eaedf88..25858f3 100644 --- a/utils/cursor_md_exporter.py +++ b/utils/cursor_md_exporter.py @@ -19,6 +19,7 @@ import json from datetime import datetime from pathlib import Path +from typing import Any from utils.cli_chat_reader import traverse_blobs, messages_to_bubbles from utils.path_helpers import to_epoch_ms @@ -31,9 +32,9 @@ def cursor_cli_session_to_markdown( db_path: str | Path, - session_meta: dict | None = None, - workspace_info: dict | None = None, - bubbles: list[dict] | None = None, + session_meta: dict[str, Any] | None = None, + workspace_info: dict[str, Any] | None = None, + bubbles: list[dict[str, Any]] | None = None, title_override: str | None = None, ) -> str: """Generate a complete Markdown document from a Cursor CLI store.db session. @@ -191,11 +192,11 @@ def cursor_cli_session_to_markdown( def cursor_ide_chat_to_markdown( - composer_data: dict, + composer_data: dict[str, Any], composer_id: str, - bubble_map: dict, - code_block_diff_map: dict | None = None, - workspace_info: dict | None = None, + bubble_map: dict[str, Any], + code_block_diff_map: dict[str, Any] | None = None, + workspace_info: dict[str, Any] | None = None, ) -> str: """Generate a complete Markdown document from a Cursor IDE composer session. @@ -235,7 +236,7 @@ def cursor_ide_chat_to_markdown( headers = cd.get("fullConversationHeadersOnly") or [] # ── Build bubble list ───────────────────────────────────────────────────── - bubbles: list[dict] = [] + bubbles: list[dict[str, Any]] = [] for h in headers: b = bubble_map.get(h.get("bubbleId")) if not b: diff --git a/utils/debug_flag.py b/utils/debug_flag.py index ba607f1..651f20c 100644 --- a/utils/debug_flag.py +++ b/utils/debug_flag.py @@ -4,8 +4,10 @@ (which the test suite intentionally avoids — see tests/test_cli_args.py). """ +from __future__ import annotations -def resolve_debug_flag(env_value, cli_flag): + +def resolve_debug_flag(env_value: str | None, cli_flag: bool) -> bool: """Return True iff Flask debug / Werkzeug debugger should be enabled. Off by default. The Werkzeug debugger lets a remote attacker execute diff --git a/utils/exclusion_rules.py b/utils/exclusion_rules.py index c0ff9b2..e5d7972 100644 --- a/utils/exclusion_rules.py +++ b/utils/exclusion_rules.py @@ -31,6 +31,11 @@ _logger = logging.getLogger(__name__) +RuleToken = str | tuple[str, str] +RuleTokens = list[RuleToken] +RuleClause = list[tuple[str, str]] +RuleClauses = list[RuleClause] + # Default path when no --exclude-rules is given: ~/.cursor-chat-browser/exclusion-rules.txt DEFAULT_EXCLUSION_RULES_FILENAME = "exclusion-rules.txt" @@ -65,7 +70,7 @@ def resolve_exclusion_rules_path(cli_path: str | None) -> str | None: return None -def tokenize_rule(line: str) -> list: +def tokenize_rule(line: str) -> RuleTokens: """ Tokenize a rule line into terms and operators. @@ -73,7 +78,7 @@ def tokenize_rule(line: str) -> list: the string ``"OR"``, or a ``(kind, value)`` tuple where *kind* is ``"word"`` or ``"phrase"``. """ - tokens: list[str | tuple[str, str]] = [] + tokens: RuleTokens = [] rest = line.strip() while rest: # Skip whitespace @@ -111,7 +116,7 @@ def tokenize_rule(line: str) -> list: return tokens -def _term_matches(term: tuple, text: str) -> bool: +def _term_matches(term: tuple[str, str], text: str) -> bool: """ Return True if *term* matches anywhere in *text* (case-insensitive). @@ -130,7 +135,7 @@ def _term_matches(term: tuple, text: str) -> bool: return value.lower() in text.lower() -def _rule_matches(tokens: list, text: str) -> bool: +def _rule_matches(tokens: RuleTokens, text: str) -> bool: """ Evaluate a tokenized rule against *text*. @@ -141,8 +146,8 @@ def _rule_matches(tokens: list, text: str) -> bool: if not tokens: return False # Split by OR into clauses; each clause is the AND of its terms - clauses: list[list] = [] - current: list = [] + clauses: RuleClauses = [] + current: RuleClause = [] for t in tokens: if t == "OR": if current: @@ -151,7 +156,7 @@ def _rule_matches(tokens: list, text: str) -> bool: elif t == "AND": # Explicit AND: terms are already collected sequentially, skip token continue - else: + elif isinstance(t, tuple): current.append(t) if current: clauses.append(current) @@ -167,7 +172,7 @@ def _rule_matches(tokens: list, text: str) -> bool: return False -def load_rules(path: str | None) -> list[list]: +def load_rules(path: str | None) -> list[RuleTokens]: """ Load and parse the exclusion rule file at *path*. @@ -198,7 +203,7 @@ def load_rules(path: str | None) -> list[list]: return rules -def is_excluded_by_rules(rules: list[list], searchable_text: str) -> bool: +def is_excluded_by_rules(rules: list[RuleTokens], searchable_text: str) -> bool: """ Return ``True`` if *searchable_text* matches any exclusion rule. diff --git a/utils/path_helpers.py b/utils/path_helpers.py index 55af44f..c145ec6 100644 --- a/utils/path_helpers.py +++ b/utils/path_helpers.py @@ -1,9 +1,12 @@ """Path utility functions mirroring src/utils/path.ts""" +from __future__ import annotations + import logging import os import sys from datetime import datetime +from typing import Any, cast from urllib.parse import unquote @@ -57,7 +60,7 @@ def normalize_file_path(file_path: str) -> str: return normalized -def to_epoch_ms(value) -> int: +def to_epoch_ms(value: Any) -> int: """Convert a timestamp value to epoch milliseconds. Handles: @@ -89,7 +92,7 @@ def to_epoch_ms(value) -> int: return 0 -def get_workspace_folder_paths(workspace_data: dict) -> list: +def get_workspace_folder_paths(workspace_data: dict[str, Any]) -> list[str]: """Extract folder paths from workspace.json data. Supports legacy and newer multi-root entry shapes: @@ -100,24 +103,24 @@ def get_workspace_folder_paths(workspace_data: dict) -> list: - {"folders": [""]} (defensive) """ - def _extract_path(entry) -> str | None: + def _extract_path(entry: Any) -> str | None: if isinstance(entry, str): return entry if not isinstance(entry, dict): return None if isinstance(entry.get("path"), str): - return entry["path"] + return cast(str, entry["path"]) uri = entry.get("uri") if isinstance(uri, str): return uri if isinstance(uri, dict): if isinstance(uri.get("path"), str): - return uri["path"] + return cast(str, uri["path"]) if isinstance(uri.get("fsPath"), str): - return uri["fsPath"] + return cast(str, uri["fsPath"]) return None - paths = [] + paths: list[str] = [] folder = workspace_data.get("folder") folder_path = _extract_path(folder) if folder_path: @@ -132,7 +135,7 @@ def _extract_path(entry) -> str | None: return paths -def get_workspace_display_name(workspace_data: dict, fallback: str | None = None) -> str: +def get_workspace_display_name(workspace_data: dict[str, Any], fallback: str | None = None) -> str: """Return a user-friendly workspace name from workspace.json data.""" for folder in get_workspace_folder_paths(workspace_data): raw = str(folder).strip() diff --git a/utils/text_extract.py b/utils/text_extract.py index 46ef5bd..93a01ca 100644 --- a/utils/text_extract.py +++ b/utils/text_extract.py @@ -14,7 +14,7 @@ class HasBubbleRaw(Protocol): def raw(self) -> dict[str, Any]: ... -def extract_text_from_rich_text(children: list) -> str: +def extract_text_from_rich_text(children: list[Any]) -> str: """Recursively extract text from a Lexical rich-text tree.""" if not isinstance(children, list): return "" @@ -72,7 +72,7 @@ def slug(s: str) -> str: return s[:80] or "untitled" -def format_tool_action(action: dict) -> str: +def format_tool_action(action: dict[str, Any]) -> str: """Format a tool action / codeBlockDiff into readable text.""" if not action: return "" diff --git a/utils/tool_parser.py b/utils/tool_parser.py index c77a8eb..de61892 100644 --- a/utils/tool_parser.py +++ b/utils/tool_parser.py @@ -3,7 +3,10 @@ Used by both workspaces.py (browser API) and export_api.py (bulk export). """ +from __future__ import annotations + import json +from typing import Any def short_path(p: str) -> str: @@ -16,7 +19,7 @@ def short_path(p: str) -> str: return p -def parse_tool_call(tfd: dict) -> dict: +def parse_tool_call(tfd: dict[str, Any]) -> dict[str, str]: """Parse toolFormerData into a structured tool call object with human-readable summaries.""" name = tfd.get("name") or "unknown" status = tfd.get("status") or "" diff --git a/utils/workspace_descriptor.py b/utils/workspace_descriptor.py index ea60780..325c5ea 100644 --- a/utils/workspace_descriptor.py +++ b/utils/workspace_descriptor.py @@ -4,10 +4,11 @@ import os import re import sys +from typing import Any from urllib.parse import unquote, urlparse -def read_json_file(path: str): +def read_json_file(path: str) -> Any: """Read a workspace.json with Cursor indirection applied.""" return _resolve_workspace_descriptor(path) @@ -31,7 +32,7 @@ def _uri_or_path_to_fs_path(value: str, base_dir: str | None = None) -> str: return os.path.normpath(expanded) -def _resolve_workspace_descriptor(path: str, depth: int = 0): +def _resolve_workspace_descriptor(path: str, depth: int = 0) -> Any: """Read a workspace descriptor, following {"workspace": ...} indirection and normalising relative folder paths.""" with open(path, "r", encoding="utf-8") as f: data = json.load(f) From 5f33fdd25610adb035b2ba1f6006c51b42a431d4 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Wed, 10 Jun 2026 19:34:09 -0400 Subject: [PATCH 02/11] fix: test failing ssues --- models/search.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/models/search.py b/models/search.py index 80648ce..b7081aa 100644 --- a/models/search.py +++ b/models/search.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, NotRequired, TypedDict +from typing import Any, TypedDict class ConversationSummary(TypedDict, total=False): @@ -14,9 +14,7 @@ class ConversationSummary(TypedDict, total=False): raw: dict[str, Any] -class SearchResult(TypedDict): - """One hit returned by ``/api/search`` and the search service helpers.""" - +class _SearchResultRequired(TypedDict): workspaceId: str workspaceFolder: str | None chatId: str @@ -24,4 +22,11 @@ class SearchResult(TypedDict): timestamp: int | str matchingText: str type: str # "composer" | "chat" | "cli_agent" - source: NotRequired[str] # "cli" for CLI agent sessions + + +class _SearchResultOptional(TypedDict, total=False): + source: str # "cli" for CLI agent sessions + + +class SearchResult(_SearchResultRequired, _SearchResultOptional): + """One hit returned by ``/api/search`` and the search service helpers.""" From 394175fb9155a1e82da866614c8c5142555ffb4d Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Wed, 10 Jun 2026 23:14:13 -0400 Subject: [PATCH 03/11] fix: review findings --- CHANGELOG.md | 8 ++--- api/search.py | 9 ++++-- models/search.py | 6 ++-- services/search.py | 13 ++++++-- services/workspace_db.py | 14 ++++++++- services/workspace_listing.py | 16 ++-------- services/workspace_tabs.py | 58 ++++++++++------------------------- 7 files changed, 53 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc141c1..cadfcbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Strict mypy** — `strict = true` in `pyproject.toml`; core TypedDict models (`SearchResult`, `ConversationSummary`) and full annotations on API routes and `utils/` (#100) - -### Changed -- CI typecheck job runs `mypy .` using pyproject config (strict production code; - per-module overrides for `scripts/export.py` and `tests.*`) - -### Added - **Summary disk cache (Phase 3)** — project list and tab summaries cached under `~/.cache/cursor-chat-browser/`, invalidated when global or per-workspace DB mtimes change; bypass with `?nocache=1` or `CURSOR_CHAT_BROWSER_NOCACHE=1` (#84) @@ -47,6 +41,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Unit tests for `determine_project_for_conversation` fallback chain (#87, #89) ### Changed +- CI typecheck job runs `mypy .` using pyproject config (strict production code; + per-module overrides for `scripts/export.py` and `tests.*`) - **List-path performance** — skip full `messageRequestContext` scan unless invalid workspace aliases are needed; filter `composerData` in SQL; skip `Composer.from_dict` on list/summary paths; cache `composer_id_to_ws` mapping (#84) diff --git a/api/search.py b/api/search.py index 700b6c3..bef76b8 100644 --- a/api/search.py +++ b/api/search.py @@ -36,9 +36,12 @@ def search() -> tuple[Response, int] | Response: query_lower = query.lower() results: list[SearchResult] = [] - results.extend( - search_global_storage(workspace_path, query, query_lower, rules, parse_warnings) - ) + if search_type != "chat": + results.extend( + search_global_storage( + workspace_path, query, query_lower, rules, parse_warnings + ) + ) results.extend( search_legacy_workspaces(workspace_path, query, query_lower, search_type, rules) ) diff --git a/models/search.py b/models/search.py index b7081aa..95da112 100644 --- a/models/search.py +++ b/models/search.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, TypedDict +from typing import Any, Literal, TypedDict class ConversationSummary(TypedDict, total=False): @@ -21,11 +21,11 @@ class _SearchResultRequired(TypedDict): chatTitle: str timestamp: int | str matchingText: str - type: str # "composer" | "chat" | "cli_agent" + type: Literal["composer", "chat", "cli_agent"] class _SearchResultOptional(TypedDict, total=False): - source: str # "cli" for CLI agent sessions + source: Literal["cli"] class SearchResult(_SearchResultRequired, _SearchResultOptional): diff --git a/services/search.py b/services/search.py index fa68ce7..7ef5f1c 100644 --- a/services/search.py +++ b/services/search.py @@ -403,6 +403,7 @@ def search_legacy_workspaces( data = json.loads(chat_row[0]) for tab in (data.get("tabs") or []): ct = tab.get("chatTitle") or "" + tab_id = str(tab.get("tabId") or "") tab_model_names: list[str] | None = None tab_meta = tab.get("metadata") @@ -440,9 +441,9 @@ def search_legacy_workspaces( results.append({ "workspaceId": name, "workspaceFolder": workspace_folder, - "chatId": tab.get("tabId"), - "chatTitle": ct or f"Chat {(tab.get('tabId') or '')[:8]}", - "timestamp": tab.get("lastSendTime") or 0, + "chatId": tab_id, + "chatTitle": ct or f"Chat {tab_id[:8]}", + "timestamp": tab.get("lastSendTime") or datetime.now().isoformat(), "matchingText": matching_text, "type": "chat", }) @@ -568,12 +569,18 @@ def rank_results(results: list[SearchResult]) -> list[SearchResult]: """ def _ts(r: SearchResult) -> float: t = r.get("timestamp", 0) + if t is None: + return 0.0 if isinstance(t, str): try: # .timestamp() -> epoch-seconds; x1000 -> epoch-ms to match ints return datetime.fromisoformat(t.replace("Z", "+00:00")).timestamp() * 1000 except Exception: return 0.0 + if isinstance(t, bool) or not isinstance(t, (int, float)): + return 0.0 + if t > 1e12: + return float(t) / 1000.0 return float(t) if t else 0.0 return sorted(results, key=_ts, reverse=True) diff --git a/services/workspace_db.py b/services/workspace_db.py index b1b6113..ab05181 100644 --- a/services/workspace_db.py +++ b/services/workspace_db.py @@ -7,7 +7,7 @@ from collections.abc import Iterator from contextlib import closing, contextmanager from pathlib import Path -from typing import Any +from typing import Any, cast _logger = logging.getLogger(__name__) @@ -22,6 +22,18 @@ # corrupt table cannot propagate to callers. +def safe_fetchall( + conn: sqlite3.Connection, + query: str, + params: tuple[Any, ...] = (), +) -> list[sqlite3.Row]: + """Run *query* on *conn*; return rows or ``[]`` on sqlite3.Error.""" + try: + return cast(list[sqlite3.Row], conn.execute(query, params).fetchall()) + except sqlite3.Error: + return [] + + def load_bubble_map(global_db: sqlite3.Connection) -> dict[str, dict[str, Any]]: """Load all ``bubbleId:*`` KV entries into ``{bubble_id: bubble_dict}``. diff --git a/services/workspace_listing.py b/services/workspace_listing.py index b1aeed1..3219b0a 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -5,7 +5,7 @@ import os import sqlite3 from datetime import datetime, timezone -from typing import Any, cast +from typing import Any _logger = logging.getLogger(__name__) @@ -33,6 +33,7 @@ load_project_layouts_for_composer, load_project_layouts_map, open_global_db, + safe_fetchall, ) from utils.workspace_path import get_cli_chats_path from services.workspace_resolver import ( @@ -138,19 +139,8 @@ def _build_workspace_projects_uncached( with open_global_db(workspace_path) as (global_db, _): if global_db: - def _safe_fetchall( - query: str, params: tuple[Any, ...] = (), - ) -> list[sqlite3.Row]: - try: - return cast( - list[sqlite3.Row], - global_db.execute(query, params).fetchall(), - ) - except sqlite3.Error: - return [] - try: - composer_rows = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) + composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) project_layouts_map: dict[str, list[str]] = {} if invalid_workspace_ids: diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 3c2a6c8..5ee2cd2 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -7,7 +7,7 @@ import sqlite3 from collections.abc import Mapping from datetime import datetime -from typing import Any, cast +from typing import Any _logger = logging.getLogger(__name__) @@ -44,6 +44,7 @@ load_project_layouts_for_composer, load_project_layouts_map, open_global_db, + safe_fetchall, ) from utils.workspace_path import get_cli_chats_path from services.workspace_resolver import ( @@ -548,22 +549,11 @@ def _build_workspace_tab_summaries_uncached( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - def _safe_fetchall( - query: str, params: tuple[Any, ...] = (), - ) -> list[sqlite3.Row]: - try: - return cast( - list[sqlite3.Row], - global_db.execute(query, params).fetchall(), - ) - except sqlite3.Error: - return [] - project_layouts_map: dict[str, list[str]] = {} if invalid_workspace_ids: project_layouts_map = load_project_layouts_map(global_db) - composer_rows = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) + composer_rows = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) invalid_workspace_aliases: dict[str, str] = {} if invalid_workspace_ids: @@ -709,18 +699,8 @@ def assemble_single_tab( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - def _safe_fetchall( - query: str, params: tuple[Any, ...] = (), - ) -> list[sqlite3.Row]: - try: - return cast( - list[sqlite3.Row], - global_db.execute(query, params).fetchall(), - ) - except sqlite3.Error: - return [] - - rows = _safe_fetchall( + rows = safe_fetchall( + global_db, "SELECT key, value FROM cursorDiskKV WHERE key = ?", (f"composerData:{composer_id}",), ) @@ -760,7 +740,7 @@ def _safe_fetchall( if invalid_workspace_ids: # Alias resolution still needs the composer roster, but project layouts # are intentionally limited to this composer (single-tab scope). - composer_rows_for_aliases = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) + composer_rows_for_aliases = safe_fetchall(global_db, COMPOSER_ROWS_WITH_HEADERS_SQL) invalid_workspace_aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows_for_aliases, project_layouts_map=project_layouts_map, @@ -848,21 +828,11 @@ def assemble_workspace_tabs( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - def _safe_fetchall( - query: str, params: tuple[Any, ...] = (), - ) -> list[sqlite3.Row]: - try: - return cast( - list[sqlite3.Row], - global_db.execute(query, params).fetchall(), - ) - except sqlite3.Error: - return [] - # Load bubbles - for row in _safe_fetchall( + for row in safe_fetchall( + global_db, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'" - " AND value IS NOT NULL" + " AND value IS NOT NULL", ): parts = row["key"].split(":") if len(parts) >= 3: @@ -901,7 +871,10 @@ def _safe_fetchall( # Load messageRequestContext rows once; build both # message_request_context_map and project_layouts_map from the same pass. project_layouts_map: dict[str, list[str]] = {} - for row in _safe_fetchall("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'messageRequestContext:%'"): + for row in safe_fetchall( + global_db, + "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'messageRequestContext:%'", + ): parts = row["key"].split(":") if len(parts) < 2: continue @@ -934,11 +907,12 @@ def _safe_fetchall( project_layouts_map[chat_id].append(layout["rootPath"]) # Get composer data entries with conversations - composer_rows = _safe_fetchall( + composer_rows = safe_fetchall( + global_db, "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'" " AND value IS NOT NULL" " AND value LIKE '%fullConversationHeadersOnly%'" - " AND value NOT LIKE '%fullConversationHeadersOnly\":[]%'" + " AND value NOT LIKE '%fullConversationHeadersOnly\":[]%'", ) invalid_workspace_aliases = infer_invalid_workspace_aliases( From b0f3213c8052c79dbab56d970f94a6ced1ffeefc Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Wed, 10 Jun 2026 23:30:48 -0400 Subject: [PATCH 04/11] fix: outside diff finding --- services/search.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/services/search.py b/services/search.py index 7ef5f1c..a0281f9 100644 --- a/services/search.py +++ b/services/search.py @@ -49,6 +49,9 @@ _logger = logging.getLogger(__name__) +# Missing/unparseable timestamps sort last in rank_results() (treated as 0.0 s). +_UNKNOWN_SEARCH_TIMESTAMP: int = 0 + # --------------------------------------------------------------------------- # Private helpers — pure functions / small utilities @@ -322,7 +325,7 @@ def search_global_storage( "timestamp": ( to_epoch_ms(composer.last_updated_at) or to_epoch_ms(composer.created_at) - or int(datetime.now().timestamp() * 1000) + or _UNKNOWN_SEARCH_TIMESTAMP ), "matchingText": matching_text, "type": "composer", @@ -443,7 +446,7 @@ def search_legacy_workspaces( "workspaceFolder": workspace_folder, "chatId": tab_id, "chatTitle": ct or f"Chat {tab_id[:8]}", - "timestamp": tab.get("lastSendTime") or datetime.now().isoformat(), + "timestamp": tab.get("lastSendTime") or _UNKNOWN_SEARCH_TIMESTAMP, "matchingText": matching_text, "type": "chat", }) @@ -488,9 +491,7 @@ def search_cli_sessions( for session in cp["sessions"]: meta = session.get("meta", {}) session_id = session["session_id"] - created_ms: int = ( - meta.get("createdAt") or int(datetime.now().timestamp() * 1000) - ) + created_ms: int = to_epoch_ms(meta.get("createdAt")) session_name: str = meta.get("name") or f"Session {session_id[:8]}" try: From 0ab5f1fa4aff78a6b53637e6b60335cc431dbc1f Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Wed, 10 Jun 2026 23:43:21 -0400 Subject: [PATCH 05/11] fix: after rebasing --- models/parse_warnings.py | 2 +- services/search.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/models/parse_warnings.py b/models/parse_warnings.py index bfe7e1f..f9fb234 100644 --- a/models/parse_warnings.py +++ b/models/parse_warnings.py @@ -11,7 +11,7 @@ class ParseWarningCollector: composers_skipped: int = 0 bubbles_skipped: int = 0 composers_processing_failed: int = 0 - source_failures: list[dict] = field(default_factory=list) + source_failures: list[dict[str, Any]] = field(default_factory=list) def record_composer_skipped(self, count: int = 1) -> None: if count > 0: diff --git a/services/search.py b/services/search.py index a0281f9..12f9b22 100644 --- a/services/search.py +++ b/services/search.py @@ -130,7 +130,7 @@ def _find_match( def _build_ws_id_to_name( - workspace_entries: list[dict], + workspace_entries: list[dict[str, Any]], ) -> dict[str, str]: """Map workspace folder IDs to human-readable display names. @@ -580,8 +580,6 @@ def _ts(r: SearchResult) -> float: return 0.0 if isinstance(t, bool) or not isinstance(t, (int, float)): return 0.0 - if t > 1e12: - return float(t) / 1000.0 return float(t) if t else 0.0 return sorted(results, key=_ts, reverse=True) From 4789c7d13914f8fd08c3116a2fa6dbd4270303e3 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Thu, 11 Jun 2026 00:06:52 -0400 Subject: [PATCH 06/11] =?UTF-8?q?fix:=20Changed=20record=5Fsource=5Ffailur?= =?UTF-8?q?e(self,=20exc:=20BaseException,=20=E2=80=A6)=20=E2=86=92=20exc:?= =?UTF-8?q?=20Exception?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/parse_warnings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/parse_warnings.py b/models/parse_warnings.py index f9fb234..9da6e4a 100644 --- a/models/parse_warnings.py +++ b/models/parse_warnings.py @@ -26,7 +26,7 @@ def record_composer_processing_failure(self, count: int = 1) -> None: if count > 0: self.composers_processing_failed += count - def record_source_failure(self, exc: BaseException, source: str) -> None: + def record_source_failure(self, exc: Exception, source: str) -> None: """Record a whole-source failure (e.g. the global storage DB is unreadable). Distinct from per-item parse skips: signals that an entire data source From e2a513ca5c8d8a5d7e1e89f1c5ada84a2e58297f Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Thu, 11 Jun 2026 11:18:42 -0400 Subject: [PATCH 07/11] fix: reviewer's findings --- api/composers.py | 21 ++++++++++----------- api/config_api.py | 32 ++++++++++++++++---------------- api/export_api.py | 16 +++++++--------- api/flask_config.py | 23 +++++++++++++++++++++-- api/logs.py | 8 +++++--- api/pdf.py | 10 +++++----- api/search.py | 15 +++++++++------ api/workspaces.py | 41 ++++++++++++++++------------------------- models/__init__.py | 3 ++- models/conversation.py | 5 +++++ pyproject.toml | 1 + services/cli_tabs.py | 15 ++++++++------- services/search.py | 5 ++++- 13 files changed, 109 insertions(+), 86 deletions(-) diff --git a/api/composers.py b/api/composers.py index 1b90c12..a790bba 100644 --- a/api/composers.py +++ b/api/composers.py @@ -11,7 +11,9 @@ from contextlib import closing from typing import Any -from flask import Blueprint, Response, jsonify +from flask import Blueprint, Response + +from api.flask_config import json_response from utils.workspace_path import resolve_workspace_path from utils.path_helpers import to_epoch_ms @@ -113,13 +115,11 @@ def list_composers() -> tuple[Response, int] | Response: ) composers.sort(key=lambda pair: to_epoch_ms(pair[0].last_updated_at), reverse=True) - return jsonify([c for _, c in composers]) + return json_response([c for _, c in composers]) except Exception: _logger.exception("Failed to get composers") - return jsonify({"error": "Failed to get composers"}), 500 - - + return json_response({"error": "Failed to get composers"}, 500) @bp.route("/api/composers/") def get_composer(composer_id: str) -> tuple[Response, int] | Response: try: @@ -183,7 +183,7 @@ def get_composer(composer_id: str) -> tuple[Response, int] | Response: # the composer (CodeRabbit on PR #30). payload = dict(local.raw) payload["conversation"] = payload.get("conversation") or [] - return jsonify(payload) + return json_response(payload) except SchemaError as e: _logger.warning( "Schema drift in %s: %s (%s)", @@ -219,15 +219,14 @@ def get_composer(composer_id: str) -> tuple[Response, int] | Response: e, type(e).__name__, ) - return jsonify({"error": "Composer schema drift"}), 404 + return json_response({"error": "Composer schema drift"}, 404) payload = dict(composer.raw) payload["conversation"] = payload.get("conversation") or [] - return jsonify(payload) + return json_response(payload) except (OSError, sqlite3.Error, json.JSONDecodeError, ValueError): pass - return jsonify({"error": "Composer not found"}), 404 - + return json_response({"error": "Composer not found"}, 404) except Exception: _logger.exception("Failed to get composer") - return jsonify({"error": "Failed to get composer"}), 500 + return json_response({"error": "Failed to get composer"}, 500) \ No newline at end of file diff --git a/api/config_api.py b/api/config_api.py index 46f5796..87e7711 100644 --- a/api/config_api.py +++ b/api/config_api.py @@ -11,7 +11,9 @@ import subprocess import sys -from flask import Blueprint, Response, jsonify, request +from flask import Blueprint, Response, request + +from api.flask_config import json_response from utils.path_validation import WorkspacePathError, validate_workspace_path from utils.workspace_path import set_workspace_path_override @@ -39,7 +41,7 @@ def detect_environment() -> Response: except Exception: pass - return jsonify({ + return json_response({ "os": sys.platform, "isWSL": is_wsl, "isRemote": is_remote, @@ -52,7 +54,7 @@ def detect_environment() -> Response: type(e).__name__, exc_info=True, ) - return jsonify({"os": "unknown", "isWSL": False, "isRemote": False}) + return json_response({"os": "unknown", "isWSL": False, "isRemote": False}) @bp.route("/api/validate-path", methods=["POST"]) @@ -61,14 +63,14 @@ def validate_path() -> tuple[Response, int] | Response: try: body = request.get_json(silent=True) or {} if not isinstance(body, dict): - return jsonify( + return json_response( {"valid": False, "error": "invalid JSON body", "workspaceCount": 0} ) raw = body.get("path", "") try: canonical = validate_workspace_path(raw) except WorkspacePathError as e: - return jsonify({"valid": False, "error": str(e), "workspaceCount": 0}) + return json_response({"valid": False, "error": str(e), "workspaceCount": 0}) workspace_count = 0 for name in os.listdir(canonical): @@ -78,7 +80,7 @@ def validate_path() -> tuple[Response, int] | Response: if os.path.isfile(db): workspace_count += 1 - return jsonify( + return json_response( { "valid": workspace_count > 0, "workspaceCount": workspace_count, @@ -93,9 +95,7 @@ def validate_path() -> tuple[Response, int] | Response: type(e).__name__, exc_info=True, ) - return jsonify({"valid": False, "error": "Failed to validate path"}), 500 - - + return json_response({"valid": False, "error": "Failed to validate path"}, 500) @bp.route("/api/set-workspace", methods=["POST"]) def set_workspace() -> tuple[Response, int] | Response: # Reject non-dict JSON bodies (array / string / number / null). Without @@ -105,7 +105,7 @@ def set_workspace() -> tuple[Response, int] | Response: # instead of a 400 client error. (CodeRabbit on PR #16.) body = request.get_json(silent=True) if not isinstance(body, dict): - return jsonify({"error": "request body must be a JSON object"}), 400 + return json_response({"error": "request body must be a JSON object"}, 400) raw = body.get("path", "") # Validate the supplied path BEFORE storing the override (issue #15). # validate_workspace_path collapses `..` traversal AND resolves symlinks @@ -115,14 +115,14 @@ def set_workspace() -> tuple[Response, int] | Response: try: canonical = validate_workspace_path(raw) except WorkspacePathError as e: - return jsonify({"error": str(e)}), 400 + return json_response({"error": str(e)}, 400) except Exception: # noqa: BLE001 — only here as a fallback - return jsonify({"error": "Failed to validate workspace path"}), 500 + return json_response({"error": "Failed to validate workspace path"}, 500) try: set_workspace_path_override(canonical) except Exception: # noqa: BLE001 — keep the response shape structured JSON - return jsonify({"error": "Failed to set workspace path"}), 500 - return jsonify({"success": True, "path": canonical}) + return json_response({"error": "Failed to set workspace path"}, 500) + return json_response({"success": True, "path": canonical}) @bp.route("/api/get-username") @@ -144,7 +144,7 @@ def get_username() -> Response: import getpass username = getpass.getuser() - return jsonify({"username": username}) + return json_response({"username": username}) except Exception as e: _logger.warning( @@ -153,4 +153,4 @@ def get_username() -> Response: type(e).__name__, exc_info=True, ) - return jsonify({"username": "YOUR_USERNAME"}) + return json_response({"username": "YOUR_USERNAME"}) diff --git a/api/export_api.py b/api/export_api.py index 26fd9b7..a192e11 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -14,9 +14,9 @@ from pathlib import Path from typing import Any, cast -from flask import Blueprint, Response, jsonify, request +from flask import Blueprint, Response, request -from api.flask_config import exclusion_rules +from api.flask_config import exclusion_rules, json_response from utils.workspace_path import resolve_workspace_path from utils.path_helpers import to_epoch_ms @@ -72,7 +72,7 @@ def _save_export_state(count: int) -> None: def get_export_state() -> Response: """Return the last export timestamp.""" state = _get_export_state() - return jsonify(state) + return json_response(state) @bp.route("/api/export", methods=["POST"]) @@ -119,8 +119,7 @@ def export_chats() -> tuple[Response, int] | Response: # ── Database reading via service layer ──────────────────────────────── with open_global_db(workspace_path) as (global_db, _): if global_db is None: - return jsonify({"error": "Cursor global storage not found"}), 404 - + return json_response({"error": "Cursor global storage not found"}, 404) bubble_map = load_bubble_map(global_db) code_block_diff_map = load_code_block_diff_map(global_db) @@ -200,10 +199,9 @@ def export_chats() -> tuple[Response, int] | Response: count = len(exported) if count == 0: - return jsonify({"error": "No conversations to export" + ( + return json_response({"error": "No conversations to export" + ( " since last export" if since == "last" else "" - )}), 404 - + )}, 404) buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: for entry in exported: @@ -229,4 +227,4 @@ def export_chats() -> tuple[Response, int] | Response: type(e).__name__, exc_info=True, ) - return jsonify({"error": "Export failed"}), 500 + return json_response({"error": "Export failed"}, 500) \ No newline at end of file diff --git a/api/flask_config.py b/api/flask_config.py index 9b72136..15158e0 100644 --- a/api/flask_config.py +++ b/api/flask_config.py @@ -2,11 +2,30 @@ from __future__ import annotations -from typing import Any +from typing import Any, cast, overload -from flask import current_app +from flask import Response, current_app, jsonify def exclusion_rules() -> list[list[Any]]: """Return loaded exclusion rules from app config (empty list when unset).""" return current_app.config.get("EXCLUSION_RULES") or [] + + +@overload +def json_response(data: Any) -> Response: ... + + +@overload +def json_response(data: Any, status: int) -> tuple[Response, int]: ... + + +def json_response( + data: Any, + status: int | None = None, +) -> Response | tuple[Response, int]: + """Typed wrapper around :func:`flask.jsonify` for strict mypy (types-Flask).""" + response = cast(Response, jsonify(data)) + if status is None: + return response + return response, status diff --git a/api/logs.py b/api/logs.py index b209975..a6bd804 100644 --- a/api/logs.py +++ b/api/logs.py @@ -12,7 +12,9 @@ from datetime import datetime from typing import Any -from flask import Blueprint, Response, jsonify +from flask import Blueprint, Response + +from api.flask_config import json_response from utils.workspace_path import resolve_workspace_path from utils.path_helpers import to_epoch_ms, warn_workspace_json_read @@ -149,8 +151,8 @@ def get_logs() -> tuple[Response, int] | Response: ) logs.sort(key=lambda log: log.get("timestamp") or 0, reverse=True) - return jsonify({"logs": logs}) + return json_response({"logs": logs}) except Exception: _logger.exception("Failed to get logs") - return jsonify({"error": "Failed to get logs", "logs": []}), 500 + return json_response({"error": "Failed to get logs", "logs": []}, 500) \ No newline at end of file diff --git a/api/pdf.py b/api/pdf.py index b0175e7..b2d107a 100644 --- a/api/pdf.py +++ b/api/pdf.py @@ -8,7 +8,9 @@ import re from typing import Any -from flask import Blueprint, Response, jsonify, request +from flask import Blueprint, Response, request + +from api.flask_config import json_response bp = Blueprint("pdf", __name__) _logger = logging.getLogger(__name__) @@ -158,7 +160,7 @@ def footer(self) -> None: continue buf = io.BytesIO() - pdf.output(buf) + buf.write(bytes(pdf.output())) buf.seek(0) safe_title = re.sub(r'[<>:"/\\|?*]', '_', title) @@ -177,9 +179,7 @@ def footer(self) -> None: type(e).__name__, exc_info=True, ) - return jsonify({"error": "Failed to generate PDF"}), 500 - - + return json_response({"error": "Failed to generate PDF"}, 500) def _render_code_block(pdf: Any, code_text: str) -> None: """Render a code block with a dark background.""" pdf.ln(3) diff --git a/api/search.py b/api/search.py index bef76b8..da25d42 100644 --- a/api/search.py +++ b/api/search.py @@ -6,7 +6,9 @@ import logging from typing import Any -from flask import Blueprint, Response, current_app, jsonify, request +from flask import Blueprint, Response, current_app, request + +from api.flask_config import json_response from models import ParseWarningCollector, SearchResult from services.search import ( @@ -29,8 +31,7 @@ def search() -> tuple[Response, int] | Response: rules = current_app.config.get("EXCLUSION_RULES") or [] if not query: - return jsonify({"error": "No search query provided"}), 400 - + return json_response({"error": "No search query provided"}, 400) workspace_path = resolve_workspace_path() parse_warnings = ParseWarningCollector() query_lower = query.lower() @@ -47,12 +48,14 @@ def search() -> tuple[Response, int] | Response: ) if search_type == "all": results.extend( - search_cli_sessions(get_cli_chats_path(), query, query_lower, rules) + search_cli_sessions( + get_cli_chats_path(), query, query_lower, rules, parse_warnings + ) ) payload: dict[str, Any] = {"results": rank_results(results)} - return jsonify(parse_warnings.attach_to(payload)) + return json_response(parse_warnings.attach_to(payload)) except Exception: _logger.exception("Search failed") - return jsonify({"error": "Search failed", "results": []}), 500 + return json_response({"error": "Search failed", "results": []}, 500) \ No newline at end of file diff --git a/api/workspaces.py b/api/workspaces.py index f5f8ac7..1c3ce87 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -12,9 +12,9 @@ from datetime import datetime, timezone from typing import Any -from flask import Blueprint, Response, jsonify, request +from flask import Blueprint, Response, request -from api.flask_config import exclusion_rules +from api.flask_config import exclusion_rules, json_response from utils.workspace_path import resolve_workspace_path, get_cli_chats_path from utils.cli_chat_reader import list_cli_projects @@ -66,12 +66,10 @@ def list_workspaces() -> tuple[Response, int] | Response: payload: dict[str, Any] = {"projects": projects} if warnings: payload["warnings"] = warnings - return jsonify(payload) + return json_response(payload) except Exception: _logger.exception("Failed to get workspaces") - return jsonify({"error": "Failed to get workspaces"}), 500 - - + return json_response({"error": "Failed to get workspaces"}, 500) # --------------------------------------------------------------------------- # GET /api/workspaces/ # --------------------------------------------------------------------------- @@ -80,7 +78,7 @@ def list_workspaces() -> tuple[Response, int] | Response: def get_workspace(workspace_id: str) -> tuple[Response, int] | Response: try: if workspace_id == "global": - return jsonify({ + return json_response({ "id": "global", "name": "Other chats", "path": None, @@ -96,7 +94,7 @@ def get_workspace(workspace_id: str) -> tuple[Response, int] | Response: continue last_ms = cp.get("last_updated_ms") workspace_path_field = cp.get("workspace_path") - return jsonify({ + return json_response({ "id": workspace_id, "name": cp.get("workspace_name") or project_id[:12], "path": workspace_path_field, @@ -108,15 +106,13 @@ def get_workspace(workspace_id: str) -> tuple[Response, int] | Response: ), "source": "cli", }) - return jsonify({"error": "CLI project not found"}), 404 - + return json_response({"error": "CLI project not found"}, 404) workspace_path = resolve_workspace_path() db_path = os.path.join(workspace_path, workspace_id, "state.vscdb") wj_path = os.path.join(workspace_path, workspace_id, "workspace.json") if not os.path.isfile(db_path): - return jsonify({"error": "Workspace not found"}), 404 - + return json_response({"error": "Workspace not found"}, 404) mtime = os.path.getmtime(db_path) folder = None workspace_name = workspace_id @@ -137,7 +133,7 @@ def get_workspace(workspace_id: str) -> tuple[Response, int] | Response: if inferred: workspace_name = inferred - return jsonify({ + return json_response({ "id": workspace_id, "name": workspace_name, "path": db_path, @@ -147,9 +143,7 @@ def get_workspace(workspace_id: str) -> tuple[Response, int] | Response: except Exception: _logger.exception("Failed to get workspace") - return jsonify({"error": "Failed to get workspace"}), 500 - - + return json_response({"error": "Failed to get workspace"}, 500) # --------------------------------------------------------------------------- # GET /api/workspaces//tabs # --------------------------------------------------------------------------- @@ -161,7 +155,7 @@ def get_workspace_tabs(workspace_id: str) -> tuple[Response, int] | Response: return get_cli_workspace_tabs(workspace_id, exclusion_rules()) except Exception: _logger.exception("Failed to get CLI workspace tabs") - return jsonify({"error": "Failed to get workspace tabs"}), 500 + return json_response({"error": "Failed to get workspace tabs"}, 500) try: workspace_path = resolve_workspace_path() rules = exclusion_rules() @@ -172,12 +166,10 @@ def get_workspace_tabs(workspace_id: str) -> tuple[Response, int] | Response: ) else: payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules) - return jsonify(payload), status + return json_response(payload, status) except Exception: _logger.exception("Failed to get workspace tabs") - return jsonify({"error": "Failed to get workspace tabs"}), 500 - - + return json_response({"error": "Failed to get workspace tabs"}, 500) # --------------------------------------------------------------------------- # GET /api/workspaces//tabs/ # --------------------------------------------------------------------------- @@ -185,13 +177,12 @@ def get_workspace_tabs(workspace_id: str) -> tuple[Response, int] | Response: @bp.route("/api/workspaces//tabs/") def get_workspace_tab(workspace_id: str, composer_id: str) -> tuple[Response, int] | Response: if workspace_id.startswith("cli:"): - return jsonify({"error": "Per-tab lazy load is not supported for CLI workspaces"}), 400 + return json_response({"error": "Per-tab lazy load is not supported for CLI workspaces"}, 400) try: workspace_path = resolve_workspace_path() rules = exclusion_rules() payload, status = assemble_single_tab(workspace_id, composer_id, workspace_path, rules) - return jsonify(payload), status + return json_response(payload, status) except Exception: _logger.exception("Failed to get workspace tab") - return jsonify({"error": "Failed to get workspace tab"}), 500 - + return json_response({"error": "Failed to get workspace tab"}, 500) \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py index 3c73172..0829449 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,5 +1,5 @@ from models.cli_session import CliSessionMeta -from models.conversation import Bubble, Composer, WorkspaceLocalComposer +from models.conversation import Bubble, Composer, Conversation, WorkspaceLocalComposer from models.errors import SchemaError from models.parse_warnings import ParseWarningCollector from models.export import ExportEntry @@ -10,6 +10,7 @@ "Bubble", "CliSessionMeta", "Composer", + "Conversation", "ConversationSummary", "ExportEntry", "ParseWarningCollector", diff --git a/models/conversation.py b/models/conversation.py index bc1308b..4e281ea 100644 --- a/models/conversation.py +++ b/models/conversation.py @@ -149,6 +149,11 @@ def model_name_from_config(self) -> str | None: return name if isinstance(name, str) and name else None +# Issue #100: Cursor persists conversations as ``composerData`` rows; ``Composer`` +# is the validated domain type for a full conversation. +Conversation = Composer + + @dataclass(frozen=True) class WorkspaceLocalComposer: """Summary composer row from per-workspace state.vscdb ItemTable; only composerId is required.""" diff --git a/pyproject.toml b/pyproject.toml index 678f218..50b6681 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ desktop = ["pywebview>=5.0,<6"] dev = [ "pytest>=8,<9", "mypy>=1.10,<2", + "types-Flask>=1.1,<2", "hypothesis>=6.100,<7", ] diff --git a/services/cli_tabs.py b/services/cli_tabs.py index a9e8e3d..27ba7c4 100644 --- a/services/cli_tabs.py +++ b/services/cli_tabs.py @@ -4,7 +4,9 @@ from datetime import datetime from typing import Any -from flask import Response, jsonify +from flask import Response + +from api.flask_config import json_response from utils.cli_chat_reader import list_cli_projects, messages_to_bubbles, traverse_blobs from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules @@ -24,8 +26,8 @@ def get_cli_workspace_tabs( Returns: ``flask.Response | tuple[flask.Response, int]`` suitable for a Flask route - handler. Success returns ``jsonify({"tabs": ...})`` (plain ``Response``, - status 200). Errors return ``(jsonify({"error": ...}), status)`` with + handler. Success returns ``json_response({"tabs": ...})`` (plain ``Response``, + status 200). Errors return ``(json_response({"error": ...}), status)`` with 404 when the project is missing or 500 on unexpected failure. """ try: @@ -39,8 +41,7 @@ def get_cli_workspace_tabs( None, ) if project is None: - return jsonify({"error": "CLI project not found"}), 404 - + return json_response({"error": "CLI project not found"}, 404) ws_name = project.get("workspace_name") or project_id[:12] sessions = project.get("sessions") or [] if not isinstance(sessions, list): @@ -138,7 +139,7 @@ def get_cli_workspace_tabs( tabs.append(tab) tabs.sort(key=lambda t: t.get("timestamp") or 0, reverse=True) - return jsonify({"tabs": tabs}) + return json_response({"tabs": tabs}) except Exception as e: _logger.error( @@ -148,4 +149,4 @@ def get_cli_workspace_tabs( type(e).__name__, exc_info=True, ) - return jsonify({"error": "Failed to get CLI workspace tabs"}), 500 + return json_response({"error": "Failed to get CLI workspace tabs"}, 500) \ No newline at end of file diff --git a/services/search.py b/services/search.py index 12f9b22..5367c67 100644 --- a/services/search.py +++ b/services/search.py @@ -467,6 +467,7 @@ def search_cli_sessions( query: str, query_lower: str, rules: list[Any], + parse_warnings: ParseWarningCollector | None = None, ) -> list[SearchResult]: """Search Cursor CLI agent sessions stored as JSONL + blob files. @@ -549,8 +550,10 @@ def search_cli_sessions( "type": "cli_agent", "source": "cli", }) - except Exception: + except Exception as exc: _logger.exception("Error searching CLI sessions") + if parse_warnings is not None: + parse_warnings.record_source_failure(exc, source="cli_sessions") return results From a0594e3008e84ac3a88d54bfda451e0d74607fbc Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Thu, 11 Jun 2026 11:29:20 -0400 Subject: [PATCH 08/11] fix: coderabbitai's full review findings --- models/search.py | 2 +- services/search.py | 30 ++++++++++++++++++++++-------- tests/test_normalize_file_path.py | 4 ++++ tests/test_search_helpers.py | 3 +++ utils/path_helpers.py | 2 ++ 5 files changed, 32 insertions(+), 9 deletions(-) diff --git a/models/search.py b/models/search.py index 95da112..c614104 100644 --- a/models/search.py +++ b/models/search.py @@ -16,7 +16,7 @@ class ConversationSummary(TypedDict, total=False): class _SearchResultRequired(TypedDict): workspaceId: str - workspaceFolder: str | None + workspaceFolder: str | None # display-name leaf (e.g. "myrepo"), not a file path chatId: str chatTitle: str timestamp: int | str diff --git a/services/search.py b/services/search.py index 5367c67..f06abbe 100644 --- a/services/search.py +++ b/services/search.py @@ -34,7 +34,7 @@ ] from models import Bubble, Composer, ParseWarningCollector, SchemaError, SearchResult from services.workspace_db import ( - build_composer_id_to_workspace_id, + build_composer_id_to_workspace_id_cached, collect_workspace_entries, open_global_db, ) @@ -93,6 +93,8 @@ def _extract_snippet(text: str, query: str, query_lower: str) -> str: Returns an empty string if there is no match. """ + if not query_lower: + return "" idx = text.lower().find(query_lower) if idx == -1: return "" @@ -214,8 +216,8 @@ def search_global_storage( try: workspace_entries = collect_workspace_entries(workspace_path) ws_id_to_name = _build_ws_id_to_name(workspace_entries) - composer_id_to_ws = build_composer_id_to_workspace_id( - workspace_path, workspace_entries + composer_id_to_ws = build_composer_id_to_workspace_id_cached( + workspace_path, workspace_entries, rules ) with open_global_db(workspace_path) as (conn, _db_path): @@ -294,7 +296,6 @@ def search_global_storage( _json_dump_safe(cd.get("conversationSummary")), _json_dump_safe(cd.get("usage")), _json_dump_safe(cd.get("requestMetadata")), - _json_dump_safe(cd), "\n".join(bubble_meta), ], ) @@ -443,16 +444,21 @@ def search_legacy_workspaces( results.append({ "workspaceId": name, - "workspaceFolder": workspace_folder, + "workspaceFolder": workspace_name, "chatId": tab_id, "chatTitle": ct or f"Chat {tab_id[:8]}", - "timestamp": tab.get("lastSendTime") or _UNKNOWN_SEARCH_TIMESTAMP, + "timestamp": to_epoch_ms(tab.get("lastSendTime")) or _UNKNOWN_SEARCH_TIMESTAMP, "matchingText": matching_text, "type": "chat", }) except Exception as exc: - _logger.warning("Failed to search legacy workspace %s: %s", name, exc) + _logger.warning( + "Failed to search legacy workspace %s: %s", + name, + exc, + exc_info=True, + ) except Exception as exc: _logger.warning( @@ -505,6 +511,14 @@ def search_cli_sessions( ) continue + if not messages and meta: + _logger.warning( + "CLI session %s has meta but traverse_blobs returned no " + "messages from %s", + session_id, + session["db_path"], + ) + bubbles = messages_to_bubbles(messages, created_ms) if not bubbles: continue @@ -542,7 +556,7 @@ def search_cli_sessions( results.append({ "workspaceId": f"cli:{cp['project_id']}", - "workspaceFolder": cp.get("workspace_path"), + "workspaceFolder": ws_name, "chatId": session_id, "chatTitle": title, "timestamp": created_ms, diff --git a/tests/test_normalize_file_path.py b/tests/test_normalize_file_path.py index 90d9274..9676516 100644 --- a/tests/test_normalize_file_path.py +++ b/tests/test_normalize_file_path.py @@ -137,6 +137,10 @@ def test_seconds_float_converted_to_ms(self) -> None: def test_zero_returns_zero(self) -> None: self.assertEqual(to_epoch_ms(0), 0) + def test_bool_returns_zero(self) -> None: + self.assertEqual(to_epoch_ms(True), 0) + self.assertEqual(to_epoch_ms(False), 0) + def test_iso8601_zulu(self) -> None: expected = int( datetime(2026, 2, 3, 20, 39, 54, 17_000, tzinfo=timezone.utc).timestamp() * 1000 diff --git a/tests/test_search_helpers.py b/tests/test_search_helpers.py index 6826640..c3c0918 100644 --- a/tests/test_search_helpers.py +++ b/tests/test_search_helpers.py @@ -59,6 +59,9 @@ def test_case_insensitive_query_lower(self): snippet = _extract_snippet(text, "Query", "query") assert "Query" in snippet + def test_empty_query_returns_empty(self): + assert _extract_snippet("any text here", "", "") == "" + def test_snippet_length_is_bounded(self): text = "a" * 1000 + "target" + "b" * 1000 snippet = _extract_snippet(text, "target", "target") diff --git a/utils/path_helpers.py b/utils/path_helpers.py index c145ec6..f6d205f 100644 --- a/utils/path_helpers.py +++ b/utils/path_helpers.py @@ -70,6 +70,8 @@ def to_epoch_ms(value: Any) -> int: """ if value is None: return 0 + if isinstance(value, bool): + return 0 if isinstance(value, (int, float)): if value > 1e12: return int(value) # already ms From c8fab212de6879a7b97994729bb5082d6f28f6cd Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Thu, 11 Jun 2026 11:40:30 -0400 Subject: [PATCH 09/11] fix: typecheck failure. --- .github/workflows/tests.yml | 1 + api/flask_config.py | 7 ++++--- pyproject.toml | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 72ede31..9a353a7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -149,6 +149,7 @@ jobs: - name: Install runtime deps + mypy # Install from the pinned lock file for deterministic resolution, # then add mypy (dev-only; not in requirements-lock.txt). + # Flask 3.1+ ships inline types — do not install types-Flask (conflicts). run: | python -m pip install --upgrade pip python -m pip install -r requirements-lock.txt diff --git a/api/flask_config.py b/api/flask_config.py index 15158e0..3c3b9ec 100644 --- a/api/flask_config.py +++ b/api/flask_config.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, cast, overload +from typing import Any, overload from flask import Response, current_app, jsonify @@ -24,8 +24,9 @@ def json_response( data: Any, status: int | None = None, ) -> Response | tuple[Response, int]: - """Typed wrapper around :func:`flask.jsonify` for strict mypy (types-Flask).""" - response = cast(Response, jsonify(data)) + """Typed wrapper around :func:`flask.jsonify` for strict mypy.""" + response = jsonify(data) + assert isinstance(response, Response) if status is None: return response return response, status diff --git a/pyproject.toml b/pyproject.toml index 50b6681..678f218 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ desktop = ["pywebview>=5.0,<6"] dev = [ "pytest>=8,<9", "mypy>=1.10,<2", - "types-Flask>=1.1,<2", "hypothesis>=6.100,<7", ] From cc24f11f09cd041d670f138a51b52f1699df85c7 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Thu, 11 Jun 2026 12:12:38 -0400 Subject: [PATCH 10/11] update gitignore file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5fd078f..6e2abab 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ __pycache__/ venv/ .venv/ env/ +.mypy-ci-test/ # Packaging *.egg-info/ From ecc849c1c8d5054a409c554483123c4e69506835 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Thu, 11 Jun 2026 14:38:12 -0400 Subject: [PATCH 11/11] fix: unify bubble KV loaders and converge DisplayBubble shapes Route all bubbleId:* loads through workspace_db._parse_bubble_kv_row and load_bubble_map (including workspace tabs). Introduce DisplayBubble/BubbleMetadata and utils/display_bubble builders shared by tabs, CLI, and IDE markdown export. Co-authored-by: Cursor --- models/__init__.py | 4 + models/bubble_display.py | 47 ++++ models/conversation.py | 5 +- scripts/export.py | 4 +- services/search.py | 47 +--- services/workspace_context.py | 3 +- services/workspace_db.py | 93 +++++--- services/workspace_listing.py | 4 +- services/workspace_resolver.py | 8 +- services/workspace_tabs.py | 179 +++------------- tests/test_models_wired_at_read_sites.py | 9 +- tests/test_parse_failure_logging.py | 4 +- tests/test_raw_accessors.py | 10 +- tests/test_workspace_assignment_fallback.py | 45 +++- tests/test_workspace_context.py | 8 +- tests/test_workspace_tabs_malformed_nested.py | 2 +- utils/cli_chat_reader.py | 8 +- utils/cursor_md_exporter.py | 202 +++++++----------- utils/display_bubble.py | 153 +++++++++++++ 19 files changed, 465 insertions(+), 370 deletions(-) create mode 100644 models/bubble_display.py create mode 100644 utils/display_bubble.py diff --git a/models/__init__.py b/models/__init__.py index 0829449..4657ff6 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,3 +1,4 @@ +from models.bubble_display import BubbleMetadata, BubbleRole, DisplayBubble from models.cli_session import CliSessionMeta from models.conversation import Bubble, Composer, Conversation, WorkspaceLocalComposer from models.errors import SchemaError @@ -8,7 +9,10 @@ __all__ = [ "Bubble", + "BubbleMetadata", + "BubbleRole", "CliSessionMeta", + "DisplayBubble", "Composer", "Conversation", "ConversationSummary", diff --git a/models/bubble_display.py b/models/bubble_display.py new file mode 100644 index 0000000..6d093d5 --- /dev/null +++ b/models/bubble_display.py @@ -0,0 +1,47 @@ +"""Rendered bubble shapes for UI, CLI, and markdown export. + +Storage/KV rows (``bubbleId:*`` in ``cursorDiskKV``) are validated as +:class:`models.conversation.Bubble` at load time via +:func:`services.workspace_db.load_bubble_map`. All user-facing paths emit +:class:`DisplayBubble` with optional :class:`BubbleMetadata`. +""" + +from __future__ import annotations + +from typing import Any, Literal, TypedDict + +BubbleRole = Literal["user", "ai"] + + +class BubbleMetadata(TypedDict, total=False): + """Nested fields on a :class:`DisplayBubble` (tabs, CLI, export).""" + + modelName: str + inputTokens: int + outputTokens: int + cachedTokens: int + toolResultsCount: int + toolResults: list[Any] + toolCalls: list[dict[str, Any]] + thinking: str + thinkingDurationMs: int | float + contextWindowPercent: float + contextTokensUsed: int + contextTokenLimit: int + contextPctRemaining: float + responseTimeMs: int + cost: float + + +class _DisplayBubbleRequired(TypedDict): + type: BubbleRole + text: str + timestamp: int + + +class _DisplayBubbleOptional(TypedDict, total=False): + metadata: BubbleMetadata + + +class DisplayBubble(_DisplayBubbleRequired, _DisplayBubbleOptional): + """One message bubble in the browser UI or an exported Markdown document.""" diff --git a/models/conversation.py b/models/conversation.py index 4e281ea..98cabbb 100644 --- a/models/conversation.py +++ b/models/conversation.py @@ -177,7 +177,10 @@ def from_dict(cls, raw: dict[str, Any]) -> "WorkspaceLocalComposer": @dataclass(frozen=True) class Bubble: - """One message in a composer; bubble_id comes from the row key, not the JSON value.""" + """One message in a composer; bubble_id comes from the row key, not the JSON value. + + Rendered for UI/export as :class:`models.bubble_display.DisplayBubble`. + """ bubble_id: str raw: dict[str, Any] = field(default_factory=dict) diff --git a/scripts/export.py b/scripts/export.py index 932a0ce..cd36454 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -52,7 +52,7 @@ cursor_cli_session_to_markdown, cursor_ide_chat_to_markdown, ) -from models import ExportEntry, SchemaError # noqa: E402 +from models import Bubble, ExportEntry, SchemaError # noqa: E402 from services.workspace_context import ( # noqa: E402 enrich_workspace_context_from_global_db, resolve_workspace_context, @@ -221,7 +221,7 @@ def main(): # ── Database reading via service layer ──────────────────────────────────── project_layouts_map: dict = {} - bubble_map: dict = {} + bubble_map: dict[str, Bubble] = {} code_block_diff_map: dict = {} ide_composer_rows: list = [] invalid_workspace_aliases: dict = {} diff --git a/services/search.py b/services/search.py index f06abbe..11fb06f 100644 --- a/services/search.py +++ b/services/search.py @@ -32,10 +32,11 @@ "search_global_storage", "search_legacy_workspaces", ] -from models import Bubble, Composer, ParseWarningCollector, SchemaError, SearchResult +from models import Composer, ParseWarningCollector, SchemaError, SearchResult from services.workspace_db import ( build_composer_id_to_workspace_id_cached, collect_workspace_entries, + load_bubble_map, open_global_db, ) from utils.cli_chat_reader import list_cli_projects, messages_to_bubbles, traverse_blobs @@ -153,38 +154,6 @@ def _build_ws_id_to_name( return mapping -def _build_search_bubble_map( - global_db: sqlite3.Connection, - parse_warnings: ParseWarningCollector, -) -> dict[str, dict[str, Any]]: - """Load ``bubbleId:*`` rows from an open global DB connection. - - Returns ``{bubble_id: {"text": str, "raw": dict}}``. Rows that fail - schema validation or JSON decoding are skipped; the skip is recorded in - *parse_warnings*. - """ - bubble_map: dict[str, dict[str, Any]] = {} - for row in global_db.execute( - "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'" - ): - parts = row["key"].split(":") - if len(parts) < 3: - continue - bid = parts[2] - try: - bubble = Bubble.from_dict(json.loads(row["value"]), bubble_id=bid) - bubble_map[bid] = {"text": extract_text_from_bubble(bubble), "raw": bubble.raw} - except SchemaError as exc: - _logger.warning( - "Schema drift in bubble %s: %s (%s)", bid, exc, type(exc).__name__ - ) - parse_warnings.record_bubble_skipped() - except (json.JSONDecodeError, TypeError, ValueError) as exc: - _logger.warning("Failed to decode Bubble from bubbleId:%s: %s", bid, exc) - parse_warnings.record_bubble_skipped() - return bubble_map - - # --------------------------------------------------------------------------- # Public: per-source search functions # --------------------------------------------------------------------------- @@ -223,7 +192,7 @@ def search_global_storage( with open_global_db(workspace_path) as (conn, _db_path): if conn is None: return results - bubble_map = _build_search_bubble_map(conn, parse_warnings) + bubble_map = load_bubble_map(conn, parse_warnings=parse_warnings) composer_rows = conn.execute( "SELECT key, value FROM cursorDiskKV" " WHERE key LIKE 'composerData:%' AND LENGTH(value) > 10" @@ -276,15 +245,13 @@ def search_global_storage( bid = header.get("bubbleId") if not bid: continue - entry = bubble_map.get(bid) - if not entry: + bubble = bubble_map.get(bid) + if bubble is None: continue - text = entry.get("text") or "" + text = extract_text_from_bubble(bubble) if text: bubble_texts.append(text) - raw_bubble = entry.get("raw") - if raw_bubble: - bubble_meta.append(_json_dump_safe(raw_bubble)) + bubble_meta.append(_json_dump_safe(bubble.raw)) exclusion_text = _build_exclusion_searchable( project_name=project_name, diff --git a/services/workspace_context.py b/services/workspace_context.py index 38cbbf3..477bf94 100644 --- a/services/workspace_context.py +++ b/services/workspace_context.py @@ -6,6 +6,7 @@ from dataclasses import dataclass, replace from typing import Any +from models import Bubble from services.workspace_db import ( build_composer_id_to_workspace_id, build_composer_id_to_workspace_id_cached, @@ -30,7 +31,7 @@ class WorkspaceContext: project_name_to_workspace_id: dict[str, str] workspace_path_to_id: dict[str, str] project_layouts_map: dict[str, list[str]] - bubble_map: dict[str, dict[str, Any]] + bubble_map: dict[str, Bubble] def _entries( diff --git a/services/workspace_db.py b/services/workspace_db.py index ab05181..8c2fb16 100644 --- a/services/workspace_db.py +++ b/services/workspace_db.py @@ -11,6 +11,7 @@ _logger = logging.getLogger(__name__) +from models import Bubble, ParseWarningCollector, SchemaError from utils.path_helpers import get_workspace_folder_paths from utils.workspace_descriptor import read_json_file @@ -34,30 +35,63 @@ def safe_fetchall( return [] -def load_bubble_map(global_db: sqlite3.Connection) -> dict[str, dict[str, Any]]: - """Load all ``bubbleId:*`` KV entries into ``{bubble_id: bubble_dict}``. +def _parse_bubble_kv_row( + row_key: str, + row_value: str | bytes, + *, + parse_warnings: ParseWarningCollector | None = None, +) -> tuple[str, Bubble] | None: + """Parse one ``bubbleId:…`` row; return ``(bubble_id, Bubble)`` or skip.""" + parts = row_key.split(":") + if len(parts) < 3: + return None + bid = parts[2] + try: + parsed = json.loads(row_value) + bubble = Bubble.from_dict(parsed, bubble_id=bid) + return bid, bubble + except SchemaError as exc: + _logger.warning( + "Schema drift in bubble %s: %s (%s)", bid, exc, type(exc).__name__ + ) + if parse_warnings is not None: + parse_warnings.record_bubble_skipped() + except (json.JSONDecodeError, TypeError, ValueError) as exc: + if parse_warnings is not None: + _logger.warning( + "Failed to decode Bubble from %s: %s", row_key, exc + ) + parse_warnings.record_bubble_skipped() + else: + _logger.debug("Skipping malformed bubbleId row %s: %s", row_key, exc) + return None - Skips rows whose JSON value is not a dict; JSON parse errors are logged at - DEBUG level so a single malformed row cannot block the rest. + +def load_bubble_map( + global_db: sqlite3.Connection, + *, + parse_warnings: ParseWarningCollector | None = None, +) -> dict[str, Bubble]: + """Load all ``bubbleId:*`` KV entries into ``{bubble_id: Bubble}``. + + Uses the same :meth:`Bubble.from_dict` validation as search and tabs. + When *parse_warnings* is set, skipped rows are recorded for the API. """ - bubble_map: dict[str, dict[str, Any]] = {} + bubble_map: dict[str, Bubble] = {} try: rows = global_db.execute( - "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'" + "SELECT key, value FROM cursorDiskKV" + " WHERE key LIKE 'bubbleId:%' AND value IS NOT NULL" ).fetchall() except sqlite3.Error: return bubble_map for row in rows: - parts = row["key"].split(":") - if len(parts) < 3: - continue - bid = parts[2] - try: - b = json.loads(row["value"]) - if isinstance(b, dict): - bubble_map[bid] = b - except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: - _logger.debug("Skipping malformed bubbleId row %s: %s", row["key"], e) + parsed = _parse_bubble_kv_row( + row["key"], row["value"], parse_warnings=parse_warnings + ) + if parsed is not None: + bid, bubble = parsed + bubble_map[bid] = bubble return bubble_map @@ -163,14 +197,17 @@ def load_code_block_diff_map(global_db: sqlite3.Connection) -> dict[str, list[di def load_bubbles_for_composer( - global_db: sqlite3.Connection, composer_id: str, -) -> dict[str, dict[str, Any]]: - """Load ``bubbleId:{composer_id}:*`` KV entries into ``{bubble_id: bubble_dict}``. + global_db: sqlite3.Connection, + composer_id: str, + *, + parse_warnings: ParseWarningCollector | None = None, +) -> dict[str, Bubble]: + """Load ``bubbleId:{composer_id}:*`` KV entries into ``{bubble_id: Bubble}``. Scoped alternative to :func:`load_bubble_map` for single-conversation assembly; avoids a full global ``bubbleId:%`` scan. """ - bubble_map: dict[str, dict[str, Any]] = {} + bubble_map: dict[str, Bubble] = {} try: rows = global_db.execute( "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", @@ -179,16 +216,12 @@ def load_bubbles_for_composer( except sqlite3.Error: return bubble_map for row in rows: - parts = row["key"].split(":") - if len(parts) < 3: - continue - bid = parts[2] - try: - b = json.loads(row["value"]) - if isinstance(b, dict): - bubble_map[bid] = b - except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: - _logger.debug("Skipping malformed bubbleId row %s: %s", row["key"], e) + parsed = _parse_bubble_kv_row( + row["key"], row["value"], parse_warnings=parse_warnings + ) + if parsed is not None: + bid, bubble = parsed + bubble_map[bid] = bubble return bubble_map diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 3219b0a..891cabe 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -18,7 +18,7 @@ warn_workspace_json_read, ) from utils.workspace_descriptor import read_json_file -from models import ParseWarningCollector +from models import Bubble, ParseWarningCollector from services.summary_cache import ( fingerprint_workspace_storage, get_cached_projects, @@ -146,7 +146,7 @@ def _build_workspace_projects_uncached( if invalid_workspace_ids: project_layouts_map = load_project_layouts_map(global_db) - bubble_map: dict[str, dict[str, Any]] = {} + bubble_map: dict[str, Bubble] = {} invalid_workspace_aliases: dict[str, str] = {} if invalid_workspace_ids: invalid_workspace_aliases = infer_invalid_workspace_aliases( diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index 7997468..28e0daa 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -248,7 +248,7 @@ def determine_project_for_conversation( project_name_to_workspace_id: dict[str, str], workspace_path_to_id: dict[str, str], workspace_entries: list[dict[str, Any]], - bubble_map: Mapping[str, Bubble | dict[str, Any]], + bubble_map: Mapping[str, Bubble], composer_id_to_workspace_id: dict[str, str] | None = None, invalid_workspace_ids: set[str] | None = None, ) -> str | None: @@ -261,7 +261,7 @@ def determine_project_for_conversation( project_name_to_workspace_id: Basename-to-workspace-folder map. workspace_path_to_id: Normalized root path to workspace folder map. workspace_entries: Output of :func:`services.workspace_db.collect_workspace_entries`. - bubble_map: ``{bubble_id: Bubble | bubble_dict}`` from global KV. + bubble_map: ``{bubble_id: Bubble}`` from global KV loaders. composer_id_to_workspace_id: Definitive per-workspace composer map; when ``None``, layout and path heuristics are used without this shortcut. invalid_workspace_ids: Workspace folders marked invalid; mapped IDs in @@ -380,7 +380,7 @@ def infer_invalid_workspace_aliases( project_name_map: dict[str, str], workspace_path_map: dict[str, str], workspace_entries: list[dict[str, Any]], - bubble_map: Mapping[str, Bubble | dict[str, Any]], + bubble_map: Mapping[str, Bubble], composer_id_to_ws: dict[str, str], invalid_workspace_ids: set[str], ) -> dict[str, str]: @@ -396,7 +396,7 @@ def infer_invalid_workspace_aliases( project_name_map: Basename map for path resolution. workspace_path_map: Normalized path map for path resolution. workspace_entries: Workspace folder entries from storage scan. - bubble_map: ``{bubble_id: Bubble | bubble_dict}`` for path resolution. + bubble_map: ``{bubble_id: Bubble}`` for path resolution. composer_id_to_ws: Composer-to-workspace map (may point at invalid IDs). invalid_workspace_ids: Workspace folder names to reassign. diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 5ee2cd2..0cbe439 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -7,7 +7,7 @@ import sqlite3 from collections.abc import Mapping from datetime import datetime -from typing import Any +from typing import Any, cast _logger = logging.getLogger(__name__) @@ -18,10 +18,21 @@ warn_workspace_json_read, ) from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules +from utils.display_bubble import ( + bubble_display_timestamp_ms, + build_storage_bubble_metadata, +) from utils.text_extract import extract_text_from_bubble -from utils.tool_parser import parse_tool_call from utils.workspace_descriptor import read_json_file -from models import Bubble, Composer, ParseWarningCollector, SchemaError +from models import ( + Bubble, + BubbleMetadata, + BubbleRole, + Composer, + DisplayBubble, + ParseWarningCollector, + SchemaError, +) from models.raw_access import ( conversation_header_bubble_id, message_request_context_project_layouts, @@ -37,6 +48,7 @@ COMPOSER_ROWS_WITH_HEADERS_SQL, collect_workspace_entries, global_storage_db_path, + load_bubble_map, load_bubbles_for_composer, load_code_block_diff_map, load_code_block_diffs_for_composer, @@ -83,13 +95,6 @@ def _loads_kv_value_logged(key: str, raw: object | None) -> Any | None: return None -def _bubble_entry_timestamp_ms(bubble: Bubble) -> int: - raw_ts = bubble.bubble_timestamp_ms() - if raw_ts is not None: - return to_epoch_ms(raw_ts) - return int(datetime.now().timestamp() * 1000) - - def _composer_tab_timestamp_ms(composer: Composer) -> int: if composer.last_updated_at is not None: return to_epoch_ms(composer.last_updated_at) @@ -112,7 +117,7 @@ def _kv_payload_log_meta(value: object | None) -> tuple[int, str | None]: def _assemble_tab_from_composer_data( composer_id: str, composer: Composer, - bubble_map: Mapping[str, Bubble | dict[str, Any]], + bubble_map: Mapping[str, Bubble], contexts: list[dict[str, Any]], code_block_diffs: list[dict[str, Any]], workspace_display_name: str, @@ -124,7 +129,7 @@ def _assemble_tab_from_composer_data( Args: composer_id: Composer UUID. composer: Validated composer model (typed field access on ``.raw``). - bubble_map: ``{bubble_id: Bubble | bubble_dict}`` — global or scoped. + bubble_map: ``{bubble_id: Bubble}`` — global or scoped. contexts: ``messageRequestContext`` entries for *this* composer (list of dicts, each with an injected ``contextId`` key and a ``bubbleId`` field from the JSON value). @@ -139,32 +144,19 @@ def _assemble_tab_from_composer_data( """ headers = composer.full_conversation_headers_only - bubbles: list[dict[str, Any]] = [] + bubbles: list[DisplayBubble] = [] for header in headers: if not isinstance(header, dict): continue bubble_id = conversation_header_bubble_id(header, composer_id=composer_id) if not bubble_id: continue - bubble_entry = bubble_map.get(bubble_id) - if bubble_entry is None: + bubble = bubble_map.get(bubble_id) + if bubble is None: continue - if isinstance(bubble_entry, Bubble): - bubble = bubble_entry - else: - try: - bubble = Bubble.from_dict(bubble_entry, bubble_id=bubble_id) - except SchemaError as e: - _logger.warning( - "Failed to parse Bubble from bubbleId:%s: %s", - bubble_id, - e, - ) - parse_warnings.record_bubble_skipped() - continue is_user = header.get("type") == 1 - msg_type = "user" if is_user else "ai" + msg_type: BubbleRole = "user" if is_user else "ai" text = extract_text_from_bubble(bubble) context_text = "" @@ -208,42 +200,13 @@ def _assemble_tab_from_composer_data( context_text += f"\n- {comp.get('name') or comp.get('composerId') or 'Conversation'}" full_text = text + context_text - token_count = bubble.token_count - - tool_calls = None - tfd = bubble.tool_former_data - if isinstance(tfd, dict): - tool_call = parse_tool_call(tfd) - if isinstance(tool_call, dict): - tool_calls = [tool_call] - - thinking = None - thinking_duration_ms = None - thinking_raw = bubble.thinking - if thinking_raw: - thinking = ( - thinking_raw - if isinstance(thinking_raw, str) - else ( - thinking_raw.get("text") - if isinstance(thinking_raw, dict) - else None - ) - ) - thinking_duration_ms = bubble.thinking_duration_ms - + metadata = build_storage_bubble_metadata(bubble, msg_type) + tool_calls = (metadata or {}).get("toolCalls") + thinking = (metadata or {}).get("thinking") has_content = full_text.strip() or tool_calls or thinking if not has_content: continue - ctx_window = bubble.context_window_status_at_creation - ctx_pct = None - if isinstance(ctx_window, dict): - if ctx_window.get("percentageRemainingFloat") is not None: - ctx_pct = ctx_window.get("percentageRemainingFloat") - elif ctx_window.get("percentageRemaining") is not None: - ctx_pct = ctx_window.get("percentageRemaining") - display_text = full_text.strip() if not display_text and tool_calls: tc = tool_calls[0] @@ -254,55 +217,13 @@ def _assemble_tab_from_composer_data( if not display_text and thinking: display_text = thinking - bubble_meta = None - model_info = bubble.model_info - model_name = model_info.get("modelName") - if model_name == "default": - model_name = None - - if msg_type == "ai": - tc_dict = token_count or {} - tool_results = bubble.tool_results - in_tok = tc_dict.get("inputTokens") or 0 - out_tok = tc_dict.get("outputTokens") or 0 - cached_tok = tc_dict.get("cachedTokens") or 0 - bubble_meta = { - "modelName": model_name, - "inputTokens": in_tok if in_tok > 0 else None, - "outputTokens": out_tok if out_tok > 0 else None, - "cachedTokens": cached_tok if cached_tok > 0 else None, - "toolResultsCount": (len(tool_calls) if tool_calls else None) or (len(tool_results) if tool_results else None), - "toolResults": tool_results if tool_results else None, - "toolCalls": tool_calls, - "thinking": thinking, - "thinkingDurationMs": thinking_duration_ms, - "contextWindowPercent": ctx_pct, - } - elif msg_type == "user": - bubble_meta = { - "modelName": model_name, - "contextWindowPercent": ctx_pct, - } - if ctx_window: - tokens_used = ctx_window.get("tokensUsed", 0) - token_limit = ctx_window.get("tokenLimit", 0) - if tokens_used > 0: - bubble_meta["contextTokensUsed"] = tokens_used - if token_limit > 0: - bubble_meta["contextTokenLimit"] = token_limit - - if bubble_meta: - bubble_meta = {k: v for k, v in bubble_meta.items() if v is not None} - if not bubble_meta: - bubble_meta = None - - b_entry = { + b_entry: DisplayBubble = { "type": msg_type, "text": display_text, - "timestamp": _bubble_entry_timestamp_ms(bubble), + "timestamp": bubble_display_timestamp_ms(bubble), } - if bubble_meta: - b_entry["metadata"] = bubble_meta + if metadata: + b_entry["metadata"] = cast(BubbleMetadata, metadata) bubbles.append(b_entry) if not bubbles: @@ -345,7 +266,7 @@ def _assemble_tab_from_composer_data( total_response_ms = 0 total_cost = 0.0 total_tool_calls = 0 - total_thinking_ms = 0 + total_thinking_ms = 0.0 models_set: set[str] = set() for b in bubbles: m = b.get("metadata") or {} @@ -766,7 +687,9 @@ def assemble_single_tab( return {"error": "Conversation not found"}, 404 # Scoped loads — only rows for this composer_id. - bubble_map = load_bubbles_for_composer(global_db, composer_id) + bubble_map = load_bubbles_for_composer( + global_db, composer_id, parse_warnings=parse_warnings + ) contexts = load_message_request_context_for_composer(global_db, composer_id) code_block_diffs = load_code_block_diffs_for_composer(global_db, composer_id) @@ -818,7 +741,6 @@ def assemble_workspace_tabs( composer_id_to_ws = ctx.composer_id_to_workspace_id matching_ws_ids = _build_matching_ws_ids(workspace_id, workspace_path, workspace_entries) - bubble_map: dict[str, Bubble] = {} code_block_diff_map: dict[str, list[dict[str, Any]]] = {} message_request_context_map: dict[str, list[dict[str, Any]]] = {} @@ -828,42 +750,7 @@ def assemble_workspace_tabs( workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) - # Load bubbles - for row in safe_fetchall( - global_db, - "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'" - " AND value IS NOT NULL", - ): - parts = row["key"].split(":") - if len(parts) >= 3: - bid = parts[2] - try: - parsed = json.loads(row["value"]) - - except (json.JSONDecodeError, TypeError, ValueError) as e: - payload_len, payload_fp = _kv_payload_log_meta(row["value"]) - _logger.warning( - "Failed to decode Bubble from %s: %s (payload_len=%d, payload_sha256=%s)", - row["key"], - e, - payload_len, - payload_fp, - ) - parse_warnings.record_bubble_skipped() - continue - try: - bubble_obj = Bubble.from_dict(parsed, bubble_id=bid) - bubble_map[bid] = bubble_obj - except SchemaError as e: - # Drift logged so the operator can chase disappearing - # bubbles instead of guessing. Bad row still skipped so the - # tabs endpoint can't 500 on one malformed bubble. - _logger.warning( - "Failed to parse Bubble from bubbleId:%s: %s", - bid, - e, - ) - parse_warnings.record_bubble_skipped() + bubble_map = load_bubble_map(global_db, parse_warnings=parse_warnings) # Load codeBlockDiffs code_block_diff_map = load_code_block_diff_map(global_db) diff --git a/tests/test_models_wired_at_read_sites.py b/tests/test_models_wired_at_read_sites.py index d1428e6..581ccd7 100644 --- a/tests/test_models_wired_at_read_sites.py +++ b/tests/test_models_wired_at_read_sites.py @@ -98,11 +98,14 @@ def tearDown(self): def test_search_endpoint_calls_bubble_from_dict(self): from app import create_app - import services.search as search_mod + import services.workspace_db as workspace_db_mod + from models import Bubble app = create_app() app.config["TESTING"] = True app.config["EXCLUSION_RULES"] = [] - with patch.object(search_mod.Bubble, "from_dict", wraps=search_mod.Bubble.from_dict) as spy: + with patch.object( + workspace_db_mod.Bubble, "from_dict", wraps=Bubble.from_dict + ) as spy: client = app.test_client() response = client.get("/api/search?q=sentinel-wired") self.assertEqual(response.status_code, 200) @@ -149,7 +152,7 @@ def test_bubble_schema_drift_is_logged_not_swallowed_silently(self): app = create_app() app.config["TESTING"] = True app.config["EXCLUSION_RULES"] = [] - with self.assertLogs("services.search", level="WARNING") as logs: + with self.assertLogs("services.workspace_db", level="WARNING") as logs: client = app.test_client() response = client.get("/api/search?q=sentinel-wired") self.assertEqual(response.status_code, 200) diff --git a/tests/test_parse_failure_logging.py b/tests/test_parse_failure_logging.py index db2838e..953de9c 100644 --- a/tests/test_parse_failure_logging.py +++ b/tests/test_parse_failure_logging.py @@ -126,7 +126,7 @@ def test_workspace_tabs_logs_bubble_json_decode_failure(self) -> None: ("bubbleId:cmp-ok:b-json", "{not valid json"), ) conn.commit() - with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with self.assertLogs("services.workspace_db", level="WARNING") as cm: with app.test_request_context("/api/workspaces/global/tabs"): _payload, _status = assemble_workspace_tabs("global", ws_root, rules=[]) @@ -176,7 +176,7 @@ def test_workspace_tabs_logs_bubble_schema_drift(self) -> None: with tempfile.TemporaryDirectory() as tmp: ws_root = _seed_tabs_with_drifted_bubble(tmp) - with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with self.assertLogs("services.workspace_db", level="WARNING") as cm: with app.test_request_context("/api/workspaces/global/tabs"): payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) diff --git a/tests/test_raw_accessors.py b/tests/test_raw_accessors.py index 1ead2e7..1be2c57 100644 --- a/tests/test_raw_accessors.py +++ b/tests/test_raw_accessors.py @@ -13,7 +13,7 @@ sys.path.insert(0, REPO_ROOT) from models.conversation import Bubble, Composer -from services.workspace_tabs import _bubble_entry_timestamp_ms +from utils.display_bubble import bubble_display_timestamp_ms from models.raw_access import ( composer_newly_created_files, conversation_header_bubble_id, @@ -110,10 +110,10 @@ def test_bubble_entry_timestamp_ms_preserves_epoch_zero(self) -> None: bubble = Bubble.from_dict({"createdAt": 0}, bubble_id="b-zero") sentinel_now_ms = 9_999_000_000_000 with patch( - "services.workspace_tabs.datetime", + "utils.display_bubble.datetime", ) as mock_datetime: mock_datetime.now.return_value.timestamp.return_value = sentinel_now_ms / 1000 - ts = _bubble_entry_timestamp_ms(bubble) + ts = bubble_display_timestamp_ms(bubble) self.assertEqual(ts, 0) self.assertNotEqual(ts, sentinel_now_ms) @@ -121,10 +121,10 @@ def test_bubble_entry_timestamp_ms_falls_back_when_no_timestamp(self) -> None: bubble = Bubble.from_dict({"text": "hi"}, bubble_id="b-none") sentinel_now_ms = 1_700_000_000_000 with patch( - "services.workspace_tabs.datetime", + "utils.display_bubble.datetime", ) as mock_datetime: mock_datetime.now.return_value.timestamp.return_value = sentinel_now_ms / 1000 - ts = _bubble_entry_timestamp_ms(bubble) + ts = bubble_display_timestamp_ms(bubble) self.assertEqual(ts, sentinel_now_ms) def test_dict_bridge_newly_created_files_matches_composer_property(self) -> None: diff --git a/tests/test_workspace_assignment_fallback.py b/tests/test_workspace_assignment_fallback.py index 0c637b1..538776b 100644 --- a/tests/test_workspace_assignment_fallback.py +++ b/tests/test_workspace_assignment_fallback.py @@ -21,10 +21,23 @@ from hypothesis import given, settings from hypothesis import strategies as st +from models import Bubble, SchemaError from services.workspace_resolver import determine_project_for_conversation from utils.path_helpers import normalize_file_path +def _bubble_map_from_raw(raw: dict) -> dict[str, Bubble]: + out: dict[str, Bubble] = {} + for bid, val in raw.items(): + if not isinstance(val, dict): + continue + try: + out[bid] = Bubble.from_dict(val, bubble_id=bid) + except SchemaError: + continue + return out + + def _write_workspace_json(parent: str, name: str, folder: str) -> dict: ws_dir = os.path.join(parent, name) os.makedirs(ws_dir, exist_ok=True) @@ -241,7 +254,11 @@ def test_bubble_relevant_files_resolves_workspace(self) -> None: assigned = _resolve( self._bubble_composer("b-rel"), - bubble_map={"b-rel": {"relevantFiles": [file_path]}}, + bubble_map={ + "b-rel": Bubble.from_dict( + {"relevantFiles": [file_path]}, bubble_id="b-rel" + ), + }, workspace_entries=entries, ) self.assertEqual(assigned, "ws-bubble") @@ -257,7 +274,10 @@ def test_bubble_attached_file_chunks_resolves_workspace(self) -> None: assigned = _resolve( self._bubble_composer("b-att"), bubble_map={ - "b-att": {"attachedFileCodeChunksUris": [{"path": file_path}]} + "b-att": Bubble.from_dict( + {"attachedFileCodeChunksUris": [{"path": file_path}]}, + bubble_id="b-att", + ), }, workspace_entries=entries, ) @@ -274,11 +294,14 @@ def test_bubble_context_file_selections_resolves_workspace(self) -> None: assigned = _resolve( self._bubble_composer("b-ctx"), bubble_map={ - "b-ctx": { - "context": { - "fileSelections": [{"uri": {"path": file_path}}] - } - } + "b-ctx": Bubble.from_dict( + { + "context": { + "fileSelections": [{"uri": {"path": file_path}}] + } + }, + bubble_id="b-ctx", + ), }, workspace_entries=entries, ) @@ -349,7 +372,11 @@ def test_path_segment_matching_from_bubble_relevant_files(self) -> None: assigned = _resolve( {"fullConversationHeadersOnly": [{"bubbleId": "b-seg"}]}, - bubble_map={"b-seg": {"relevantFiles": [orphan]}}, + bubble_map={ + "b-seg": Bubble.from_dict( + {"relevantFiles": [orphan]}, bubble_id="b-seg" + ), + }, workspace_entries=entries, ) self.assertEqual(assigned, "ws-bubble-seg") @@ -434,7 +461,7 @@ def test_never_raises_on_arbitrary_inputs( project_name_to_workspace_id={}, workspace_path_to_id={}, workspace_entries=[], - bubble_map=bubble_map, + bubble_map=_bubble_map_from_raw(bubble_map), composer_id_to_workspace_id=None, invalid_workspace_ids=None, ) diff --git a/tests/test_workspace_context.py b/tests/test_workspace_context.py index f7225e5..07d20f5 100644 --- a/tests/test_workspace_context.py +++ b/tests/test_workspace_context.py @@ -170,7 +170,9 @@ def test_enrich_populates_bubble_map(): ) finally: conn.close() - assert enriched.bubble_map.get("bid1") is not None + loaded = enriched.bubble_map.get("bid1") + assert loaded is not None + assert loaded.text == "hi" assert ctx.bubble_map == {} @@ -222,7 +224,9 @@ def test_enrich_populates_both_global_maps(): finally: conn.close() assert enriched.project_layouts_map["composer-1"] == ["/tmp/myproject"] - assert enriched.bubble_map.get("bid1") is not None + loaded = enriched.bubble_map.get("bid1") + assert loaded is not None + assert loaded.text == "hi" assert ctx.project_layouts_map == {} assert ctx.bubble_map == {} diff --git a/tests/test_workspace_tabs_malformed_nested.py b/tests/test_workspace_tabs_malformed_nested.py index 914f353..ef0ff3a 100644 --- a/tests/test_workspace_tabs_malformed_nested.py +++ b/tests/test_workspace_tabs_malformed_nested.py @@ -204,7 +204,7 @@ def test_non_dict_parse_result_does_not_drop_composer(self) -> None: # Force parse_tool_call to return None — the previous code # would have stored ``tool_calls = [None]`` and crashed in the # display-text fallback with ``NoneType.get``. - with patch("services.workspace_tabs.parse_tool_call", return_value=None): + with patch("utils.display_bubble.parse_tool_call", return_value=None): with app.test_request_context("/api/workspaces/global/tabs"): payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py index b0d6d0c..c3bf8bf 100644 --- a/utils/cli_chat_reader.py +++ b/utils/cli_chat_reader.py @@ -44,6 +44,8 @@ from datetime import datetime, timezone from typing import Any, Generator, cast +from models import DisplayBubble + # --------------------------------------------------------------------------- # Low-level store.db helpers @@ -233,7 +235,7 @@ def strip_user_info(text: str) -> str: return _USER_INFO_RE.sub("", text).strip() -def messages_to_bubbles(messages: list[dict[str, Any]], created_at_ms: int) -> list[dict[str, Any]]: +def messages_to_bubbles(messages: list[dict[str, Any]], created_at_ms: int) -> list[DisplayBubble]: """Convert CLI message dicts to the bubble format used by the browser UI. Each bubble has: @@ -273,7 +275,7 @@ def messages_to_bubbles(messages: list[dict[str, Any]], created_at_ms: int) -> l # only if not already set, to avoid clobbering a keyed entry. tool_outputs.setdefault("", content) - bubbles: list[dict[str, Any]] = [] + bubbles: list[DisplayBubble] = [] seq = 0 for msg in messages: @@ -303,7 +305,7 @@ def messages_to_bubbles(messages: list[dict[str, Any]], created_at_ms: int) -> l if not text.strip() and not tool_calls: continue - bubble: dict[str, Any] = {"type": "ai", "text": text, "timestamp": ts} + bubble: DisplayBubble = {"type": "ai", "text": text, "timestamp": ts} if tool_calls: # Convert to the format parse_tool_call returns formatted_calls = [] diff --git a/utils/cursor_md_exporter.py b/utils/cursor_md_exporter.py index 25858f3..12872f6 100644 --- a/utils/cursor_md_exporter.py +++ b/utils/cursor_md_exporter.py @@ -21,10 +21,16 @@ from pathlib import Path from typing import Any +from models import Bubble, BubbleRole, DisplayBubble from utils.cli_chat_reader import traverse_blobs, messages_to_bubbles +from utils.display_bubble import ( + annotate_response_times, + build_display_bubble_from_storage, + display_bubble_metadata, + display_bubble_tool_calls, +) from utils.path_helpers import to_epoch_ms -from utils.text_extract import extract_text_from_bubble, slug -from utils.tool_parser import parse_tool_call +from utils.text_extract import slug # ── CLI session exporter ───────────────────────────────────────────────────── @@ -34,7 +40,7 @@ def cursor_cli_session_to_markdown( db_path: str | Path, session_meta: dict[str, Any] | None = None, workspace_info: dict[str, Any] | None = None, - bubbles: list[dict[str, Any]] | None = None, + bubbles: list[DisplayBubble] | None = None, title_override: str | None = None, ) -> str: """Generate a complete Markdown document from a Cursor CLI store.db session. @@ -194,7 +200,7 @@ def cursor_cli_session_to_markdown( def cursor_ide_chat_to_markdown( composer_data: dict[str, Any], composer_id: str, - bubble_map: dict[str, Any], + bubble_map: dict[str, Bubble], code_block_diff_map: dict[str, Any] | None = None, workspace_info: dict[str, Any] | None = None, ) -> str: @@ -208,8 +214,8 @@ def cursor_ide_chat_to_markdown( The composer UUID — used as ``log_id`` in frontmatter and as the key into ``code_block_diff_map``. bubble_map: - Global ``{bubble_id: bubble_dict}`` map loaded from - ``cursorDiskKV`` (see ``services.workspace_db.load_bubble_map``). + Global ``{bubble_id: Bubble}`` map from + :func:`services.workspace_db.load_bubble_map`. code_block_diff_map: Optional ``{composer_id: [diff_dict]}`` map. When ``None`` no code edit bubbles are appended. @@ -236,59 +242,15 @@ def cursor_ide_chat_to_markdown( headers = cd.get("fullConversationHeadersOnly") or [] # ── Build bubble list ───────────────────────────────────────────────────── - bubbles: list[dict[str, Any]] = [] + bubbles: list[DisplayBubble] = [] for h in headers: - b = bubble_map.get(h.get("bubbleId")) - if not b: + storage = bubble_map.get(h.get("bubbleId")) + if storage is None: continue - text = extract_text_from_bubble(b) - has_tool = isinstance(b.get("toolFormerData"), dict) - has_thinking = bool(b.get("thinking")) - if not text.strip() and not has_tool and not has_thinking: - continue - if not text.strip() and has_tool: - text = f"**Tool: {b['toolFormerData'].get('name', 'unknown')}**" - - btype = "user" if h.get("type") == 1 else "ai" - - thinking = None - thinking_duration_ms = None - if b.get("thinking"): - thinking = ( - b["thinking"] if isinstance(b["thinking"], str) - else (b["thinking"].get("text") if isinstance(b["thinking"], dict) else None) - ) - thinking_duration_ms = b.get("thinkingDurationMs") - - tool_info = parse_tool_call(b["toolFormerData"]) if has_tool else None - - model_info = (b.get("modelInfo") or {}).get("modelName") - if model_info == "default": - model_info = None - - ctx_window = b.get("contextWindowStatusAtCreation") or {} - ctx_tokens_used = ctx_window.get("tokensUsed", 0) - ctx_token_limit = ctx_window.get("tokenLimit", 0) - ctx_pct_remaining = ( - ctx_window.get("percentageRemainingFloat") or ctx_window.get("percentageRemaining") - ) - - bubbles.append({ - "type": btype, - "text": text, - "timestamp": ( - to_epoch_ms(b.get("createdAt")) - or to_epoch_ms(b.get("timestamp")) - or int(datetime.now().timestamp() * 1000) - ), - "tool": tool_info, - "thinking": thinking, - "thinkingDurationMs": thinking_duration_ms, - "model": model_info, - "contextTokensUsed": ctx_tokens_used if ctx_tokens_used > 0 else None, - "contextTokenLimit": ctx_token_limit if ctx_token_limit > 0 else None, - "contextPctRemaining": round(ctx_pct_remaining, 1) if ctx_pct_remaining else None, - }) + role: BubbleRole = "user" if h.get("type") == 1 else "ai" + entry = build_display_bubble_from_storage(storage, role) + if entry is not None: + bubbles.append(entry) # Append code-block diffs as synthetic AI bubbles. diff_ts = to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or int(datetime.now().timestamp() * 1000) @@ -300,30 +262,29 @@ def cursor_ide_chat_to_markdown( }) bubbles.sort(key=lambda bub: bub.get("timestamp") or 0) - - # ── Compute response times ──────────────────────────────────────────────── - last_user_ts = None - for bub in bubbles: - if bub["type"] == "user": - last_user_ts = bub.get("timestamp") - elif bub["type"] == "ai" and last_user_ts: - bts = bub.get("timestamp") - if bts and bts > last_user_ts: - bub["responseTimeMs"] = bts - last_user_ts + annotate_response_times(bubbles) # ── Session-level aggregates ────────────────────────────────────────────── - total_response_ms = sum(bub.get("responseTimeMs", 0) for bub in bubbles) - total_thinking_ms = sum(bub.get("thinkingDurationMs", 0) or 0 for bub in bubbles) - total_tool_calls = sum(1 for bub in bubbles if bub.get("tool")) - max_ctx_used = max((bub.get("contextTokensUsed") or 0) for bub in bubbles) if bubbles else 0 - ctx_limit = max((bub.get("contextTokenLimit") or 0) for bub in bubbles) if bubbles else 0 + total_response_ms = sum( + display_bubble_metadata(bub).get("responseTimeMs") or 0 for bub in bubbles + ) + total_thinking_ms = sum( + display_bubble_metadata(bub).get("thinkingDurationMs") or 0 for bub in bubbles + ) + total_tool_calls = sum(len(display_bubble_tool_calls(bub)) for bub in bubbles) + max_ctx_used = max( + (display_bubble_metadata(bub).get("contextTokensUsed") or 0) for bub in bubbles + ) if bubbles else 0 + ctx_limit = max( + (display_bubble_metadata(bub).get("contextTokenLimit") or 0) for bub in bubbles + ) if bubbles else 0 lines_added = cd.get("totalLinesAdded", 0) lines_removed = cd.get("totalLinesRemoved", 0) tool_breakdown: dict[str, int] = {} for bub in bubbles: - if bub.get("tool"): - tn = bub["tool"].get("name", "unknown") + for tool in display_bubble_tool_calls(bub): + tn = tool.get("name", "unknown") tool_breakdown[tn] = tool_breakdown.get(tn, 0) + 1 ts_vals = [bub["timestamp"] for bub in bubbles if bub.get("timestamp")] @@ -339,29 +300,27 @@ def cursor_ide_chat_to_markdown( "searches": 0, "web": 0, } for bub in bubbles: - if not bub.get("tool"): - continue - t = bub["tool"] - tn = t.get("name", "") - status = t.get("status") or "" - raw_input = str(t.get("input") or "").strip() - first_line = raw_input.split("\n")[0] if raw_input else "" - if tn == "read_file_v2" and first_line: - files_read_list.append(first_line) - tool_result_stats["file_reads"] += 1 - elif tn == "edit_file_v2" and first_line: - files_written_list.append(first_line) - tool_result_stats["file_edits"] += 1 - elif tn == "run_terminal_command_v2" and raw_input: - commands_run_list.append(raw_input) - if status in ("error", "failed"): - tool_result_stats["terminal_error"] += 1 - else: - tool_result_stats["terminal_success"] += 1 - elif tn in ("ripgrep_raw_search", "glob_file_search", "semantic_search_full"): - tool_result_stats["searches"] += 1 - elif tn in ("web_search", "web_fetch"): - tool_result_stats["web"] += 1 + for t in display_bubble_tool_calls(bub): + tn = t.get("name", "") + status = t.get("status") or "" + raw_input = str(t.get("input") or "").strip() + first_line = raw_input.split("\n")[0] if raw_input else "" + if tn == "read_file_v2" and first_line: + files_read_list.append(first_line) + tool_result_stats["file_reads"] += 1 + elif tn == "edit_file_v2" and first_line: + files_written_list.append(first_line) + tool_result_stats["file_edits"] += 1 + elif tn == "run_terminal_command_v2" and raw_input: + commands_run_list.append(raw_input) + if status in ("error", "failed"): + tool_result_stats["terminal_error"] += 1 + else: + tool_result_stats["terminal_success"] += 1 + elif tn in ("ripgrep_raw_search", "glob_file_search", "semantic_search_full"): + tool_result_stats["searches"] += 1 + elif tn in ("web_search", "web_fetch"): + tool_result_stats["web"] += 1 # ── Frontmatter ─────────────────────────────────────────────────────────── fm_lines = ["---"] @@ -383,7 +342,9 @@ def cursor_ide_chat_to_markdown( fm_lines.append("tool_call_breakdown:") for tn, cnt in sorted(tool_breakdown.items(), key=lambda x: -x[1]): fm_lines.append(f" {json.dumps(tn, ensure_ascii=False)}: {cnt}") - total_think = sum(1 for bub in bubbles if bub.get("thinking")) + total_think = sum( + 1 for bub in bubbles if display_bubble_metadata(bub).get("thinking") + ) if total_think: fm_lines.append(f"thinking_count: {total_think}") if wall_clock_sec is not None: @@ -458,36 +419,39 @@ def cursor_ide_chat_to_markdown( # ── Body ────────────────────────────────────────────────────────────────── body = "" for bub in bubbles: - role = "User" if bub["type"] == "user" else "Assistant" - body += f"### {role}\n\n" + role_label = "User" if bub["type"] == "user" else "Assistant" + body += f"### {role_label}\n\n" + meta = display_bubble_metadata(bub) bub_meta: list[str] = [] - if bub.get("model"): - bub_meta.append(f"Model: {bub['model']}") - if bub.get("responseTimeMs"): - bub_meta.append(f"Response: {bub['responseTimeMs'] / 1000:.1f}s") - if bub.get("thinkingDurationMs"): - bub_meta.append(f"Thinking: {bub['thinkingDurationMs'] / 1000:.1f}s") - if bub.get("contextTokensUsed") and bub.get("contextTokenLimit"): - pct = bub["contextTokensUsed"] / bub["contextTokenLimit"] * 100 + if meta.get("modelName"): + bub_meta.append(f"Model: {meta['modelName']}") + response_ms = meta.get("responseTimeMs") + if response_ms: + bub_meta.append(f"Response: {response_ms / 1000:.1f}s") + thinking_ms = meta.get("thinkingDurationMs") + if thinking_ms: + bub_meta.append(f"Thinking: {thinking_ms / 1000:.1f}s") + ctx_used = meta.get("contextTokensUsed") + ctx_limit_bub = meta.get("contextTokenLimit") + if ctx_used and ctx_limit_bub: + pct = ctx_used / ctx_limit_bub * 100 bub_meta.append( - f"Context: {bub['contextTokensUsed']:,} / {bub['contextTokenLimit']:,}" + f"Context: {ctx_used:,} / {ctx_limit_bub:,}" f" tokens ({pct:.0f}% used)" ) - elif bub.get("contextPctRemaining") is not None: - bub_meta.append(f"Context: {bub['contextPctRemaining']}% remaining") + elif meta.get("contextWindowPercent") is not None: + remaining = meta["contextWindowPercent"] + bub_meta.append(f"Context: {remaining}% remaining") if bub_meta: body += f"_{' | '.join(bub_meta)}_\n\n" if bub.get("timestamp"): body += f"_{datetime.fromtimestamp(bub['timestamp'] / 1000).isoformat()}_\n\n" - if bub.get("thinking"): - dur_str = ( - f" ({bub['thinkingDurationMs'] / 1000:.1f}s)" - if bub.get("thinkingDurationMs") else "" - ) - body += f"
Thinking{dur_str}\n\n{bub['thinking']}\n\n
\n\n" + thinking_text = meta.get("thinking") + if thinking_text: + dur_str = f" ({thinking_ms / 1000:.1f}s)" if thinking_ms else "" + body += f"
Thinking{dur_str}\n\n{thinking_text}\n\n
\n\n" body += bub["text"] + "\n\n" - if bub.get("tool"): - t = bub["tool"] + for t in display_bubble_tool_calls(bub): tool_summary = t.get("summary") or t.get("name") or "unknown" tool_status = t.get("status") or "" status_str = f" ({tool_status})" if tool_status else "" diff --git a/utils/display_bubble.py b/utils/display_bubble.py new file mode 100644 index 0000000..6efb34e --- /dev/null +++ b/utils/display_bubble.py @@ -0,0 +1,153 @@ +"""Build and read :class:`models.DisplayBubble` from storage :class:`models.Bubble`.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, cast + +from models import Bubble, BubbleMetadata, BubbleRole, DisplayBubble +from utils.path_helpers import to_epoch_ms +from utils.text_extract import extract_text_from_bubble +from utils.tool_parser import parse_tool_call + + +def bubble_display_timestamp_ms(bubble: Bubble) -> int: + """Epoch-ms timestamp for a storage bubble; falls back to now when absent.""" + raw_ts = bubble.bubble_timestamp_ms() + if raw_ts is not None: + return to_epoch_ms(raw_ts) + return int(datetime.now().timestamp() * 1000) + + +def extract_thinking_text( + bubble: Bubble, +) -> tuple[str | None, int | float | None]: + """Return ``(thinking_text, thinking_duration_ms)`` from a storage bubble.""" + thinking_raw = bubble.thinking + if not thinking_raw: + return None, bubble.thinking_duration_ms + if isinstance(thinking_raw, str): + return thinking_raw, bubble.thinking_duration_ms + if isinstance(thinking_raw, dict): + return thinking_raw.get("text"), bubble.thinking_duration_ms + return None, bubble.thinking_duration_ms + + +def build_storage_bubble_metadata( + bubble: Bubble, + role: BubbleRole, +) -> dict[str, Any] | None: + """Metadata dict for tabs/export — tool calls, tokens, thinking, context.""" + model_info = bubble.model_info + model_name = model_info.get("modelName") + if model_name == "default": + model_name = None + + ctx_window = bubble.context_window_status_at_creation + ctx_pct: float | None = None + if ctx_window: + if ctx_window.get("percentageRemainingFloat") is not None: + ctx_pct = ctx_window.get("percentageRemainingFloat") + elif ctx_window.get("percentageRemaining") is not None: + ctx_pct = ctx_window.get("percentageRemaining") + + meta: dict[str, Any] = {} + if model_name: + meta["modelName"] = model_name + if ctx_pct is not None: + meta["contextWindowPercent"] = ctx_pct + + if role == "ai": + token_count = bubble.token_count or {} + tool_results = bubble.tool_results + tfd = bubble.tool_former_data + if isinstance(tfd, dict): + tool_call = parse_tool_call(tfd) + if isinstance(tool_call, dict): + meta["toolCalls"] = [tool_call] + + thinking, thinking_duration_ms = extract_thinking_text(bubble) + if thinking: + meta["thinking"] = thinking + if thinking_duration_ms is not None: + meta["thinkingDurationMs"] = thinking_duration_ms + + in_tok = token_count.get("inputTokens") or 0 + out_tok = token_count.get("outputTokens") or 0 + cached_tok = token_count.get("cachedTokens") or 0 + if in_tok > 0: + meta["inputTokens"] = in_tok + if out_tok > 0: + meta["outputTokens"] = out_tok + if cached_tok > 0: + meta["cachedTokens"] = cached_tok + tool_calls = meta.get("toolCalls") + tr_count = (len(tool_calls) if tool_calls else 0) or ( + len(tool_results) if tool_results else 0 + ) + if tr_count > 0: + meta["toolResultsCount"] = tr_count + if tool_results: + meta["toolResults"] = tool_results + elif ctx_window: + tokens_used = ctx_window.get("tokensUsed", 0) + token_limit = ctx_window.get("tokenLimit", 0) + if tokens_used > 0: + meta["contextTokensUsed"] = tokens_used + if token_limit > 0: + meta["contextTokenLimit"] = token_limit + + return meta or None + + +def build_display_bubble_from_storage( + bubble: Bubble, + role: BubbleRole, + *, + display_text: str | None = None, +) -> DisplayBubble | None: + """Render a storage bubble as a :class:`DisplayBubble` for UI or export.""" + text = display_text if display_text is not None else extract_text_from_bubble(bubble) + tfd = bubble.tool_former_data + thinking, _ = extract_thinking_text(bubble) + has_tool = tfd is not None + has_thinking = bool(thinking) + if not text.strip() and not has_tool and not has_thinking: + return None + + if not text.strip() and has_tool and tfd is not None: + text = f"**Tool: {tfd.get('name', 'unknown')}**" + + entry: DisplayBubble = { + "type": role, + "text": text.strip() or text, + "timestamp": bubble_display_timestamp_ms(bubble), + } + metadata = build_storage_bubble_metadata(bubble, role) + if metadata: + entry["metadata"] = cast(BubbleMetadata, metadata) + return entry + + +def display_bubble_metadata(bubble: DisplayBubble) -> BubbleMetadata: + return bubble.get("metadata") or {} + + +def display_bubble_tool_calls(bubble: DisplayBubble) -> list[dict[str, Any]]: + return list(display_bubble_metadata(bubble).get("toolCalls") or []) + + +def annotate_response_times(bubbles: list[DisplayBubble]) -> None: + """Set ``metadata.responseTimeMs`` on AI bubbles following a user message.""" + last_user_ts: int | None = None + for bub in bubbles: + if bub["type"] == "user": + last_user_ts = bub.get("timestamp") + continue + if bub["type"] != "ai" or last_user_ts is None: + continue + bts = bub.get("timestamp") + if bts and bts > last_user_ts: + meta = dict(display_bubble_metadata(bub)) + meta["responseTimeMs"] = bts - last_user_ts + bub["metadata"] = cast(BubbleMetadata, meta)