Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ In `utils/tool_dispatch.py`, tool results are classified through `_parse_tool_re

When adding a new tool renderer:

1. Add a `(predicate, builder)` pair to `_TOOL_RESULT_DISPATCH` in `utils/tool_dispatch.py`, preserving existing predicate order unless you also update fixtures and ordering tests (`tests/test_jsonl_parser.py`, `tests/test_real_session_fixtures.py`). Order is **not** “specific before generic” in general — the first match wins. `_tool_result_pred_task_message` is the intentional broad-before-narrow exception (`task_id` or `message` before retrieval/completed/async).
1. Add a `(predicate, builder)` pair to `_TOOL_RESULT_DISPATCH` in `utils/tool_dispatch.py`, preserving existing predicate order unless you also update fixtures and ordering tests (`tests/test_jsonl_parser.py`, `tests/test_real_session_fixtures.py`). Order is **not** “specific before generic” in general — the first match wins. `is_task_message_tool_result` is the intentional broad-before-narrow exception (`task_id` or `message` before retrieval/completed/async).
2. Add or extend a JSONL fixture under `tests/fixtures/` (especially for overlaps with existing predicates).
3. Run `pytest tests/test_jsonl_parser.py tests/test_real_session_fixtures.py -v`.

Expand Down
6 changes: 6 additions & 0 deletions models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
from models.errors import ErrorResponse
from models.export import ExportStateDict
from models.project import ProjectDict, ProjectSessionRowDict, SessionListItemDict
from models.record_data import RecordDataUnion
from models.search import SearchHitDict
from models.session import (
MessageDict,
QuickSessionInfoDict,
SessionDict,
SessionMetadataDict,
ToolUseDict,
)
from models.stats import FilesTouchedDict, SessionStatsDict
from models.tool_results import ToolResultUnion

__all__ = [
"ErrorResponse",
Expand All @@ -25,4 +28,7 @@
"SessionListItemDict",
"SessionMetadataDict",
"SessionStatsDict",
"RecordDataUnion",
"ToolResultUnion",
"ToolUseDict",
]
43 changes: 43 additions & 0 deletions models/record_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""TypedDict shapes for record-level ``data`` payloads on progress messages."""

from typing import Literal, TypedDict


class BashProgressDataDict(TypedDict, total=False):
type: Literal["bash_progress"]
output: str


class HookProgressDataDict(TypedDict, total=False):
type: Literal["hook_progress"]
output: str


class AgentProgressDataDict(TypedDict, total=False):
type: Literal["agent_progress"]
message: str


class SummaryDataDict(TypedDict, total=False):
"""Summary-style progress payloads (when present on progress entries)."""

type: Literal["summary"]
summary: str


class CompactBoundaryDataDict(TypedDict, total=False):
"""Compact-boundary metadata when carried on a data blob."""

type: Literal["compact_boundary"]
trigger: str
pre_tokens: int


RecordDataUnion = (
BashProgressDataDict
| HookProgressDataDict
| AgentProgressDataDict
| SummaryDataDict
| CompactBoundaryDataDict
| dict[str, object]
)
33 changes: 27 additions & 6 deletions models/session.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
"""Parsed session shapes from jsonl_parser."""

from typing import Any, NotRequired, TypedDict
from typing import Any, Literal, NotRequired, TypedDict

from models.record_data import RecordDataUnion
from models.tool_results import ToolNameLiteral, ToolResultUnion


class ToolUseDict(TypedDict, total=False):
id: str
# Literal | str is just str for mypy — documents known tool names, not exhaustiveness.
name: ToolNameLiteral | str
Comment thread
clean6378-max-it marked this conversation as resolved.
input: dict[str, object]


class MessageUsageDict(TypedDict, total=False):
input_tokens: int
output_tokens: int
cache_read: int
cache_creation: int
service_tier: str | None


SystemSubtypeLiteral = Literal["compact_boundary", "init"]


class MessageDict(TypedDict):
Expand All @@ -12,18 +33,18 @@ class MessageDict(TypedDict):
content: NotRequired[str]
images: NotRequired[list[Any] | None]
is_sidechain: NotRequired[bool]
tool_result: NotRequired[Any]
tool_result_parsed: NotRequired[dict[str, Any] | None]
tool_result: NotRequired[ToolResultUnion | None]
tool_result_parsed: NotRequired[dict[str, object] | None]
slug: NotRequired[str | None]
model: NotRequired[str]
stop_reason: NotRequired[str]
thinking: NotRequired[str | None]
tool_uses: NotRequired[list[dict[str, Any]] | None]
tool_uses: NotRequired[list[ToolUseDict] | None]
is_api_error: NotRequired[bool]
usage: NotRequired[dict[str, Any]]
usage: NotRequired[MessageUsageDict]
subtype: NotRequired[str]
level: NotRequired[str]
data: NotRequired[Any]
data: NotRequired[RecordDataUnion]
progress_type: NotRequired[str]
tool_use_id: NotRequired[str | None]
parent_tool_use_id: NotRequired[str | None]
Expand Down
233 changes: 233 additions & 0 deletions models/tool_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
"""TypedDict shapes for Claude Code toolUseResult blobs at the JSONL parse boundary.

Ground truth: tests/test_jsonl_parser.py, tests/test_real_session_fixtures.py,
and utils/tool_dispatch.py predicate order (first match wins).
"""

from typing import Literal, TypedDict, TypeGuard


class BashToolResultDict(TypedDict, total=False):
stdout: str
stderr: str
exitCode: int
interrupted: bool
is_error: bool
returnCodeInterpretation: str


class FileEditToolResultDict(TypedDict, total=False):
structuredPatch: str
filePath: str
newString: str
replaceAll: bool


class PlanToolResultDict(TypedDict, total=False):
plan: list[object]
filePath: str
content: str


class FileWriteToolResultDict(TypedDict, total=False):
filePath: str
content: str


class GlobToolResultDict(TypedDict, total=False):
filenames: list[str]
numFiles: int
truncated: bool
durationMs: int


class GrepToolResultDict(TypedDict, total=False):
mode: str
numFiles: int
numLines: int
content: str
durationMs: int


class ReadFileObjDict(TypedDict, total=False):
filePath: str
numLines: int
content: str


class ReadToolResultDict(TypedDict, total=False):
file: ReadFileObjDict
content: list[object]


class WebSearchToolResultDict(TypedDict, total=False):
query: str
results: list[object] | None
durationSeconds: float


class WebFetchToolResultDict(TypedDict, total=False):
url: str
code: int
durationMs: int


class TaskMessageToolResultDict(TypedDict, total=False):
task_id: str
task_type: str
message: str
agentId: str


class TaskRetrievalToolResultDict(TypedDict, total=False):
retrieval_status: str
task: dict[str, object]


class TaskCompletedToolResultDict(TypedDict, total=False):
agentId: str
totalDurationMs: int
status: str
totalTokens: int
totalToolUseCount: int


class TaskAsyncToolResultDict(TypedDict, total=False):
agentId: str
isAsync: bool
status: str
description: str


class TodoItemDict(TypedDict, total=False):
id: str
content: str


class TodoWriteToolResultDict(TypedDict, total=False):
newTodos: list[TodoItemDict]
oldTodos: list[TodoItemDict]


class UserInputToolResultDict(TypedDict, total=False):
questions: list[dict[str, object]]
answers: dict[str, object]


class ToolResultContentBlockDict(TypedDict, total=False):
type: str
source: dict[str, object]


class ToolResultWithContentDict(TypedDict, total=False):
"""Read-on-image and similar payloads that embed content blocks."""

content: list[ToolResultContentBlockDict]


# Dict passed into dispatch predicates (structural superset of all tool blobs).
ToolResultDict = dict[str, object]

ToolResultUnion = (
str
| BashToolResultDict
| FileEditToolResultDict
| PlanToolResultDict
| FileWriteToolResultDict
| GlobToolResultDict
| GrepToolResultDict
| ReadToolResultDict
| WebSearchToolResultDict
| WebFetchToolResultDict
| TaskMessageToolResultDict
| TaskRetrievalToolResultDict
| TaskCompletedToolResultDict
| TaskAsyncToolResultDict
| TodoWriteToolResultDict
| UserInputToolResultDict
| ToolResultWithContentDict
| dict[str, object]
)


def is_tool_result_dict(tr: ToolResultUnion | None) -> TypeGuard[ToolResultDict]:
return isinstance(tr, dict)


def is_bash_tool_result(tr: ToolResultDict) -> TypeGuard[BashToolResultDict]:
return "stdout" in tr or "stderr" in tr


def is_file_edit_tool_result(tr: ToolResultDict) -> TypeGuard[FileEditToolResultDict]:
return "structuredPatch" in tr or ("filePath" in tr and "newString" in tr)


def is_plan_tool_result(tr: ToolResultDict) -> TypeGuard[PlanToolResultDict]:
return "plan" in tr and "filePath" in tr


def is_file_write_tool_result(tr: ToolResultDict) -> TypeGuard[FileWriteToolResultDict]:
return "filePath" in tr and "content" in tr


def is_glob_tool_result(tr: ToolResultDict) -> TypeGuard[GlobToolResultDict]:
filenames = tr.get("filenames")
return "filenames" in tr and isinstance(filenames, list)


def is_grep_tool_result(tr: ToolResultDict) -> TypeGuard[GrepToolResultDict]:
return "mode" in tr and "numFiles" in tr


def is_read_tool_result(tr: ToolResultDict) -> TypeGuard[ReadToolResultDict]:
file_obj = tr.get("file")
return "file" in tr and isinstance(file_obj, dict)


def is_web_search_tool_result(tr: ToolResultDict) -> TypeGuard[WebSearchToolResultDict]:
return "query" in tr and "results" in tr


def is_web_fetch_tool_result(tr: ToolResultDict) -> TypeGuard[WebFetchToolResultDict]:
return "url" in tr and "code" in tr
Comment thread
clean6378-max-it marked this conversation as resolved.


def is_task_message_tool_result(tr: ToolResultDict) -> TypeGuard[TaskMessageToolResultDict]:
# Broad: matches ``task_id`` OR ``message``. Runs before retrieval/completed/async
# arms in tool_dispatch — same short-circuit order as the historical if/elif chain.
return "task_id" in tr or "message" in tr
Comment thread
clean6378-max-it marked this conversation as resolved.


def is_task_retrieval_tool_result(tr: ToolResultDict) -> TypeGuard[TaskRetrievalToolResultDict]:
return "retrieval_status" in tr and "task" in tr


def is_task_completed_tool_result(tr: ToolResultDict) -> TypeGuard[TaskCompletedToolResultDict]:
return "agentId" in tr and "totalDurationMs" in tr


def is_task_async_tool_result(tr: ToolResultDict) -> TypeGuard[TaskAsyncToolResultDict]:
return "agentId" in tr and "isAsync" in tr


def is_todo_write_tool_result(tr: ToolResultDict) -> TypeGuard[TodoWriteToolResultDict]:
return "newTodos" in tr or "oldTodos" in tr


def is_user_input_tool_result(tr: ToolResultDict) -> TypeGuard[UserInputToolResultDict]:
return "questions" in tr and "answers" in tr


# Tool names on assistant tool_use blocks — pairs with slug on user tool_result rows.
ToolNameLiteral = Literal[
"Bash",
"Read",
"Write",
"Edit",
"Glob",
"Grep",
"Task",
"TodoWrite",
"WebFetch",
"WebSearch",
]
2 changes: 1 addition & 1 deletion tests/test_real_session_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def test_task_retrieval_not_misclassified_as_task_message() -> None:
def test_task_completed_with_message_key_matches_task_message_first() -> None:
"""Legacy dispatch: broad task_message runs before task_completed when ``message`` present.

``_tool_result_pred_task_message`` matches any dict with a ``message`` or ``task_id``
``is_task_message_tool_result`` matches any dict with a ``message`` or ``task_id``
key. Future tool shapes that add ``message`` for status text (e.g. web-fetch) would
be misclassified as task until dispatch order is refined — this test locks that
known false-positive surface.
Expand Down
Loading
Loading