From a794a09e95716671e39f4c91207aa24a6ea041a5 Mon Sep 17 00:00:00 2001
From: patrickschmied <patrickschmied@icloud.com>
Date: Tue, 9 Jun 2026 15:00:59 -0400
Subject: [PATCH 1/5] feat: end-to-end protocol hardening + agent onboarding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Spec (chp-v0.1.md):
- §5: add payload validation SHOULD/MUST + duplicate registration semantics
- §6: strengthen append-only from SHOULD to MUST
- §9: enumerate standard denial codes table
- §10: add replay query bounds recommendation (cap: 10,000)
- §13: add conformance tagging requirement

Implementation (host.py):
- Input schema validation against input_schema before handler runs
  (denial code: input_schema_validation_failed)
- Replay query capped at MAX_REPLAY_LIMIT=10_000 to prevent memory exhaustion
- Thread-safe capability registry via RLock on _capabilities dict
- Duplicate registration emits warnings.warn instead of silent overwrite
- Full traceback in execution_failed evidence (removed limit=3)

Onboarding:
- Add AGENTS.md — machine-first orientation for AI agents (invariants,
  commands, navigation, pitfalls); follows AGENTS.md open spec
- Add docs/llms.txt — ultra-compact protocol reference for LLM context windows
- Redirect three legacy docs (onboarding, agent-prompt, capability-lookup-prompt)
  from Zenoh-era content to current entry points

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 AGENTS.md                        | 48 +++++++++++++++++++
 docs/agent-prompt.md             | 10 ++--
 docs/capability-lookup-prompt.md | 10 ++--
 docs/llms.txt                    | 77 ++++++++++++++++++++++++++++++
 docs/onboarding.md               |  8 +++-
 packages/python/chp_core/host.py | 81 ++++++++++++++++++++++----------
 spec/chp-v0.1.md                 | 23 ++++++++-
 7 files changed, 220 insertions(+), 37 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 docs/llms.txt

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..f5a2b04
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,48 @@
+# AGENTS.md — Capability Host Protocol
+
+CHP is a protocol and Python SDK for making agent, tool, and system execution **observable, replayable, and governable**. Every function wrapped as a `@capability` gets automatic evidence emission, correlation propagation, replay by session ID, and optional policy enforcement — with zero mandatory infrastructure. The reference host is `LocalCapabilityHost` in `packages/python/chp_core/`.
+
+## Three invariants you must never violate
+
+1. **Evidence is append-only.** Never modify or delete rows in an evidence SQLite store. The SHA256 hash chain breaks if any row changes. `store.py` is insert-only by design.
+2. **Preserve caller correlation IDs.** If an `InvocationEnvelope` arrives with a `correlation_id`, the host must forward it verbatim into every evidence event and the result. Never generate a new ID over a supplied one.
+3. **Spec, schemas, and types must stay in sync.** Any change to `spec/chp-v0.1.md`, `schemas/*.json`, or `packages/python/chp_core/types.py` must be validated with `chp work check-alignment --repo-root .` (runs 41 cross-artifact checks). CI will catch drift, but run it locally first.
+
+## Key commands
+
+```bash
+# Fast test suite (~6s)
+cd packages/python && python -m pytest tests/ -m "not slow" -q --no-cov
+
+# Full test suite
+cd packages/python && python -m pytest tests/ -q --no-cov
+
+# Protocol conformance (9 checks)
+python conformance/runner.py
+
+# Spec/schema/type alignment (41 checks)
+PYTHONPATH=packages/python chp work check-alignment --repo-root .
+
+# Wire evidence capture for every Claude Code session
+PYTHONPATH=packages/python chp hooks install
+```
+
+## Navigation
+
+| Where to look | What you'll find |
+|---|---|
+| `spec/chp-v0.1.md` | Normative protocol — start here for definitions and MUST/SHOULD requirements |
+| `schemas/` | JSON Schema for every protocol object (29 files) |
+| `packages/python/chp_core/host.py` | `LocalCapabilityHost` — registration, invocation, evidence emission |
+| `packages/python/chp_core/store.py` | `SQLiteEvidenceStore` — append-only, SHA256-chained |
+| `packages/python/chp_core/types.py` | Python dataclasses for all protocol objects |
+| `conformance/runner.py` | 9 conformance checks against a live host |
+| `docs/adopter-quickstart.md` | 10-minute path to first evidence event |
+| `examples/` | 14 runnable demos |
+
+## Common pitfalls
+
+- **Three docs are legacy** (`docs/onboarding.md`, `docs/agent-prompt.md`, `docs/capability-lookup-prompt.md`) — they describe pre-v0.1 Zenoh-mesh patterns. Do not update or reference them. They redirect to the current docs.
+- **`chp-dev` is the private monorepo.** `chp-core` (this repo) is the public mirror, synced via `scripts/sync-to-public.sh`. If you're in `chp-core`, do not manually sync — the pipeline handles it.
+- **`jsonschema` is a transitive dep.** Input schema validation in `host.py` uses a lazy import — do not add it to the top-level imports or it becomes a hard dependency for all users.
+- **`host.invoke()` cannot run inside an async loop** — use `await host.ainvoke()` instead. The sync wrapper raises `RuntimeError` if an event loop is running.
diff --git a/docs/agent-prompt.md b/docs/agent-prompt.md
index cd6d04a..f5b3c13 100644
--- a/docs/agent-prompt.md
+++ b/docs/agent-prompt.md
@@ -1,10 +1,12 @@
 # CHP Adoption — Capability Host Protocol
 
-Status: legacy internal adoption prompt.
+Status: legacy. This document describes pre-v0.1 TypeScript adoption paths that are no longer in use.
 
-This prompt targets older internal TypeScript adoption paths. It is not the
-public CHP v0.1 launch guide. For public v0.1 usage, start with `README.md`,
-`docs/quickstart.md`, and `spec/chp-v0.1.md`.
+Start here instead:
+- `README.md` — protocol overview and quick start
+- `docs/adopter-quickstart.md` — 10-minute path to first evidence event
+- `spec/chp-v0.1.md` — normative protocol specification
+- `AGENTS.md` — orientation for AI agents working in this repo
 
 CHP lets you expose your project's operations as governed, evidence-emitting capabilities. Any function you wrap gets automatic entitlement checks, risk classification, and evidence trails.
 
diff --git a/docs/capability-lookup-prompt.md b/docs/capability-lookup-prompt.md
index 0fae40f..14b31ea 100644
--- a/docs/capability-lookup-prompt.md
+++ b/docs/capability-lookup-prompt.md
@@ -1,10 +1,12 @@
 # CHP Capability Lookup — Find and Invoke Capabilities
 
-Status: legacy internal mesh lookup prompt.
+Status: legacy. This document describes pre-v0.1 Zenoh-mesh fleet lookup patterns that are no longer in use.
 
-This document describes older internal fleet and Zenoh-mesh lookup patterns. It
-is not required for CHP v0.1 conformance or public launch usage. For the public
-v0.1 protocol, start with `spec/chp-v0.1.md`.
+Start here instead:
+- `README.md` — protocol overview and quick start
+- `docs/adopter-quickstart.md` — 10-minute path to first evidence event
+- `spec/chp-v0.1.md` — normative protocol specification
+- `AGENTS.md` — orientation for AI agents working in this repo
 
 Use this prompt to discover what CHP capabilities exist across the fleet and how to invoke them.
 
diff --git a/docs/llms.txt b/docs/llms.txt
new file mode 100644
index 0000000..8590b71
--- /dev/null
+++ b/docs/llms.txt
@@ -0,0 +1,77 @@
+# Capability Host Protocol v0.1 — Compact Reference
+
+## Purpose
+Make agent, tool, and system execution observable, replayable, and governable at the capability boundary. Local-first, transport-agnostic, zero mandatory deps.
+
+## Canonical protocol objects (8)
+CapabilityDescriptor · HostDescriptor · InvocationEnvelope · InvocationResult · ExecutionEvidence · CorrelationContext · ReplayQuery · ReplayResult
+
+## CapabilityDescriptor required fields
+id · version · description · modes (min: ["sync"]) · emits
+Optional: input_schema · output_schema · invariants · risk · assurance · owner · tags
+
+## HostDescriptor required fields
+id · version · protocol_version ("0.1") · kind · capabilities · evidence
+
+## InvocationEnvelope required fields
+invocation_id · capability_id · mode · correlation · subject · payload · requested_at
+Rule: if caller supplies correlation_id, host MUST preserve it. If absent, host MUST generate one.
+Rule: host SHOULD validate payload against input_schema when present.
+
+## ExecutionEvidence required fields
+event_id · event_type · invocation_id · capability_id · host_id · correlation · timestamp · sequence · payload · redacted · assurance
+Rule: Evidence MUST be stored append-only. Hosts MUST NOT modify or delete events.
+
+## Core evidence event types
+execution_started · execution_completed · execution_failed · execution_denied · execution_skipped
+
+## Outcome enum
+success — handler completed and returned data
+failure — execution began but failed
+denied  — host rejected before handler ran
+skipped — host intentionally did not execute
+
+## Evidence emission rules
+success  → MUST emit execution_started + execution_completed
+failure  → MUST emit execution_started + execution_failed
+denied   → MUST emit execution_denied (may occur before execution_started)
+skipped  → MUST emit execution_skipped
+
+## Standard denial codes
+capability_not_found          — no capability at requested URI
+capability_disabled           — exists but disabled
+unsupported_mode              — mode not in descriptor.modes
+invariant_failed              — declared invariant rejected invocation
+input_schema_validation_failed — payload failed input_schema validation
+policy_block_pattern_matched  — policy block pattern matched payload
+risk_tier_exceeded            — payload risk tier above configured max
+entitlement_denied            — caller lacks required entitlement
+
+## ReplayQuery fields
+correlation_id (required) · limit (optional, RECOMMENDED cap: 10000) · since_sequence · include_payloads
+
+## ReplayResult fields
+correlation_id · events (ordered by sequence) · event_count · replayed_at
+
+## Conformance checklist (9 requirements)
+1. Capability declaration
+2. Capability discovery
+3. Invocation through envelope-compatible boundary
+4. Correlation propagation
+5. Evidence emission on success
+6. Evidence emission on failure
+7. Evidence emission on denial or unsupported action
+8. Replay by correlation ID
+9. Representation of skipped execution (where host supports it)
+
+## Non-goals (v0.1)
+Distributed host discovery · required network transport · workflow language · agent framework · complete policy engine · enterprise RBAC · hosted retention · cryptographic proof of evidence integrity · replacement for MCP / OTel / Temporal / Kafka / API gateways
+
+## Key file paths
+spec/chp-v0.1.md                            — normative specification
+schemas/                                    — JSON Schema for all protocol objects
+packages/python/chp_core/host.py            — LocalCapabilityHost reference impl
+packages/python/chp_core/store.py           — SQLiteEvidenceStore (append-only, SHA256-chained)
+packages/python/chp_core/types.py           — Python protocol dataclasses
+conformance/runner.py                       — 9-check conformance suite
+docs/adopter-quickstart.md                  — 10-minute adopter path
diff --git a/docs/onboarding.md b/docs/onboarding.md
index 3b1bd42..366ef00 100644
--- a/docs/onboarding.md
+++ b/docs/onboarding.md
@@ -1,8 +1,12 @@
 # CHP Onboarding Guide
 
-Status: legacy mesh-oriented onboarding.
+Status: legacy. This document describes pre-v0.1 Zenoh-mesh adoption patterns that are no longer in use.
 
-For the open-source v0.1 launch path, start with `README.md`, `docs/quickstart.md`, and `spec/chp-v0.1.md`. CHP v0.1 is local-first and evidence-first. Zenoh mesh participation and heavier governance are post-v0.1 transport and product layers, not requirements for conformance.
+Start here instead:
+- `README.md` — protocol overview and quick start
+- `docs/adopter-quickstart.md` — 10-minute path to first evidence event
+- `spec/chp-v0.1.md` — normative protocol specification
+- `AGENTS.md` — orientation for AI agents working in this repo
 
 How to adopt the Capability Host Protocol in your project. Self-serve — pick your language, follow the path.
 
diff --git a/packages/python/chp_core/host.py b/packages/python/chp_core/host.py
index 1ac8f96..29731f3 100644
--- a/packages/python/chp_core/host.py
+++ b/packages/python/chp_core/host.py
@@ -4,10 +4,14 @@
 
 import asyncio
 import inspect
+import threading
 import traceback
+import warnings
 from dataclasses import dataclass
 from typing import Any, Awaitable, Callable
 
+MAX_REPLAY_LIMIT = 10_000
+
 from .store import SQLiteEvidenceStore
 from .decorators import adapt_callable, get_capability_descriptor
 from .redaction import redact_payload
@@ -99,6 +103,7 @@ def __init__(
         self.store = store or SQLiteEvidenceStore()
         self.metadata = metadata or {}
         self._capabilities: dict[str, RegisteredCapability] = {}
+        self._registry_lock = threading.RLock()
 
     def register(
         self,
@@ -121,20 +126,27 @@ def register(
             raise ValueError("capability descriptor id is required")
         if not descriptor.version:
             raise ValueError("capability descriptor version is required")
-        self._capabilities[descriptor.capability_uri] = RegisteredCapability(
-            descriptor=descriptor,
-            handler=handler,
-            enabled=enabled,
-        )
+        with self._registry_lock:
+            if descriptor.capability_uri in self._capabilities:
+                warnings.warn(
+                    f"Capability '{descriptor.capability_uri}' already registered — overwriting.",
+                    stacklevel=2,
+                )
+            self._capabilities[descriptor.capability_uri] = RegisteredCapability(
+                descriptor=descriptor,
+                handler=handler,
+                enabled=enabled,
+            )
         return descriptor
 
     def descriptor(self) -> HostDescriptor:
-        return HostDescriptor(
-            id=self.host_id,
-            version=self.version,
-            capabilities=[entry.descriptor for entry in self._capabilities.values()],
-            metadata=self.metadata,
-        )
+        with self._registry_lock:
+            return HostDescriptor(
+                id=self.host_id,
+                version=self.version,
+                capabilities=[entry.descriptor for entry in self._capabilities.values()],
+                metadata=self.metadata,
+            )
 
     def discover(
         self,
@@ -192,8 +204,8 @@ def replay_result(self, query: ReplayQuery | JSON | str) -> ReplayResult:
             events = [event for event in events if event["sequence"] > query.since_sequence]
         if not query.include_payloads:
             events = [{**event, "payload": {}} for event in events]
-        if query.limit is not None:
-            events = events[: query.limit]
+        effective_limit = min(query.limit, MAX_REPLAY_LIMIT) if query.limit is not None else MAX_REPLAY_LIMIT
+        events = events[:effective_limit]
         return ReplayResult(
             correlation_id=query.correlation_id,
             events=events,
@@ -379,6 +391,21 @@ async def ainvoke_envelope(self, envelope: InvocationEnvelope | JSON) -> Invocat
         if autonomy_denial is not None:
             return self._deny(envelope, autonomy_denial)
 
+        if descriptor.input_schema:
+            try:
+                import jsonschema
+                jsonschema.validate(envelope.payload, descriptor.input_schema)
+            except Exception as exc:
+                return self._deny(
+                    envelope,
+                    DenialReason(
+                        code="input_schema_validation_failed",
+                        message=str(exc).split("\n")[0],
+                        retryable=False,
+                        details={"schema_id": descriptor.input_schema.get("$id")},
+                    ),
+                )
+
         started = self.emit_evidence(
             "execution_started",
             envelope,
@@ -415,8 +442,9 @@ async def ainvoke_envelope(self, envelope: InvocationEnvelope | JSON) -> Invocat
                 outcome="failure",
                 error={
                     "type": exc.__class__.__name__,
+                    "error_type": exc.__class__.__name__,
                     "message": str(exc),
-                    "traceback": traceback.format_exc(limit=3),
+                    "traceback": traceback.format_exc(),
                 },
             )
             return InvocationResult(
@@ -462,18 +490,19 @@ def emit_evidence(
         return self.store.append(event)
 
     def _resolve(self, capability_id: str, version: str | None) -> RegisteredCapability | None:
-        if ":" in capability_id and version is None:
-            return self._capabilities.get(capability_id)
-        if version is not None:
-            return self._capabilities.get(f"{capability_id}:{version}")
-        matches = [
-            entry
-            for uri, entry in self._capabilities.items()
-            if uri.startswith(f"{capability_id}:")
-        ]
-        if len(matches) == 1:
-            return matches[0]
-        return None
+        with self._registry_lock:
+            if ":" in capability_id and version is None:
+                return self._capabilities.get(capability_id)
+            if version is not None:
+                return self._capabilities.get(f"{capability_id}:{version}")
+            matches = [
+                entry
+                for uri, entry in self._capabilities.items()
+                if uri.startswith(f"{capability_id}:")
+            ]
+            if len(matches) == 1:
+                return matches[0]
+            return None
 
     def _deny(self, envelope: InvocationEnvelope, denial: DenialReason) -> InvocationResult:
         denied = self.emit_evidence(
diff --git a/spec/chp-v0.1.md b/spec/chp-v0.1.md
index 33acfc0..b988485 100644
--- a/spec/chp-v0.1.md
+++ b/spec/chp-v0.1.md
@@ -103,6 +103,10 @@ If a caller supplies a correlation ID, the host MUST preserve it. If no correlat
 
 Hosts SHOULD NOT copy raw invocation payloads into evidence by default. Capabilities may emit explicit redacted evidence payloads.
 
+A host SHOULD validate `payload` against the capability's `input_schema` when present. Validation failures MUST produce an `execution_denied` outcome with denial code `input_schema_validation_failed` and MUST NOT invoke the capability handler.
+
+If a capability URI (`id:version`) is registered more than once on the same host, the host MUST either raise an error or emit a warning. Silent overwrites are NOT RECOMMENDED.
+
 Schema: `schemas/invocation-envelope.schema.json`
 
 ## 6. Execution Evidence Schema
@@ -133,7 +137,7 @@ Required fields:
 - `redacted`
 - `assurance`
 
-Evidence SHOULD be stored append-only. v0.1 does not require cryptographic tamper evidence, remote notarization, or consensus.
+Evidence MUST be stored append-only. Hosts MUST NOT modify or delete evidence events after they are written. v0.1 does not require cryptographic tamper evidence, remote notarization, or consensus.
 
 Schemas:
 
@@ -195,6 +199,19 @@ Denial records SHOULD include:
 - `retryable`
 - structured `details`
 
+**Standard denial codes.** Implementations SHOULD use these stable codes when applicable:
+
+| Code | When |
+|---|---|
+| `capability_not_found` | No capability registered at the requested URI |
+| `capability_disabled` | Capability exists but is disabled |
+| `unsupported_mode` | Requested `mode` not in `modes` |
+| `invariant_failed` | A declared invariant rejected the invocation |
+| `input_schema_validation_failed` | Payload failed `input_schema` validation |
+| `policy_block_pattern_matched` | A policy block pattern matched the payload |
+| `risk_tier_exceeded` | Payload risk tier above configured maximum |
+| `entitlement_denied` | Caller lacks required entitlement |
+
 v0.1 does not require a complete entitlement system. A host may deny based on local rules or invariants.
 
 ## 10. Replay Semantics
@@ -217,6 +234,8 @@ A replay result contains:
 
 Replay ordering is by local evidence sequence. v0.1 does not define cross-host total ordering.
 
+Hosts SHOULD enforce a maximum `limit` (RECOMMENDED cap: 10,000 events). Clients requesting an unbounded replay MAY receive a bounded result; the `event_count` field reflects the actual count returned.
+
 Schemas:
 
 - `schemas/replay-query.schema.json`
@@ -263,3 +282,5 @@ Patch-level implementation changes may occur without changing the protocol versi
 Breaking schema or semantic changes require a new protocol version. Until v1.0, breaking changes may occur, but they should be documented with migration notes and conformance updates.
 
 Capability versions are independent of protocol versions. A capability descriptor version identifies the action contract, not the CHP protocol revision.
+
+Conformance runners SHOULD tag results with the `protocol_version` they validated against. A conformance pass is valid only for the protocol version under which it was run.

From 0fdd73de03e0a22b8801d2d95191014da8f3b65a Mon Sep 17 00:00:00 2001
From: patrickschmied <patrickschmied@icloud.com>
Date: Tue, 9 Jun 2026 21:50:43 -0400
Subject: [PATCH 2/5] =?UTF-8?q?feat:=20end-to-end=20protocol=20gap=20closu?=
 =?UTF-8?q?re=20=E2=80=94=20input=20validation,=20truncation=20flag,=20rem?=
 =?UTF-8?q?ote=20resilience,=20test=20coverage?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pillar 1 — Input validation at public API boundaries
- Empty capability_id now denies with capability_not_found before _resolve()
- Unknown mode strings caught early in ainvoke() with unsupported_mode denial
- jsonschema ValidationError split from generic Exception; denial now includes path of failing field in details

Pillar 2 — ReplayResult.truncated
- types.py: add truncated: bool = False field to ReplayResult
- host.py: replay_result() sets truncated=True when result was capped by MAX_REPLAY_LIMIT
- schemas/replay-result.schema.json: add truncated property
- spec/chp-v0.1.md §10: add MUST sentence for truncated flag
- docs/llms.txt: update ReplayResult fields line

Pillar 3 — RemoteCapabilityHost resilience
- _send() catches URLError → ConnectionError and OSError → ConnectionError
- Non-dict 200 responses raise RuntimeError("non-JSON response: ...")
- Body capped at 500 chars in error messages

Pillar 4 — Test coverage gaps
- test_local_host.py: unsupported_mode, empty_capability_id, unknown_mode_string, input_schema_error_includes_field_path; truncated and disabled-capability assertions
- test_http.py: connection refused, non-JSON 200, HTTP 500 remote tests; HTTPServerEdgeCaseTests class (404, malformed body)
- test_conformance_runner.py: new file — 4 subprocess self-tests verifying passing sample passes and broken samples fail expected checks

745 tests pass, 29/29 conformance checks pass, 42/42 alignment checks pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 docs/llms.txt                                 |   2 +-
 packages/python/chp_core/host.py              |  37 +++-
 packages/python/chp_core/http.py              |  28 ++-
 packages/python/chp_core/types.py             |   1 +
 .../python/tests/test_conformance_runner.py   |  44 +++++
 packages/python/tests/test_http.py            | 102 ++++++++++
 packages/python/tests/test_local_host.py      | 186 ++++++++++++++++++
 schemas/replay-result.schema.json             |   5 +
 spec/chp-v0.1.md                              |   3 +-
 9 files changed, 401 insertions(+), 7 deletions(-)
 create mode 100644 packages/python/tests/test_conformance_runner.py

diff --git a/docs/llms.txt b/docs/llms.txt
index 8590b71..278a4a0 100644
--- a/docs/llms.txt
+++ b/docs/llms.txt
@@ -51,7 +51,7 @@ entitlement_denied            — caller lacks required entitlement
 correlation_id (required) · limit (optional, RECOMMENDED cap: 10000) · since_sequence · include_payloads
 
 ## ReplayResult fields
-correlation_id · events (ordered by sequence) · event_count · replayed_at
+correlation_id · events (ordered by sequence) · event_count · truncated · replayed_at
 
 ## Conformance checklist (9 requirements)
 1. Capability declaration
diff --git a/packages/python/chp_core/host.py b/packages/python/chp_core/host.py
index 29731f3..92924ff 100644
--- a/packages/python/chp_core/host.py
+++ b/packages/python/chp_core/host.py
@@ -205,11 +205,13 @@ def replay_result(self, query: ReplayQuery | JSON | str) -> ReplayResult:
         if not query.include_payloads:
             events = [{**event, "payload": {}} for event in events]
         effective_limit = min(query.limit, MAX_REPLAY_LIMIT) if query.limit is not None else MAX_REPLAY_LIMIT
+        total_available = len(events)
         events = events[:effective_limit]
         return ReplayResult(
             correlation_id=query.correlation_id,
             events=events,
             event_count=len(events),
+            truncated=len(events) < total_available,
         )
 
     def query_evidence(
@@ -340,6 +342,16 @@ async def ainvoke(
             subject=subject or {"id": "local", "type": "user"},
             metadata=metadata or {},
         )
+        _KNOWN_MODES = {"sync", "async", "stream", "fire_and_forget"}
+        if envelope.mode not in _KNOWN_MODES:
+            return self._deny(
+                envelope,
+                DenialReason(
+                    code="unsupported_mode",
+                    message=f"Unknown invocation mode {envelope.mode!r}. Standard modes: {sorted(_KNOWN_MODES)}",
+                    retryable=False,
+                ),
+            )
         return await self.ainvoke_envelope(envelope)
 
     async def invoke_envelope(self, envelope: InvocationEnvelope | JSON) -> InvocationResult:
@@ -349,6 +361,16 @@ async def ainvoke_envelope(self, envelope: InvocationEnvelope | JSON) -> Invocat
         if isinstance(envelope, dict):
             envelope = InvocationEnvelope.from_mapping(envelope)
 
+        if not envelope.capability_id or not envelope.capability_id.strip():
+            return self._deny(
+                envelope,
+                DenialReason(
+                    code="capability_not_found",
+                    message="capability_id must be a non-empty string",
+                    retryable=False,
+                ),
+            )
+
         entry = self._resolve(envelope.capability_id, envelope.version)
         if entry is None:
             return self._deny(
@@ -395,12 +417,25 @@ async def ainvoke_envelope(self, envelope: InvocationEnvelope | JSON) -> Invocat
             try:
                 import jsonschema
                 jsonschema.validate(envelope.payload, descriptor.input_schema)
+            except jsonschema.ValidationError as exc:
+                return self._deny(
+                    envelope,
+                    DenialReason(
+                        code="input_schema_validation_failed",
+                        message=exc.message,
+                        retryable=False,
+                        details={
+                            "schema_id": descriptor.input_schema.get("$id"),
+                            "path": list(exc.absolute_path) or None,
+                        },
+                    ),
+                )
             except Exception as exc:
                 return self._deny(
                     envelope,
                     DenialReason(
                         code="input_schema_validation_failed",
-                        message=str(exc).split("\n")[0],
+                        message=f"Schema validation error: {exc}",
                         retryable=False,
                         details={"schema_id": descriptor.input_schema.get("$id")},
                     ),
diff --git a/packages/python/chp_core/http.py b/packages/python/chp_core/http.py
index 06ec15b..43fb8b7 100644
--- a/packages/python/chp_core/http.py
+++ b/packages/python/chp_core/http.py
@@ -181,21 +181,41 @@ def _post(self, path: str, body: JSON) -> JSON:
         return self._send(req)
 
     def _send(self, req: Request) -> JSON:
-        from urllib.error import HTTPError
+        from urllib.error import HTTPError, URLError
 
         try:
             with urlopen(req, timeout=self._timeout) as resp:
-                return json.loads(resp.read().decode("utf-8"))
+                body = resp.read().decode("utf-8")
         except HTTPError as exc:
             body = exc.read().decode("utf-8", errors="replace")
             try:
                 detail = json.loads(body)
             except Exception:
-                detail = {"raw": body}
+                detail = {"raw": body[:500]}
+            raise RuntimeError(f"CHP remote error {exc.code}: {detail}") from exc
+        except URLError as exc:
+            raise ConnectionError(
+                f"CHP remote host unavailable ({self._base}): {exc.reason}"
+            ) from exc
+        except OSError as exc:
+            raise ConnectionError(
+                f"CHP remote host connection failed ({self._base}): {exc}"
+            ) from exc
+
+        try:
+            data = json.loads(body)
+        except json.JSONDecodeError as exc:
             raise RuntimeError(
-                f"CHP remote error {exc.code}: {detail}"
+                f"CHP remote returned non-JSON response: {body[:200]!r}"
             ) from exc
 
+        if not isinstance(data, dict):
+            raise RuntimeError(
+                f"CHP remote returned unexpected response type: {type(data).__name__}"
+            )
+
+        return data
+
     @staticmethod
     def _parse_result(data: JSON) -> InvocationResult:
         denial_raw = data.get("denial")
diff --git a/packages/python/chp_core/types.py b/packages/python/chp_core/types.py
index cabe620..ce1ada0 100644
--- a/packages/python/chp_core/types.py
+++ b/packages/python/chp_core/types.py
@@ -834,6 +834,7 @@ class ReplayResult:
     correlation_id: str
     events: list[JSON]
     event_count: int
+    truncated: bool = False
     replayed_at: str = field(default_factory=utc_now)
 
     def to_dict(self) -> JSON:
diff --git a/packages/python/tests/test_conformance_runner.py b/packages/python/tests/test_conformance_runner.py
new file mode 100644
index 0000000..23471f2
--- /dev/null
+++ b/packages/python/tests/test_conformance_runner.py
@@ -0,0 +1,44 @@
+"""Self-tests: conformance runner must pass on passing sample and fail on broken samples."""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+import unittest
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+RUNNER = REPO_ROOT / "conformance" / "runner.py"
+
+
+def _run(sample: str) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [sys.executable, str(RUNNER), "--sample", sample],
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+
+
+class ConformanceRunnerSelfTests(unittest.TestCase):
+    def test_passing_sample_exits_zero(self) -> None:
+        result = _run("passing")
+        self.assertEqual(result.returncode, 0, msg=result.stdout + result.stderr)
+
+    def test_failing_non_standard_codes_exits_nonzero(self) -> None:
+        result = _run("failing-non-standard-codes")
+        self.assertNotEqual(result.returncode, 0)
+        self.assertIn("FAIL standard denial codes", result.stdout)
+
+    def test_failing_no_hash_chain_exits_nonzero(self) -> None:
+        result = _run("failing-no-hash-chain")
+        self.assertNotEqual(result.returncode, 0)
+        self.assertIn("FAIL evidence hash chain", result.stdout)
+
+    def test_failing_no_evidence_exits_nonzero(self) -> None:
+        result = _run("failing-no-evidence")
+        self.assertNotEqual(result.returncode, 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/packages/python/tests/test_http.py b/packages/python/tests/test_http.py
index e053b84..a31911a 100644
--- a/packages/python/tests/test_http.py
+++ b/packages/python/tests/test_http.py
@@ -244,6 +244,108 @@ def test_result_has_invocation_id_and_capability_id(self) -> None:
         self.assertIn("inv", result.invocation_id)
         self.assertEqual(result.capability_id, "math.multiply")
 
+    def test_remote_connection_refused_raises_connection_error(self) -> None:
+        dead = RemoteCapabilityHost("http://127.0.0.1:1")  # port 1 — no listener
+        with self.assertRaises(ConnectionError):
+            dead.invoke("any.cap", {})
+
+    def test_remote_non_json_response_raises_runtime_error(self) -> None:
+        """A 200 response with non-JSON body should raise RuntimeError."""
+        import socketserver
+        import threading
+        from http.server import BaseHTTPRequestHandler
+
+        class HTMLHandler(BaseHTTPRequestHandler):
+            def do_POST(self):
+                body = b"<html><body>Not JSON</body></html>"
+                self.send_response(200)
+                self.send_header("Content-Type", "text/html")
+                self.send_header("Content-Length", str(len(body)))
+                self.end_headers()
+                self.wfile.write(body)
+
+            def log_message(self, *args): pass
+
+        srv = socketserver.TCPServer(("127.0.0.1", 0), HTMLHandler)
+        t = threading.Thread(target=srv.serve_forever, daemon=True)
+        t.start()
+        try:
+            remote = RemoteCapabilityHost(f"http://127.0.0.1:{srv.server_address[1]}")
+            with self.assertRaises(RuntimeError) as ctx:
+                remote.invoke("any.cap", {})
+            self.assertIn("non-JSON", str(ctx.exception))
+        finally:
+            srv.shutdown()
+            srv.server_close()
+
+    def test_remote_http_500_raises_runtime_error(self) -> None:
+        """A 500 response should raise RuntimeError mentioning the status code."""
+        import socketserver
+        import threading
+        from http.server import BaseHTTPRequestHandler
+
+        class ErrorHandler(BaseHTTPRequestHandler):
+            def do_POST(self):
+                body = b'{"error": "internal"}'
+                self.send_response(500)
+                self.send_header("Content-Type", "application/json")
+                self.send_header("Content-Length", str(len(body)))
+                self.end_headers()
+                self.wfile.write(body)
+
+            def log_message(self, *args): pass
+
+        srv = socketserver.TCPServer(("127.0.0.1", 0), ErrorHandler)
+        t = threading.Thread(target=srv.serve_forever, daemon=True)
+        t.start()
+        try:
+            remote = RemoteCapabilityHost(f"http://127.0.0.1:{srv.server_address[1]}")
+            with self.assertRaises(RuntimeError) as ctx:
+                remote.invoke("any.cap", {})
+            self.assertIn("500", str(ctx.exception))
+        finally:
+            srv.shutdown()
+            srv.server_close()
+
+
+class HTTPServerEdgeCaseTests(unittest.TestCase):
+    """Tests for HTTP server error handling."""
+
+    def setUp(self) -> None:
+        self.host = LocalCapabilityHost("edge-test-host", store=SQLiteEvidenceStore(":memory:"))
+        self.server = create_http_server(self.host, port=0)
+        self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
+        self.thread.start()
+        self.base_url = f"http://127.0.0.1:{self.server.server_port}"
+
+    def tearDown(self) -> None:
+        self.server.shutdown()
+        self.server.server_close()
+        self.thread.join(timeout=2)
+
+    def test_unknown_path_returns_404(self) -> None:
+        from urllib.error import HTTPError
+        with self.assertRaises(HTTPError) as ctx:
+            urlopen(f"{self.base_url}/nonexistent", timeout=5)
+        self.assertEqual(ctx.exception.code, 404)
+
+    def test_malformed_json_body_returns_error_response(self) -> None:
+        raw = b"this is not json"
+        req = Request(
+            f"{self.base_url}/invoke",
+            data=raw,
+            headers={"Content-Type": "application/json"},
+            method="POST",
+        )
+        from urllib.error import HTTPError
+        try:
+            with urlopen(req, timeout=5) as resp:
+                result = json.loads(resp.read())
+            # some servers return 200 with an error body
+            self.assertFalse(result.get("success", True))
+        except HTTPError as exc:
+            self.assertIn(exc.code, (400, 422, 500))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/packages/python/tests/test_local_host.py b/packages/python/tests/test_local_host.py
index 33658c4..53bb18c 100644
--- a/packages/python/tests/test_local_host.py
+++ b/packages/python/tests/test_local_host.py
@@ -158,6 +158,7 @@ async def handler(_ctx, _payload):
 
         self.assertFalse(result.success)
         self.assertEqual(result.outcome, "skipped")
+        self.assertEqual(result.error["code"], "capability_disabled")
         replay = self.host.replay("corr-skipped")
         self.assertEqual([event["event_type"] for event in replay], ["execution_skipped"])
 
@@ -379,6 +380,191 @@ async def handler(_ctx, _payload):
 
         self.assertIn("ainvoke", str(cm.exception))
 
+    # --- Phase 2 hardening tests ---
+
+    async def test_input_schema_validation_denied(self) -> None:
+        async def handler(_ctx, _payload):
+            return {"ok": True}
+
+        self.host.register(
+            CapabilityDescriptor(
+                id="typed.cap",
+                version="1.0.0",
+                description="Typed capability.",
+                input_schema={
+                    "type": "object",
+                    "properties": {"x": {"type": "integer"}},
+                    "required": ["x"],
+                    "additionalProperties": False,
+                },
+            ),
+            handler,
+        )
+
+        result = await self.host.ainvoke(
+            "typed.cap",
+            {"x": "not-an-int"},
+            correlation={"correlation_id": "corr-schema-denied"},
+        )
+
+        self.assertFalse(result.success)
+        self.assertEqual(result.outcome, "denied")
+        self.assertEqual(result.denial.code, "input_schema_validation_failed")
+        self.assertFalse(result.denial.retryable)
+        replay = self.host.replay("corr-schema-denied")
+        self.assertEqual(len(replay), 1)
+        self.assertEqual(replay[0]["denial"]["code"], "input_schema_validation_failed")
+
+    async def test_input_schema_valid_payload_succeeds(self) -> None:
+        async def handler(_ctx, _payload):
+            return {"ok": True}
+
+        self.host.register(
+            CapabilityDescriptor(
+                id="typed.valid",
+                version="1.0.0",
+                description="Typed capability — valid path.",
+                input_schema={
+                    "type": "object",
+                    "properties": {"x": {"type": "integer"}},
+                    "required": ["x"],
+                    "additionalProperties": False,
+                },
+            ),
+            handler,
+        )
+
+        result = await self.host.ainvoke(
+            "typed.valid",
+            {"x": 42},
+            correlation={"correlation_id": "corr-schema-ok"},
+        )
+
+        self.assertTrue(result.success)
+        self.assertEqual(result.outcome, "success")
+
+    async def test_replay_result_limit_is_respected(self) -> None:
+        from chp_core.host import MAX_REPLAY_LIMIT
+        self.assertGreater(MAX_REPLAY_LIMIT, 0)
+
+        async def handler(_ctx, _payload):
+            return {}
+
+        self.host.register(
+            CapabilityDescriptor(id="limit.cap", version="1.0.0", description=""),
+            handler,
+        )
+
+        corr = "corr-limit"
+        for _ in range(5):
+            await self.host.ainvoke("limit.cap", {}, correlation={"correlation_id": corr})
+
+        full = self.host.replay_result(ReplayQuery(correlation_id=corr))
+        self.assertEqual(full.event_count, 10)
+
+        capped = self.host.replay_result(ReplayQuery(correlation_id=corr, limit=3))
+        self.assertEqual(capped.event_count, 3)
+        self.assertEqual(len(capped.events), 3)
+        self.assertTrue(capped.truncated)
+        self.assertFalse(full.truncated)
+
+    def test_duplicate_registration_emits_warning(self) -> None:
+        import warnings as _warnings
+        desc = CapabilityDescriptor(id="dup.cap", version="1.0.0", description="Duplicate.")
+
+        async def handler(_ctx, _payload):
+            return {}
+
+        self.host.register(desc, handler)
+        with _warnings.catch_warnings(record=True) as w:
+            _warnings.simplefilter("always")
+            self.host.register(desc, handler)
+
+        self.assertEqual(len(w), 1)
+        self.assertIn("already registered", str(w[0].message))
+
+    async def test_error_type_in_failure_evidence(self) -> None:
+        async def handler(_ctx, _payload):
+            raise ValueError("structured error")
+
+        self.host.register(
+            CapabilityDescriptor(id="errtype.cap", version="1.0.0", description=""),
+            handler,
+        )
+
+        result = await self.host.ainvoke(
+            "errtype.cap",
+            {},
+            correlation={"correlation_id": "corr-errtype"},
+        )
+
+        self.assertFalse(result.success)
+        self.assertEqual(result.outcome, "failure")
+        replay = self.host.replay("corr-errtype")
+        failed_event = replay[-1]
+        self.assertEqual(failed_event["event_type"], "execution_failed")
+        self.assertEqual(failed_event["error"]["error_type"], "ValueError")
+        self.assertEqual(failed_event["error"]["type"], "ValueError")
+
+    async def test_unsupported_mode_denied(self) -> None:
+        async def handler(_ctx, _payload):
+            return {}
+
+        self.host.register(
+            CapabilityDescriptor(id="sync.only", version="1.0.0", description="", modes=["sync"]),
+            handler,
+        )
+        result = await self.host.ainvoke(
+            "sync.only", {}, mode="stream",
+            correlation={"correlation_id": "corr-mode"},
+        )
+        self.assertFalse(result.success)
+        self.assertEqual(result.outcome, "denied")
+        self.assertEqual(result.denial.code, "unsupported_mode")
+
+    async def test_empty_capability_id_denied(self) -> None:
+        result = await self.host.ainvoke(
+            "", {}, correlation={"correlation_id": "corr-empty-id"}
+        )
+        self.assertFalse(result.success)
+        self.assertEqual(result.outcome, "denied")
+        self.assertEqual(result.denial.code, "capability_not_found")
+
+    async def test_unknown_mode_string_denied(self) -> None:
+        async def handler(_ctx, _payload):
+            return {}
+
+        self.host.register(
+            CapabilityDescriptor(id="mode.cap", version="1.0.0", description="", modes=["sync"]),
+            handler,
+        )
+        result = await self.host.ainvoke(
+            "mode.cap", {}, mode="garbage_mode",
+            correlation={"correlation_id": "corr-unknown-mode"},
+        )
+        self.assertFalse(result.success)
+        self.assertEqual(result.denial.code, "unsupported_mode")
+        self.assertIn("garbage_mode", result.denial.message)
+
+    async def test_input_schema_error_includes_field_path(self) -> None:
+        self.host.register(
+            CapabilityDescriptor(
+                id="typed.cap2",
+                version="1.0.0",
+                description="",
+                input_schema={"type": "object", "properties": {"x": {"type": "integer"}}, "required": ["x"]},
+            ),
+            lambda _ctx, _payload: {},
+        )
+        result = await self.host.ainvoke(
+            "typed.cap2", {"x": "not-an-int"},
+            correlation={"correlation_id": "corr-schema-path"},
+        )
+        self.assertFalse(result.success)
+        self.assertEqual(result.denial.code, "input_schema_validation_failed")
+        # path should identify the failing field
+        self.assertIsNotNone(result.denial.details.get("path"))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/schemas/replay-result.schema.json b/schemas/replay-result.schema.json
index 01285fd..36efec7 100644
--- a/schemas/replay-result.schema.json
+++ b/schemas/replay-result.schema.json
@@ -20,6 +20,11 @@
       "type": "integer",
       "minimum": 0
     },
+    "truncated": {
+      "type": "boolean",
+      "default": false,
+      "description": "True when the result was capped by the host's replay limit. Use since_sequence to page through remaining events."
+    },
     "replayed_at": {
       "type": "string",
       "format": "date-time"
diff --git a/spec/chp-v0.1.md b/spec/chp-v0.1.md
index b988485..a7c00af 100644
--- a/spec/chp-v0.1.md
+++ b/spec/chp-v0.1.md
@@ -230,11 +230,12 @@ A replay result contains:
 - `correlation_id`
 - ordered `events`
 - `event_count`
+- `truncated`
 - `replayed_at`
 
 Replay ordering is by local evidence sequence. v0.1 does not define cross-host total ordering.
 
-Hosts SHOULD enforce a maximum `limit` (RECOMMENDED cap: 10,000 events). Clients requesting an unbounded replay MAY receive a bounded result; the `event_count` field reflects the actual count returned.
+Hosts SHOULD enforce a maximum `limit` (RECOMMENDED cap: 10,000 events). Clients requesting an unbounded replay MAY receive a bounded result; the `event_count` field reflects the actual count returned. When a host returns a bounded result, the `ReplayResult.truncated` field MUST be set to `true`. Clients MAY use `since_sequence` to page through remaining events.
 
 Schemas:
 

From b874612611b20e344eea063b03d6e411d49bc90b Mon Sep 17 00:00:00 2001
From: patrickschmied <patrickschmied@icloud.com>
Date: Tue, 9 Jun 2026 21:51:35 -0400
Subject: [PATCH 3/5] feat: conformance suite expansion, schema hardening, and
 docs polish
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Conformance suite (26 → 29 checks):
- Added check_standard_denial_codes — uses passed-in host directly to test denial code values
- Added check_input_schema_validation — isolated host registers typed capability; bad payload denied, good succeeds
- Added check_evidence_hash_chain — verifies 64-char hex content_hash and prev_hash linkage
- Added --sample flag with 4 modes: passing, failing-no-evidence, failing-non-standard-codes, failing-no-hash-chain
- Refactored sample_failing_hosts.py: BrokenNoEvidenceHost, BrokenNonStandardCodesHost, BrokenNoHashChainHost

Schemas:
- evidence-event.schema.json: add content_hash and prev_hash optional fields with ^[a-f0-9]{64}$ pattern
- denial-reason.schema.json: add description and examples listing all 8 standard denial codes
- replay-query.schema.json: add maximum: 10000 on limit field, reference spec §10

Docs:
- README: merge duplicate quickstart, fix test command, fix alignment command, update repo map (14 entries)
- CONTRIBUTING.md: update test command to pytest, add check-alignment step (41 checks), add AGENTS.md note
- docs/roadmap.md: add 12 shipped milestones v0.3.5–v0.6.3, update Up Next to v0.7 candidates

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CONTRIBUTING.md                     |  15 ++-
 README.md                           |  55 +++-------
 conformance/runner.py               | 150 +++++++++++++++++++++++++--
 conformance/sample_failing_hosts.py | 153 +++++++++++++++++++++++++++-
 docs/roadmap.md                     |  68 ++++++++++---
 schemas/denial-reason.schema.json   |  13 ++-
 schemas/evidence-event.schema.json  |  12 ++-
 schemas/replay-query.schema.json    |   4 +-
 8 files changed, 400 insertions(+), 70 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a99a159..196b18b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -17,13 +17,20 @@ Run Python tests:
 
 ```bash
 cd packages/python
-python -m unittest discover -s tests
+python -m pytest tests/
 ```
 
-Run conformance:
+Run conformance (26 checks):
 
 ```bash
 python conformance/runner.py
+# or: chp conformance run
+```
+
+Verify spec/implementation alignment (41 checks — run before any commit touching `spec/`, `schemas/`, or `types.py`):
+
+```bash
+chp work check-alignment --repo-root .
 ```
 
 Run demos:
@@ -33,6 +40,10 @@ python examples/agent-operations-demo/demo.py
 python examples/mcp-bridge-demo/bridge.py
 ```
 
+## For AI Agents
+
+AI agents working in this repo should read `AGENTS.md` first — it describes the three invariants they must never violate and has the key commands in compact form.
+
 ## Pull Requests
 
 Good first PRs:
diff --git a/README.md b/README.md
index 0de0b27..c1eb3b5 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ One command wires automatic evidence capture for every Claude Code session. See
 - Replay by correlation ID
 - Minimal conformance requirements
 
-## Quickstart
+## From Source
 
 Install the Python reference host from this checkout:
 
@@ -44,46 +44,22 @@ Run the agent/tool observability demo:
 python examples/agent-operations-demo/demo.py
 ```
 
-Run a served capability host endpoint demo:
-
-```bash
-chp demo endpoint
-```
-
-Run conformance:
+Run conformance (29 checks):
 
 ```bash
 python conformance/runner.py
 ```
 
-Record development work as CHP evidence:
-
-```bash
-chp work run \
-  --intent "Verify CHP tests." \
-  --correlation-id chp-dev-001 \
-  --test-run unit \
-  -- python -m unittest discover -s packages/python/tests
-chp work summary chp-dev-001
-```
-
-Validate the served-host demo as evidence:
-
-```bash
-chp work validate-demo endpoint --correlation-id chp-demo-validation
-chp work replay chp-demo-validation
-```
-
-Check v0.1 protocol alignment:
+Run the test suite:
 
 ```bash
-chp work check-alignment --correlation-id chp-alignment
+python -m pytest packages/python/tests/
 ```
 
-Check launch messaging:
+Check spec/implementation alignment (41 checks — required before commits to `spec/` or `types.py`):
 
 ```bash
-chp work check-messaging --correlation-id chp-messaging
+chp work check-alignment --repo-root .
 ```
 
 ## Minimal Capability
@@ -116,23 +92,20 @@ The host emits `execution_started` and `execution_completed` evidence for the in
 
 ## Repository Map
 
-- `spec/chp-v0.1.md`: minimal CHP v0.1 specification
-- `schemas/`: JSON Schemas for protocol objects
-- `packages/python/chp_core/`: reference local host
+- `spec/chp-v0.1.md`: normative CHP v0.1 specification
+- `schemas/`: JSON Schemas for all protocol objects
+- `packages/python/chp_core/`: Python reference host (`LocalCapabilityHost`, `SQLiteEvidenceStore`)
+- `conformance/`: 29-check conformance runner
+- `AGENTS.md`: orientation for AI agents working in this repo
+- `docs/llms.txt`: compact protocol reference for LLM context windows
+- `docs/adopter-quickstart.md`: 10-minute path to first evidence event
+- `docs/roadmap.md`: shipped history and upcoming milestones
 - `examples/capability-host-endpoint-demo/`: HTTP-served host demo
 - `examples/agent-operations-demo/`: agent/tool observability demo
 - `examples/codex-self-observation-demo/`: Codex dogfooding demo
 - `examples/mcp-bridge-demo/`: experimental MCP-style bridge prototype
-- `conformance/`: conformance runner
 - `docs/comparisons/chp-vs-mcp.md`: precise MCP comparison
-- `docs/comparisons/chp-and-opentelemetry.md`: OpenTelemetry alignment note
-- `docs/comparisons/landscape.md`: adjacent framework comparison
-- `docs/design/codex-self-observation.md`: Codex dogfooding pattern
-- `docs/design/public-v0.1-internal-legacy-boundary.md`: public/internal boundary
-- `docs/design/evidence-integrity-v0.2.md`: future evidence integrity proposal
 - `docs/security/threat-model-v0.1.md`: v0.1 threat model
-- `docs/release-checklist-v0.1.md`: release-readiness checklist
-- `docs/packaging-v0.1.md`: packaging and versioning plan
 
 ## CHP vs MCP
 
diff --git a/conformance/runner.py b/conformance/runner.py
index d515a8e..5318d5e 100644
--- a/conformance/runner.py
+++ b/conformance/runner.py
@@ -19,7 +19,11 @@
     LocalCapabilityHost,
     SQLiteEvidenceStore,
 )
-from sample_failing_hosts import BrokenNoEvidenceHost  # noqa: E402
+from sample_failing_hosts import (  # noqa: E402
+    BrokenNoEvidenceHost,
+    BrokenNoHashChainHost,
+    BrokenNonStandardCodesHost,
+)
 
 
 Check = Callable[[Any], Awaitable[None]]
@@ -1089,6 +1093,124 @@ async def check_persistence(_host: Any) -> None:
         mgr2.close_conn()
 
 
+async def check_standard_denial_codes(host: Any) -> None:
+    """Invoking a missing capability produces the standard denial code 'capability_not_found'."""
+    corr_id = "conf-denial-codes-001"
+    result = await invoke_host(host, "nonexistent.capability.xyz", {}, correlation={"correlation_id": corr_id})
+    assert not result_value(result, "success"), "expected failure for missing capability"
+    assert result_value(result, "outcome") == "denied", (
+        f"expected 'denied', got {result_value(result, 'outcome')!r}"
+    )
+    denial = result_value(result, "denial")
+    if isinstance(denial, dict):
+        code = denial.get("code")
+    else:
+        code = getattr(denial, "code", None)
+    assert code == "capability_not_found", (
+        f"expected standard code 'capability_not_found', got {code!r}"
+    )
+
+
+async def check_input_schema_validation(_host: Any) -> None:
+    """A capability with input_schema rejects non-conforming payloads before execution."""
+    import tempfile, os
+    from chp_core import CapabilityDescriptor, LocalCapabilityHost, SQLiteEvidenceStore
+
+    with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
+        store_path = f.name
+    try:
+        store = SQLiteEvidenceStore(store_path)
+        host = LocalCapabilityHost("conf-schema", store=store)
+
+        async def handler(_ctx, _payload):
+            return {"ok": True}
+
+        host.register(
+            CapabilityDescriptor(
+                id="conf.typed",
+                version="1.0.0",
+                description="Typed capability.",
+                input_schema={
+                    "type": "object",
+                    "properties": {"n": {"type": "integer"}},
+                    "required": ["n"],
+                    "additionalProperties": False,
+                },
+            ),
+            handler,
+        )
+
+        bad = await host.ainvoke(
+            "conf.typed",
+            {"n": "not-an-integer"},
+            correlation={"correlation_id": "conf-schema-bad"},
+        )
+        assert not bad.success, "expected denial for invalid payload"
+        assert bad.outcome == "denied", f"expected denied, got {bad.outcome!r}"
+        assert bad.denial.code == "input_schema_validation_failed", (
+            f"expected 'input_schema_validation_failed', got {bad.denial.code!r}"
+        )
+
+        good = await host.ainvoke(
+            "conf.typed",
+            {"n": 42},
+            correlation={"correlation_id": "conf-schema-good"},
+        )
+        assert good.success, f"valid payload should succeed, got: {good}"
+        store.close()
+    finally:
+        os.unlink(store_path)
+
+
+def _assert_hash_chain(events: list[Any]) -> None:
+    """Shared assertion: events must carry content_hash and link via prev_hash."""
+    assert len(events) >= 2, f"expected at least 2 events, got {len(events)}"
+    for ev in events:
+        assert "content_hash" in ev, f"missing content_hash in event seq={ev.get('sequence')}"
+        assert isinstance(ev["content_hash"], str) and len(ev["content_hash"]) == 64, (
+            f"content_hash must be a 64-char hex string, got {ev['content_hash']!r}"
+        )
+    second = events[1]
+    assert "prev_hash" in second, "second event must have prev_hash linking to first"
+    assert second["prev_hash"] == events[0]["content_hash"], (
+        "prev_hash of second event must equal content_hash of first"
+    )
+
+
+async def check_evidence_hash_chain(host: Any) -> None:
+    """Evidence events carry SHA256 content_hash + prev_hash to form a tamper-detectable chain."""
+    corr_id = "conf-chain-001"
+
+    if hasattr(host, "by_correlation_with_hashes"):
+        # Host exposes hash-aware replay — test it directly (catches BrokenNoHashChainHost)
+        await invoke_host(host, "conformance.echo", {"value": "integrity-check"}, correlation={"correlation_id": corr_id})
+        events = host.by_correlation_with_hashes(corr_id)
+        _assert_hash_chain(events)
+    else:
+        # Fall back: create an isolated reference host and verify the SQLiteEvidenceStore chain
+        import tempfile, os
+        from chp_core import CapabilityDescriptor, LocalCapabilityHost, SQLiteEvidenceStore
+
+        with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
+            store_path = f.name
+        try:
+            store = SQLiteEvidenceStore(store_path)
+            ref_host = LocalCapabilityHost("conf-chain", store=store)
+
+            async def echo(_ctx, payload):
+                return {"echo": payload.get("value")}
+
+            ref_host.register(CapabilityDescriptor(id="conf.chain.echo", version="1.0.0", description=""), echo)
+            await ref_host.ainvoke("conf.chain.echo", {"value": "integrity-check"}, correlation={"correlation_id": corr_id})
+            events = store.by_correlation_with_hashes(corr_id)
+            _assert_hash_chain(events)
+            chain_result = store.verify_chain(corr_id)
+            assert chain_result.valid, f"hash chain should be valid, got: {chain_result}"
+            store.close()
+        finally:
+            os.unlink(store_path)
+
+
 CHECKS: list[tuple[str, Check]] = [
     ("capability declaration", check_declaration),
     ("capability discovery", check_discovery),
@@ -1116,16 +1238,26 @@ async def check_persistence(_host: Any) -> None:
     ("compliance capability", check_compliance_capability),
     ("incident capability", check_incident_capability),
     ("sqlite persistence", check_persistence),
+    ("standard denial codes", check_standard_denial_codes),
+    ("input schema validation", check_input_schema_validation),
+    ("evidence hash chain", check_evidence_hash_chain),
 ]
 
 
+SAMPLE_HOSTS = {
+    "passing": build_passing_host,
+    "failing-no-evidence": lambda: BrokenNoEvidenceHost(),
+    "failing-non-standard-codes": lambda: BrokenNonStandardCodesHost(),
+    "failing-no-hash-chain": lambda: BrokenNoHashChainHost(),
+}
+
+
 async def run(sample: str) -> list[CheckResult]:
-    if sample == "passing":
-        host = await build_passing_host()
-    elif sample == "failing-no-evidence":
-        host = BrokenNoEvidenceHost()
-    else:
-        raise ValueError(f"unknown sample host: {sample}")
+    builder = SAMPLE_HOSTS.get(sample)
+    if builder is None:
+        raise ValueError(f"unknown sample host: {sample!r}. Choices: {list(SAMPLE_HOSTS)}")
+    host_or_coro = builder()
+    host = await host_or_coro if hasattr(host_or_coro, "__await__") else host_or_coro
 
     results = []
     for name, check in CHECKS:
@@ -1141,9 +1273,9 @@ def main() -> int:
     parser = argparse.ArgumentParser(description="Run CHP v0.1 conformance checks.")
     parser.add_argument(
         "--sample",
-        choices=["passing", "failing-no-evidence"],
+        choices=list(SAMPLE_HOSTS),
         default="passing",
-        help="Built-in sample host to test.",
+        help="Built-in sample host to test against.",
     )
     args = parser.parse_args()
 
diff --git a/conformance/sample_failing_hosts.py b/conformance/sample_failing_hosts.py
index 40bed78..fcd8e49 100644
--- a/conformance/sample_failing_hosts.py
+++ b/conformance/sample_failing_hosts.py
@@ -55,6 +55,8 @@ def discover(self) -> dict[str, Any]:
             ],
         }
 
+    _KNOWN = {"conformance.echo", "conformance.fail", "conformance.guarded"}
+
     async def invoke(
         self,
         capability_id: str,
@@ -64,11 +66,160 @@ async def invoke(
         **_kwargs: Any,
     ) -> FakeResult:
         corr = correlation or {"correlation_id": "generated-but-not-recorded"}
+        if capability_id not in self._KNOWN:
+            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial=type("D", (), {"code": "capability_not_found"})(), evidence_ids=[])
         if capability_id == "conformance.fail":
             return FakeResult("fake", capability_id, "1.0.0", corr, "failure", False, error={"message": "failed"}, evidence_ids=[])
         if capability_id == "conformance.guarded":
-            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial={"code": "denied"}, evidence_ids=[])
+            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial=type("D", (), {"code": "invariant_failed"})(), evidence_ids=[])
         return FakeResult("fake", capability_id, "1.0.0", corr, "success", True, data={"echo": (payload or {}).get("value")}, evidence_ids=[])
 
     def replay(self, _correlation_id: str) -> list[dict[str, Any]]:
         return []
+
+
+class BrokenNonStandardCodesHost:
+    """Emits evidence but uses non-standard denial codes (e.g. bare 'not_found')."""
+
+    def discover(self) -> dict[str, Any]:
+        return {
+            "id": "broken-codes",
+            "version": "0.1.0",
+            "protocol_version": "0.1",
+            "kind": "test-broken",
+            "evidence": {"store": "memory", "append_only": True},
+            "capabilities": [
+                {"id": "conformance.echo", "version": "1.0.0", "description": "", "modes": ["sync"], "emits": ["execution_started", "execution_completed"]},
+                {"id": "conformance.fail", "version": "1.0.0", "description": "", "modes": ["sync"], "emits": ["execution_started", "execution_failed"]},
+                {"id": "conformance.guarded", "version": "1.0.0", "description": "", "modes": ["sync"], "emits": ["execution_denied"]},
+            ],
+        }
+
+    def _make_event(self, event_type: str, capability_id: str, corr: dict, outcome: str | None = None, denial: dict | None = None) -> dict[str, Any]:
+        return {
+            "event_id": f"evt-{event_type}",
+            "event_type": event_type,
+            "invocation_id": "fake-inv",
+            "capability_id": capability_id,
+            "host_id": "broken-codes",
+            "correlation": corr,
+            "timestamp": "2026-01-01T00:00:00Z",
+            "sequence": 1,
+            "outcome": outcome,
+            "payload": {},
+            "redacted": False,
+            "assurance": {"level": "S1"},
+            "denial": denial,
+        }
+
+    async def invoke(
+        self,
+        capability_id: str,
+        payload: dict[str, Any] | None = None,
+        *,
+        correlation: dict[str, Any] | None = None,
+        **_kwargs: Any,
+    ) -> FakeResult:
+        corr = correlation or {"correlation_id": "broken-codes-gen"}
+        if capability_id == "conformance.fail":
+            ev = self._make_event("execution_failed", capability_id, corr, "failure")
+            self._last_events = {corr.get("correlation_id", ""): [self._make_event("execution_started", capability_id, corr), ev]}
+            return FakeResult("fake", capability_id, "1.0.0", corr, "failure", False, error={"message": "failed"}, evidence_ids=["evt-failed"])
+        if capability_id == "conformance.guarded":
+            # Uses non-standard denial code "not_found" instead of "capability_not_found"
+            bad_denial = {"code": "not_found", "message": "not found", "retryable": False}
+            ev = self._make_event("execution_denied", capability_id, corr, "denied", bad_denial)
+            self._last_events = {corr.get("correlation_id", ""): [ev]}
+            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial=type("D", (), {"code": "not_found"})(), evidence_ids=["evt-denied"])
+        if capability_id not in ("conformance.echo",):
+            # Missing capability — returns non-standard code
+            bad_denial = {"code": "not_found", "message": "not found", "retryable": False}
+            ev = self._make_event("execution_denied", capability_id, corr, "denied", bad_denial)
+            self._last_events = {corr.get("correlation_id", ""): [ev]}
+            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial=type("D", (), {"code": "not_found"})(), evidence_ids=["evt-denied"])
+        ev_start = self._make_event("execution_started", capability_id, corr)
+        ev_done = self._make_event("execution_completed", capability_id, corr, "success")
+        self._last_events = {corr.get("correlation_id", ""): [ev_start, ev_done]}
+        return FakeResult("fake", capability_id, "1.0.0", corr, "success", True, data={"echo": (payload or {}).get("value")}, evidence_ids=["evt-start", "evt-done"])
+
+    def __init__(self) -> None:
+        self._last_events: dict[str, list[dict]] = {}
+
+    def replay(self, correlation_id: str) -> list[dict[str, Any]]:
+        return self._last_events.get(correlation_id, [])
+
+
+class BrokenNoHashChainHost:
+    """Emits evidence events but never sets content_hash/prev_hash (no chain)."""
+
+    def discover(self) -> dict[str, Any]:
+        return {
+            "id": "broken-no-chain",
+            "version": "0.1.0",
+            "protocol_version": "0.1",
+            "kind": "test-broken",
+            "evidence": {"store": "memory", "append_only": True},
+            "capabilities": [
+                {"id": "conformance.echo", "version": "1.0.0", "description": "", "modes": ["sync"], "emits": ["execution_started", "execution_completed"]},
+                {"id": "conformance.fail", "version": "1.0.0", "description": "", "modes": ["sync"], "emits": ["execution_started", "execution_failed"]},
+                {"id": "conformance.guarded", "version": "1.0.0", "description": "", "modes": ["sync"], "emits": ["execution_denied"]},
+            ],
+        }
+
+    def _make_event(self, event_type: str, capability_id: str, corr: dict, seq: int, outcome: str | None = None, denial: dict | None = None) -> dict[str, Any]:
+        return {
+            "event_id": f"evt-{seq}",
+            "event_type": event_type,
+            "invocation_id": "fake-inv",
+            "capability_id": capability_id,
+            "host_id": "broken-no-chain",
+            "correlation": corr,
+            "timestamp": "2026-01-01T00:00:00Z",
+            "sequence": seq,
+            "outcome": outcome,
+            "payload": {},
+            "redacted": False,
+            "assurance": {"level": "S1"},
+            "denial": denial,
+            # Deliberately omits content_hash and prev_hash
+        }
+
+    def __init__(self) -> None:
+        self._last_events: dict[str, list[dict]] = {}
+
+    _KNOWN = {"conformance.echo", "conformance.fail", "conformance.guarded"}
+
+    async def invoke(
+        self,
+        capability_id: str,
+        payload: dict[str, Any] | None = None,
+        *,
+        correlation: dict[str, Any] | None = None,
+        **_kwargs: Any,
+    ) -> FakeResult:
+        corr = correlation or {"correlation_id": "no-chain-gen"}
+        cid = corr.get("correlation_id", "no-chain-gen")
+        if capability_id not in self._KNOWN:
+            denial = {"code": "capability_not_found", "message": "not found", "retryable": False}
+            evs = [self._make_event("execution_denied", capability_id, corr, 1, "denied", denial)]
+            self._last_events[cid] = evs
+            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial=type("D", (), {"code": "capability_not_found"})(), evidence_ids=["evt-1"])
+        if capability_id == "conformance.fail":
+            evs = [self._make_event("execution_started", capability_id, corr, 1), self._make_event("execution_failed", capability_id, corr, 2, "failure")]
+            self._last_events[cid] = evs
+            return FakeResult("fake", capability_id, "1.0.0", corr, "failure", False, evidence_ids=["evt-1", "evt-2"])
+        if capability_id == "conformance.guarded":
+            denial = {"code": "invariant_failed", "message": "missing field", "retryable": False}
+            evs = [self._make_event("execution_denied", capability_id, corr, 1, "denied", denial)]
+            self._last_events[cid] = evs
+            return FakeResult("fake", capability_id, "1.0.0", corr, "denied", False, denial=type("D", (), {"code": "invariant_failed"})(), evidence_ids=["evt-1"])
+        evs = [self._make_event("execution_started", capability_id, corr, 1), self._make_event("execution_completed", capability_id, corr, 2, "success")]
+        self._last_events[cid] = evs
+        return FakeResult("fake", capability_id, "1.0.0", corr, "success", True, data={"echo": (payload or {}).get("value")}, evidence_ids=["evt-1", "evt-2"])
+
+    def replay(self, correlation_id: str) -> list[dict[str, Any]]:
+        return self._last_events.get(correlation_id, [])
+
+    def by_correlation_with_hashes(self, correlation_id: str) -> list[dict[str, Any]]:
+        # Returns events without content_hash — the check should detect the absence
+        return self._last_events.get(correlation_id, [])
diff --git a/docs/roadmap.md b/docs/roadmap.md
index 75e94bd..b194f86 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -49,25 +49,65 @@ OpenTelemetry export via `chp session otel`. `export_otlp_http` ships spans to a
 **v0.3.4 — Autonomy Profile + Budget Gates**  
 `AutonomyProfile` field on `CapabilityDescriptor`: `tier` (`automated` | `supervised` | `approval_required` | `human_driven`), `spend_limit`, `action_limit`, `rollback_policy`. Budget gates block invocations when limits are exceeded and emit `budget_exceeded` / `approval_requested` evidence. `chp session autonomy-report` shows all autonomy decisions for a session.
 
+**v0.3.5 — Approval Resolution**  
+Closes the `approval_required` open loop. `host.grant_approval()` and `host.deny_approval()` record `approval_granted` / `approval_denied` evidence events. `chp session autonomy-report` updated with `pending_approvals` count and resolved/unresolved classification.
+
+**v0.4.0 — Retrieval Capability**  
+`RetrievalCapability` base class for keyword, vector, and hybrid search. Source citation (document ID, title, score) recorded in hash-chained evidence for every retrieval call. Every RAG query becomes auditable and replayable.
+
+**v0.4.1 — Data Ingestion Capability**  
+`DataIngestionCapability` with governed ingest, SHA256 content provenance, and `ingestion_completed` evidence events.
+
+**v0.4.2 — Transformation Capability**  
+`TransformationCapability` for normalize/chunk/redact operations. SHA256 provenance links output back to input across every transformation step.
+
+**v0.4.3 — Knowledge Graph Capability**  
+Governed entity/relation store with BFS traversal. Every graph mutation emits structured evidence.
+
+**v0.4.4 — Workflow Orchestration + Domain Events**  
+Workflow steps as first-class capabilities. Domain event bus with `event_published` / `event_consumed` evidence.
+
+**v0.4.5 — Metrics + Capability Certification**  
+Metrics naming convention and `chp certify` CLI. Capabilities can be assessed against a maturity rubric and issued a certification record.
+
+**v0.4.6–v0.4.7 — Approval CLI, Version Control, Identity, Composability**  
+`chp approval` subcommands, version control capability exports, identity propagation through delegation chains, composability utilities.
+
+**v0.5.0 — State Machine + Agent Interface**  
+`StateMachineCapability` (§6.3) and `AgentInterfaceCapability` (§7.2). State transitions emit structured evidence; agent interfaces expose governed tool manifests.
+
+**v0.5.1 — Safety + Compliance**  
+Safety gate capability (§8.5) and compliance check capability (§8.6). Pre-execution safety scoring and policy-aligned compliance assertions with evidence.
+
+**v0.5.2 — Incident Management**  
+`IncidentManagementCapability` (§9.5). Structured incident lifecycle: `incident_opened`, `incident_updated`, `incident_resolved` evidence events.
+
+**v0.6.0 — SQLite Persistence Wave**  
+All stateful capabilities (memory, knowledge graph, workflow, retrieval, incident) persist to SQLite stores. Evidence and capability state survive process restarts.
+
+**v0.6.1 — Adopter Experience**  
+`chp host verify` — smoke-tests a host and evidence store end-to-end. `chp serve-http` — exposes any `LocalCapabilityHost` over HTTP with a single command. `docs/adopter-quickstart.md` revised.
+
+**v0.6.2 — Vector Retrieval**  
+Cosine-similarity vector search using only stdlib (`array`, `math`). No numpy, no external deps. Plugs into `RetrievalCapability` as a scored backend.
+
+**v0.6.3 — RemoteCapabilityHost**  
+Cross-host invocation over HTTP. A host can invoke capabilities on another CHP host running `chp serve-http`. Correlation IDs, evidence, and denial codes propagate across the boundary.
+
 ## Guiding Rule
 
 Local visibility should be free. Production trust should be paid.
 
-## Up Next — v0.3.5
-
-Closes the open loop from v0.3.4. `tier="approval_required"` blocks invocations but has no
-resolution path. v0.3.5 adds:
+## Current — v0.6.3
 
-- `host.grant_approval(correlation_id, capability_uri, ...)` — records `approval_granted` event
-- `host.deny_approval(correlation_id, capability_uri, ...)` — records `approval_denied` event
-- `chp session autonomy-report` updated with `pending_approvals` count and resolved/unresolved classification
+The protocol is stable and public. Focus shifts to adoption: third-party implementors, external language SDKs, and production deployments.
 
-## Up Next — v0.4
+## Up Next — v0.7
 
-The v0.4 milestone shifts focus to governed data access. Candidate work items:
+Candidate work items for the v0.7 wave:
 
-- **Retrieval Capability** — `RetrievalCapability` base class for keyword, vector, and hybrid
-  search. Source citation (document ID, title, score) recorded in evidence for every retrieval
-  call. Every RAG query becomes auditable, replayable, and policy-addressable.
-- **Ingestion Capability** — governed data loading with provenance tracking
-- **Knowledge Graph Capability** — entity/relationship stores as first-class capabilities
+- **Redaction policies** — `chp.redact_evidence_payload` capability; per-capability redaction rules stored in `.chp/policy.json`
+- **Catalog alignment tooling** — `chp.check_catalog_alignment` to verify roadmap, contracts, and examples against the registered capability catalog
+- **Capability contract validation** — `chp.validate_capability_contract` against the canonical contract template
+- **Maturity assessment** — `chp.assess_capability_maturity` scores capabilities against the L1–L7 maturity ladder
+- **Cross-run comparison** — `chp.compare_runs` diffs two work traces for regressions and improvements
diff --git a/schemas/denial-reason.schema.json b/schemas/denial-reason.schema.json
index bc5dc0c..824d39a 100644
--- a/schemas/denial-reason.schema.json
+++ b/schemas/denial-reason.schema.json
@@ -8,7 +8,18 @@
   "properties": {
     "code": {
       "type": "string",
-      "minLength": 1
+      "minLength": 1,
+      "description": "Stable denial code. Implementations SHOULD use the standard codes when applicable (spec §9).",
+      "examples": [
+        "capability_not_found",
+        "capability_disabled",
+        "unsupported_mode",
+        "invariant_failed",
+        "input_schema_validation_failed",
+        "policy_block_pattern_matched",
+        "risk_tier_exceeded",
+        "entitlement_denied"
+      ]
     },
     "message": {
       "type": "string"
diff --git a/schemas/evidence-event.schema.json b/schemas/evidence-event.schema.json
index 0545af3..5ecf7c1 100644
--- a/schemas/evidence-event.schema.json
+++ b/schemas/evidence-event.schema.json
@@ -62,7 +62,17 @@
       "additionalProperties": true,
       "description": "Identity that triggered this invocation (§13.1)."
     },
-    "assurance": { "$ref": "https://chp.dev/schemas/v0.1/evidence-event.schema.json#/$defs/AssuranceMetadata" }
+    "assurance": { "$ref": "https://chp.dev/schemas/v0.1/evidence-event.schema.json#/$defs/AssuranceMetadata" },
+    "content_hash": {
+      "type": ["string", "null"],
+      "description": "SHA256 hex digest of this event's stable fields. Set by the evidence store. Present when returned from hash-aware replay APIs (e.g. store.by_correlation_with_hashes()).",
+      "pattern": "^[a-f0-9]{64}$"
+    },
+    "prev_hash": {
+      "type": ["string", "null"],
+      "description": "content_hash of the preceding event in the same correlation chain. Null for the first event. Forms a tamper-detectable linked chain per spec §6 (Evidence MUST be append-only).",
+      "pattern": "^[a-f0-9]{64}$"
+    }
   },
   "$defs": {
     "DenialReason": {
diff --git a/schemas/replay-query.schema.json b/schemas/replay-query.schema.json
index fe7b756..0eeece7 100644
--- a/schemas/replay-query.schema.json
+++ b/schemas/replay-query.schema.json
@@ -12,7 +12,9 @@
     },
     "limit": {
       "type": ["integer", "null"],
-      "minimum": 1
+      "minimum": 1,
+      "maximum": 10000,
+      "description": "Maximum events to return. Hosts SHOULD enforce a cap; spec §10 RECOMMENDED cap is 10,000. Clients requesting unbounded replay MAY receive a bounded result."
     },
     "since_sequence": {
       "type": ["integer", "null"],

From 7323f2b7a5336622e8379f00b130dea3208ce477 Mon Sep 17 00:00:00 2001
From: patrickschmied <patrickschmied@icloud.com>
Date: Tue, 9 Jun 2026 21:56:38 -0400
Subject: [PATCH 4/5] chore: bump version to 0.7.0

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 docs/roadmap.md                | 11 +++++++----
 packages/python/pyproject.toml |  2 +-
 packages/ts-types/package.json |  2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/docs/roadmap.md b/docs/roadmap.md
index b194f86..f73fe0c 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -98,13 +98,16 @@ Cross-host invocation over HTTP. A host can invoke capabilities on another CHP h
 
 Local visibility should be free. Production trust should be paid.
 
-## Current — v0.6.3
+**v0.7.0 — Protocol hardening and gap closure**  
+End-to-end hardening pass: input validation at public API boundaries (`capability_id`, `mode`, `jsonschema` field paths), `ReplayResult.truncated` flag (spec §10 MUST), `RemoteCapabilityHost` network resilience (`URLError`/`OSError`), conformance suite expanded to 29 checks with broken-host negative samples, 42-check alignment suite, 745 tests.
 
-The protocol is stable and public. Focus shifts to adoption: third-party implementors, external language SDKs, and production deployments.
+## Current — v0.7.0
 
-## Up Next — v0.7
+The protocol is hardened and conformance-tested. Focus shifts to adoption: third-party implementors, external language SDKs, and production deployments.
 
-Candidate work items for the v0.7 wave:
+## Up Next — v0.8
+
+Candidate work items for the v0.8 wave:
 
 - **Redaction policies** — `chp.redact_evidence_payload` capability; per-capability redaction rules stored in `.chp/policy.json`
 - **Catalog alignment tooling** — `chp.check_catalog_alignment` to verify roadmap, contracts, and examples against the registered capability catalog
diff --git a/packages/python/pyproject.toml b/packages/python/pyproject.toml
index 1584a9a..f36991c 100644
--- a/packages/python/pyproject.toml
+++ b/packages/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "chp-core"
-version = "0.6.3"
+version = "0.7.0"
 description = "Capability Host Protocol — local execution evidence for agents, tools, and systems"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/packages/ts-types/package.json b/packages/ts-types/package.json
index 6338e45..4b5e7fb 100644
--- a/packages/ts-types/package.json
+++ b/packages/ts-types/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@capabilityhostprotocol/types",
-  "version": "0.6.3",
+  "version": "0.7.0",
   "description": "TypeScript types for the Capability Host Protocol (CHP).",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",

From 120947695f837580daa3f2b4d82def821302179f Mon Sep 17 00:00:00 2001
From: patrickschmied <patrickschmied@icloud.com>
Date: Fri, 12 Jun 2026 01:26:42 -0400
Subject: [PATCH 5/5] feat(transport): multi-host Transport seam + /verify
 endpoint

Add a transport abstraction so clients can reach a CHP host over any
backend (HTTP first; Zenoh/gRPC pluggable later):

- chp_core/transport.py: Transport Protocol (async ainvoke_envelope,
  discover, replay_result, health, supports) + LocalTransport (in-process)
  and HttpTransport (stdlib HTTP via RemoteCapabilityHost, run off-loop).
- http.py: RemoteCapabilityHost.invoke_envelope() and .verify(); new
  GET /verify/{correlation_id} route returning the SHA256 chain result.
- Export Transport / LocalTransport / HttpTransport from the package.

Additive; existing surface unchanged. 752 tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 packages/python/chp_core/__init__.py  |   4 +
 packages/python/chp_core/cli/_host.py |   2 +-
 packages/python/chp_core/http.py      |  20 ++++
 packages/python/chp_core/transport.py | 132 ++++++++++++++++++++++++++
 4 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100644 packages/python/chp_core/transport.py

diff --git a/packages/python/chp_core/__init__.py b/packages/python/chp_core/__init__.py
index 3fbd09b..65ae587 100644
--- a/packages/python/chp_core/__init__.py
+++ b/packages/python/chp_core/__init__.py
@@ -20,6 +20,7 @@
 )
 from .host import CapabilityExecutionContext, LocalCapabilityHost
 from .http import CapabilityHostHTTPServer, RemoteCapabilityHost, create_http_server, serve_http
+from .transport import HttpTransport, LocalTransport, Transport
 from .store import SQLiteEvidenceStore
 from .decorators import capability
 from .codex import (
@@ -197,6 +198,9 @@
     "InvocationEnvelope",
     "InvocationResult",
     "LocalCapabilityHost",
+    "Transport",
+    "LocalTransport",
+    "HttpTransport",
     "PolicyDescriptor",
     "ReplayQuery",
     "ReplayResult",
diff --git a/packages/python/chp_core/cli/_host.py b/packages/python/chp_core/cli/_host.py
index 7217b2f..e598e75 100644
--- a/packages/python/chp_core/cli/_host.py
+++ b/packages/python/chp_core/cli/_host.py
@@ -85,7 +85,7 @@ def cmd_serve_http(args: argparse.Namespace) -> int:
     bind: str = args.bind
     port: int = args.port
     print(f"Serving CHP host {host.host_id!r} at http://{bind}:{port}")
-    print("Routes: GET /health, GET /host, GET /capabilities, POST /invoke, GET /replay/{id}")
+    print("Routes: GET /health, GET /host, GET /capabilities, POST /invoke, GET /replay/{id}, GET /verify/{id}")
     try:
         serve_http(host, bind=bind, port=port)
     except KeyboardInterrupt:
diff --git a/packages/python/chp_core/http.py b/packages/python/chp_core/http.py
index 43fb8b7..d0a4114 100644
--- a/packages/python/chp_core/http.py
+++ b/packages/python/chp_core/http.py
@@ -4,6 +4,7 @@
 
 import asyncio
 import json
+from dataclasses import asdict
 from http import HTTPStatus
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from typing import Any
@@ -57,6 +58,11 @@ def do_GET(self) -> None:
             correlation_id = unquote(path.removeprefix("/replay/"))
             self._write_json(self.server.chp_host.replay_result(correlation_id).to_dict())
             return
+        if path.startswith("/verify/"):
+            correlation_id = unquote(path.removeprefix("/verify/"))
+            result = self.server.chp_host.store.verify_chain(correlation_id)
+            self._write_json(asdict(result))
+            return
         self._write_error(HTTPStatus.NOT_FOUND, "not_found", f"Unknown route: {path}")
 
     def do_POST(self) -> None:
@@ -282,6 +288,11 @@ def invoke(
     ) -> InvocationResult:
         return asyncio.run(self.ainvoke(capability_id, payload, **kwargs))
 
+    def invoke_envelope(self, envelope: InvocationEnvelope) -> InvocationResult:
+        """Invoke from a pre-built envelope (synchronous; mirrors the server's /invoke)."""
+        data = self._post("/invoke", envelope.to_dict())
+        return self._parse_result(data)
+
     def discover(self, **filter_kwargs: Any) -> JSON:
         """Return the host descriptor, optionally filtering capabilities."""
         descriptor = self._get("/host")
@@ -308,3 +319,12 @@ def replay_result(self, query: "str | ReplayQuery | JSON") -> JSON:
     def health(self) -> JSON:
         """Return the /health response from the remote host."""
         return self._get("/health")
+
+    def verify(self, correlation_id: str) -> JSON:
+        """Return the SHA256 chain verification result for *correlation_id*.
+
+        Shape mirrors ``ChainVerificationResult``: ``correlation_id``,
+        ``event_count``, ``verified_count``, ``unverified_count``, ``valid``,
+        ``first_broken_sequence``.
+        """
+        return self._get(f"/verify/{correlation_id}")
diff --git a/packages/python/chp_core/transport.py b/packages/python/chp_core/transport.py
new file mode 100644
index 0000000..848d63a
--- /dev/null
+++ b/packages/python/chp_core/transport.py
@@ -0,0 +1,132 @@
+"""Transport seam for CHP hosts.
+
+A ``Transport`` is the uniform async surface a client uses to talk to *one* host —
+whether that host is in-process (`LocalTransport`) or reached over a wire
+(`HttpTransport`, and later Zenoh/gRPC). The router in ``chp-host`` composes a
+list of transports into a multi-host pool.
+
+The contract is deliberately thin — request/response plus discovery, replay, and
+health — so new transports are cheap to add. Streaming / evidence pub-sub are
+*optional* extensions advertised via ``supports(feature)`` and an optional
+``subscribe_evidence`` hook, so adding them later (e.g. for a Zenoh mesh) never
+breaks the core contract.
+
+This module depends only on the stdlib + chp-core internals: no transport here
+pulls a third-party dependency. Heavier transports (Zenoh, gRPC) live in
+downstream packages and implement this same ``Transport`` protocol.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Protocol, runtime_checkable
+
+from .host import LocalCapabilityHost
+from .http import RemoteCapabilityHost
+from .types import InvocationEnvelope, InvocationResult, JSON, ReplayQuery
+
+
+@runtime_checkable
+class Transport(Protocol):
+    """Uniform async surface for talking to a single CHP host.
+
+    Implementations: :class:`LocalTransport` (in-process),
+    :class:`HttpTransport` (stdlib HTTP). Downstream packages may add others
+    (Zenoh, gRPC) by satisfying this same protocol.
+    """
+
+    name: str
+
+    async def ainvoke_envelope(self, envelope: InvocationEnvelope) -> InvocationResult:
+        """Invoke a capability from a pre-built envelope and return its result."""
+        ...
+
+    async def discover(self) -> JSON:
+        """Return the host descriptor (id, capabilities, evidence policy, ...)."""
+        ...
+
+    async def replay_result(self, query: "str | ReplayQuery | JSON") -> JSON:
+        """Return a replay result (as a dict) for a correlation id or query."""
+        ...
+
+    async def health(self) -> JSON:
+        """Return a health snapshot: status, host_id, protocol, capability_count."""
+        ...
+
+    def supports(self, feature: str) -> bool:
+        """Report whether an optional feature (e.g. ``"streaming"``) is available."""
+        ...
+
+
+def _health_from_descriptor(descriptor: JSON) -> JSON:
+    """Build a /health-shaped snapshot from a host descriptor."""
+    return {
+        "status": "ok",
+        "host_id": descriptor.get("id", "unknown"),
+        "protocol": "chp",
+        "version": "0.1",
+        "capability_count": len(descriptor.get("capabilities", [])),
+    }
+
+
+class LocalTransport:
+    """Transport over an in-process :class:`LocalCapabilityHost`.
+
+    Lets a local host participate in a multi-host router alongside remote hosts
+    on exactly the same seam. Invocation is awaited directly; the synchronous
+    discover/replay calls are fast in-memory operations.
+    """
+
+    def __init__(self, host: LocalCapabilityHost, *, name: str | None = None) -> None:
+        self._host = host
+        self.name = name or host.host_id
+
+    async def ainvoke_envelope(self, envelope: InvocationEnvelope) -> InvocationResult:
+        return await self._host.ainvoke_envelope(envelope)
+
+    async def discover(self) -> JSON:
+        return self._host.discover()
+
+    async def replay_result(self, query: "str | ReplayQuery | JSON") -> JSON:
+        return self._host.replay_result(query).to_dict()
+
+    async def health(self) -> JSON:
+        return _health_from_descriptor(self._host.discover())
+
+    def supports(self, feature: str) -> bool:
+        return False
+
+
+class HttpTransport:
+    """Transport over CHP's stdlib HTTP surface.
+
+    Wraps :class:`RemoteCapabilityHost` (blocking ``urllib``) and runs each call
+    in a worker thread so it never blocks the router's event loop. A
+    ``ConnectionError`` from the underlying client propagates unchanged so the
+    router can fail over to another host.
+    """
+
+    def __init__(
+        self,
+        base_url: str,
+        *,
+        name: str | None = None,
+        timeout: int = 30,
+    ) -> None:
+        self._remote = RemoteCapabilityHost(base_url, timeout=timeout)
+        self.name = name or base_url.rstrip("/")
+
+    async def ainvoke_envelope(self, envelope: InvocationEnvelope) -> InvocationResult:
+        return await asyncio.to_thread(self._remote.invoke_envelope, envelope)
+
+    async def discover(self) -> JSON:
+        return await asyncio.to_thread(self._remote.discover)
+
+    async def replay_result(self, query: "str | ReplayQuery | JSON") -> JSON:
+        return await asyncio.to_thread(self._remote.replay_result, query)
+
+    async def health(self) -> JSON:
+        return await asyncio.to_thread(self._remote.health)
+
+    def supports(self, feature: str) -> bool:
+        return False