Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 63 additions & 8 deletions backend/app/services/contract_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,26 @@ def canonical_path(self) -> str:

# Part F section header pattern — same regex as MCP precheck for behavior parity.
_PART_F_HEADER = re.compile(r"^#+\s*part\s*f\b", re.IGNORECASE | re.MULTILINE)
# Any "Part <letter>" heading — used to bound Part F to its own section instead of
# scanning to end-of-document (a later Part, or a summary line restating Part F,
# must not be read as the readiness declaration).
_PART_HEADER = re.compile(r"^#+\s*part\s*[a-z]\b", re.IGNORECASE | re.MULTILINE)
_INTEGRATION_READY_TOKEN = re.compile(
r"integration[_\s-]*tests?[_\s-]*ready", re.IGNORECASE
)
# The authoritative "**Integration Readiness:** <token>" declaration. Anchored to a
# single line (^...$, MULTILINE) so a prose sentence that merely mentions the label
# — before or after the real field — can never be captured instead of it.
_READINESS_FIELD = re.compile(
r"^\s*\**\s*integration\s+readiness[\s*:]{1,12}"
r"(?:(?P<ready>integration[_\s-]*tests?[_\s-]*ready)"
r"|(?P<not_ready>local[_\s-]*only|not[_\s-]*ready))"
r"[\s*.]{0,6}$",
re.IGNORECASE | re.MULTILINE,
)
# Detects an attempted (but unparseable) declaration — present so callers can refuse
# rather than guess, instead of silently falling back to a whole-section token scan.
_READINESS_LABEL = re.compile(r"integration\s+readiness", re.IGNORECASE)
# Heading match is intentionally lenient (any heading level h1–h6) so the
# deterministic preflight never rejects a plan the downstream LLM conversion agent
# would accept. The skill emits `## Phase N:`, but a hand-edited `# Phase N` must
Expand Down Expand Up @@ -286,6 +303,18 @@ def normalize_contract_files(
),
)

part_f = _part_f_section(analysis_text)
if part_f is not None and _readiness_field_is_ambiguous(part_f):
raise ContractRejection(
code=RejectionCode.ANALYSIS_UNREADABLE,
message=(
f"Couldn't read integration readiness from `{SPEC_COMPLETENESS_FILE}`. "
f"Part F declares an Integration Readiness field, but its value isn't "
f"`INTEGRATION_TESTS_READY` or `LOCAL_ONLY`. Re-run "
f"`check_specification_completeness`."
),
)

result: dict[str, Path] = {"analysis": analysis_path, "plan": plan_path}

if is_integration_tests_ready(analysis_text):
Expand Down Expand Up @@ -395,28 +424,54 @@ def validate_generation_contract_preflight(


def _part_f_section(analysis_text: str) -> str | None:
"""Return the text from the Part F header to end of document, or None if absent.
"""Return the text of the Part F section only, or None if absent.

Readiness must be read from Part F only — the integration-readiness token may
legitimately appear elsewhere (a Part B summary table, a preamble note) without
meaning the spec is integration-ready.
Bounded at the next "Part <letter>" heading (or end of document if there is
none) so a later section — or a summary line elsewhere that merely restates
"Part F (Integration Readiness): ..." — is never scanned as part of Part F.
"""
match = _PART_F_HEADER.search(analysis_text)
if match is None:
return None
return analysis_text[match.start():]
next_part = _PART_HEADER.search(analysis_text, match.end())
end = next_part.start() if next_part is not None else len(analysis_text)
return analysis_text[match.start():end]


def _readiness_field_is_ambiguous(part_f: str) -> bool:
"""True if Part F attempts an "Integration Readiness" declaration that doesn't
parse to a recognized token — e.g. a paraphrase or a value split across
decoration the field regex doesn't recognize.

Distinguishes "field missing entirely" (legitimate non-standard formatting,
left to the lenient fallback in ``is_integration_tests_ready``) from "field
present but unreadable" (refuse rather than guess, per callers below).
"""
return _READINESS_LABEL.search(part_f) is not None and _READINESS_FIELD.search(part_f) is None


def is_integration_tests_ready(analysis_text: str) -> bool:
"""Return True if the analysis file declares INTEGRATION_TESTS_READY in Part F.

Only the Part F section is scanned. A document with no Part F header is treated
as not-ready here; genuine "Part F missing" cases are rejected earlier in
``normalize_contract_files`` with a clearer ANALYSIS_UNREADABLE error.
Only the Part F section is scanned. Within it, the "**Integration Readiness:**"
field — anchored to its own line — is checked first, so neither a preamble
sentence nor a Rationale sentence that merely mentions the other token can be
captured in its place. If no such field is found at all (non-standard
formatting with no labeled declaration), falls back to scanning the whole
Part F section for the token. If a field IS attempted but doesn't parse, the
caller is expected to have already rejected via ``_readiness_field_is_ambiguous``
rather than reach this fallback.

A document with no Part F header is treated as not-ready here; genuine "Part F
missing" cases are rejected earlier in ``normalize_contract_files`` with a
clearer ANALYSIS_UNREADABLE error.
"""
part_f = _part_f_section(analysis_text)
if part_f is None:
return False
field_match = _READINESS_FIELD.search(part_f)
if field_match is not None:
return field_match.group("ready") is not None
return bool(_INTEGRATION_READY_TOKEN.search(part_f))


Expand Down
91 changes: 91 additions & 0 deletions backend/test/services/test_contract_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_find_candidates,
_CanonicalFile,
_normalize_for_match,
_readiness_field_is_ambiguous,
)
from app.core.artifact_files import (
SPEC_COMPLETENESS_FILE,
Expand Down Expand Up @@ -205,6 +206,22 @@ def test_rejection_message_contains_mcp_tool_name(self, tmp_path):
normalize_contract_files(tmp_path, "docs", _logger())
assert "check_specification_completeness" in exc_info.value.message

def test_analysis_unreadable_when_readiness_field_ambiguous(self, tmp_path):
"""A Part F that attempts a declaration but doesn't parse must be refused,
not silently guessed via the whole-section fallback."""
_setup_outputs(tmp_path)
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** Currently LOCAL_ONLY due to missing CI\n\n"
"**Rationale:** Meets neither criteria for INTEGRATION_TESTS_READY.\n"
)
_write(tmp_path, "docs", ANALYSIS_SUBDIR, SPEC_COMPLETENESS_FILE, analysis)
_write(tmp_path, "docs", PLANNING_SUBDIR, IMPLEMENTATION_PLAN_FILE, _MINIMAL_PLAN)
with pytest.raises(ContractRejection) as exc_info:
normalize_contract_files(tmp_path, "docs", _logger())
assert exc_info.value.code == RejectionCode.ANALYSIS_UNREADABLE
assert "check_specification_completeness" in exc_info.value.message


class TestGenerationContractPreflight:
def test_rejects_plan_with_no_phase_headings(self, tmp_path):
Expand Down Expand Up @@ -317,3 +334,77 @@ def test_no_part_f_returns_false(self):
# No Part F header at all → not ready here; the missing-Part-F case is
# rejected separately by normalize_contract_files with ANALYSIS_UNREADABLE.
assert is_integration_tests_ready("# Part A\n\nINTEGRATION_TESTS_READY\n") is False

def test_token_in_part_f_rationale_does_not_override_declared_field(self):
# Real-world regression: the field declares LOCAL_ONLY, but the Rationale
# prose explains the gap by naming the other token. Must not flip to ready.
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** **LOCAL_ONLY**\n\n"
"**Rationale:** The deployment target is named, but there are no deploy "
"workflow files or IaC, and no acceptance/e2e test methodology. This meets "
"neither the deployment-methodology nor the acceptance-test criteria for "
"`INTEGRATION_TESTS_READY`.\n"
)
assert is_integration_tests_ready(analysis) is False

def test_declared_field_integration_ready_wins_over_rationale_wording(self):
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** **INTEGRATION_TESTS_READY**\n\n"
"**Rationale:** All deploy workflows, IaC, and e2e methodology are present "
"and confirmed working, well beyond LOCAL_ONLY.\n"
)
assert is_integration_tests_ready(analysis) is True

def test_preamble_mention_does_not_hijack_the_declared_field(self):
# Regression: an earlier sentence naming "integration readiness" (before the
# real field) must not be captured in place of the actual declaration.
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"Initial integration readiness: LOCAL_ONLY was assumed, but after "
"reviewing the CI workflows we upgraded the classification.\n\n"
"**Integration Readiness:** INTEGRATION_TESTS_READY\n\n"
"**Rationale:** All deploy workflows and e2e methodology confirmed working.\n"
)
assert is_integration_tests_ready(analysis) is True

def test_summary_line_after_part_f_does_not_hijack_the_declared_field(self):
# Regression: a later "- Part F (Integration Readiness): ..." summary line
# (emitted by the skill after Part F) must not be read as the declaration.
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** **LOCAL_ONLY**\n\n"
"**Rationale:** No CI configured.\n\n"
"## DIMENSION STATUS\n"
"- Part F (Integration Readiness): INTEGRATION_TESTS_READY\n"
)
assert is_integration_tests_ready(analysis) is False


class TestReadinessFieldIsAmbiguous:
"""Malformed-but-attempted declarations must be flagged, not silently guessed."""

def test_paraphrased_value_is_ambiguous(self):
part_f = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** Not ready yet\n\n"
"**Rationale:** Does not currently meet the bar for INTEGRATION_TESTS_READY.\n"
)
assert _readiness_field_is_ambiguous(part_f) is True

def test_qualifier_word_before_token_is_ambiguous(self):
part_f = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** Currently LOCAL_ONLY due to missing CI\n\n"
"**Rationale:** Meets neither criteria for INTEGRATION_TESTS_READY.\n"
)
assert _readiness_field_is_ambiguous(part_f) is True

def test_no_label_at_all_is_not_ambiguous(self):
# Legitimate non-standard formatting (no labeled field at all) stays lenient.
assert _readiness_field_is_ambiguous("## Part F\n\nLOCAL_ONLY\n") is False

def test_well_formed_field_is_not_ambiguous(self):
part_f = "## Part F\n\n**Integration Readiness:** **LOCAL_ONLY**\n"
assert _readiness_field_is_ambiguous(part_f) is False
49 changes: 48 additions & 1 deletion mcp_server/services/run_generation_precheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,28 @@ class RejectionCode(StrEnum):

# Part F section header pattern (case-insensitive, allows for variations in formatting).
_PART_F_HEADER = re.compile(r"^#+\s*part\s*f\b", re.IGNORECASE | re.MULTILINE)
# Any "Part <letter>" heading — used to bound Part F to its own section instead of
# scanning to end-of-document (a later Part, or a summary line restating Part F,
# must not be read as the readiness declaration).
_PART_HEADER = re.compile(r"^#+\s*part\s*[a-z]\b", re.IGNORECASE | re.MULTILINE)
_INTEGRATION_READY_TOKEN = re.compile(
r"integration[_\s-]*tests?[_\s-]*ready", re.IGNORECASE
)

# The authoritative "**Integration Readiness:** <token>" declaration. Anchored to a
# single line (^...$, MULTILINE) so a prose sentence that merely mentions the label
# — before or after the real field — can never be captured instead of it.
_READINESS_FIELD = re.compile(
r"^\s*\**\s*integration\s+readiness[\s*:]{1,12}"
r"(?:(?P<ready>integration[_\s-]*tests?[_\s-]*ready)"
r"|(?P<not_ready>local[_\s-]*only|not[_\s-]*ready))"
r"[\s*.]{0,6}$",
re.IGNORECASE | re.MULTILINE,
)
# Detects an attempted (but unparseable) declaration — present so callers can refuse
# rather than guess, instead of silently falling back to a whole-section token scan.
_READINESS_LABEL = re.compile(r"integration\s+readiness", re.IGNORECASE)


def _normalize_stem(filename: str) -> str:
"""Match backend contract_validator stem normalization (case + separators)."""
Expand Down Expand Up @@ -95,16 +113,33 @@ def _find_required_md(


def _part_f_section(analysis_text: str) -> str | None:
"""Return the text of the Part F section only, or None if absent.

Bounded at the next "Part <letter>" heading (or end of document if there is
none) so a later section — or a summary line elsewhere that merely restates
"Part F (Integration Readiness): ..." — is never scanned as part of Part F.
"""
match = _PART_F_HEADER.search(analysis_text)
if match is None:
return None
return analysis_text[match.start() :]
next_part = _PART_HEADER.search(analysis_text, match.end())
end = next_part.start() if next_part is not None else len(analysis_text)
return analysis_text[match.start():end]


def _readiness_field_is_ambiguous(part_f: str) -> bool:
"""True if Part F attempts an "Integration Readiness" declaration that doesn't
parse to a recognized token — refuse rather than guess (see `precheck`)."""
return _READINESS_LABEL.search(part_f) is not None and _READINESS_FIELD.search(part_f) is None


def _is_integration_tests_ready(analysis_text: str) -> bool:
part_f = _part_f_section(analysis_text)
if part_f is None:
return False
field_match = _READINESS_FIELD.search(part_f)
if field_match is not None:
return field_match.group("ready") is not None
return bool(_INTEGRATION_READY_TOKEN.search(part_f))


Expand Down Expand Up @@ -196,6 +231,18 @@ def precheck(
),
)

part_f = _part_f_section(analysis_text)
if part_f is not None and _readiness_field_is_ambiguous(part_f):
return Rejection(
code=RejectionCode.ANALYSIS_UNREADABLE,
message=(
f"Couldn't read integration readiness from `{SPEC_COMPLETENESS_FILE}`. "
f"Part F declares an Integration Readiness field, but its value isn't "
f"`INTEGRATION_TESTS_READY` or `LOCAL_ONLY`. Re-run "
f"`check_specification_completeness`."
),
)

if _is_integration_tests_ready(analysis_text):
e2e_plan_file = _find_required_md(outputs_path, PLANNING_SUBDIR, E2E_TEST_PLAN_FILE)
if e2e_plan_file is None:
Expand Down
60 changes: 60 additions & 0 deletions mcp_server/tests/test_run_generation_precheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,63 @@ def test_integration_token_outside_part_f_does_not_require_e2e(self, tmp_path):
_write_analysis(tmp_path, content=analysis)
_write_plan(tmp_path)
assert precheck(tmp_path, "specs", "docs") is None

def test_integration_token_in_part_f_rationale_does_not_require_e2e(self, tmp_path):
"""Real-world regression: Rationale names the other token while explaining
why it does NOT apply. The declared field (LOCAL_ONLY) must still win."""
_write_spec(tmp_path)
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** **LOCAL_ONLY**\n\n"
"**Rationale:** No deploy workflow files or IaC, and no acceptance/e2e "
"test methodology. This meets neither criteria for `INTEGRATION_TESTS_READY`.\n"
)
_write_analysis(tmp_path, content=analysis)
_write_plan(tmp_path)
assert precheck(tmp_path, "specs", "docs") is None

def test_preamble_mention_does_not_hijack_the_declared_field(self, tmp_path):
"""Regression: an earlier sentence naming 'integration readiness' (before the
real field) must not be captured in place of the actual declaration."""
_write_spec(tmp_path)
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"Initial integration readiness: LOCAL_ONLY was assumed, but after "
"reviewing the CI workflows we upgraded the classification.\n\n"
"**Integration Readiness:** INTEGRATION_TESTS_READY\n\n"
"**Rationale:** All deploy workflows and e2e methodology confirmed working.\n"
)
_write_analysis(tmp_path, content=analysis)
_write_plan(tmp_path)
_write_e2e_plan(tmp_path)
assert precheck(tmp_path, "specs", "docs") is None

def test_summary_line_after_part_f_does_not_hijack_the_declared_field(self, tmp_path):
"""Regression: a later '- Part F (Integration Readiness): ...' summary line
(emitted by the skill after Part F) must not be read as the declaration."""
_write_spec(tmp_path)
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** **LOCAL_ONLY**\n\n"
"**Rationale:** No CI configured.\n\n"
"## DIMENSION STATUS\n"
"- Part F (Integration Readiness): INTEGRATION_TESTS_READY\n"
)
_write_analysis(tmp_path, content=analysis)
_write_plan(tmp_path)
assert precheck(tmp_path, "specs", "docs") is None

def test_analysis_unreadable_when_readiness_field_ambiguous(self, tmp_path):
"""A Part F that attempts a declaration but doesn't parse must be refused,
not silently guessed via the whole-section fallback."""
_write_spec(tmp_path)
analysis = (
"## Part F: Integration & Deployment Readiness\n\n"
"**Integration Readiness:** Currently LOCAL_ONLY due to missing CI\n\n"
"**Rationale:** Meets neither criteria for INTEGRATION_TESTS_READY.\n"
)
_write_analysis(tmp_path, content=analysis)
_write_plan(tmp_path)
result = precheck(tmp_path, "specs", "docs")
assert result is not None
assert result.code == RejectionCode.ANALYSIS_UNREADABLE