diff --git a/backend/app/services/contract_validator.py b/backend/app/services/contract_validator.py index 5ef14c9..320dc18 100644 --- a/backend/app/services/contract_validator.py +++ b/backend/app/services/contract_validator.py @@ -122,9 +122,26 @@ def canonical_path(self) -> str: # Part F section header pattern — same regex as MCP precheck for behavior parity. _PART_F_HEADER = re.compile(r"^#+\s*part\s*f\b", re.IGNORECASE | re.MULTILINE) +# Any "Part " heading — used to bound Part F to its own section instead of +# scanning to end-of-document (a later Part, or a summary line restating Part F, +# must not be read as the readiness declaration). +_PART_HEADER = re.compile(r"^#+\s*part\s*[a-z]\b", re.IGNORECASE | re.MULTILINE) _INTEGRATION_READY_TOKEN = re.compile( r"integration[_\s-]*tests?[_\s-]*ready", re.IGNORECASE ) +# The authoritative "**Integration Readiness:** " declaration. Anchored to a +# single line (^...$, MULTILINE) so a prose sentence that merely mentions the label +# — before or after the real field — can never be captured instead of it. +_READINESS_FIELD = re.compile( + r"^\s*\**\s*integration\s+readiness[\s*:]{1,12}" + r"(?:(?Pintegration[_\s-]*tests?[_\s-]*ready)" + r"|(?Plocal[_\s-]*only|not[_\s-]*ready))" + r"[\s*.]{0,6}$", + re.IGNORECASE | re.MULTILINE, +) +# Detects an attempted (but unparseable) declaration — present so callers can refuse +# rather than guess, instead of silently falling back to a whole-section token scan. +_READINESS_LABEL = re.compile(r"integration\s+readiness", re.IGNORECASE) # Heading match is intentionally lenient (any heading level h1–h6) so the # deterministic preflight never rejects a plan the downstream LLM conversion agent # would accept. The skill emits `## Phase N:`, but a hand-edited `# Phase N` must @@ -286,6 +303,18 @@ def normalize_contract_files( ), ) + part_f = _part_f_section(analysis_text) + if part_f is not None and _readiness_field_is_ambiguous(part_f): + raise ContractRejection( + code=RejectionCode.ANALYSIS_UNREADABLE, + message=( + f"Couldn't read integration readiness from `{SPEC_COMPLETENESS_FILE}`. " + f"Part F declares an Integration Readiness field, but its value isn't " + f"`INTEGRATION_TESTS_READY` or `LOCAL_ONLY`. Re-run " + f"`check_specification_completeness`." + ), + ) + result: dict[str, Path] = {"analysis": analysis_path, "plan": plan_path} if is_integration_tests_ready(analysis_text): @@ -395,28 +424,54 @@ def validate_generation_contract_preflight( def _part_f_section(analysis_text: str) -> str | None: - """Return the text from the Part F header to end of document, or None if absent. + """Return the text of the Part F section only, or None if absent. - Readiness must be read from Part F only — the integration-readiness token may - legitimately appear elsewhere (a Part B summary table, a preamble note) without - meaning the spec is integration-ready. + Bounded at the next "Part " heading (or end of document if there is + none) so a later section — or a summary line elsewhere that merely restates + "Part F (Integration Readiness): ..." — is never scanned as part of Part F. """ match = _PART_F_HEADER.search(analysis_text) if match is None: return None - return analysis_text[match.start():] + next_part = _PART_HEADER.search(analysis_text, match.end()) + end = next_part.start() if next_part is not None else len(analysis_text) + return analysis_text[match.start():end] + + +def _readiness_field_is_ambiguous(part_f: str) -> bool: + """True if Part F attempts an "Integration Readiness" declaration that doesn't + parse to a recognized token — e.g. a paraphrase or a value split across + decoration the field regex doesn't recognize. + + Distinguishes "field missing entirely" (legitimate non-standard formatting, + left to the lenient fallback in ``is_integration_tests_ready``) from "field + present but unreadable" (refuse rather than guess, per callers below). + """ + return _READINESS_LABEL.search(part_f) is not None and _READINESS_FIELD.search(part_f) is None def is_integration_tests_ready(analysis_text: str) -> bool: """Return True if the analysis file declares INTEGRATION_TESTS_READY in Part F. - Only the Part F section is scanned. A document with no Part F header is treated - as not-ready here; genuine "Part F missing" cases are rejected earlier in - ``normalize_contract_files`` with a clearer ANALYSIS_UNREADABLE error. + Only the Part F section is scanned. Within it, the "**Integration Readiness:**" + field — anchored to its own line — is checked first, so neither a preamble + sentence nor a Rationale sentence that merely mentions the other token can be + captured in its place. If no such field is found at all (non-standard + formatting with no labeled declaration), falls back to scanning the whole + Part F section for the token. If a field IS attempted but doesn't parse, the + caller is expected to have already rejected via ``_readiness_field_is_ambiguous`` + rather than reach this fallback. + + A document with no Part F header is treated as not-ready here; genuine "Part F + missing" cases are rejected earlier in ``normalize_contract_files`` with a + clearer ANALYSIS_UNREADABLE error. """ part_f = _part_f_section(analysis_text) if part_f is None: return False + field_match = _READINESS_FIELD.search(part_f) + if field_match is not None: + return field_match.group("ready") is not None return bool(_INTEGRATION_READY_TOKEN.search(part_f)) diff --git a/backend/test/services/test_contract_validator.py b/backend/test/services/test_contract_validator.py index 9b57f09..88f5569 100644 --- a/backend/test/services/test_contract_validator.py +++ b/backend/test/services/test_contract_validator.py @@ -11,6 +11,7 @@ _find_candidates, _CanonicalFile, _normalize_for_match, + _readiness_field_is_ambiguous, ) from app.core.artifact_files import ( SPEC_COMPLETENESS_FILE, @@ -205,6 +206,22 @@ def test_rejection_message_contains_mcp_tool_name(self, tmp_path): normalize_contract_files(tmp_path, "docs", _logger()) assert "check_specification_completeness" in exc_info.value.message + def test_analysis_unreadable_when_readiness_field_ambiguous(self, tmp_path): + """A Part F that attempts a declaration but doesn't parse must be refused, + not silently guessed via the whole-section fallback.""" + _setup_outputs(tmp_path) + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** Currently LOCAL_ONLY due to missing CI\n\n" + "**Rationale:** Meets neither criteria for INTEGRATION_TESTS_READY.\n" + ) + _write(tmp_path, "docs", ANALYSIS_SUBDIR, SPEC_COMPLETENESS_FILE, analysis) + _write(tmp_path, "docs", PLANNING_SUBDIR, IMPLEMENTATION_PLAN_FILE, _MINIMAL_PLAN) + with pytest.raises(ContractRejection) as exc_info: + normalize_contract_files(tmp_path, "docs", _logger()) + assert exc_info.value.code == RejectionCode.ANALYSIS_UNREADABLE + assert "check_specification_completeness" in exc_info.value.message + class TestGenerationContractPreflight: def test_rejects_plan_with_no_phase_headings(self, tmp_path): @@ -317,3 +334,77 @@ def test_no_part_f_returns_false(self): # No Part F header at all → not ready here; the missing-Part-F case is # rejected separately by normalize_contract_files with ANALYSIS_UNREADABLE. assert is_integration_tests_ready("# Part A\n\nINTEGRATION_TESTS_READY\n") is False + + def test_token_in_part_f_rationale_does_not_override_declared_field(self): + # Real-world regression: the field declares LOCAL_ONLY, but the Rationale + # prose explains the gap by naming the other token. Must not flip to ready. + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** **LOCAL_ONLY**\n\n" + "**Rationale:** The deployment target is named, but there are no deploy " + "workflow files or IaC, and no acceptance/e2e test methodology. This meets " + "neither the deployment-methodology nor the acceptance-test criteria for " + "`INTEGRATION_TESTS_READY`.\n" + ) + assert is_integration_tests_ready(analysis) is False + + def test_declared_field_integration_ready_wins_over_rationale_wording(self): + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** **INTEGRATION_TESTS_READY**\n\n" + "**Rationale:** All deploy workflows, IaC, and e2e methodology are present " + "and confirmed working, well beyond LOCAL_ONLY.\n" + ) + assert is_integration_tests_ready(analysis) is True + + def test_preamble_mention_does_not_hijack_the_declared_field(self): + # Regression: an earlier sentence naming "integration readiness" (before the + # real field) must not be captured in place of the actual declaration. + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "Initial integration readiness: LOCAL_ONLY was assumed, but after " + "reviewing the CI workflows we upgraded the classification.\n\n" + "**Integration Readiness:** INTEGRATION_TESTS_READY\n\n" + "**Rationale:** All deploy workflows and e2e methodology confirmed working.\n" + ) + assert is_integration_tests_ready(analysis) is True + + def test_summary_line_after_part_f_does_not_hijack_the_declared_field(self): + # Regression: a later "- Part F (Integration Readiness): ..." summary line + # (emitted by the skill after Part F) must not be read as the declaration. + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** **LOCAL_ONLY**\n\n" + "**Rationale:** No CI configured.\n\n" + "## DIMENSION STATUS\n" + "- Part F (Integration Readiness): INTEGRATION_TESTS_READY\n" + ) + assert is_integration_tests_ready(analysis) is False + + +class TestReadinessFieldIsAmbiguous: + """Malformed-but-attempted declarations must be flagged, not silently guessed.""" + + def test_paraphrased_value_is_ambiguous(self): + part_f = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** Not ready yet\n\n" + "**Rationale:** Does not currently meet the bar for INTEGRATION_TESTS_READY.\n" + ) + assert _readiness_field_is_ambiguous(part_f) is True + + def test_qualifier_word_before_token_is_ambiguous(self): + part_f = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** Currently LOCAL_ONLY due to missing CI\n\n" + "**Rationale:** Meets neither criteria for INTEGRATION_TESTS_READY.\n" + ) + assert _readiness_field_is_ambiguous(part_f) is True + + def test_no_label_at_all_is_not_ambiguous(self): + # Legitimate non-standard formatting (no labeled field at all) stays lenient. + assert _readiness_field_is_ambiguous("## Part F\n\nLOCAL_ONLY\n") is False + + def test_well_formed_field_is_not_ambiguous(self): + part_f = "## Part F\n\n**Integration Readiness:** **LOCAL_ONLY**\n" + assert _readiness_field_is_ambiguous(part_f) is False diff --git a/mcp_server/services/run_generation_precheck.py b/mcp_server/services/run_generation_precheck.py index 54d05e7..0cb2e90 100644 --- a/mcp_server/services/run_generation_precheck.py +++ b/mcp_server/services/run_generation_precheck.py @@ -53,10 +53,28 @@ class RejectionCode(StrEnum): # Part F section header pattern (case-insensitive, allows for variations in formatting). _PART_F_HEADER = re.compile(r"^#+\s*part\s*f\b", re.IGNORECASE | re.MULTILINE) +# Any "Part " heading — used to bound Part F to its own section instead of +# scanning to end-of-document (a later Part, or a summary line restating Part F, +# must not be read as the readiness declaration). +_PART_HEADER = re.compile(r"^#+\s*part\s*[a-z]\b", re.IGNORECASE | re.MULTILINE) _INTEGRATION_READY_TOKEN = re.compile( r"integration[_\s-]*tests?[_\s-]*ready", re.IGNORECASE ) +# The authoritative "**Integration Readiness:** " declaration. Anchored to a +# single line (^...$, MULTILINE) so a prose sentence that merely mentions the label +# — before or after the real field — can never be captured instead of it. +_READINESS_FIELD = re.compile( + r"^\s*\**\s*integration\s+readiness[\s*:]{1,12}" + r"(?:(?Pintegration[_\s-]*tests?[_\s-]*ready)" + r"|(?Plocal[_\s-]*only|not[_\s-]*ready))" + r"[\s*.]{0,6}$", + re.IGNORECASE | re.MULTILINE, +) +# Detects an attempted (but unparseable) declaration — present so callers can refuse +# rather than guess, instead of silently falling back to a whole-section token scan. +_READINESS_LABEL = re.compile(r"integration\s+readiness", re.IGNORECASE) + def _normalize_stem(filename: str) -> str: """Match backend contract_validator stem normalization (case + separators).""" @@ -95,16 +113,33 @@ def _find_required_md( def _part_f_section(analysis_text: str) -> str | None: + """Return the text of the Part F section only, or None if absent. + + Bounded at the next "Part " heading (or end of document if there is + none) so a later section — or a summary line elsewhere that merely restates + "Part F (Integration Readiness): ..." — is never scanned as part of Part F. + """ match = _PART_F_HEADER.search(analysis_text) if match is None: return None - return analysis_text[match.start() :] + next_part = _PART_HEADER.search(analysis_text, match.end()) + end = next_part.start() if next_part is not None else len(analysis_text) + return analysis_text[match.start():end] + + +def _readiness_field_is_ambiguous(part_f: str) -> bool: + """True if Part F attempts an "Integration Readiness" declaration that doesn't + parse to a recognized token — refuse rather than guess (see `precheck`).""" + return _READINESS_LABEL.search(part_f) is not None and _READINESS_FIELD.search(part_f) is None def _is_integration_tests_ready(analysis_text: str) -> bool: part_f = _part_f_section(analysis_text) if part_f is None: return False + field_match = _READINESS_FIELD.search(part_f) + if field_match is not None: + return field_match.group("ready") is not None return bool(_INTEGRATION_READY_TOKEN.search(part_f)) @@ -196,6 +231,18 @@ def precheck( ), ) + part_f = _part_f_section(analysis_text) + if part_f is not None and _readiness_field_is_ambiguous(part_f): + return Rejection( + code=RejectionCode.ANALYSIS_UNREADABLE, + message=( + f"Couldn't read integration readiness from `{SPEC_COMPLETENESS_FILE}`. " + f"Part F declares an Integration Readiness field, but its value isn't " + f"`INTEGRATION_TESTS_READY` or `LOCAL_ONLY`. Re-run " + f"`check_specification_completeness`." + ), + ) + if _is_integration_tests_ready(analysis_text): e2e_plan_file = _find_required_md(outputs_path, PLANNING_SUBDIR, E2E_TEST_PLAN_FILE) if e2e_plan_file is None: diff --git a/mcp_server/tests/test_run_generation_precheck.py b/mcp_server/tests/test_run_generation_precheck.py index fed7165..3f892f3 100644 --- a/mcp_server/tests/test_run_generation_precheck.py +++ b/mcp_server/tests/test_run_generation_precheck.py @@ -153,3 +153,63 @@ def test_integration_token_outside_part_f_does_not_require_e2e(self, tmp_path): _write_analysis(tmp_path, content=analysis) _write_plan(tmp_path) assert precheck(tmp_path, "specs", "docs") is None + + def test_integration_token_in_part_f_rationale_does_not_require_e2e(self, tmp_path): + """Real-world regression: Rationale names the other token while explaining + why it does NOT apply. The declared field (LOCAL_ONLY) must still win.""" + _write_spec(tmp_path) + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** **LOCAL_ONLY**\n\n" + "**Rationale:** No deploy workflow files or IaC, and no acceptance/e2e " + "test methodology. This meets neither criteria for `INTEGRATION_TESTS_READY`.\n" + ) + _write_analysis(tmp_path, content=analysis) + _write_plan(tmp_path) + assert precheck(tmp_path, "specs", "docs") is None + + def test_preamble_mention_does_not_hijack_the_declared_field(self, tmp_path): + """Regression: an earlier sentence naming 'integration readiness' (before the + real field) must not be captured in place of the actual declaration.""" + _write_spec(tmp_path) + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "Initial integration readiness: LOCAL_ONLY was assumed, but after " + "reviewing the CI workflows we upgraded the classification.\n\n" + "**Integration Readiness:** INTEGRATION_TESTS_READY\n\n" + "**Rationale:** All deploy workflows and e2e methodology confirmed working.\n" + ) + _write_analysis(tmp_path, content=analysis) + _write_plan(tmp_path) + _write_e2e_plan(tmp_path) + assert precheck(tmp_path, "specs", "docs") is None + + def test_summary_line_after_part_f_does_not_hijack_the_declared_field(self, tmp_path): + """Regression: a later '- Part F (Integration Readiness): ...' summary line + (emitted by the skill after Part F) must not be read as the declaration.""" + _write_spec(tmp_path) + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** **LOCAL_ONLY**\n\n" + "**Rationale:** No CI configured.\n\n" + "## DIMENSION STATUS\n" + "- Part F (Integration Readiness): INTEGRATION_TESTS_READY\n" + ) + _write_analysis(tmp_path, content=analysis) + _write_plan(tmp_path) + assert precheck(tmp_path, "specs", "docs") is None + + def test_analysis_unreadable_when_readiness_field_ambiguous(self, tmp_path): + """A Part F that attempts a declaration but doesn't parse must be refused, + not silently guessed via the whole-section fallback.""" + _write_spec(tmp_path) + analysis = ( + "## Part F: Integration & Deployment Readiness\n\n" + "**Integration Readiness:** Currently LOCAL_ONLY due to missing CI\n\n" + "**Rationale:** Meets neither criteria for INTEGRATION_TESTS_READY.\n" + ) + _write_analysis(tmp_path, content=analysis) + _write_plan(tmp_path) + result = precheck(tmp_path, "specs", "docs") + assert result is not None + assert result.code == RejectionCode.ANALYSIS_UNREADABLE