diff --git a/backend/app/api/v1/generation_sessions.py b/backend/app/api/v1/generation_sessions.py index b1d6867..9b210da 100644 --- a/backend/app/api/v1/generation_sessions.py +++ b/backend/app/api/v1/generation_sessions.py @@ -71,8 +71,10 @@ _multi_workspace_result_to_store, _handle_workflow_exception, ) +from app.core.artifact_files import MULTI_WORKSPACE_REPORT_HTML_FILE +from app.core.artifact_subdirs import REPORT_SUBDIR from app.services.workspace_pool import WorkspacePoolService -from app.services.artifact_store import ArtifactStore +from app.services.artifact_store import ARTIFACTS_BASE, ArtifactStore from app.services.agent_stream_broker import get_agent_stream_broker from app.services.mcp_prune import resolve_enabled_mcps_and_set_telemetry from app.schemas.model_token_usage import ModelTokenUsage @@ -1662,3 +1664,29 @@ async def download_generation_session_outputs( detail=f"Failed to build outputs tarball: {e}", ) + +@router.get("/{generation_id}/report.html") +@track_event(event_name="download_report_html_triggered") +async def download_generation_session_report_html( + generation_id: str, + request: Request, + _: None = Depends(require_generation_session_owner), +): + """ + Serve the P10Y multi-workspace estimation report as raw HTML. + + Lightweight alternative to ``/outputs`` β€” a few KB rather than a full + tarball of workspace code, so it's fast enough to fetch on a single local + TUI keypress. Raises 404 when the report hasn't been written yet (P10Y + estimation not complete, or this is an older run predating the report). + """ + report_path = ( + ARTIFACTS_BASE / generation_id / REPORT_SUBDIR / MULTI_WORKSPACE_REPORT_HTML_FILE + ) + if not report_path.is_file(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"No HTML report found for generation session {generation_id}.", + ) + return Response(content=report_path.read_bytes(), media_type="text/html") + diff --git a/backend/app/core/artifact_files.py b/backend/app/core/artifact_files.py index 0b49919..2b9f957 100644 --- a/backend/app/core/artifact_files.py +++ b/backend/app/core/artifact_files.py @@ -11,3 +11,9 @@ SPEC_COMPLETENESS_FILE = "specification_completeness.md" IMPLEMENTATION_PLAN_FILE = "IMPLEMENTATION_PLAN.md" E2E_TEST_PLAN_FILE = "e2e-test-plan.md" + +# P10Y multi-workspace estimation report β€” written by the estimation workflow +# under ARTIFACTS_BASE/{generation_id}/report/, read back by the report-html +# API endpoint and the local TUI. +MULTI_WORKSPACE_REPORT_MD_FILE = "multi-workspace-estimation-report.md" +MULTI_WORKSPACE_REPORT_HTML_FILE = "multi-workspace-estimation-report.html" diff --git a/backend/app/core/notifications.py b/backend/app/core/notifications.py index 6b3378b..cd9e4de 100644 --- a/backend/app/core/notifications.py +++ b/backend/app/core/notifications.py @@ -11,7 +11,7 @@ from app.core.config import EmailConfig from app.core.config import settings -from app.database.interface import IDatabase +from app.database.interface import IDatabase, ReadOnlyDatabase from app.schemas.estimate import ( ComparativeAnalysis, EstimationMetrics, @@ -1060,412 +1060,438 @@ def _build_generation_session_email( db: Optional[IDatabase], notification_kind: GenerationSessionNotificationKind = GenerationSessionNotificationKind.COMPLETE, ) -> Tuple[str, str]: - """ - Build HTML and plain text email content for generation session completion. - + """Build HTML and plain text email content for generation session completion. + Returns: Tuple of (html_content, plain_content) """ - pre_deploy = notification_kind == GenerationSessionNotificationKind.CODING_COMPLETE_PRE_DEPLOY - variance = _p10y_variance_summary(result, pre_deploy=pre_deploy) + return render_generation_session_report_html( + generation_id=generation_id, + workspace_ids=workspace_ids, + result=result, + spec_path=spec_path, + db=db, + notification_kind=notification_kind, + ) - try: - final_estimate = result.summary.risk_assessment.final_estimate if result.summary.risk_assessment else result.summary.average_hours - except (AttributeError, KeyError): - final_estimate = 0.0 - - try: - average_hours = result.summary.average_hours - except (AttributeError, KeyError): - average_hours = 0.0 - - try: - min_hours = result.summary.min_hours - except (AttributeError, KeyError): - min_hours = 0.0 - - try: - max_hours = result.summary.max_hours - except (AttributeError, KeyError): - max_hours = 0.0 - final_estimate_display = ( - "Pending (reported after P10Y phase)" if pre_deploy else f"{final_estimate:.1f} hours" - ) - average_display = ( - "Pending" if pre_deploy else f"{average_hours:.1f} hours" - ) - range_display = ( - "Pending (P10Y after deploy)" if pre_deploy else f"{min_hours:.1f} - {max_hours:.1f} hours" - ) - cost_display = _session_llm_cost_display(result) - - # Extract timestamp with safe fallback - try: - timestamp = result.timestamp if hasattr(result, 'timestamp') else "Unknown" - except (AttributeError, KeyError): - timestamp = "Unknown" - - # Extract workspace generations - try: - workspace_estimations = result.workspace_estimations if hasattr(result, 'workspace_estimations') else None - except (AttributeError, KeyError): - workspace_estimations = None - - # Build per-workspace model/token lookup from workspace_estimations - ws_est_by_name = {} - if workspace_estimations: - for ws_est in workspace_estimations: - try: - ws_name = getattr(ws_est, 'workspace_name', None) - if ws_name: - ws_est_by_name[ws_name] = ws_est - except (AttributeError, TypeError): - continue +def render_generation_session_report_html( + generation_id: str, + workspace_ids: List[str], + result: Any, + spec_path: str, + db: Optional[ReadOnlyDatabase], + notification_kind: GenerationSessionNotificationKind = GenerationSessionNotificationKind.COMPLETE, +) -> Tuple[str, str]: + """Build HTML and plain text P10Y report content, independent of any notifier. + + Shared by ``EmailNotifier`` (sent as an email body) and the P10Y workflow + (saved to disk next to the markdown reports, regardless of whether email + or Slack are configured). + + Returns: + Tuple of (html_content, plain_content) + """ + pre_deploy = notification_kind == GenerationSessionNotificationKind.CODING_COMPLETE_PRE_DEPLOY + variance = _p10y_variance_summary(result, pre_deploy=pre_deploy) - # Pre-compute codegen usage lines once per workspace (reused in HTML + plain-text) - ws_codegen_lines: dict[str, list[str]] = { - ws_id: _workspace_codegen_usage_lines(ws_est) - for ws_id, ws_est in ws_est_by_name.items() - } + try: + final_estimate = result.summary.risk_assessment.final_estimate if result.summary.risk_assessment else result.summary.average_hours + except (AttributeError, KeyError): + final_estimate = 0.0 - # Build repository links - repo_links = [] - if db: - for workspace_id in workspace_ids: - try: - ws_doc = db.get("workspaces", workspace_id) - if ws_doc and ws_doc.get("repo_url"): - repo_url = ws_doc["repo_url"] - # Remove .git suffix if present - if repo_url.endswith(".git"): - repo_url = repo_url[:-4] - branch_name = GitArchiveService.branch_name(generation_id) - branch_url = f"{repo_url}/tree/{branch_name}" - - # Get model/token info from matching WorkspaceEstimation - ws_est = ws_est_by_name.get(workspace_id) - mu_html = getattr(ws_est, "model_usage", None) if ws_est else None - model_name = (mu_html.model_name if mu_html else None) or None - input_tokens = mu_html.input_tokens if mu_html else None - output_tokens = mu_html.output_tokens if mu_html else None - agent_num_turns = mu_html.num_turns if mu_html else None - cache_write = mu_html.cache_write_tokens if mu_html else None - cache_read = mu_html.cache_read_tokens if mu_html else None - llm_cost = getattr(ws_est, "total_usd_cost", None) if ws_est else None - - repo_links.append({ - "workspace_id": workspace_id, - "repo_url": repo_url, - "branch_url": branch_url, - "model_name": model_name, - "input_tokens": input_tokens, - "output_tokens": output_tokens, - "agent_num_turns": agent_num_turns, - "cache_write_tokens": cache_write, - "cache_read_tokens": cache_read, - "total_usd_cost": llm_cost, - }) - except Exception as e: - self.logger.warning(f"Failed to retrieve workspace {workspace_id}: {e}") - - # Extract component breakdown - component_breakdown = {} - if workspace_estimations: - # Collect all unique components across all workspaces - all_components = set() + try: + average_hours = result.summary.average_hours + except (AttributeError, KeyError): + average_hours = 0.0 + + try: + min_hours = result.summary.min_hours + except (AttributeError, KeyError): + min_hours = 0.0 + + try: + max_hours = result.summary.max_hours + except (AttributeError, KeyError): + max_hours = 0.0 + + final_estimate_display = ( + "Pending (reported after P10Y phase)" if pre_deploy else f"{final_estimate:.1f} hours" + ) + average_display = ( + "Pending" if pre_deploy else f"{average_hours:.1f} hours" + ) + range_display = ( + "Pending (P10Y after deploy)" if pre_deploy else f"{min_hours:.1f} - {max_hours:.1f} hours" + ) + cost_display = _session_llm_cost_display(result) + + # Extract timestamp with safe fallback + try: + timestamp = result.timestamp if hasattr(result, 'timestamp') else "Unknown" + except (AttributeError, KeyError): + timestamp = "Unknown" + + # Extract workspace generations + try: + workspace_estimations = result.workspace_estimations if hasattr(result, 'workspace_estimations') else None + except (AttributeError, KeyError): + workspace_estimations = None + + # Build per-workspace model/token lookup from workspace_estimations + ws_est_by_name = {} + if workspace_estimations: + for ws_est in workspace_estimations: + try: + ws_name = getattr(ws_est, 'workspace_name', None) + if ws_name: + ws_est_by_name[ws_name] = ws_est + except (AttributeError, TypeError): + continue + + # Pre-compute codegen usage lines once per workspace (reused in HTML + plain-text) + ws_codegen_lines: dict[str, list[str]] = { + ws_id: _workspace_codegen_usage_lines(ws_est) + for ws_id, ws_est in ws_est_by_name.items() + } + + # Build repository links + repo_links = [] + if db: + for workspace_id in workspace_ids: + try: + ws_doc = db.get("workspaces", workspace_id) + if ws_doc and ws_doc.get("repo_url"): + repo_url = ws_doc["repo_url"] + # Remove .git suffix if present + if repo_url.endswith(".git"): + repo_url = repo_url[:-4] + branch_name = GitArchiveService.branch_name(generation_id) + branch_url = f"{repo_url}/tree/{branch_name}" + + # Get model/token info from matching WorkspaceEstimation + ws_est = ws_est_by_name.get(workspace_id) + mu_html = getattr(ws_est, "model_usage", None) if ws_est else None + model_name = (mu_html.model_name if mu_html else None) or None + input_tokens = mu_html.input_tokens if mu_html else None + output_tokens = mu_html.output_tokens if mu_html else None + agent_num_turns = mu_html.num_turns if mu_html else None + cache_write = mu_html.cache_write_tokens if mu_html else None + cache_read = mu_html.cache_read_tokens if mu_html else None + llm_cost = getattr(ws_est, "total_usd_cost", None) if ws_est else None + + repo_links.append({ + "workspace_id": workspace_id, + "repo_url": repo_url, + "branch_url": branch_url, + "model_name": model_name, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "agent_num_turns": agent_num_turns, + "cache_write_tokens": cache_write, + "cache_read_tokens": cache_read, + "total_usd_cost": llm_cost, + }) + except Exception as e: + logger.warning(f"Failed to retrieve workspace {workspace_id}: {e}") + + # Extract component breakdown + component_breakdown = {} + if workspace_estimations: + # Collect all unique components across all workspaces + all_components = set() + for ws_est in workspace_estimations: + try: + if hasattr(ws_est, 'component_breakdown') and ws_est.component_breakdown: + all_components.update(ws_est.component_breakdown.keys()) + except (AttributeError, TypeError): + continue # Skip this workspace if component_breakdown is missing or invalid + + # Build component breakdown with hours per workspace + for component_name in sorted(all_components): + component_data = { + "name": component_name, + "workspaces": {} + } for ws_est in workspace_estimations: try: - if hasattr(ws_est, 'component_breakdown') and ws_est.component_breakdown: - all_components.update(ws_est.component_breakdown.keys()) - except (AttributeError, TypeError): - continue # Skip this workspace if component_breakdown is missing or invalid - - # Build component breakdown with hours per workspace - for component_name in sorted(all_components): - component_data = { - "name": component_name, - "workspaces": {} - } - for ws_est in workspace_estimations: - try: - if (hasattr(ws_est, 'component_breakdown') and - component_name in ws_est.component_breakdown): - comp = ws_est.component_breakdown[component_name] - workspace_name = getattr(ws_est, 'workspace_name', 'unknown') - component_data["workspaces"][workspace_name] = { - "hours": getattr(comp, 'hours', 0.0), - "new_work": getattr(comp, 'new_work', 0.0), - "refactor": getattr(comp, 'refactor', 0.0), - "rework": getattr(comp, 'rework', 0.0), - "quality_score": getattr(comp, 'quality_score', 0.0) - } - except (AttributeError, TypeError, KeyError): - continue # Skip this workspace/component if data is missing - component_breakdown[component_name] = component_data - - # Build HTML content - html_parts = [] - html_parts.append(""" - - - - - - - """) - - html_parts.append('
') + if (hasattr(ws_est, 'component_breakdown') and + component_name in ws_est.component_breakdown): + comp = ws_est.component_breakdown[component_name] + workspace_name = getattr(ws_est, 'workspace_name', 'unknown') + component_data["workspaces"][workspace_name] = { + "hours": getattr(comp, 'hours', 0.0), + "new_work": getattr(comp, 'new_work', 0.0), + "refactor": getattr(comp, 'refactor', 0.0), + "rework": getattr(comp, 'rework', 0.0), + "quality_score": getattr(comp, 'quality_score', 0.0) + } + except (AttributeError, TypeError, KeyError): + continue # Skip this workspace/component if data is missing + component_breakdown[component_name] = component_data + + # Build HTML content + html_parts = [] + html_parts.append(""" + + + + + + + """) + + html_parts.append('
') + html_parts.append( + '

πŸš€ SpecFlow β€” Coding complete, starting deployment & QA

' + if pre_deploy + else '

βœ… SpecFlow Iteration Complete

' + ) + html_parts.append('
') + + html_parts.append('
') + + # Summary section + html_parts.append('
') + html_parts.append('

Summary

') + html_parts.append(f'
Specification: {spec_path}
') + html_parts.append(f'
Run ID: {generation_id}
') + html_parts.append( + f'
{variance.label}: ' + f"{variance.value}
" + ) + html_parts.append( + f'
Final Estimate: ' + f"{final_estimate_display}
" + ) + html_parts.append( + f'
Average: {average_display}
' + ) + html_parts.append( + f'
Range: {range_display}
' + ) + if cost_display: html_parts.append( - '

πŸš€ SpecFlow β€” Coding complete, starting deployment & QA

' - if pre_deploy - else '

βœ… SpecFlow Iteration Complete

' + '
Total LLM cost (cumulative): ' + f"{cost_display}
" ) - html_parts.append('
') - - html_parts.append('
') - - # Summary section + html_parts.append('
') + + # Variants section + if repo_links: html_parts.append('
') - html_parts.append('

Summary

') - html_parts.append(f'
Specification: {spec_path}
') - html_parts.append(f'
Run ID: {generation_id}
') - html_parts.append( - f'
{variance.label}: ' - f"{variance.value}
" - ) - html_parts.append( - f'
Final Estimate: ' - f"{final_estimate_display}
" - ) - html_parts.append( - f'
Average: {average_display}
' - ) - html_parts.append( - f'
Range: {range_display}
' - ) - if cost_display: + html_parts.append('

Variants

') + html_parts.append('

Click the links below to view the generation branches:

') + html_parts.append('') - - html_parts.append('') - - html_parts.append('') - - html_content = '\n'.join(html_parts) - - # Build plain text content - plain_parts = [] - plain_parts.append("=" * 60) - plain_parts.append( - "SpecFlow β€” CODING COMPLETE, STARTING DEPLOYMENT & QA" - if pre_deploy - else "SpecFlow ITERATION COMPLETE" - ) - plain_parts.append("=" * 60) - plain_parts.append("") - plain_parts.append(f"Specification: {spec_path}") - plain_parts.append(f"Run ID: {generation_id}") - plain_parts.append(f"{variance.label}: {variance.value}") - plain_parts.append(f"Final Estimate: {final_estimate_display}") - plain_parts.append(f"Average: {average_display}") - plain_parts.append(f"Range: {range_display}") - if cost_display: - plain_parts.append(f"Total LLM cost (cumulative): {cost_display}") - plain_parts.append("") - - if repo_links: - plain_parts.append("VARIANTS:") - plain_parts.append("-" * 60) - for repo_link in repo_links: - plain_parts.append(f"{repo_link['workspace_id']}: {repo_link['branch_url']}") - model = repo_link.get("model_name") - codegen_lines = ws_codegen_lines.get(repo_link["workspace_id"], []) - if codegen_lines: + + html_parts.append('
') + + html_parts.append('') + + html_parts.append('') + + html_content = '\n'.join(html_parts) + + # Build plain text content + plain_parts = [] + plain_parts.append("=" * 60) + plain_parts.append( + "SpecFlow β€” CODING COMPLETE, STARTING DEPLOYMENT & QA" + if pre_deploy + else "SpecFlow ITERATION COMPLETE" + ) + plain_parts.append("=" * 60) + plain_parts.append("") + plain_parts.append(f"Specification: {spec_path}") + plain_parts.append(f"Run ID: {generation_id}") + plain_parts.append(f"{variance.label}: {variance.value}") + plain_parts.append(f"Final Estimate: {final_estimate_display}") + plain_parts.append(f"Average: {average_display}") + plain_parts.append(f"Range: {range_display}") + if cost_display: + plain_parts.append(f"Total LLM cost (cumulative): {cost_display}") + plain_parts.append("") + + if repo_links: + plain_parts.append("VARIANTS:") + plain_parts.append("-" * 60) + for repo_link in repo_links: + plain_parts.append(f"{repo_link['workspace_id']}: {repo_link['branch_url']}") + model = repo_link.get("model_name") + codegen_lines = ws_codegen_lines.get(repo_link["workspace_id"], []) + if codegen_lines: + if model: + plain_parts.append(f" model: {model}") + for ln in codegen_lines: + plain_parts.append(f" {ln}") + else: + tokens_str = _format_tokens_millions( + repo_link.get("input_tokens"), repo_link.get("output_tokens") + ) + if model or tokens_str: + detail_parts = [] if model: - plain_parts.append(f" model: {model}") - for ln in codegen_lines: - plain_parts.append(f" {ln}") - else: - tokens_str = _format_tokens_millions( - repo_link.get("input_tokens"), repo_link.get("output_tokens") + detail_parts.append(f"model: {model}") + if tokens_str: + detail_parts.append(f"tokens used: {tokens_str}") + plain_parts.append(f" {' | '.join(detail_parts)}") + llm_c = repo_link.get("total_usd_cost") + if llm_c is not None: + try: + plain_parts.append( + f" LLM API cost (cumulative): USD {float(llm_c):,.2f}" ) - if model or tokens_str: - detail_parts = [] - if model: - detail_parts.append(f"model: {model}") - if tokens_str: - detail_parts.append(f"tokens used: {tokens_str}") - plain_parts.append(f" {' | '.join(detail_parts)}") - llm_c = repo_link.get("total_usd_cost") - if llm_c is not None: - try: - plain_parts.append( - f" LLM API cost (cumulative): USD {float(llm_c):,.2f}" - ) - except (TypeError, ValueError): - pass - plain_parts.append("") - - if component_breakdown: - plain_parts.append("COMPONENT BREAKDOWN:") - plain_parts.append("-" * 60) - # Build header - header = "Component" + except (TypeError, ValueError): + pass + plain_parts.append("") + + if component_breakdown: + plain_parts.append("COMPONENT BREAKDOWN:") + plain_parts.append("-" * 60) + # Build header + header = "Component" + if workspace_estimations: + for ws_est in workspace_estimations: + try: + ws_name = getattr(ws_est, 'workspace_name', 'unknown') + header += f" | {ws_name}" + except (AttributeError, TypeError): + continue + plain_parts.append(header) + plain_parts.append("-" * len(header)) + + for component_name, component_data in component_breakdown.items(): + row = component_name if workspace_estimations: for ws_est in workspace_estimations: try: ws_name = getattr(ws_est, 'workspace_name', 'unknown') - header += f" | {ws_name}" - except (AttributeError, TypeError): - continue - plain_parts.append(header) - plain_parts.append("-" * len(header)) - - for component_name, component_data in component_breakdown.items(): - row = component_name - if workspace_estimations: - for ws_est in workspace_estimations: - try: - ws_name = getattr(ws_est, 'workspace_name', 'unknown') - if ws_name in component_data["workspaces"]: - hours = component_data["workspaces"][ws_name]["hours"] - row += f" | {hours:.1f}" - else: - row += " | -" - except (AttributeError, TypeError, KeyError): + if ws_name in component_data["workspaces"]: + hours = component_data["workspaces"][ws_name]["hours"] + row += f" | {hours:.1f}" + else: row += " | -" - plain_parts.append(row) - plain_parts.append("") - - plain_parts.append(f"Generated at: {timestamp}") - plain_content = '\n'.join(plain_parts) - - return html_content, plain_content + except (AttributeError, TypeError, KeyError): + row += " | -" + plain_parts.append(row) + plain_parts.append("") + + plain_parts.append(f"Generated at: {timestamp}") + plain_content = '\n'.join(plain_parts) + + return html_content, plain_content # Initialize Notifications diff --git a/backend/app/database/interface.py b/backend/app/database/interface.py index cb1e091..3125c22 100644 --- a/backend/app/database/interface.py +++ b/backend/app/database/interface.py @@ -7,7 +7,7 @@ """ from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar +from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple, TypeVar, runtime_checkable # Type for query filter: (field, operator, value) FilterTuple = Tuple[str, str, Any] @@ -16,6 +16,22 @@ T = TypeVar("T") +@runtime_checkable +class ReadOnlyDatabase(Protocol): + """Read-only view over a database β€” the narrow subset a document reader needs. + + Consumers that must never write status/checkpoint/workspace_phases (e.g. the + notifications report renderer) depend on this protocol instead of the full + ``IDatabase``, so those writes are impossible through the handle they hold. + Any ``IDatabase`` satisfies it structurally; ``StateMachineDBAdapter`` hands + out a view that exposes *only* ``get`` β€” keeping Commandment VII enforced, + not merely documented. + """ + + def get(self, collection: str, doc_id: str) -> Optional[Dict[str, Any]]: + ... + + class ITransactionContext(ABC): """ Transaction context interface for atomic database operations. diff --git a/backend/app/state/db_adapter.py b/backend/app/state/db_adapter.py index 918e984..208e0cc 100644 --- a/backend/app/state/db_adapter.py +++ b/backend/app/state/db_adapter.py @@ -18,10 +18,10 @@ from __future__ import annotations import asyncio -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict, Optional if TYPE_CHECKING: - from app.database.interface import IDatabase + from app.database.interface import IDatabase, ReadOnlyDatabase # Firestore collection names β€” use these constants instead of repeating string literals COL_WORKSPACES = "workspaces" @@ -29,6 +29,24 @@ COL_API_KEYS = "api_keys" +class _ReadOnlyDatabaseView: + """Read-only faΓ§ade over ``IDatabase`` exposing only ``get``. + + Handed to read-only consumers (the notifications report renderer) so they + physically cannot perform status/checkpoint/workspace_phases writes β€” the + encapsulation of Commandment VII is enforced by construction, not by a + docstring warning. Satisfies the ``ReadOnlyDatabase`` protocol structurally. + """ + + __slots__ = ("_db",) + + def __init__(self, db: "IDatabase") -> None: + self._db = db + + def get(self, collection: str, doc_id: str) -> Optional[Dict[str, Any]]: + return self._db.get(collection, doc_id) + + class StateMachineDBAdapter: """ Adapts the synchronous IDatabase interface to the async interface @@ -45,6 +63,18 @@ class StateMachineDBAdapter: def __init__(self, db: "IDatabase") -> None: self._db = db + @property + def read_only_db(self) -> "ReadOnlyDatabase": + """A read-only view over the underlying database. + + For consumers that only read (e.g. the notifications report renderer, + which does a plain ``db.get("workspaces", id)``). The returned view + exposes *only* ``get`` β€” writes are unreachable through it, so + status/checkpoint/workspace_phases writes still have to go through this + adapter's async methods. + """ + return _ReadOnlyDatabaseView(self._db) + # ------------------------------------------------------------------ # Generation methods # ------------------------------------------------------------------ diff --git a/backend/app/workflows/multi_workspace_estimation_p10y.py b/backend/app/workflows/multi_workspace_estimation_p10y.py index 787a390..e0d19fa 100644 --- a/backend/app/workflows/multi_workspace_estimation_p10y.py +++ b/backend/app/workflows/multi_workspace_estimation_p10y.py @@ -44,6 +44,8 @@ from app.schemas.specification import GenerationWorkflowRequest from app.schemas.telemetry_workflow import TelemetryWorkflowLabel from app.schemas.workspace import WorkspaceSettings +from app.core.artifact_files import MULTI_WORKSPACE_REPORT_HTML_FILE, MULTI_WORKSPACE_REPORT_MD_FILE +from app.core.notifications import render_generation_session_report_html from app.services.artifact_store import ArtifactStore from app.services.claude_code import agent_query from app.services.p10y.estimation_report_generator import format_multi_workspace_report @@ -642,7 +644,19 @@ async def multi_workspace_estimation_p10y_workflow( f"buffer={risk_assessment.total_buffer_pct*100:.1f}%, " f"final_estimate={risk_assessment.final_estimate:.1f}h" ) - + + # Build response now β€” every field is resolved, and both the markdown and + # HTML reports below render from it. + response = MultiWorkspaceEstimationResponse( + summary=summary, + workspace_estimations=workspace_estimations, + comparative_analysis=comparative_analysis, + timestamp=datetime.now(timezone.utc).isoformat(), + skipped_workspaces=skipped_workspaces, + aggregate_p10y_commit_coverage_pct=aggregate_cov, + total_usd_cost=session_llm_cost_total, + ) + # Generate structured markdown report using report generator logger.info("Generating structured markdown report...") structured_report = format_multi_workspace_report( @@ -652,13 +666,37 @@ async def multi_workspace_estimation_p10y_workflow( skipped_workspaces=skipped_workspaces, aggregate_p10y_commit_coverage_pct=aggregate_cov, ) - + # Save structured report to file in primary workspace primary_workspace_path = workspaces[0].workspace_path full_outputs_dir = f"{primary_workspace_path}/{request.outputs_dir}" - structured_report_path = f"{full_outputs_dir}/multi-workspace-estimation-report.md" + structured_report_path = f"{full_outputs_dir}/{MULTI_WORKSPACE_REPORT_MD_FILE}" _write_structured_report(structured_report, structured_report_path, logger) - + + # Save the same report as HTML, regardless of whether email/Slack notifiers + # are configured β€” local quickstart users have neither, so this is the only + # place the HTML report is produced. Non-essential observability output: a + # failure here must never abort an estimation whose work is already done. + # The renderer only reads (a plain db.get("workspaces", id)); db_adapter is + # the async state-machine wrapper, so pass its read-only view β€” without a + # database handle the Variants section silently drops out. + logger.info("Generating HTML report...") + try: + html_content, _plain_content = render_generation_session_report_html( + generation_id=request.generation_id, + workspace_ids=workspace_ids, + result=response, + spec_path=request.spec_path, + db=db_adapter.read_only_db if db_adapter else None, + ) + html_report_path = f"{full_outputs_dir}/{MULTI_WORKSPACE_REPORT_HTML_FILE}" + _write_structured_report(html_content, html_report_path, logger) + except Exception as e: + logger.warning( + "Failed to render/write HTML report β€” continuing without it: %s", + e, exc_info=True, + ) + # Generate AI-powered comprehensive report (optional, additional analysis) logger.info("Generating AI-powered comprehensive report...") report_summary = await _generate_ai_report( @@ -691,18 +729,7 @@ async def multi_workspace_estimation_p10y_workflow( f"Failed to archive combined report for estimation {generation_id}: {e}", exc_info=True, ) - - # Build response - response = MultiWorkspaceEstimationResponse( - summary=summary, - workspace_estimations=workspace_estimations, - comparative_analysis=comparative_analysis, - timestamp=datetime.now(timezone.utc).isoformat(), - skipped_workspaces=skipped_workspaces, - aggregate_p10y_commit_coverage_pct=aggregate_cov, - total_usd_cost=session_llm_cost_total, - ) - + logger.info("Multi-workspace estimation completed successfully") - + return response diff --git a/backend/scripts/example_test_session.py b/backend/scripts/example_test_session.py new file mode 100644 index 0000000..6f29afa --- /dev/null +++ b/backend/scripts/example_test_session.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +""" +Seed one fake, already-COMPLETED generation session for local TUI testing. + +Writes a generation_sessions document (with a full P10Y result β€” summary, +per-workspace breakdown, component comparison) plus the markdown/HTML reports +on disk, so `specflow tui` can render a finished run without waiting for a +real generation. + +Requires the local sentinel identity to already exist (api_keys/local) β€” +run `specflow init` (or `python scripts/init_firestore.py`) at least once +before this script. +""" + +import argparse +import os +import sys +from datetime import datetime, timezone +from pathlib import Path + +# Set DATABASE_TYPE early if FIRESTORE_EMULATOR_HOST is set (before importing settings) +if os.getenv("FIRESTORE_EMULATOR_HOST") and not os.getenv("DATABASE_TYPE"): + os.environ["DATABASE_TYPE"] = "emulator" + +from app.core.artifact_files import MULTI_WORKSPACE_REPORT_HTML_FILE, MULTI_WORKSPACE_REPORT_MD_FILE +from app.core.artifact_subdirs import REPORT_SUBDIR +from app.core.local_identity import LOCAL_API_KEY_DOC_ID +from app.core.notifications import render_generation_session_report_html +from app.core.workspace_pool_names import DEFAULT_WORKSPACE_POOL +from app.database.factory import get_database +from app.schemas.estimate import ( + ComparativeAnalysis, + ComponentComparison, + ComponentEstimation, + EstimationMetrics, + EstimationSummary, + MultiWorkspaceEstimationResponse, + RiskAssessment, + WorkspaceEstimation, +) +from app.schemas.generation_workflow_enums import GenerationCheckpoint, GenerationStatus +from app.services.artifact_store import ARTIFACTS_BASE +from app.services.p10y.estimation_report_generator import format_multi_workspace_report +from app.state.db_adapter import COL_GENERATION_SESSIONS + +# Matches tui.constants.LOCAL_ONLY_READINESS β€” kept as a literal since the TUI +# package isn't importable from the backend. +LOCAL_ONLY_READINESS = "LOCAL_ONLY" + +COMPONENTS = [ + ("auth", 0.40, 6.0), + ("billing", 0.35, 9.5), + ("api", 0.25, 3.2), +] + + +def _workspace_estimation(workspace_name: str, total_hours: float) -> WorkspaceEstimation: + component_breakdown = { + name: ComponentEstimation( + component_name=name, + hours=round(total_hours * share, 1), + new_work=round(total_hours * share * 0.7, 1), + refactor=round(total_hours * share * 0.25, 1), + rework=round(total_hours * share * 0.05, 1), + quality_score=0.9, + ) + for name, share, _variance in COMPONENTS + } + return WorkspaceEstimation( + workspace_name=workspace_name, + workspace_path=f"/workspaces/{workspace_name}", + total_hours=total_hours, + total_effective_output=round(total_hours * 0.9, 1), + component_breakdown=component_breakdown, + estimation_metrics=EstimationMetrics( + new_work=round(total_hours * 0.7, 1), + refactor=round(total_hours * 0.25, 1), + rework=round(total_hours * 0.05, 1), + removed_work=0.0, + quality_score=0.9, + effective_output=round(total_hours * 0.9, 1), + total_output=total_hours, + ), + commits_count=12, + total_usd_cost=4.25, + ) + + +def build_estimation_result(workspace_ids: list[str]) -> MultiWorkspaceEstimationResponse: + """Fake P10Y result with a real component breakdown, built from the actual schemas.""" + hours_by_ws = [110.0 + 5.0 * i for i in range(len(workspace_ids))] + workspace_estimations = [ + _workspace_estimation(ws_id, hours) for ws_id, hours in zip(workspace_ids, hours_by_ws) + ] + + average_hours = sum(hours_by_ws) / len(hours_by_ws) + summary = EstimationSummary( + average_hours=average_hours, + std_deviation=3.3, + min_hours=min(hours_by_ws), + max_hours=max(hours_by_ws), + coefficient_of_variation=0.03, + variance_assessment="low", + risk_assessment=RiskAssessment( + status="Approved", + instability_ratio=0.03, + rejection_threshold=0.15, + base_component=0.10, + var_component=0.02, + size_component=0.01, + total_buffer_pct=0.13, + final_estimate=average_hours * 1.13, + ), + ) + + component_comparison = { + name: ComponentComparison( + component_name=name, + hours_by_workspace={ + ws.workspace_name: ws.component_breakdown[name].hours for ws in workspace_estimations + }, + average=sum(ws.component_breakdown[name].hours for ws in workspace_estimations) + / len(workspace_estimations), + std_deviation=2.1, + variance_percentage=variance, + ) + for name, _share, variance in COMPONENTS + } + comparative_analysis = ComparativeAnalysis( + component_comparison=component_comparison, + high_variance_components=["billing"], + insights=[ + "Low variance across workspaces", + "Billing has the highest variance β€” consider a closer look", + ], + ) + + return MultiWorkspaceEstimationResponse( + summary=summary, + workspace_estimations=workspace_estimations, + comparative_analysis=comparative_analysis, + timestamp=datetime.now(timezone.utc).isoformat(), + total_usd_cost=sum(ws.total_usd_cost for ws in workspace_estimations), + ) + + +def write_reports(generation_id: str, result: MultiWorkspaceEstimationResponse, spec_path: str) -> Path: + """Write the markdown + HTML reports where the report.html endpoint expects to find them.""" + report_dir = ARTIFACTS_BASE / generation_id / REPORT_SUBDIR + report_dir.mkdir(parents=True, exist_ok=True) + + markdown = format_multi_workspace_report( + workspace_estimations=result.workspace_estimations, + summary=result.summary, + comparative_analysis=result.comparative_analysis, + skipped_workspaces=result.skipped_workspaces, + aggregate_p10y_commit_coverage_pct=result.aggregate_p10y_commit_coverage_pct, + ) + (report_dir / MULTI_WORKSPACE_REPORT_MD_FILE).write_text(markdown) + + html_content, _plain = render_generation_session_report_html( + generation_id=generation_id, + workspace_ids=[ws.workspace_name for ws in result.workspace_estimations], + result=result, + spec_path=spec_path, + db=None, + ) + (report_dir / MULTI_WORKSPACE_REPORT_HTML_FILE).write_text(html_content) + + return report_dir + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--generation-id", + default="est-example0001", + help="Generation ID to seed (default: est-example0001). Overwrites if it already exists.", + ) + parser.add_argument( + "--spec-path", + default="specs/example.md", + help="Fake spec path stored on the session (cosmetic only).", + ) + args = parser.parse_args() + + db = get_database() + + sentinel = db.get("api_keys", LOCAL_API_KEY_DOC_ID) + if not sentinel: + print( + f"ERROR: local sentinel identity (api_keys/{LOCAL_API_KEY_DOC_ID}) not found.\n" + "Run `specflow init` (or `python scripts/init_firestore.py`) at least once first." + ) + return 1 + user_email = (sentinel.get("user_id") or "").lower() + key_uid = sentinel.get("key_uid") + workspace_pool = sentinel.get("workspace_pool") or DEFAULT_WORKSPACE_POOL + + workspace_ids = ["ws-01-1", "ws-01-2", "ws-01-3"] + result = build_estimation_result(workspace_ids) + report_dir = write_reports(args.generation_id, result, args.spec_path) + + now = datetime.now(timezone.utc) + workspace_phases = { + ws_id: {"last_completed_phase": 9, "total_phases": 9, "phase_name": "Done"} + for ws_id in workspace_ids + } + + db.set( + COL_GENERATION_SESSIONS, + args.generation_id, + { + "generation_id": args.generation_id, + "user_email": user_email, + "key_uid": key_uid, + "workspace_pool": workspace_pool, + "status": GenerationStatus.COMPLETED.value, + "checkpoint": GenerationCheckpoint.ESTIMATION_DONE.value, + "created_at": now, + "started_at": now, + "completed_at": now, + "workspace_ids": workspace_ids, + "parameters": { + "workspace_count": len(workspace_ids), + "spec_path": args.spec_path, + "outputs_dir": "docs", + }, + "last_spec_readiness": LOCAL_ONLY_READINESS, + "workspace_phases": workspace_phases, + "progress": {}, + "result": result.model_dump(), + "artifact_path": str(ARTIFACTS_BASE / args.generation_id), + "code_archived": True, + "retry_count": 0, + "max_retries": 3, + "error": None, + }, + ) + + print(f"Seeded completed generation session: {args.generation_id}") + print(f"Reports written to: {report_dir}") + print(f"\nOn your host machine, run:\n\n specflow tui --generation-id {args.generation_id}\n") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/test/api/test_email_notifications.py b/backend/test/api/test_email_notifications.py index 97adc9e..f522c34 100644 --- a/backend/test/api/test_email_notifications.py +++ b/backend/test/api/test_email_notifications.py @@ -545,6 +545,60 @@ def test_notify_generation_session_complete_with_missing_workspace( # But email should still be sent successfully +class TestRenderGenerationSessionReportHtml: + """``render_generation_session_report_html`` is usable with no SMTP/EmailConfig β€” + this is the function the P10Y workflow calls to save the report to disk even + when no notifier is configured (local quickstart).""" + + def test_builds_html_and_plain_without_any_email_config( + self, mock_db, sample_workspace_docs, sample_estimation_result + ): + from app.core.notifications import render_generation_session_report_html + + workspace_ids, _ = sample_workspace_docs + + html_content, plain_content = render_generation_session_report_html( + generation_id="est-test-123", + workspace_ids=workspace_ids, + result=sample_estimation_result, + spec_path="specs/test.md", + db=mock_db, + ) + + assert html_content.strip() != "" + assert plain_content.strip() != "" + assert "report") + + with patch("app.api.v1.generation_sessions.ARTIFACTS_BASE", tmp_path): + response = client.get( + "/api/v1/generation-sessions/est-test-123/report.html", + headers={"X-API-Key": "test-key"}, + ) + + assert response.status_code == 200 + assert response.headers["content-type"].startswith("text/html") + assert response.text == "report" + + def test_404_when_report_missing(self, client, tmp_path): + with patch("app.api.v1.generation_sessions.ARTIFACTS_BASE", tmp_path): + response = client.get( + "/api/v1/generation-sessions/est-test-123/report.html", + headers={"X-API-Key": "test-key"}, + ) + + assert response.status_code == 404 + + class TestStreamWorkspaceMessages: """Tests for GET /{generation_id}/workspaces/{workspace_id}/messages/stream (SSE).""" diff --git a/backend/test/test_report_generation.py b/backend/test/test_report_generation.py index a8e1d8e..117c060 100644 --- a/backend/test/test_report_generation.py +++ b/backend/test/test_report_generation.py @@ -618,5 +618,205 @@ def test_open_failure_does_not_raise(self, tmp_path, caplog): ), "Expected WARNING log on open() failure" +# --------------------------------------------------------------------------- +# Phase 5: the workflow saves the HTML report next to the markdown report, +# regardless of whether any email/Slack notifier is configured (local +# quickstart users have neither). +# --------------------------------------------------------------------------- + + +class TestWorkflowSavesHtmlReportRegardlessOfNotifierConfig: + @pytest.mark.asyncio + async def test_html_report_written_next_to_markdown_report(self, tmp_path): + import logging + from unittest.mock import AsyncMock, patch + + from app.core.config import settings as global_settings + from app.schemas.model_token_usage import ModelTokenUsage + from app.schemas.specification import GenerationWorkflowRequest + from app.services.parallel_executor import ParallelGenerationResult + from app.schemas.workspace import WorkspaceSettings + from app.workflows.multi_workspace_estimation_p10y import ( + multi_workspace_estimation_p10y_workflow, + ) + + workspace_estimation = WorkspaceEstimation( + workspace_name="ws-01-1", + workspace_path=str(tmp_path / "ws-01-1"), + total_hours=100.0, + total_effective_output=90.0, + component_breakdown={ + "auth": ComponentEstimation( + component_name="auth", + hours=40.0, + new_work=30.0, + refactor=8.0, + rework=2.0, + quality_score=0.9, + ), + }, + estimation_metrics=EstimationMetrics( + new_work=30.0, + refactor=8.0, + rework=2.0, + removed_work=0.0, + quality_score=0.9, + effective_output=90.0, + total_output=100.0, + ), + commits_count=5, + model_usage=ModelTokenUsage(model_name="anthropic/claude-sonnet-4.5", num_turns=3), + ) + parallel_result = ParallelGenerationResult( + workspace_name="ws-01-1", + workspace_settings=WorkspaceSettings( + workspace_path=str(tmp_path / "ws-01-1"), + provider="anthropic", + model="claude-sonnet-4.5", + ), + estimation=workspace_estimation, + success=True, + ) + + request = GenerationWorkflowRequest( + spec_path="specs/test.md", + outputs_dir="outputs", + generation_id=None, # skips Firestore-backed usage/archive branches + ) + test_settings = global_settings.model_copy( + update={"WORKSPACE_BASE_PATH": str(tmp_path)} + ) + + with ( + patch( + "app.workflows.multi_workspace_estimation_p10y.execute_generation_parallel", + new=AsyncMock(return_value=[parallel_result]), + ), + patch( + "app.workflows.multi_workspace_estimation_p10y._generate_ai_report", + new=AsyncMock(return_value="skipped in test"), + ), + ): + response = await multi_workspace_estimation_p10y_workflow( + request=request, + settings=test_settings, + logger=logging.getLogger("test"), + workspace_ids=["ws-01-1"], + db_adapter=None, + ) + + outputs_dir = tmp_path / "ws-01-1" / "outputs" + md_report = outputs_dir / "multi-workspace-estimation-report.md" + html_report = outputs_dir / "multi-workspace-estimation-report.html" + + assert md_report.exists() + assert html_report.exists() + html_content = html_report.read_text() + assert " dict: return { @@ -802,6 +849,16 @@ def test_other_actions_always_enabled(self): screen._payload = {"status": "running"} assert screen.check_action("clear", ()) is True + def test_open_report_greyed_out_without_result(self): + screen = tui_app.DashboardScreen("gen_x") + screen._payload = {"status": "running"} + assert screen.check_action("open_report", ()) is None + + def test_open_report_enabled_when_result_present(self): + screen = tui_app.DashboardScreen("gen_x") + screen._payload = {"status": "completed", "result": {"summary": {}}} + assert screen.check_action("open_report", ()) is True + class TestConfirmScreenCountdown: @pytest.mark.asyncio @@ -1001,6 +1058,100 @@ async def test_clear_unavailable_when_pool_status_missing(self): assert isinstance(psw.await_args.args[0], tui_app.MessageScreen) +class TestDashboardOpenReport: + """``h`` fetches the HTML report over HTTP (the backend runs in a container, + so there's no shared filesystem path to open directly) and caches it locally + before opening; a missing opener or missing report must never crash.""" + + @pytest.mark.asyncio + async def test_opens_report_via_platform_opener(self, tmp_path): + a, b, c = _gate_ready() + with a, b, c, patch("tui.app.poll_once", new=AsyncMock(return_value=_running_payload())): + app = tui_app.SpecFlowTUI(root=tmp_path, generation_id="gen_x", poll_interval=999) + async with app.run_test() as pilot: + await pilot.pause() + screen = app.screen + with ( + patch( + "tui.app.call_backend_endpoint_bytes", + new=AsyncMock(return_value=b"report"), + ), + patch("tui.app._platform_opener", return_value="open"), + patch("tui.app.shutil.which", return_value="/usr/bin/open"), + patch("tui.app.local_env.run_command", new=AsyncMock()) as run_cmd, + ): + await screen._open_report() + cache_path = tmp_path / ".specflow-local" / "reports" / "gen_x.html" + assert cache_path.read_bytes() == b"report" + run_cmd.assert_awaited_once_with( + ["open", str(cache_path)], cache_path.parent, timeout=10 + ) + + @pytest.mark.asyncio + async def test_falls_back_to_notify_when_no_opener_available(self, tmp_path): + a, b, c = _gate_ready() + with a, b, c, patch("tui.app.poll_once", new=AsyncMock(return_value=_running_payload())): + app = tui_app.SpecFlowTUI(root=tmp_path, generation_id="gen_x", poll_interval=999) + async with app.run_test() as pilot: + await pilot.pause() + screen = app.screen + with ( + patch( + "tui.app.call_backend_endpoint_bytes", + new=AsyncMock(return_value=b""), + ), + patch("tui.app._platform_opener", return_value=None), + patch("tui.app.local_env.run_command", new=AsyncMock()) as run_cmd, + patch.object(screen, "notify") as notify, + ): + await screen._open_report() + run_cmd.assert_not_awaited() + notify.assert_called_once() + + @pytest.mark.asyncio + async def test_404_shows_no_report_available_message(self, tmp_path): + request = httpx.Request("GET", "http://backend/api/v1/generation-sessions/gen_x/report.html") + response = httpx.Response(404, request=request) + not_found = httpx.HTTPStatusError("Not Found", request=request, response=response) + a, b, c = _gate_ready() + with a, b, c, patch("tui.app.poll_once", new=AsyncMock(return_value=_running_payload())): + app = tui_app.SpecFlowTUI(root=tmp_path, generation_id="gen_x", poll_interval=999) + async with app.run_test() as pilot: + await pilot.pause() + screen = app.screen + with ( + patch( + "tui.app.call_backend_endpoint_bytes", + new=AsyncMock(side_effect=not_found), + ), + patch("tui.app.local_env.run_command", new=AsyncMock()) as run_cmd, + patch.object(screen, "notify") as notify, + ): + await screen._open_report() + run_cmd.assert_not_awaited() + notify.assert_called_once() + assert "no html report" in notify.call_args.args[0].lower() + + @pytest.mark.asyncio + async def test_open_report_no_op_when_no_result_yet(self, tmp_path): + a, b, c = _gate_ready() + with a, b, c, patch("tui.app.poll_once", new=AsyncMock(return_value=_running_payload())): + app = tui_app.SpecFlowTUI(root=tmp_path, generation_id="gen_x", poll_interval=999) + async with app.run_test() as pilot: + await pilot.pause() + screen = app.screen + screen._payload = {"status": "running"} + with ( + patch( + "tui.app.call_backend_endpoint_bytes", new=AsyncMock() + ) as call_backend, + patch.object(screen, "notify") as notify, + ): + screen.action_open_report() + call_backend.assert_not_awaited() + notify.assert_called_once() + + # --------------------------------------------------------------------------- # ClientSetupScreen β€” registration of the MCP server with AI clients # --------------------------------------------------------------------------- diff --git a/mcp_server/tests/test_tui_render.py b/mcp_server/tests/test_tui_render.py index 2901b83..179f750 100644 --- a/mcp_server/tests/test_tui_render.py +++ b/mcp_server/tests/test_tui_render.py @@ -183,18 +183,34 @@ def test_full_result(self): ], "total_usd_cost": 94.1, } - panel = render.estimate_panel(result) + panel = render.estimate_panel({"result": result}) assert panel.average_hours == 318 assert panel.risk_status == "Approved" assert panel.per_workspace == [("ws-01-1", 305.0), ("ws-01-2", 331.0)] assert panel.total_usd_cost == 94.1 + assert panel.component_comparison == [] def test_partial_result_is_tolerant(self): - panel = render.estimate_panel({"summary": {"average_hours": 100}}) + panel = render.estimate_panel({"result": {"summary": {"average_hours": 100}}}) assert panel.average_hours == 100 assert panel.risk_status is None assert panel.per_workspace == [] + def test_component_comparison_sorted_by_variance_descending(self): + result = { + "summary": {}, + "comparative_analysis": { + "component_comparison": { + "auth": {"component_name": "auth", "average": 40.0, "variance_percentage": 5.0}, + "billing": {"component_name": "billing", "average": 20.0, "variance_percentage": 25.0}, + } + }, + } + panel = render.estimate_panel({"result": result}) + assert [row.component_name for row in panel.component_comparison] == ["billing", "auth"] + assert panel.component_comparison[0].average_hours == 20.0 + assert panel.component_comparison[0].variance_percentage == 25.0 + class _Event: """Minimal attr-compatible stand-in for tui.stream.AgentStreamEvent.""" diff --git a/mcp_server/tui/app.py b/mcp_server/tui/app.py index 64244ef..c6159f7 100644 --- a/mcp_server/tui/app.py +++ b/mcp_server/tui/app.py @@ -34,6 +34,7 @@ from pathlib import Path from typing import Any +import httpx from rich.console import Group, RenderableType from rich.panel import Panel from rich.table import Table @@ -62,6 +63,7 @@ from cli import resolve_backend_config from services import local_env from services.session import resolve_generation_id, set_project_root +from services.specflow_backend import call_backend_endpoint_bytes from tui import actions, activity, mcp_clients, onboarding, render from tui.config import ( EDITABLE_KEYS, @@ -152,7 +154,7 @@ def _workspaces_panel(payload: dict[str, Any], selected_ws_id: str | None = None def _estimate_panel(payload: dict[str, Any]) -> Panel | None: - panel = render.estimate_panel(payload.get("result")) + panel = render.estimate_panel(payload) if panel is None: return None grid = Table.grid(padding=(0, 2)) @@ -177,7 +179,23 @@ def fmt_h(v: float | None) -> str: grid.add_row("Variants", variants) if panel.total_usd_cost is not None: grid.add_row("Total spend", f"${panel.total_usd_cost:.2f}") - return Panel(grid, title="P10Y estimate", border_style="green") + + renderables: list[RenderableType] = [grid] + if panel.component_comparison: + breakdown = Table(box=None, padding=(0, 2)) + breakdown.add_column("Component", justify="left") + breakdown.add_column("Avg hours", justify="right") + breakdown.add_column("Variance", justify="right") + for row in panel.component_comparison: + breakdown.add_row( + row.component_name, + f"{row.average_hours:.0f} h", + f"{row.variance_percentage:.0f}%", + ) + renderables.append(breakdown) + renderables.append(Text("HTML report available β€” press h to open", style="dim")) + + return Panel(Group(*renderables), title="P10Y estimate", border_style="green") def _activity_panel(root: Path, payload: dict[str, Any]) -> Panel | None: @@ -444,6 +462,7 @@ class DashboardScreen(_SpecFlowScreen): Binding("c", "connect_client", "Add MCP to AI tool"), Binding("o", "open_workspace", "open ws"), Binding("enter", "open_workspace", "open ws", show=False), + Binding("h", "open_report", "open report"), # Priority so the workspace selection wins over the scroll container's # own up/down handling (otherwise arrows just scroll the dashboard). # PageUp/PageDown/Home/End and the mouse wheel still scroll the body. @@ -516,6 +535,8 @@ def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | No if action == "retry": status = ((self._payload or {}).get("status") or "").lower() return True if status == "failed" else None + if action == "open_report": + return True if render.estimate_panel(self._payload) is not None else None return True def _move_selection(self, delta: int) -> None: @@ -537,6 +558,47 @@ def action_open_workspace(self) -> None: return self.app.push_screen(WorkspaceMessagesScreen(self._generation_id, ws_id)) + def action_open_report(self) -> None: + if render.estimate_panel(self._payload) is None: + self.notify("No HTML report available yet.", severity="information") + return + self.run_worker(self._open_report(), exclusive=True) + + async def _open_report(self) -> None: + """Fetch the HTML report from the backend, cache it locally, then open it. + + The backend runs in a container (local quickstart); its ``artifact_path`` + is a container-internal filesystem path the TUI process can't read + directly, so the report is fetched over HTTP rather than opened in place. + """ + try: + html_bytes = await call_backend_endpoint_bytes( + endpoint=f"/api/v1/generation-sessions/{self._generation_id}/report.html", + timeout_seconds=30, + ) + except httpx.HTTPStatusError as exc: + if exc.response.status_code == 404: + self.notify("No HTML report available for this run.", severity="information") + else: + self.notify(f"Couldn't fetch report: {exc}", severity="warning") + return + except Exception as exc: + self.notify(f"Couldn't fetch report: {exc}", severity="warning") + return + + cache_path = self.app.root / ".specflow-local" / "reports" / f"{self._generation_id}.html" + cache_path.parent.mkdir(parents=True, exist_ok=True) + cache_path.write_bytes(html_bytes) + + opener = _platform_opener() + if opener is None or not shutil.which(opener): + self.notify(f"Report saved at {cache_path}", severity="information") + return + try: + await local_env.run_command([opener, str(cache_path)], cache_path.parent, timeout=10) + except OSError as exc: + self.notify(f"Couldn't open report automatically: {exc}", severity="warning") + async def _run_suspended(self, coro) -> None: """Run a CLI action with the TUI suspended, then refresh.""" with self.app.suspend(): diff --git a/mcp_server/tui/render.py b/mcp_server/tui/render.py index 4deb615..b6aca90 100644 --- a/mcp_server/tui/render.py +++ b/mcp_server/tui/render.py @@ -66,6 +66,15 @@ def phase_label(self) -> str: return f"Phase {self.last_completed_phase}/{total}" +@dataclass(frozen=True) +class ComponentBreakdownRow: + """One row of the cross-workspace component comparison table.""" + + component_name: str + average_hours: float + variance_percentage: float + + @dataclass(frozen=True) class EstimatePanel: """Completed-run estimation summary, flattened for display.""" @@ -80,6 +89,7 @@ class EstimatePanel: final_estimate: float | None per_workspace: list[tuple[str, float]] = field(default_factory=list) total_usd_cost: float | None = None + component_comparison: list[ComponentBreakdownRow] = field(default_factory=list) def status_pill(status: str | None) -> tuple[str, str]: @@ -229,13 +239,29 @@ def clear_ws_ineligible_message(payload: dict[str, Any] | None) -> str: return "Nothing to clear β€” these workspaces are not awaiting cleanup." -def estimate_panel(result: dict[str, Any] | None) -> EstimatePanel | None: - """Flatten a ``MultiWorkspaceEstimationResponse`` dict for display. +def _component_comparison_rows(result: dict[str, Any]) -> list[ComponentBreakdownRow]: + """Cross-workspace per-component breakdown, highest-variance first.""" + comparison = (result.get("comparative_analysis") or {}).get("component_comparison") or {} + rows = [ + ComponentBreakdownRow( + component_name=data.get("component_name") or name, + average_hours=float(data.get("average") or 0.0), + variance_percentage=float(data.get("variance_percentage") or 0.0), + ) + for name, data in comparison.items() + ] + rows.sort(key=lambda row: row.variance_percentage, reverse=True) + return rows + + +def estimate_panel(payload: dict[str, Any] | None) -> EstimatePanel | None: + """Flatten a completed-run status ``payload`` for display. Returns None when no result is present (run not COMPLETED). Tolerant of missing nested fields β€” every access is defensive so a partial result still renders what it has. """ + result = (payload or {}).get("result") if not result: return None @@ -260,6 +286,7 @@ def estimate_panel(result: dict[str, Any] | None) -> EstimatePanel | None: final_estimate=risk.get("final_estimate"), per_workspace=per_workspace, total_usd_cost=result.get("total_usd_cost"), + component_comparison=_component_comparison_rows(result), )