diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d622bd2..e1c4ac5 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,9 +3,8 @@ name: Linting # Lint gate runs on every PR and push to main. # # - ruff format + ruff check are HARD gates (block the PR). -# - mypy is ADVISORY for now (continue-on-error): the package carries ~28 -# pre-existing type errors that predate CI enforcement. Tracked for burn-down -# in makegov/tango-python; flip `continue-on-error` off once that's clear. +# - mypy is a HARD gate: the package type-checks cleanly under strict mypy. +# (The earlier ~28-error burn-down is complete.) # - The SDK filter/shape conformance check needs the canonical manifest from the # private makegov/tango repo, which requires a TANGO_API_REPO_ACCESS_TOKEN # secret the public CI does not have. The conformance job SKIPS cleanly when @@ -42,8 +41,7 @@ jobs: - name: Lint with ruff run: uv run ruff check tango/ - - name: Type check with mypy (advisory) - continue-on-error: true + - name: Type check with mypy run: uv run mypy tango/ conformance: diff --git a/CHANGELOG.md b/CHANGELOG.md index 16c86ea..5f168c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2026-06-02): `actions/checkout` v4→v6, `astral-sh/setup-uv` v4→v8.1.0 (pinned exact — no floating `v8` major tag is published yet), and `codecov/codecov-action` v3→v5 (with the renamed `files:` input). +- `mypy` is now a **hard gate** in `lint.yml` (no longer advisory). The + `tango/` package type-checks cleanly under strict mypy. + +### Changed +- Completed the strict-`mypy` burn-down across `tango/shapes/` (parser, + generator, factory, schema). All changes are type-annotation/typing + corrections with no runtime behavior change, except: + - `FieldSchema.nested_model` is now typed `type | str | None` (it always + accepted string model names from the explicit schemas; the annotation was + wrong). `ModelFactory.validate_data` and `ShapeParser._validate_field_spec` + likewise accept `type | str` for the model argument. + - Removed two dead `elif field_spec.is_wildcard:` branches (in + `TypeGenerator.generate_type` and `ModelFactory.create_instance`) and the + now-orphaned `_parse_nested_wildcard` helper. These were unreachable — + wildcard field specs are fully handled by the top-of-loop branch that + `continue`s before reaching them — so removal is behavior-preserving. ## [1.1.1] - 2026-05-29 diff --git a/tango/shapes/factory.py b/tango/shapes/factory.py index 1a33c76..92f1f58 100644 --- a/tango/shapes/factory.py +++ b/tango/shapes/factory.py @@ -23,7 +23,7 @@ from collections.abc import Callable from datetime import date, datetime from decimal import Decimal -from typing import Any +from typing import Any, cast from tango.exceptions import ModelInstantiationError from tango.shapes.generator import TypeGenerator @@ -542,38 +542,6 @@ def create_instance( # Value is not a dict - might be a primitive or None result[result_field_name] = value - elif field_spec.is_wildcard: - # Wildcard on nested field - use full model type - # This is handled at the top level, but we need to handle it here too - # for nested wildcards like recipient(*) - if field_schema.nested_model: - if field_schema.is_list: - if isinstance(value, list): - nested_instances = [] - for item in value: - if isinstance(item, dict): - # Parse all fields from the nested model - nested_instance = self._parse_nested_wildcard( - item, field_schema.nested_model - ) - nested_instances.append(nested_instance) - else: - nested_instances.append(item) - result[result_field_name] = nested_instances - else: - result[result_field_name] = value - else: - if isinstance(value, dict): - nested_instance = self._parse_nested_wildcard( - value, field_schema.nested_model - ) - result[result_field_name] = nested_instance - else: - result[result_field_name] = value - else: - # Not a nested model, just use the value - result[result_field_name] = value - else: # Simple field - parse using appropriate parser parsed_value = self._parse_field( @@ -661,7 +629,7 @@ def _resolve_nested_model(self, nested_model: type | str) -> type: raise ModelInstantiationError( f"Could not resolve nested model '{nested_model}'" ) - return model_class + return cast(type, model_class) except ImportError as err: raise ModelInstantiationError( f"Could not import models module to resolve '{nested_model}'" @@ -704,41 +672,6 @@ def _create_nested_instance( # Recursively create nested instance return self.create_instance(data, nested_shape, resolved_model, nested_type) - def _parse_nested_wildcard( - self, data: dict[str, Any], nested_model: type | str - ) -> dict[str, Any]: - """Parse nested object with wildcard (all fields) - - Args: - data: Nested object data - nested_model: Model class or string name for the nested object - - Returns: - Dictionary with all parsed fields - """ - # Resolve nested model if it's a string - resolved_model = self._resolve_nested_model(nested_model) - - # Ensure model is registered - if not self.schema_registry.is_registered(resolved_model): - self.schema_registry.register(resolved_model) - - # Get model schema - model_schema = self.schema_registry.get_schema(resolved_model) - - # Parse all fields - result: dict[str, Any] = {} - for field_name, value in data.items(): - if field_name in model_schema: - field_schema = model_schema[field_name] - parsed_value = self._parse_field(field_name, value, field_schema.type, field_schema) - result[field_name] = parsed_value - else: - # Field not in schema, include as-is - result[field_name] = value - - return result - def _parse_field(self, field_name: str, value: Any, field_type: type, field_schema: Any) -> Any: """Parse a single field value using appropriate parser @@ -778,7 +711,7 @@ def _parse_field(self, field_name: str, value: Any, field_type: type, field_sche return value def validate_data( - self, data: dict[str, Any], shape_spec: ShapeSpec, base_model: type + self, data: dict[str, Any], shape_spec: ShapeSpec, base_model: type | str ) -> list[str]: """Validate that data matches the shape specification @@ -803,11 +736,15 @@ def validate_data( errors: list[str] = [] if not isinstance(data, dict): - errors.append(f"Expected dictionary data, got {type(data).__name__}") + errors.append( # type: ignore[unreachable] + f"Expected dictionary data, got {type(data).__name__}" + ) return errors - # Ensure model is registered - if not self.schema_registry.is_registered(base_model): + # Ensure model is registered. String model names are expected to be + # pre-registered (explicit schemas); only concrete classes can be + # auto-registered via introspection. + if isinstance(base_model, type) and not self.schema_registry.is_registered(base_model): self.schema_registry.register(base_model) # Get model schema @@ -826,9 +763,8 @@ def validate_data( # Check if field exists in schema if field_spec.name not in model_schema: - errors.append( - f"Field '{field_spec.name}' does not exist in {base_model.__name__} schema" - ) + model_name = base_model.__name__ if isinstance(base_model, type) else base_model + errors.append(f"Field '{field_spec.name}' does not exist in {model_name} schema") continue field_schema = model_schema[field_spec.name] diff --git a/tango/shapes/generator.py b/tango/shapes/generator.py index 5d7e978..7fbad1b 100644 --- a/tango/shapes/generator.py +++ b/tango/shapes/generator.py @@ -20,7 +20,7 @@ import logging import threading from collections import OrderedDict -from typing import Any, get_args, get_origin, get_type_hints +from typing import Any, cast, get_args, get_origin, get_type_hints from tango.exceptions import TypeGenerationError from tango.shapes.models import ShapeSpec @@ -250,7 +250,10 @@ def generate_type( field_schema = model_schema[field_spec.name] - # Determine field type + # Determine field type. The value is a heterogeneous mix of type + # objects, parameterized generics (list[...]), and union objects, + # so it is intentionally typed as Any. + field_type: Any if field_spec.nested_fields: # Generate nested type if not field_schema.nested_model: @@ -275,25 +278,7 @@ def generate_type( # Handle optional types if field_schema.is_optional: - field_type = field_type | None # type: ignore - - annotations[field_name] = field_type - - elif field_spec.is_wildcard: - # Wildcard on nested field - use full model type - if field_schema.nested_model: - # Resolve nested model if it's a string - field_type = self._resolve_nested_model(field_schema.nested_model) - else: - field_type = field_schema.type - - # Handle list types - if field_schema.is_list: - field_type = list[field_type] # type: ignore - - # Handle optional types - if field_schema.is_optional: - field_type = field_type | None # type: ignore + field_type = field_type | None annotations[field_name] = field_type @@ -303,11 +288,11 @@ def generate_type( # Handle list types if field_schema.is_list: - field_type = list[field_type] # type: ignore + field_type = list[field_type] # Handle optional types if field_schema.is_optional: - field_type = field_type | None # type: ignore + field_type = field_type | None annotations[field_name] = field_type @@ -329,7 +314,7 @@ def generate_type( field_type = field_schema.type # Handle optional types if field_schema.is_optional: - field_type = field_type | None # type: ignore + field_type = field_type | None annotations[auto_field] = field_type # Create TypedDict dynamically @@ -414,7 +399,7 @@ def _resolve_nested_model(self, nested_model: type | str) -> type: model_class = getattr(models, nested_model, None) if model_class is None: raise TypeGenerationError(f"Could not resolve nested model '{nested_model}'") - return model_class + return cast(type, model_class) except ImportError as err: raise TypeGenerationError( f"Could not import models module to resolve '{nested_model}'" @@ -555,7 +540,7 @@ def _format_type_annotation(self, type_annotation: Any) -> str: # Handle basic types if hasattr(type_annotation, "__name__"): - type_name = type_annotation.__name__ + type_name = str(type_annotation.__name__) else: type_name = str(type_annotation) @@ -576,7 +561,7 @@ def _format_type_annotation(self, type_annotation: Any) -> str: if args: formatted_args = [self._format_type_annotation(arg) for arg in args] return f"{origin.__name__}[{', '.join(formatted_args)}]" - return origin.__name__ + return str(origin.__name__) return type_name diff --git a/tango/shapes/parser.py b/tango/shapes/parser.py index 5fd7187..8d2440f 100644 --- a/tango/shapes/parser.py +++ b/tango/shapes/parser.py @@ -110,7 +110,7 @@ def _suggest_field_correction(invalid_field: str, valid_fields: list[str]) -> st # Check for common prefix best_match = None - best_score = 0 + best_score = 0.0 for field in valid_fields: # Count common prefix length @@ -167,6 +167,13 @@ def __init__(self, cache_enabled: bool = True, schema_registry: SchemaRegistry | self._schema_registry = schema_registry self._schema_registry_initialized = schema_registry is not None + def _ensure_registry(self) -> SchemaRegistry: + """Return the schema registry, lazily creating it on first use.""" + if self._schema_registry is None: + self._schema_registry = SchemaRegistry() + self._schema_registry_initialized = True + return self._schema_registry + def parse(self, shape: str) -> ShapeSpec: """Parse a shape string into a ShapeSpec @@ -544,25 +551,22 @@ def validate(self, shape_spec: ShapeSpec, model_class: type) -> None: >>> spec = parser.parse("invalid_field") >>> parser.validate(spec, Contract) # Raises ShapeValidationError """ - # Lazy initialize schema registry - if not self._schema_registry_initialized: - self._schema_registry = SchemaRegistry() - self._schema_registry_initialized = True + registry = self._ensure_registry() # Ensure model is registered - if not self._schema_registry.is_registered(model_class): - self._schema_registry.register(model_class) + if not registry.is_registered(model_class): + registry.register(model_class) # Validate each field for field_spec in shape_spec.fields: self._validate_field_spec(field_spec, model_class) - def _validate_field_spec(self, field_spec: FieldSpec, model_class: type) -> None: + def _validate_field_spec(self, field_spec: FieldSpec, model_class: type | str) -> None: """Validate a single field specification against a model Args: field_spec: Field specification to validate - model_class: Model class to validate against + model_class: Model class (or registered model name) to validate against Raises: ShapeValidationError: If field is invalid @@ -571,20 +575,17 @@ def _validate_field_spec(self, field_spec: FieldSpec, model_class: type) -> None if field_spec.is_wildcard: return - # Lazy initialize schema registry if needed - if not self._schema_registry_initialized: - self._schema_registry = SchemaRegistry() - self._schema_registry_initialized = True + registry = self._ensure_registry() # Validate field exists in model try: - field_schema = self._schema_registry.validate_field(model_class, field_spec.name) + field_schema = registry.validate_field(model_class, field_spec.name) except ShapeValidationError as e: # Enhance error message with suggestions model_name = ( model_class.__name__ if hasattr(model_class, "__name__") else str(model_class) ) - model_schema = self._schema_registry.get_schema(model_class) + model_schema = registry.get_schema(model_class) valid_fields = list(model_schema.keys()) error_msg = f"Field '{field_spec.name}' does not exist in {model_name}." @@ -630,7 +631,7 @@ def _validate_field_spec(self, field_spec: FieldSpec, model_class: type) -> None error_msg += "\n\nNested selections are only valid for object fields like 'recipient', 'agency', 'location', etc." # Find some nested fields as examples - model_schema = self._schema_registry.get_schema(model_class) + model_schema = registry.get_schema(model_class) nested_examples = [ name for name, schema in model_schema.items() if schema.nested_model ] diff --git a/tango/shapes/schema.py b/tango/shapes/schema.py index a91caec..45a9278 100644 --- a/tango/shapes/schema.py +++ b/tango/shapes/schema.py @@ -8,6 +8,7 @@ list indicators independently of the dataclass definitions. """ +import builtins from dataclasses import dataclass from typing import Any, get_args, get_origin, get_type_hints @@ -33,10 +34,10 @@ class FieldSchema: """ name: str - type: type + type: builtins.type is_optional: bool is_list: bool - nested_model: type | None = None + nested_model: builtins.type | str | None = None def __repr__(self) -> str: """String representation for debugging"""