From 9c1641c6540fafa67f66e82aec424ad7745d7535 Mon Sep 17 00:00:00 2001 From: kraysent Date: Sun, 7 Jun 2026 18:38:23 +0200 Subject: [PATCH 1/2] #282: format endpoint for rules formatting --- app/commands/adminapi/command.py | 2 + app/commands/dataapi/command.py | 30 ++- app/data/repositories/__init__.py | 2 + app/data/repositories/designation_rules.py | 86 +++++++ app/domain/adminapi/actions.py | 10 +- app/domain/adminapi/designation_rules.py | 49 ++++ app/domain/adminapi/mock.py | 1 + app/domain/dataapi/actions.py | 14 ++ app/domain/designation/__init__.py | 23 ++ app/domain/designation/engine.py | 141 +++++++++++ app/domain/designation/formatter.py | 63 +++++ app/presentation/adminapi/interface.py | 35 +++ app/presentation/adminapi/server.py | 37 ++- app/presentation/dataapi/interface.py | 21 ++ app/presentation/dataapi/server.py | 19 ++ .../V049__designation_format_rules.sql | 91 +++++++ tests/integration/designation_format_test.py | 228 ++++++++++++++++++ tests/integration/metadata_api_test.py | 4 + tests/unit/domain/designation_engine_test.py | 80 ++++++ 19 files changed, 932 insertions(+), 4 deletions(-) create mode 100644 app/data/repositories/designation_rules.py create mode 100644 app/domain/adminapi/designation_rules.py create mode 100644 app/domain/designation/__init__.py create mode 100644 app/domain/designation/engine.py create mode 100644 app/domain/designation/formatter.py create mode 100644 postgres/migrations/V049__designation_format_rules.sql create mode 100644 tests/integration/designation_format_test.py create mode 100644 tests/unit/domain/designation_engine_test.py diff --git a/app/commands/adminapi/command.py b/app/commands/adminapi/command.py index dbf19137..a740345c 100644 --- a/app/commands/adminapi/command.py +++ b/app/commands/adminapi/command.py @@ -49,6 +49,7 @@ def prepare(self): ) layer0_repo = repositories.Layer0Repository(self.pg_storage, log) + designation_rules_repo = repositories.DesignationRulesRepository(self.pg_storage, log) refresh = table_stats.make_table_stats_refresh(layer0_repo) self.table_stats_cache = cache.BackgroundCache( "table_stats", @@ -64,6 +65,7 @@ def prepare(self): layer0_repo=layer0_repo, layer1_repo=repositories.Layer1Repository(self.pg_storage, log), layer2_repo=repositories.Layer2Repository(self.pg_storage, log), + designation_rules_repo=designation_rules_repo, authenticator=authenticator, clients=clients.Clients(cfg.clients.ads_token), table_stats_cache=self.table_stats_cache, diff --git a/app/commands/dataapi/command.py b/app/commands/dataapi/command.py index ac31335d..0865267d 100644 --- a/app/commands/dataapi/command.py +++ b/app/commands/dataapi/command.py @@ -1,3 +1,5 @@ +import threading +from datetime import timedelta from pathlib import Path from typing import Any, final @@ -9,7 +11,8 @@ from app.data import repositories from app.domain import dataapi as domain from app.domain import responders -from app.lib import auth, commands, config, tracing +from app.domain.designation import DesignationFormatter +from app.lib import auth, cache, commands, config, tracing from app.lib.storage import postgres from app.lib.tracing import TracingConfig from app.lib.web import server @@ -27,6 +30,11 @@ class DataAPICommand(commands.Command): def __init__(self, config_path: str) -> None: self.config_path = config_path + self.pg_auth: postgres.PgStorage | None = None + self.pg_main: postgres.PgStorage | None = None + self.designation_rules_cache: cache.BackgroundCache | None = None + self._designation_rules_thread: threading.Thread | None = None + self.app: presentation.Server | None = None def prepare(self): self.config = parse_config(self.config_path) @@ -43,10 +51,26 @@ def prepare(self): self.pg_auth.connect() self.pg_main.connect() + designation_rules_repo = repositories.DesignationRulesRepository(self.pg_main, log) + self.designation_rules_cache = cache.BackgroundCache( + "designation_rules", + designation_rules_repo.snapshot, + refresh_frequency=timedelta(seconds=30), + refresh_timeout=timedelta(seconds=10), + ) + self._designation_rules_thread = threading.Thread( + target=self.designation_rules_cache.run, + daemon=True, + ) + self._designation_rules_thread.start() + + designation_formatter = DesignationFormatter(self.designation_rules_cache.get) + actions = domain.Actions( layer2_repo=repositories.Layer2Repository(self.pg_main, log), catalog_cfg=self.config.catalogs, metadata_repo=repositories.MetadataRepository(self.pg_main), + designation_formatter=designation_formatter, ) self.app = presentation.Server( @@ -58,9 +82,13 @@ def prepare(self): ) def run(self): + if self.app is None: + raise RuntimeError("prepare() was not called") self.app.run() def cleanup(self): + if self.designation_rules_cache is not None: + self.designation_rules_cache.stop() if self.pg_auth: self.pg_auth.disconnect() if self.pg_main: diff --git a/app/data/repositories/__init__.py b/app/data/repositories/__init__.py index 1d76c847..d4b15a4b 100644 --- a/app/data/repositories/__init__.py +++ b/app/data/repositories/__init__.py @@ -1,4 +1,5 @@ from app.data.repositories.common import CommonRepository +from app.data.repositories.designation_rules import DesignationRulesRepository from app.data.repositories.layer0 import INTERNAL_ID_COLUMN_NAME, Layer0Repository from app.data.repositories.layer1 import Layer1Repository from app.data.repositories.layer2 import Layer2Repository @@ -6,6 +7,7 @@ __all__ = [ "CommonRepository", + "DesignationRulesRepository", "Layer0Repository", "INTERNAL_ID_COLUMN_NAME", "Layer1Repository", diff --git a/app/data/repositories/designation_rules.py b/app/data/repositories/designation_rules.py new file mode 100644 index 00000000..49ee1d00 --- /dev/null +++ b/app/data/repositories/designation_rules.py @@ -0,0 +1,86 @@ +import json +from typing import Any, final + +import structlog + +from app.domain.designation.formatter import RuleModel, RuleSetSnapshot +from app.lib.storage import postgres + + +def _row_to_model(row: dict[str, Any]) -> RuleModel: + transforms = row["transforms"] + if isinstance(transforms, str): + transforms = json.loads(transforms) + return RuleModel( + id=row["id"], + priority=int(row["priority"]), + pattern=row["pattern"], + template=row["template"], + transforms=transforms or {}, + enabled=bool(row["enabled"]), + ) + + +def _transforms_to_json(transforms: dict[str, list[dict[str, str | None]]]) -> str: + return json.dumps(transforms) + + +@final +class DesignationRulesRepository(postgres.TransactionalPGRepository): + def __init__(self, storage: postgres.PgStorage, logger: structlog.stdlib.BoundLogger) -> None: + self._logger = logger + super().__init__(storage) + + def snapshot(self) -> RuleSetSnapshot: + rows = self._storage.query( + """SELECT id, priority, pattern, template, transforms, enabled, + EXTRACT(EPOCH FROM MAX(modification_time) OVER ()) AS version_epoch + FROM designation.format_rules + WHERE enabled = true + ORDER BY priority ASC""", + ) + version = int(rows[0]["version_epoch"]) if rows else 0 + rules = [_row_to_model(r) for r in rows] + return RuleSetSnapshot(version=version, rules=rules) + + def list_rules(self) -> list[RuleModel]: + rows = self._storage.query( + """SELECT id, priority, pattern, template, transforms, enabled + FROM designation.format_rules + ORDER BY priority ASC""", + ) + return [_row_to_model(r) for r in rows] + + def get_rule(self, rule_id: str) -> RuleModel | None: + rows = self._storage.query( + """SELECT id, priority, pattern, template, transforms, enabled + FROM designation.format_rules WHERE id = %s""", + params=[rule_id], + ) + if not rows: + return None + return _row_to_model(rows[0]) + + def save_rule( + self, + rule_id: str, + priority: int, + pattern: str, + template: str, + transforms: dict[str, list[dict[str, str | None]]], + enabled: bool = True, + ) -> RuleModel: + rows = self._storage.query( + """INSERT INTO designation.format_rules (id, priority, pattern, template, transforms, enabled) + VALUES (%s, %s, %s, %s, %s::jsonb, %s) + ON CONFLICT (id) DO UPDATE SET + priority = EXCLUDED.priority, + pattern = EXCLUDED.pattern, + template = EXCLUDED.template, + transforms = EXCLUDED.transforms, + enabled = EXCLUDED.enabled, + modification_time = NOW() + RETURNING id, priority, pattern, template, transforms, enabled""", + params=[rule_id, priority, pattern, template, _transforms_to_json(transforms), enabled], + ) + return _row_to_model(rows[0]) diff --git a/app/domain/adminapi/actions.py b/app/domain/adminapi/actions.py index 27c89d78..db720e7b 100644 --- a/app/domain/adminapi/actions.py +++ b/app/domain/adminapi/actions.py @@ -1,7 +1,7 @@ from typing import final from app.data import repositories -from app.domain.adminapi import crossmatch, layer1_write, login, sources, table_upload +from app.domain.adminapi import crossmatch, designation_rules, layer1_write, login, sources, table_upload from app.lib import auth, cache, clients from app.presentation import adminapi @@ -14,6 +14,7 @@ def __init__( layer0_repo: repositories.Layer0Repository, layer1_repo: repositories.Layer1Repository, layer2_repo: repositories.Layer2Repository, + designation_rules_repo: repositories.DesignationRulesRepository, authenticator: auth.Authenticator, clients: clients.Clients, table_stats_cache: cache.BackgroundCache[adminapi.TableStatsSnapshot], @@ -29,6 +30,7 @@ def __init__( ) self.crossmatch_manager = crossmatch.CrossmatchManager(layer0_repo, layer1_repo, layer2_repo) self.layer1_writer = layer1_write.Layer1Writer(layer1_repo) + self.designation_rules_manager = designation_rules.DesignationRulesManager(designation_rules_repo) def create_source(self, r: adminapi.CreateSourceRequest) -> adminapi.CreateSourceResponse: return self.source_manager.create_source(r) @@ -68,3 +70,9 @@ def set_crossmatch_results(self, r: adminapi.SetCrossmatchResultsRequest) -> adm def assign_record_pgcs(self, r: adminapi.AssignRecordPgcsRequest) -> adminapi.AssignRecordPgcsResponse: return self.crossmatch_manager.assign_record_pgcs(r) + + def list_designation_rules(self) -> adminapi.ListRulesResponse: + return self.designation_rules_manager.list_rules() + + def save_designation_rule(self, r: adminapi.SaveRuleRequest) -> adminapi.SaveRuleResponse: + return self.designation_rules_manager.save_rule(r) diff --git a/app/domain/adminapi/designation_rules.py b/app/domain/adminapi/designation_rules.py new file mode 100644 index 00000000..d9dcf017 --- /dev/null +++ b/app/domain/adminapi/designation_rules.py @@ -0,0 +1,49 @@ +from typing import final + +from app.data import repositories +from app.domain.designation.engine import Rule, RuleEngine +from app.domain.designation.formatter import RuleModel +from app.lib.web.errors import RuleValidationError +from app.presentation import adminapi + + +def _validate_rule(rule: RuleModel, examples: list[tuple[str, str]] | None = None) -> None: + engine_rule = Rule( + id=rule.id, + priority=rule.priority, + pattern=rule.pattern, + template=rule.template, + transforms=rule.to_engine_rule().transforms, + examples=examples or [], + ) + try: + RuleEngine.compile([engine_rule]).validate_rule(engine_rule) + except ValueError as e: + raise RuleValidationError(str(e)) from e + + +@final +class DesignationRulesManager: + def __init__(self, rules_repo: repositories.DesignationRulesRepository) -> None: + self._repo = rules_repo + + def list_rules(self) -> adminapi.ListRulesResponse: + rules = self._repo.list_rules() + return adminapi.ListRulesResponse(rules=[adminapi.RuleModel(**r.model_dump()) for r in rules]) + + def save_rule(self, request: adminapi.SaveRuleRequest) -> adminapi.SaveRuleResponse: + rule = RuleModel(**request.rule.model_dump()) + if not rule.id.strip(): + raise RuleValidationError("rule.id is required") + if rule.enabled: + examples = [(e.input, e.expected) for e in request.examples] + _validate_rule(rule, examples) + saved = self._repo.save_rule( + rule_id=rule.id, + priority=rule.priority, + pattern=rule.pattern, + template=rule.template, + transforms=rule.transforms, + enabled=rule.enabled, + ) + return adminapi.SaveRuleResponse(rule=adminapi.RuleModel(**saved.model_dump())) diff --git a/app/domain/adminapi/mock.py b/app/domain/adminapi/mock.py index b7fede53..b96401d9 100644 --- a/app/domain/adminapi/mock.py +++ b/app/domain/adminapi/mock.py @@ -25,6 +25,7 @@ def get_mock_actions(): layer0_repo=mock.MagicMock(), layer1_repo=mock.MagicMock(), layer2_repo=mock.MagicMock(), + designation_rules_repo=mock.MagicMock(), authenticator=auth.NoopAuthenticator(), clients=c, table_stats_cache=get_mock_table_stats_cache(), diff --git a/app/domain/dataapi/actions.py b/app/domain/dataapi/actions.py index bcdb784f..04421dad 100644 --- a/app/domain/dataapi/actions.py +++ b/app/domain/dataapi/actions.py @@ -3,6 +3,7 @@ from app.data import model, repositories from app.domain import responders from app.domain.dataapi import parameterized_query, search_parsers, tap_types +from app.domain.designation import DesignationFormatter from app.presentation import dataapi ENABLED_CATALOGS = [ @@ -41,10 +42,12 @@ def __init__( layer2_repo: repositories.Layer2Repository, catalog_cfg: responders.CatalogConfig, metadata_repo: repositories.MetadataRepository, + designation_formatter: DesignationFormatter, ) -> None: self.layer2_repo = layer2_repo self.catalog_cfg = catalog_cfg self.metadata_repo = metadata_repo + self.designation_formatter = designation_formatter self.parameterized_query_manager = parameterized_query.ParameterizedQueryManager( layer2_repo, ENABLED_CATALOGS, catalog_cfg ) @@ -65,6 +68,17 @@ def query(self, query: dataapi.QueryRequest) -> dataapi.QueryResponse: def query_fits(self, query: dataapi.FITSRequest) -> bytes: return self.parameterized_query_manager.query_fits(query) + def format_designations(self, request: dataapi.FormatRequest) -> dataapi.FormatResponse: + batch = self.designation_formatter.format_batch(request.names) + results: list[dataapi.FormatResult] = [] + for original, match in batch: + raw = original.strip() if original else "" + if match is None: + results.append(dataapi.FormatResult(formatted=raw, rule_id=None)) + else: + results.append(dataapi.FormatResult(formatted=match.formatted, rule_id=match.rule_id)) + return dataapi.FormatResponse(results=results) + def query_simple(self, query: dataapi.QuerySimpleRequest) -> dataapi.QuerySimpleResponse: return self.parameterized_query_manager.query_simple(query) diff --git a/app/domain/designation/__init__.py b/app/domain/designation/__init__.py new file mode 100644 index 00000000..27704fc3 --- /dev/null +++ b/app/domain/designation/__init__.py @@ -0,0 +1,23 @@ +from app.domain.designation.engine import ( + CompiledRule, + FormatMatch, + Rule, + RuleEngine, + TransformOp, + TransformSpec, + apply_transform, +) +from app.domain.designation.formatter import DesignationFormatter, RuleModel, RuleSetSnapshot + +__all__ = [ + "CompiledRule", + "DesignationFormatter", + "FormatMatch", + "Rule", + "RuleEngine", + "RuleModel", + "RuleSetSnapshot", + "TransformOp", + "TransformSpec", + "apply_transform", +] diff --git a/app/domain/designation/engine.py b/app/domain/designation/engine.py new file mode 100644 index 00000000..fb736917 --- /dev/null +++ b/app/domain/designation/engine.py @@ -0,0 +1,141 @@ +import re +from dataclasses import dataclass, field +from enum import StrEnum +from typing import Any, Self, final + + +class TransformOp(StrEnum): + UPPER = "upper" + LOWER = "lower" + CAPITALIZE = "capitalize" + STRIP_ZEROS = "strip_zeros" + ZFILL = "zfill" + ROMAN_OR_INT = "roman_or_int" + DEFAULT = "default" + + +_ROMAN = {"I": 1, "V": 5, "X": 10, "L": 50, "C": 100, "D": 500, "M": 1000} + + +def _roman_to_int(s: str) -> int: + val = 0 + for i in range(len(s)): + if i + 1 < len(s) and _ROMAN[s[i]] < _ROMAN[s[i + 1]]: + val -= _ROMAN[s[i]] + else: + val += _ROMAN[s[i]] + return val + + +@dataclass(frozen=True) +class TransformSpec: + op: TransformOp + arg: str | None = None + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Self: + return cls(op=TransformOp(data["op"]), arg=data.get("arg")) + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = {"op": self.op.value} + if self.arg is not None: + result["arg"] = self.arg + return result + + +def apply_transform(value: str, spec: TransformSpec) -> str: + match spec.op: + case TransformOp.UPPER: + return value.upper() + case TransformOp.LOWER: + return value.lower() + case TransformOp.CAPITALIZE: + return value.capitalize() + case TransformOp.STRIP_ZEROS: + return str(int(value)) if value.isdigit() else value + case TransformOp.ZFILL: + width = int(spec.arg or "0") + return value.zfill(width) + case TransformOp.ROMAN_OR_INT: + if value.isdigit(): + return str(int(value)) + return str(_roman_to_int(value.upper())) + case TransformOp.DEFAULT: + return spec.arg if not value else value + + +@dataclass(frozen=True) +class Rule: + id: str + priority: int + pattern: str + template: str + transforms: dict[int, list[TransformSpec]] = field(default_factory=dict) + examples: list[tuple[str, str]] = field(default_factory=list) + + +@dataclass(frozen=True) +class FormatMatch: + formatted: str + rule_id: str + + +@final +class CompiledRule: + def __init__(self, rule: Rule) -> None: + self.rule = rule + self._pattern = re.compile(rule.pattern, re.IGNORECASE) + + def match(self, value: str) -> str | None: + m = self._pattern.match(value) + if m is None: + return None + groups: list[str] = [] + for i in range(1, m.lastindex + 1 if m.lastindex else 0): + raw = m.group(i) or "" + transformed = raw + for spec in self.rule.transforms.get(i, []): + if spec.op == TransformOp.DEFAULT and transformed: + continue + transformed = apply_transform(transformed, spec) + groups.append(transformed) + return self.rule.template.format(*groups) + + +@final +class RuleEngine: + def __init__(self, compiled: list[CompiledRule]) -> None: + self._compiled = compiled + + @classmethod + def compile(cls, rules: list[Rule]) -> Self: + enabled = sorted(rules, key=lambda r: r.priority) + return cls([CompiledRule(r) for r in enabled]) + + def format(self, name: str) -> FormatMatch | None: + value = name.strip() if name else "" + if not value: + return None + for compiled in self._compiled: + formatted = compiled.match(value) + if formatted is not None: + return FormatMatch( + formatted=formatted, + rule_id=compiled.rule.id, + ) + return None + + def validate_rule(self, rule: Rule) -> None: + re.compile(rule.pattern, re.IGNORECASE) + for group_idx, specs in rule.transforms.items(): + for spec in specs: + if spec.op == TransformOp.ZFILL and spec.arg is None: + raise ValueError(f"zfill transform on group {group_idx} requires arg") + if spec.op == TransformOp.DEFAULT and spec.arg is None: + raise ValueError(f"default transform on group {group_idx} requires arg") + engine = RuleEngine.compile([rule]) + for raw, expected in rule.examples: + result = engine.format(raw) + if result is None or result.formatted != expected: + got = result.formatted if result else None + raise ValueError(f"example {raw!r}: expected {expected!r}, got {got!r}") diff --git a/app/domain/designation/formatter.py b/app/domain/designation/formatter.py new file mode 100644 index 00000000..c755698b --- /dev/null +++ b/app/domain/designation/formatter.py @@ -0,0 +1,63 @@ +from collections.abc import Callable +from typing import final + +import pydantic + +from app.domain.designation.engine import FormatMatch, Rule, RuleEngine, TransformSpec + + +class RuleModel(pydantic.BaseModel): + id: str + priority: int + pattern: str + template: str + transforms: dict[str, list[dict[str, str | None]]] = pydantic.Field(default_factory=dict) + enabled: bool = True + + def to_engine_rule(self) -> Rule: + transforms: dict[int, list[TransformSpec]] = {} + for key, ops in self.transforms.items(): + transforms[int(key)] = [TransformSpec.from_dict(op) for op in ops] + return Rule( + id=self.id, + priority=self.priority, + pattern=self.pattern, + template=self.template, + transforms=transforms, + ) + + +class RuleSetSnapshot(pydantic.BaseModel): + version: int + rules: list[RuleModel] + + +@final +class DesignationFormatter: + def __init__(self, get_snapshot: Callable[[], RuleSetSnapshot]) -> None: + self._get_snapshot = get_snapshot + self._cached_version: int | None = None + self._engine: RuleEngine | None = None + + def _ensure_engine(self) -> RuleEngine: + snapshot = self._get_snapshot() + if self._engine is None or self._cached_version != snapshot.version: + rules = [r.to_engine_rule() for r in snapshot.rules if r.enabled] + self._engine = RuleEngine.compile(rules) + self._cached_version = snapshot.version + return self._engine + + def format(self, name: str) -> FormatMatch | None: + return self._ensure_engine().format(name) + + def format_batch(self, names: list[str]) -> list[tuple[str, FormatMatch | None]]: + engine = self._ensure_engine() + results: list[tuple[str, FormatMatch | None]] = [] + for name in names: + raw = name.strip() if name else "" + if not raw: + results.append((name, None)) + continue + match = engine.format(name) + results.append((name, match)) + return results diff --git a/app/presentation/adminapi/interface.py b/app/presentation/adminapi/interface.py index 38d9880f..df04cc54 100644 --- a/app/presentation/adminapi/interface.py +++ b/app/presentation/adminapi/interface.py @@ -377,6 +377,33 @@ class AssignRecordPgcsResponse(pydantic.BaseModel): pass +class RuleExample(pydantic.BaseModel): + input: str + expected: str + + +class RuleModel(pydantic.BaseModel): + id: str + priority: int + pattern: str + template: str + transforms: dict[str, list[dict[str, str | None]]] = pydantic.Field(default_factory=dict) + enabled: bool = True + + +class ListRulesResponse(pydantic.BaseModel): + rules: list[RuleModel] + + +class SaveRuleRequest(WriteRequest): + rule: RuleModel + examples: list[RuleExample] = pydantic.Field(default_factory=list) + + +class SaveRuleResponse(pydantic.BaseModel): + rule: RuleModel + + class Actions(abc.ABC): @abc.abstractmethod def add_data(self, r: AddDataRequest) -> AddDataResponse: @@ -429,3 +456,11 @@ def set_crossmatch_results(self, r: SetCrossmatchResultsRequest) -> SetCrossmatc @abc.abstractmethod def assign_record_pgcs(self, r: AssignRecordPgcsRequest) -> AssignRecordPgcsResponse: pass + + @abc.abstractmethod + def list_designation_rules(self) -> ListRulesResponse: + pass + + @abc.abstractmethod + def save_designation_rule(self, r: SaveRuleRequest) -> SaveRuleResponse: + pass diff --git a/app/presentation/adminapi/server.py b/app/presentation/adminapi/server.py index 9e413fdc..43edc64b 100644 --- a/app/presentation/adminapi/server.py +++ b/app/presentation/adminapi/server.py @@ -112,6 +112,19 @@ def assign_record_pgcs( response = self.actions.assign_record_pgcs(request) return server.APIOkResponse(data=response) + def list_designation_rules( + self, + ) -> server.APIOkResponse[interface.ListRulesResponse]: + response = self.actions.list_designation_rules() + return server.APIOkResponse(data=response) + + def save_designation_rule( + self, + request: interface.SaveRuleRequest, + ) -> server.APIOkResponse[interface.SaveRuleResponse]: + response = self.actions.save_designation_rule(request) + return server.APIOkResponse(data=response) + class Server(server.WebServer): def __init__( @@ -327,10 +340,10 @@ def __init__( ```json { "catalog": "designation", - "columns": ["design"], + "columns": ["design", "raw", "rule_id"], "units": {}, "ids": ["uuid-1", "uuid-2"], - "data": [["NGC 1234"], ["NGC 5678"]] + "data": [["NGC 1234", "ngc 1234", "ngc"], ["NGC 5678", "NGC5678", "ngc"]] } ``` @@ -348,6 +361,26 @@ def __init__( allowed_roles=admin_only, audit_action=True, ), + server.Route( + "/v1/designation/rules", + http.HTTPMethod.GET, + api.list_designation_rules, + "List designation format rules", + "Returns all designation formatting rules ordered by priority.", + allowed_roles=admin_only, + ), + server.Route( + "/v1/designation/rule", + http.HTTPMethod.POST, + api.save_designation_rule, + "Save designation format rule", + """Creates or updates a designation formatting rule. +`rule.id` is the stable free-text identifier and primary key. +Set `rule.enabled` to false to disable a rule instead of deleting it. +When `rule.enabled` is true, optional `examples` are validated before save.""", + allowed_roles=admin_only, + audit_action=True, + ), ] super().__init__( diff --git a/app/presentation/dataapi/interface.py b/app/presentation/dataapi/interface.py index c2dde262..240d3384 100644 --- a/app/presentation/dataapi/interface.py +++ b/app/presentation/dataapi/interface.py @@ -210,6 +210,23 @@ class QueryResponse(pydantic.BaseModel): objects: list[PGCObject] +class FormatRequest(pydantic.BaseModel): + names: list[str] = pydantic.Field( + description="Designation names to format", + min_length=1, + max_length=10000, + ) + + +class FormatResult(pydantic.BaseModel): + formatted: str + rule_id: str | None = None + + +class FormatResponse(pydantic.BaseModel): + results: list[FormatResult] + + class FITSRequest(pydantic.BaseModel): pgcs: list[int] | None = pydantic.Field( default=None, @@ -262,6 +279,10 @@ def query(self, query: QueryRequest) -> QueryResponse: def query_fits(self, query: FITSRequest) -> bytes: pass + @abc.abstractmethod + def format_designations(self, request: FormatRequest) -> FormatResponse: + pass + @abc.abstractmethod def tap_tables(self, request: tap.ListTAPTablesRequest) -> tap.ListTAPTablesResponse: pass diff --git a/app/presentation/dataapi/server.py b/app/presentation/dataapi/server.py index 10402c98..3b838341 100644 --- a/app/presentation/dataapi/server.py +++ b/app/presentation/dataapi/server.py @@ -55,6 +55,15 @@ def query_fits( }, ) + def format_designations( + self, + request: fastapi.Request, + body: interface.FormatRequest, + ) -> server.APIOkResponse[interface.FormatResponse]: + _ = request + response = self.actions.format_designations(body) + return server.APIOkResponse(data=response) + def tap_tables( self, request: Annotated[tap.ListTAPTablesRequest, fastapi.Query()], @@ -123,6 +132,16 @@ def __init__( For example, if both coordinates and designation are specified, object must be in the specified area and have the specified designation.""", ), + server.Route( + "/v1/designation/format", + http.HTTPMethod.POST, + api.format_designations, + "Format designation names", + """Canonicalizes a batch of designation names using the active format rules. +Returns the formatted name and matched rule ID for each input. +Unmatched names are returned unchanged with rule_id omitted.""", + rate_limit="120/minute", + ), server.Route( "/v1/tap/tables", http.HTTPMethod.GET, diff --git a/postgres/migrations/V049__designation_format_rules.sql b/postgres/migrations/V049__designation_format_rules.sql new file mode 100644 index 00000000..4f2f09a7 --- /dev/null +++ b/postgres/migrations/V049__designation_format_rules.sql @@ -0,0 +1,91 @@ +/* pgmigrate-encoding: utf-8 */ + +CREATE TABLE designation.format_rules ( + id text PRIMARY KEY, + priority int NOT NULL, + pattern text NOT NULL, + template text NOT NULL, + transforms jsonb NOT NULL DEFAULT '{}', + enabled boolean NOT NULL DEFAULT true, + created_at timestamp without time zone NOT NULL DEFAULT NOW(), + modification_time timestamp without time zone NOT NULL DEFAULT NOW() +); + +CREATE INDEX format_rules_enabled_priority_idx ON designation.format_rules (enabled, priority); + +COMMENT ON TABLE designation.format_rules IS 'Declarative rules for canonicalizing object designations'; +COMMENT ON COLUMN designation.format_rules.id IS 'Stable free-text rule identifier'; +COMMENT ON COLUMN designation.format_rules.priority IS 'First-match-wins ordering; lower values are tried first'; +COMMENT ON COLUMN designation.format_rules.pattern IS 'Regex pattern (case-insensitive match applied at runtime)'; +COMMENT ON COLUMN designation.format_rules.template IS 'Output template with {0}, {1}, ... placeholders for capture groups'; +COMMENT ON COLUMN designation.format_rules.transforms IS 'Per-group transform ops, e.g. {"1": [{"op": "upper"}]}'; + +ALTER TABLE designation.data + ADD COLUMN raw text, + ADD COLUMN rule_id text REFERENCES designation.format_rules (id) ON DELETE SET NULL; + +CREATE INDEX designation_data_rule_id_idx ON designation.data (rule_id); +CREATE INDEX designation_data_rule_id_null_idx ON designation.data (rule_id) WHERE rule_id IS NULL; + +COMMENT ON COLUMN designation.data.raw IS '{"description": "Original designation before formatting", "ucd": "meta.id"}'; +COMMENT ON COLUMN designation.data.rule_id IS '{"description": "ID of the format rule that produced design, if any"}'; + +INSERT INTO designation.format_rules (id, priority, pattern, template, transforms) VALUES + ('pgc', 1, '^(?:LEDA|PGC|P|#)?\s*0*(\d+)$', 'PGC {0}', '{}'::jsonb), + ('sdss', 2, '^SDSS\s*J(\d{6}\.\d{2}[+-]\d{6}\.\d)$', 'SDSS J{0}', '{}'::jsonb), + ('2mass', 3, '^(2MAS[SX])\s*J\s*(\d{8}[+-]\d{7})$', '{0} J{1}', '{"1": [{"op": "upper"}]}'::jsonb), + ('m', 4, '^M(?:ESSIER)?\s*0*(\d+)$', 'M {0}', '{}'::jsonb), + ('ngc', 5, '^N(?:GC)?\s*0*(\d{1,4})\s*([A-Z]?)$', 'NGC {0}{1}', '{"1": [{"op": "strip_zeros"}], "2": [{"op": "upper"}]}'::jsonb), + ('ic', 6, '^IC?\s*0*(\d{1,4})\s*([A-Z]?)$', 'IC {0}{1}', '{"1": [{"op": "strip_zeros"}], "2": [{"op": "upper"}]}'::jsonb), + ('ugc', 7, '^U(?:GCG?)?\s*0*(\d{1,5})\s*([A-Z]?)$', 'UGC {0}{1}', '{"1": [{"op": "strip_zeros"}], "2": [{"op": "upper"}]}'::jsonb), + ('ugca', 8, '^U(?:GC)?A\s*0*(\d{1,3})$', 'UGCA {0}', '{}'::jsonb), + ('agc', 9, '^AGC\s*0*(\d+)$', 'AGC {0}', '{}'::jsonb), + ('eso', 10, '^ESO\s*0*(\d+)-\s*G?\s*0*(\d+)([a-z]?)$', 'ESO {0}-{1}{2}', '{"3": [{"op": "upper"}]}'::jsonb), + ('mcg', 11, '^MCG\s*([+-]?)(\d{1,2})-(\d{1,2})-(\d+)$', 'MCG {0}{1}-{2}-{3}', '{"1": [{"op": "default", "arg": "+"}], "2": [{"op": "zfill", "arg": "2"}], "3": [{"op": "zfill", "arg": "2"}], "4": [{"op": "zfill", "arg": "3"}]}'::jsonb), + ('abell', 12, '^(?:ABELL?|ABGC|ACO)\s*S0*(\d+)$', 'ACO S {0}', '{}'::jsonb), + ('abell-13', 13, '^(?:ABELL?|ABGC|ACO)\s*0*(\d+)$', 'ACO {0}', '{}'::jsonb), + ('andromeda', 14, '^And(?:romeda)?\s*(\d+|[IVXLCDM]+)$', 'And {0}', '{"1": [{"op": "roman_or_int"}]}'::jsonb), + ('cassiopeia', 15, '^Cas(?:siopeia)?\s*0*(\d+)$', 'Cas {0}', '{}'::jsonb), + ('triangulum', 16, '^Tri(?:angulum)?\s*(\d+|[IVXLCDM]+)$', 'Tri {0}', '{"1": [{"op": "roman_or_int"}]}'::jsonb), + ('eponym', 17, '^Arp\s*0*(\d{1,3})$', 'Arp {0}', '{}'::jsonb), + ('eponym-18', 18, '^(?:Frl|Fair|Fairall)\s*0*(\d{1,4})$', 'Frl {0}', '{}'::jsonb), + ('eponym-19', 19, '^(?:Mkn|Mrk|Markarian|Markarjan)\s*0*(\d{1,4})$', 'Mrk {0}', '{}'::jsonb), + ('eponym-20', 20, '^(?:Ho|Holm|Holmberg)\s*(\d|[IVX]+)$', 'Holmberg {0}', '{"1": [{"op": "roman_or_int"}]}'::jsonb), + ('eponym-21', 21, '^(Dwingeloo|Laevens|Maffei)\s*0*(\d)$', '{0} {1}', '{"1": [{"op": "capitalize"}]}'::jsonb), + ('lsbg', 22, '^(?:LSBG|\[MDS99\])?\s*F([1-5]\d{2})-(\d{1,3})$', '[MDS99] F{0}-{1}', '{"2": [{"op": "zfill", "arg": "3"}]}'::jsonb), + ('lsbg-23', 23, '^(?:LSBG|ISI96|\[ISI96\])\s*(\d{4}[+-]\d{4})([abx]?)$', '[ISI96] {0}{1}', '{"2": [{"op": "lower"}]}'::jsonb), + ('ref-j', 24, '^\[([A-Z]{1,3}(?:[6-9]\d|20\d{2}))\]\s*J(\d{6}(?:\.\d+)?[+-]\d{6}(?:\.\d+)?)$', '[{0}] J{1}', '{"1": [{"op": "upper"}]}'::jsonb), + ('ref-hhmm-ddmm', 25, '^\[([A-Z]{1,3}(?:[6-9]\d|20\d{2}))\]\s*(\d{4}[+-]\d{4})$', '[{0}] {1}', '{"1": [{"op": "upper"}]}'::jsonb), + ('ref-n', 26, '^\[([A-Z]{1,3}(?:[6-9]\d|20\d{2}))\]\s*0*(\d+)\s*([a-z]?)$', '[{0}] {1}{2}', '{"1": [{"op": "upper"}], "3": [{"op": "lower"}]}'::jsonb), + ('6df', 27, '^6dF\s*J\s*(\d{7}[+-]\d{6})\:?$', '6dF J{0}', '{}'::jsonb), + ('usgc', 28, '^USGC\s*([US])\s*(\d+)$', 'USGC {0}{1}', '{"1": [{"op": "upper"}]}'::jsonb), + ('3c', 29, '^3C\s*(\d+(?:\.\d)?)([a-z]?)$', '3C {0}{1}', '{"1": [{"op": "zfill", "arg": "3"}], "2": [{"op": "upper"}]}'::jsonb), + ('dw', 30, '^Dw\s*J?(\d{4}[+-]\d{2,4})([ab]?)$', 'dwJ{0}{1}', '{"2": [{"op": "lower"}]}'::jsonb), + ('cat-n', 31, '^([a-z0-9]{1,5}[a-z])\s*0*(\d+)([a-z]?)$', '{0} {1}{2}', '{"1": [{"op": "upper"}], "2": [{"op": "strip_zeros"}], "3": [{"op": "lower"}]}'::jsonb), + ('cat-hhmmssss-ddmmsss', 32, '^([a-z0-9]{2,6}?)\s*([JB]?)\s*(\d{8}[+-]\d{7})$', '{0} {1}{2}', '{"1": [{"op": "upper"}], "2": [{"op": "upper"}]}'::jsonb), + ('cat-hhmmss-ddmmss', 33, '^([a-z0-9]{2,6}?)\s*([JB]?)\s*(\d{7}[+-]\d{6})$', '{0} {1}{2}', '{"1": [{"op": "upper"}], "2": [{"op": "upper"}]}'::jsonb), + ('cat-hhmmss-sss-ddmmss-sss', 34, '^([a-z0-9]{2,6}?)\s*([JB]?)\s*(\d{6}(?:\.\d+)?[+-]\d{6}(?:\.\d+)?)$', '{0} {1}{2}', '{"1": [{"op": "upper"}], "2": [{"op": "upper"}]}'::jsonb), + ('cat-hhmm-ddmm', 35, '^([a-z0-9]{2,6}?)\s*([JB]?)\s*(\d{4}[+-]\d{4})$', '{0} {1}{2}', '{"1": [{"op": "upper"}], "2": [{"op": "upper"}]}'::jsonb), + ('cat-hhmm-dd', 36, '^([a-z0-9]{2,6})\s*([JB]?)\s*(\d{4}[+-]\d{2,3})([a-z]?)$', '{0} {1}{2}{3}', '{"1": [{"op": "upper"}], "2": [{"op": "upper"}]}'::jsonb), + ('cat-ddd-ddd-dd-ddd', 37, '^([a-z0-9]{2,6})\s*J\s*(\d{1,3}\.\d+[+-]\d{1,3}\.\d+)$', '{0} J{1}', '{"1": [{"op": "upper"}]}'::jsonb), + ('cat-hhmmss-ddmms', 38, '^([a-z0-9]{2,6})\s*([JB]?)\s*(\d{6}[+-]\d{5})$', '{0} {1}{2}', '{"1": [{"op": "upper"}], "2": [{"op": "upper"}]}'::jsonb), + ('cat-n-n-n', 39, '^([a-z]{2,6})\s*0*(\d{1,5})-0*(\d{1,5})-0*(\d{1,5})$', '{0} {1}-{2}-{3}', '{"1": [{"op": "upper"}], "2": [{"op": "strip_zeros"}], "3": [{"op": "strip_zeros"}], "4": [{"op": "strip_zeros"}]}'::jsonb), + ('cgcg', 40, '^CGCG\s*(\d{3})-0*(\d{2,3})$', 'CGCG {0}-{1}', '{}'::jsonb), + ('ksp-dw', 41, '^KSP-DW\s*0*(\d+)$', 'KSP-DW {0}', '{}'::jsonb), + ('dr8', 42, '^DR8-(\d{4})([pm])(\d{3})-(\d{1,5})$', 'DR8-{0}{1}{2}-{3}', '{}'::jsonb), + ('lsbc', 43, '^LSBC\s*D\s*0*(\d+)-0*(\d+)$', 'LSBC D{0}-{1}', '{}'::jsonb), + ('cnoc2', 44, '^CNOC2_(\d+)\.(\d+)$', 'CNOC2_{0}.{1}', '{}'::jsonb), + ('galfa', 45, '^GALFAJ(\d+(?:\.\d+)?)\+(\d+(?:\.\d+)?)\+(\d+)$', 'GALFA J{0}+{1}+{2}', '{}'::jsonb), + ('rxj', 46, '^RXJ(\d{2})(\d{2}(?:\.\d+)?)([+-])(\d{2})(\d{2}(?:\.\d+)?):\[([^\]]+)\](\d+)$', 'RX J{0}{1}{2}{3}{4} [{5}] {6}', '{}'::jsonb), + ('rxj-47', 47, '^RXJ(\d{2})(\d{2}(?:\.\d+)?)([+-])(\d{2})(\d{2}(?:\.\d+)?)$', 'RX J{0}{1}{2}{3}{4}', '{}'::jsonb), + ('ngc-48', 48, '^NGC\s*0*(\d+)([a-zA-Z]{0,3}):\[([^\]]+)\](\d+)$', 'NGC {0}{1} [{2}] {3}', '{"1": [{"op": "strip_zeros"}]}'::jsonb), + ('ngc-49', 49, '^N\s*0*(\d+)([a-zA-Z]{1,3})$', 'NGC {0}{1}', '{}'::jsonb), + ('2dfgrs', 50, '^2dfgrs\s*([NS]\d+Z\d+)$', '2dFGRS {0}', '{}'::jsonb), + ('j', 51, '^J\s*(\d{2})(\d{2})(\d{2}(?:\.\d+)?)([+-])(\d{2})(\d{2})(\d{2}(?:\.\d+)?)$', 'J{0}{1}{2}{3}{4}{5}{6}', '{}'::jsonb), + ('cat-hhmmss', 52, '^([A-Za-z]{2,5})\s*([+-])(\d{6})$', '{0} {1}{2}', '{"1": [{"op": "upper"}]}'::jsonb), + ('cat-dd-d', 53, '^((?:[A-Za-z][A-Za-z0-9]{1,4}|[0-9][A-Za-z][A-Za-z0-9]{0,3}|[0-9]{2}[A-Za-z][A-Za-z0-9]{0,2}|[0-9]{3}[A-Za-z][A-Za-z0-9]?))([+-])(\d{2}(?:\.\d+)?)$', '{0} {1}{2}', '{"1": [{"op": "upper"}]}'::jsonb), + ('cgmw', 54, '^CGMW([1-5])-0*(\d{5})$', 'CGMW {0}-{1}', '{"2": [{"op": "strip_zeros"}]}'::jsonb), + ('reformat', 55, '^.*?\[TKA2006\]\s*F\s*0*(\d)-\s*0*(\d{1,2})\s*([ab]?)$', '[TKA2006] F{0}-{1}{2}', '{"2": [{"op": "strip_zeros"}], "3": [{"op": "lower"}]}'::jsonb), + ('reformat-56', 56, '^ABELL.*\[M(?:CF)?2008\]\s*0*(\d+)$', '[MCF2008] {0}', '{}'::jsonb), + ('reformat-57', 57, '^ABELL.*\[([a-z]{1,3}(?:20\d\d|\d\d))\]\s*0*(\d+)$', '[{0}] {1}', '{"1": [{"op": "upper"}], "2": [{"op": "strip_zeros"}]}'::jsonb), + ('abell-58', 58, '^ABELL\s*0*(\d+)_(\d+):\[([^\]]+)\](\d+)$', 'ABELL {0}_{1} [{2}] {3}', '{"1": [{"op": "strip_zeros"}], "2": [{"op": "strip_zeros"}]}'::jsonb); diff --git a/tests/integration/designation_format_test.py b/tests/integration/designation_format_test.py new file mode 100644 index 00000000..ea49261d --- /dev/null +++ b/tests/integration/designation_format_test.py @@ -0,0 +1,228 @@ +import pathlib +import unittest + +import structlog +from starlette import testclient + +from app.commands.adminapi import command as adminapi_command +from app.commands.dataapi import command as dataapi_command +from app.data import repositories +from app.domain import adminapi as admin_domain +from app.domain import dataapi as dataapi_domain +from app.domain.adminapi import mock as admin_mock +from app.domain.designation import DesignationFormatter, RuleEngine +from app.lib import audit, auth, clients +from app.presentation.adminapi.server import Server as AdminServer +from app.presentation.dataapi.server import Server as DataAPIServer +from tests import lib + +GOLDEN_CASES: list[tuple[str, str, str]] = [ + ("pgc 1234", "PGC 1234", "pgc"), + ("LEDA 001", "PGC 1", "pgc"), + ("ngc 0224", "NGC 224", "ngc"), + ("NGC 1234A", "NGC 1234A", "ngc"), + ("ic 0342", "IC 342", "ic"), + ("messier 31", "M 31", "m"), + ("2mass j12345678+1234567", "2MASS J12345678+1234567", "2mass"), + ("3C 84", "3C 084", "3c"), + ("ACO S123", "ACO S 123", "abell"), + ("ACO 123", "ACO 123", "abell-13"), + ("And III", "And 3", "andromeda"), + ("MCG 1-2-3", "MCG +01-02-003", "mcg"), + ("totally unknown xyz", "totally unknown xyz", ""), +] + +IDEMPOTENT_CASES = [ + ("pgc 1234", "PGC 1234"), + ("ngc 0224", "NGC 224"), + ("messier 31", "M 31"), +] + +TEST_RULE_IDS = ("test", "bad", "disableme") + + +def _formatter_from_repo(pg: repositories.DesignationRulesRepository) -> DesignationFormatter: + return DesignationFormatter(pg.snapshot) + + +class DesignationFormatIntegrationTest(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.pg_storage = lib.TestPostgresStorage.get() + cls.log = structlog.get_logger() + cfg_path = pathlib.Path(__file__).resolve().parents[2] / "configs" / "dev" / "dataapi.yaml" + cls.cfg = dataapi_command.parse_config(str(cfg_path)) + + def setUp(self) -> None: + self.pg = self.pg_storage.get_storage() + self.rules_repo = repositories.DesignationRulesRepository(self.pg, self.log) + self.engine = RuleEngine.compile([r.to_engine_rule() for r in self.rules_repo.snapshot().rules]) + self.data_client = testclient.TestClient( + DataAPIServer( + dataapi_domain.Actions( + layer2_repo=repositories.Layer2Repository(self.pg, self.log), + catalog_cfg=self.cfg.catalogs, + metadata_repo=repositories.MetadataRepository(self.pg), + designation_formatter=_formatter_from_repo(self.rules_repo), + ), + self.cfg.server, + self.log, + auth.NoopAuthenticator(), + ).app + ) + + def tearDown(self) -> None: + self.pg_storage.clear() + + def test_format_batch(self) -> None: + response = self.data_client.post( + "/api/v1/designation/format", + json={"names": ["ngc 224", "unknown name xyz"]}, + ) + self.assertEqual(response.status_code, 200) + results = response.json()["data"]["results"] + self.assertEqual(len(results), 2) + self.assertEqual(results[0]["formatted"], "NGC 224") + self.assertEqual(results[0]["rule_id"], "ngc") + self.assertIsNone(results[1]["rule_id"]) + self.assertEqual(results[1]["formatted"], "unknown name xyz") + + def test_golden_cases(self) -> None: + for raw, expected, rule_id in GOLDEN_CASES: + with self.subTest(raw=raw): + result = self.engine.format(raw) + if rule_id: + self.assertIsNotNone(result) + assert result is not None + self.assertEqual(result.formatted, expected) + self.assertEqual(result.rule_id, rule_id) + else: + self.assertIsNone(result) + + def test_idempotency(self) -> None: + for raw, expected in IDEMPOTENT_CASES: + with self.subTest(raw=raw): + first = self.engine.format(raw) + assert first is not None + second = self.engine.format(first.formatted) + self.assertIsNotNone(second) + assert second is not None + self.assertEqual(second.formatted, expected) + + +class DesignationRulesIntegrationTest(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.pg_storage = lib.TestPostgresStorage.get() + cls.log = structlog.get_logger() + cfg_path = pathlib.Path(__file__).resolve().parents[2] / "configs" / "dev" / "adminapi.yaml" + cls.cfg = adminapi_command.parse_config(str(cfg_path)) + pg = cls.pg_storage.get_storage() + cls.migration_rule_count = len(repositories.DesignationRulesRepository(pg, cls.log).list_rules()) + + def setUp(self) -> None: + self.pg = self.pg_storage.get_storage() + self.rules_repo = repositories.DesignationRulesRepository(self.pg, self.log) + self.client = testclient.TestClient( + AdminServer( + admin_domain.Actions( + common_repo=repositories.CommonRepository(self.pg, self.log), + layer0_repo=repositories.Layer0Repository(self.pg, self.log), + layer1_repo=repositories.Layer1Repository(self.pg, self.log), + layer2_repo=repositories.Layer2Repository(self.pg, self.log), + designation_rules_repo=self.rules_repo, + authenticator=auth.NoopAuthenticator(), + clients=clients.Clients(ads_token="test"), + table_stats_cache=admin_mock.get_mock_table_stats_cache(), + ), + self.cfg.server, + self.log, + auth.NoopAuthenticator(), + action_recorder=audit.NoopActionRecorder(), + auth_enabled=False, + ).app + ) + + def tearDown(self) -> None: + for rule_id in TEST_RULE_IDS: + self.pg.exec("DELETE FROM designation.format_rules WHERE id = %s", params=[rule_id]) + self.pg_storage.clear() + + def test_list_rules_after_migration(self) -> None: + response = self.client.get("/admin/api/v1/designation/rules") + self.assertEqual(response.status_code, 200) + rules = response.json()["data"]["rules"] + self.assertEqual(len(rules), self.migration_rule_count) + self.assertGreater(self.migration_rule_count, 0) + + def test_save_new_rule(self) -> None: + response = self.client.post( + "/admin/api/v1/designation/rule", + json={ + "rule": { + "id": "test", + "priority": 9999, + "pattern": r"^TEST\s*(\d+)$", + "template": "TEST {0}", + "transforms": {}, + "enabled": True, + }, + "examples": [{"input": "TEST 42", "expected": "TEST 42"}], + }, + ) + self.assertEqual(response.status_code, 200) + rule = response.json()["data"]["rule"] + self.assertEqual(rule["id"], "test") + + def test_save_rejects_bad_example(self) -> None: + response = self.client.post( + "/admin/api/v1/designation/rule", + json={ + "rule": { + "id": "bad", + "priority": 9998, + "pattern": r"^BAD\s*(\d+)$", + "template": "BAD {0}", + "transforms": {}, + "enabled": True, + }, + "examples": [{"input": "BAD 1", "expected": "WRONG"}], + }, + ) + self.assertEqual(response.status_code, 400) + + def test_disable_rule(self) -> None: + create = self.client.post( + "/admin/api/v1/designation/rule", + json={ + "rule": { + "id": "disableme", + "priority": 9997, + "pattern": r"^DISABLEME\s*(\d+)$", + "template": "DISABLEME {0}", + "transforms": {}, + "enabled": True, + }, + "examples": [{"input": "DISABLEME 1", "expected": "DISABLEME 1"}], + }, + ) + self.assertEqual(create.status_code, 200) + rule = create.json()["data"]["rule"] + + disable = self.client.post( + "/admin/api/v1/designation/rule", + json={ + "rule": {**rule, "enabled": False}, + }, + ) + self.assertEqual(disable.status_code, 200) + disabled = disable.json()["data"]["rule"] + self.assertFalse(disabled["enabled"]) + + snapshot = self.rules_repo.snapshot() + self.assertTrue(all(r.id != rule["id"] for r in snapshot.rules)) + + def test_rules_repo_snapshot(self) -> None: + snapshot = self.rules_repo.snapshot() + self.assertEqual(len(snapshot.rules), self.migration_rule_count) + self.assertGreater(snapshot.version, 0) diff --git a/tests/integration/metadata_api_test.py b/tests/integration/metadata_api_test.py index a75ccdc6..cbbe6213 100644 --- a/tests/integration/metadata_api_test.py +++ b/tests/integration/metadata_api_test.py @@ -9,6 +9,7 @@ from app.data import repositories from app.domain import dataapi as domain from app.domain.dataapi import actions as dataapi_actions +from app.domain.designation import DesignationFormatter from app.lib import auth from app.presentation.dataapi.server import Server from tests import lib @@ -24,10 +25,13 @@ def setUpClass(cls) -> None: def setUp(self) -> None: self.pg = self.pg_storage.get_storage() + rules_repo = repositories.DesignationRulesRepository(self.pg, self.log) + formatter = DesignationFormatter(rules_repo.snapshot) self.actions = domain.Actions( layer2_repo=repositories.Layer2Repository(self.pg, self.log), catalog_cfg=self.cfg.catalogs, metadata_repo=repositories.MetadataRepository(self.pg), + designation_formatter=formatter, ) self.client = testclient.TestClient( Server(self.actions, self.cfg.server, self.log, auth.NoopAuthenticator()).app diff --git a/tests/unit/domain/designation_engine_test.py b/tests/unit/domain/designation_engine_test.py new file mode 100644 index 00000000..49eb2965 --- /dev/null +++ b/tests/unit/domain/designation_engine_test.py @@ -0,0 +1,80 @@ +import unittest + +from app.domain.designation.engine import Rule, RuleEngine, TransformOp, TransformSpec + + +class DesignationEngineTest(unittest.TestCase): + def test_empty_input(self) -> None: + engine = RuleEngine.compile([]) + self.assertIsNone(engine.format("")) + self.assertIsNone(engine.format(" ")) + + def test_simple_template(self) -> None: + engine = RuleEngine.compile( + [ + Rule( + id="pgc", + priority=1, + pattern=r"^pgc\s*(\d+)$", + template="PGC {0}", + ) + ] + ) + result = engine.format("pgc 1234") + self.assertIsNotNone(result) + assert result is not None + self.assertEqual(result.formatted, "PGC 1234") + self.assertEqual(result.rule_id, "pgc") + + def test_transforms(self) -> None: + engine = RuleEngine.compile( + [ + Rule( + id="ngc", + priority=1, + pattern=r"^ngc\s*0*(\d{1,4})\s*([A-Z]?)$", + template="NGC {0}{1}", + transforms={ + 1: [TransformSpec(TransformOp.STRIP_ZEROS)], + 2: [TransformSpec(TransformOp.UPPER)], + }, + ) + ] + ) + result = engine.format("ngc 0224") + self.assertIsNotNone(result) + assert result is not None + self.assertEqual(result.formatted, "NGC 224") + + def test_validate_rule_examples(self) -> None: + rule = Rule( + id="test", + priority=1, + pattern=r"^test\s*(\d+)$", + template="TEST {0}", + examples=[("test 1", "TEST 1")], + ) + RuleEngine.compile([rule]).validate_rule(rule) + + def test_validate_rule_rejects_bad_example(self) -> None: + rule = Rule( + id="test", + priority=1, + pattern=r"^test\s*(\d+)$", + template="TEST {0}", + examples=[("test 1", "WRONG")], + ) + with self.assertRaises(ValueError): + RuleEngine.compile([rule]).validate_rule(rule) + + def test_first_match_wins(self) -> None: + engine = RuleEngine.compile( + [ + Rule(id="a", priority=1, pattern=r"^X(\d+)$", template="A {0}"), + Rule(id="b", priority=2, pattern=r"^X(\d+)$", template="B {0}"), + ] + ) + result = engine.format("X1") + self.assertIsNotNone(result) + assert result is not None + self.assertEqual(result.rule_id, "a") From 57745548dafb96b87cb73cb01eb8cce4022b521c Mon Sep 17 00:00:00 2001 From: kraysent Date: Sun, 7 Jun 2026 18:42:08 +0200 Subject: [PATCH 2/2] fix test --- tests/integration/designation_format_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/designation_format_test.py b/tests/integration/designation_format_test.py index ea49261d..eb4ea6b1 100644 --- a/tests/integration/designation_format_test.py +++ b/tests/integration/designation_format_test.py @@ -84,7 +84,7 @@ def test_format_batch(self) -> None: self.assertEqual(len(results), 2) self.assertEqual(results[0]["formatted"], "NGC 224") self.assertEqual(results[0]["rule_id"], "ngc") - self.assertIsNone(results[1]["rule_id"]) + self.assertIsNone(results[1].get("rule_id")) self.assertEqual(results[1]["formatted"], "unknown name xyz") def test_golden_cases(self) -> None: