From 6066b92ec4bb5f8106d4840b13d458d626c8d039 Mon Sep 17 00:00:00 2001 From: Jammy2211 Date: Sun, 21 Jun 2026 22:22:32 +0100 Subject: [PATCH] ci: use PyAutoBuild reusable navigator_check workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace this workspace's navigator_check.yml with a thin caller of PyAutoLabs/PyAutoBuild/.github/workflows/navigator_check.yml@main (project: autofit), and delete the local .github/scripts/check_navigator.py and regenerate_navigator.py — both now live in PyAutoBuild/autobuild and are shared across all three workspaces. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/scripts/check_navigator.py | 212 ------------------------ .github/scripts/regenerate_navigator.py | 56 ------- .github/workflows/navigator_check.yml | 83 +--------- 3 files changed, 9 insertions(+), 342 deletions(-) delete mode 100644 .github/scripts/check_navigator.py delete mode 100644 .github/scripts/regenerate_navigator.py diff --git a/.github/scripts/check_navigator.py b/.github/scripts/check_navigator.py deleted file mode 100644 index d761b1cd..00000000 --- a/.github/scripts/check_navigator.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python3 -""" -Navigator / workspace catalogue checker. - -Two independent checks, runnable locally or in CI: - - (a) Path existence (HARD FAIL) - Scan the workspace's navigator / instruction files for repo-relative - ``scripts/...`` and ``notebooks/...`` references and confirm each resolves - on disk. A reference may be a literal path (must exist) or a glob - containing ``*`` (must match at least one file). ``output/...`` and - ``dataset/...`` references are ignored (runtime / data, not example code). - In ``workspace_index.json`` only the authoritative ``path`` / ``notebook`` - fields are validated; ``cross_refs`` are best-effort docstring references - and are not required to resolve. - - (b) Banner-comment lint (warn or fail, see ``--banners``) - Scan ``scripts/**/*.py`` for banner-style separator comments — a comment - line that is ``#`` followed only by a run (>= 4) of ``-``, ``=``, ``#`` or - ``*``. The workspace style is ``\"\"\"__Section__\"\"\"`` docstrings, not - ``# -----`` banners. The ``===`` underline beneath a docstring title is not - a ``#`` comment and is never flagged. - -This script is intentionally repo-agnostic: it hardcodes no repository name and -operates on the current working directory, so it ports to the galaxy / fit -workspaces unchanged. Run it from the workspace root:: - - python .github/scripts/check_navigator.py - python .github/scripts/check_navigator.py --banners=warn - -An optional ignore file (default ``.navigator_check_ignore``) lists paths or -globs (one per line, ``#`` comments allowed) that are exempt from BOTH checks. -""" - -import argparse -import fnmatch -import re -import sys -from pathlib import Path - -# Files scanned for path references. ``scripts/**/README.md`` is expanded at -# runtime. Missing files are simply skipped (e.g. a workspace without one of -# these), so the same list ports across workspaces. -REFERENCE_FILES = [ - "AGENTS.md", - "CLAUDE.md", - ".github/copilot-instructions.md", - "llms.txt", - "llms-full.txt", - "workspace_index.json", -] - -# Path tokens we care about: example code / notebooks under the workspace. The -# negative lookbehind anchors the token at a path boundary so a longer path such -# as ".github/scripts/run_smoke.py" is NOT matched as "scripts/run_smoke.py". -_PATH_TOKEN_RE = re.compile( - r"(?=4) of -# the separator characters. Does not match '# ----- Section -----' (has text) or -# the docstring '===' underline (not a '#' comment). -_BANNER_RE = re.compile(r"^\s*#\s*[-=#*]{4,}\s*$") - -DEFAULT_IGNORE_FILE = ".navigator_check_ignore" - - -def load_ignore(root: Path, ignore_file: str): - """Load ignore patterns (paths / globs). Returns a list of POSIX strings.""" - path = root / ignore_file - patterns = [] - if path.exists(): - for line in path.read_text(encoding="utf-8").splitlines(): - line = line.strip() - if line and not line.startswith("#"): - patterns.append(line) - return patterns - - -def is_ignored(rel_path: str, patterns) -> bool: - """True if ``rel_path`` matches an ignore pattern (literal or glob).""" - return any( - rel_path == pat or fnmatch.fnmatch(rel_path, pat) for pat in patterns - ) - - -def reference_files(root: Path): - """Yield the existing reference files, including scripts/**/README.md.""" - for name in REFERENCE_FILES: - candidate = root / name - if candidate.exists(): - yield candidate - for readme in sorted((root / "scripts").rglob("README.md")): - yield readme - - -def extract_path_tokens(text: str, json_authoritative_only: bool = False): - """ - Yield (line_number, token) for every scripts/.. or notebooks/.. token in the - text. Works uniformly for markdown links and inline-code spans, since all we - need is the token substring on its line. - - When ``json_authoritative_only`` is set (for workspace_index.json), only - lines holding the authoritative ``"path"`` / ``"notebook"`` keys are scanned, - so best-effort ``cross_refs`` and prose ``summary`` values are skipped. - """ - for lineno, line in enumerate(text.splitlines(), start=1): - if json_authoritative_only and not _JSON_AUTHORITATIVE_KEY_RE.match(line): - continue - for match in _PATH_TOKEN_RE.findall(line): - # Strip trailing punctuation that commonly abuts a token in prose - # (e.g. "see scripts/foo.py." or a token closing a code span). - token = match.rstrip(".,;:)`\"'") - yield lineno, token - - -def check_paths(root: Path, ignore_patterns): - """Return a list of (file, line, token) misses. Empty list == all good.""" - misses = [] - for ref in reference_files(root): - rel_ref = ref.relative_to(root).as_posix() - try: - text = ref.read_text(encoding="utf-8") - except (OSError, UnicodeDecodeError) as exc: - print(f"WARNING: could not read {rel_ref}: {exc}", file=sys.stderr) - continue - json_only = ref.suffix == ".json" - for lineno, token in extract_path_tokens(text, json_only): - if is_ignored(token, ignore_patterns): - continue - if "*" in token: - if not any(root.glob(token)): - misses.append((rel_ref, lineno, token)) - else: - if not (root / token).exists(): - misses.append((rel_ref, lineno, token)) - return misses - - -def check_banners(root: Path, ignore_patterns): - """Return a list of (file, line) banner-comment hits.""" - hits = [] - for script in sorted((root / "scripts").rglob("*.py")): - rel = script.relative_to(root).as_posix() - if is_ignored(rel, ignore_patterns): - continue - try: - lines = script.read_text(encoding="utf-8").splitlines() - except (OSError, UnicodeDecodeError) as exc: - print(f"WARNING: could not read {rel}: {exc}", file=sys.stderr) - continue - for lineno, line in enumerate(lines, start=1): - if _BANNER_RE.match(line): - hits.append((rel, lineno)) - return hits - - -def main(argv=None): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--root", - default=".", - help="Workspace root to check (default: current directory).", - ) - parser.add_argument( - "--banners", - choices=["warn", "fail"], - default="fail", - help="Banner-comment lint mode: 'fail' (nonzero exit) or 'warn'.", - ) - parser.add_argument( - "--ignore-file", - default=DEFAULT_IGNORE_FILE, - help=f"Ignore file of paths/globs (default: {DEFAULT_IGNORE_FILE}).", - ) - args = parser.parse_args(argv) - - root = Path(args.root).resolve() - ignore_patterns = load_ignore(root, args.ignore_file) - - # (a) Path existence — always a hard failure. - path_misses = check_paths(root, ignore_patterns) - if path_misses: - print(f"Path check: {len(path_misses)} missing reference(s):") - for ref, lineno, token in path_misses: - print(f" {ref}:{lineno} -> missing path: {token}") - else: - print("Path check: OK — every scripts/ and notebooks/ reference resolves.") - - # (b) Banner lint — warn or fail. - banner_hits = check_banners(root, ignore_patterns) - if banner_hits: - print(f"Banner lint: {len(banner_hits)} banner-style comment(s):") - for ref, lineno in banner_hits: - print(f" {ref}:{lineno}") - print( - " Use triple-quoted \"\"\"__Section__\"\"\" docstrings, not # ----- banners." - ) - else: - print("Banner lint: OK — no banner-style comments found.") - - failed = bool(path_misses) or (args.banners == "fail" and bool(banner_hits)) - return 1 if failed else 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.github/scripts/regenerate_navigator.py b/.github/scripts/regenerate_navigator.py deleted file mode 100644 index 6998a5d7..00000000 --- a/.github/scripts/regenerate_navigator.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -""" -Regenerate the workspace catalogue (Phase 2 only — no notebook rebuild). - -This is the lightweight entrypoint the ``navigator_check`` staleness job uses: -it calls PyAutoBuild's ``navigator.write_catalogue`` for the current workspace -checkout, producing ``llms-full.txt`` and ``workspace_index.json`` *without* -running the full notebook generation and *without* needing the science stack -(only ``pyyaml``). - -Repo-agnostic — no repository name is hardcoded. Configuration is via -environment variables so porting to another workspace is a one-line change in -the workflow: - - NAVIGATOR_PROJECT the generator project name (default: ``autolens``). - PYAUTOBUILD_DIR path to PyAutoBuild's ``autobuild`` package directory - (default: ``../PyAutoBuild/autobuild`` relative to CWD, - matching the local sibling-clone layout). In CI this is - set to the checked-out PyAutoBuild path. - -Run from the workspace root:: - - python .github/scripts/regenerate_navigator.py -""" - -import os -import sys -from pathlib import Path - - -def main(): - project = os.environ.get("NAVIGATOR_PROJECT", "autolens") - - pyautobuild_dir = os.environ.get( - "PYAUTOBUILD_DIR", str(Path.cwd().parent / "PyAutoBuild" / "autobuild") - ) - pyautobuild_dir = str(Path(pyautobuild_dir).resolve()) - if not Path(pyautobuild_dir).is_dir(): - sys.exit( - f"PyAutoBuild autobuild dir not found: {pyautobuild_dir}\n" - "Set PYAUTOBUILD_DIR to the checked-out PyAutoBuild/autobuild path." - ) - - sys.path.insert(0, pyautobuild_dir) - - # navigator imports generate.py lazily, whose module-level argparse expects - # a project positional; supply it via argv so the import resolves cleanly. - sys.argv = ["generate.py", project] - - import navigator - - navigator.write_catalogue(Path.cwd(), project) - - -if __name__ == "__main__": - main() diff --git a/.github/workflows/navigator_check.yml b/.github/workflows/navigator_check.yml index b0acd340..b8617ade 100644 --- a/.github/workflows/navigator_check.yml +++ b/.github/workflows/navigator_check.yml @@ -1,82 +1,17 @@ name: Navigator Check +# Thin caller for PyAutoBuild's reusable navigator-catalogue check. The check +# logic (path/banner lint + catalogue staleness) and its entrypoints live in +# PyAutoBuild/autobuild; this workspace only declares which generator project to +# run. See PyAutoLabs/PyAutoBuild/.github/workflows/navigator_check.yml. + on: [push, pull_request] permissions: contents: read -# Generator project target for the staleness job. Porting this workflow to -# another workspace is a one-word change here. -env: - PROJECT: autofit - jobs: - paths: - name: Navigator paths + banner lint - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Install dependencies - run: pip install --quiet pyyaml - - name: Run navigator checker (paths hard, banners fail) - run: python .github/scripts/check_navigator.py --banners=fail - - staleness: - name: Catalogue staleness - runs-on: ubuntu-latest - steps: - - name: Checkout workspace - uses: actions/checkout@v4 - with: - path: workspace - - name: Checkout PyAutoBuild - uses: actions/checkout@v4 - with: - repository: PyAutoLabs/PyAutoBuild - path: PyAutoBuild - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Extract branch name - id: extract_branch - shell: bash - run: | - cd workspace - echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> "$GITHUB_OUTPUT" - - name: Match PyAutoBuild branch - shell: bash - run: | - BRANCH="${{ steps.extract_branch.outputs.branch }}" - pushd PyAutoBuild - if [[ -n "$(git ls-remote --heads origin "$BRANCH")" ]]; then - echo "Branch $BRANCH exists in PyAutoBuild — checking out" - git fetch origin "$BRANCH" - git checkout "$BRANCH" - else - echo "Branch $BRANCH not in PyAutoBuild — staying on default branch" - fi - popd - - name: Install dependencies - run: pip install --quiet pyyaml - - name: Regenerate catalogue (Phase 2 only — no notebook rebuild) - env: - NAVIGATOR_PROJECT: ${{ env.PROJECT }} - PYAUTOBUILD_DIR: ${{ github.workspace }}/PyAutoBuild/autobuild - run: | - cd workspace - python .github/scripts/regenerate_navigator.py - - name: Fail if catalogue drifted from scripts - shell: bash - run: | - cd workspace - if ! git diff --exit-code llms-full.txt workspace_index.json; then - echo "::error::llms-full.txt / workspace_index.json are stale." - echo "Regenerate with 'generate.py ${{ env.PROJECT }}' and commit the result." - exit 1 - fi + navigator: + uses: PyAutoLabs/PyAutoBuild/.github/workflows/navigator_check.yml@main + with: + project: autofit