diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f533a136..af2bda2c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -140,40 +140,66 @@ jobs: check_integration_label: runs-on: ubuntu-latest needs: [build] + permissions: + pull-requests: read outputs: has_geos_integration_label: ${{ steps.set-label.outputs.has_label }} steps: - name: Check if PR has '${{ env.LABEL_TEST_GEOS_INTEGRATION }}' label id: set-label + # Fetch labels live from the GitHub REST API rather than reading + # github.event.pull_request.labels. The event payload is a snapshot + # frozen at the time the workflow was first triggered, so labels + # added after that first run are invisible to re-runs. + env: + GITHUB_TOKEN: ${{ github.token }} + REQUIRED_LABEL: ${{ env.LABEL_TEST_GEOS_INTEGRATION }} run: | echo "Checking for label..." LABEL_FOUND=false - LABELS='${{ toJson(github.event.pull_request.labels.*.name) }}' + PR_JSON=$(curl --fail --silent --show-error \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}") + LABELS=$(echo "${PR_JSON}" | jq -crM '[.labels[].name]') echo "PR Labels: $LABELS" - if echo "$LABELS" | grep -q "${{ env.LABEL_TEST_GEOS_INTEGRATION }}"; then + if echo "$LABELS" | jq -e --arg label "${REQUIRED_LABEL}" 'index($label) != null' > /dev/null; then LABEL_FOUND=true - echo "Label '${{ env.LABEL_TEST_GEOS_INTEGRATION }}' found" + echo "Label '${REQUIRED_LABEL}' found" fi - echo "has_label=$LABEL_FOUND" >> $GITHUB_OUTPUT + echo "has_label=$LABEL_FOUND" >> "$GITHUB_OUTPUT" check_force_integration_label: runs-on: ubuntu-latest # needs: [build] + permissions: + pull-requests: read outputs: has_geos_integration_force_label: ${{ steps.set-label.outputs.has_label }} steps: - name: Check if PR has '${{ env.LABEL_FORCE_GEOS_INTEGRATION }}' label id: set-label + # Fetch labels live from the GitHub REST API rather than reading + # github.event.pull_request.labels. The event payload is a snapshot + # frozen at the time the workflow was first triggered, so labels + # added after that first run are invisible to re-runs. + env: + GITHUB_TOKEN: ${{ github.token }} + REQUIRED_LABEL: ${{ env.LABEL_FORCE_GEOS_INTEGRATION }} run: | echo "Checking for label..." LABEL_FOUND=false - LABELS='${{ toJson(github.event.pull_request.labels.*.name) }}' + PR_JSON=$(curl --fail --silent --show-error \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}") + LABELS=$(echo "${PR_JSON}" | jq -crM '[.labels[].name]') echo "PR Labels: $LABELS" - if echo "$LABELS" | grep -q "${{ env.LABEL_FORCE_GEOS_INTEGRATION }}"; then + if echo "$LABELS" | jq -e --arg label "${REQUIRED_LABEL}" 'index($label) != null' > /dev/null; then LABEL_FOUND=true - echo "Label '${{ env.LABEL_FORCE_GEOS_INTEGRATION }}' found" + echo "Label '${REQUIRED_LABEL}' found" fi - echo "has_label=$LABEL_FOUND" >> $GITHUB_OUTPUT + echo "has_label=$LABEL_FOUND" >> "$GITHUB_OUTPUT" # Step 3: Check if GEOS integration is required based on changed files check_geos_integration_required: diff --git a/geos-ats/src/geos/ats/baseline_io.py b/geos-ats/src/geos/ats/baseline_io.py index 26b1920b..92e3d220 100644 --- a/geos-ats/src/geos/ats/baseline_io.py +++ b/geos-ats/src/geos/ats/baseline_io.py @@ -2,6 +2,7 @@ import logging import tempfile import shutil +import subprocess import yaml import time import requests @@ -161,6 +162,51 @@ def collect_baselines( bucket_name: str, raise Exception( f'Could not find baseline files to unpack: expected={archive_name}' ) +def _available_cpu_count() -> int: + if hasattr( os, 'sched_getaffinity' ): + return len( os.sched_getaffinity( 0 ) ) + return os.cpu_count() or 1 + + +def _pack_baselines_with_parallel_gzip( archive_name: str, baseline_path: str ) -> bool: + tar_bin = shutil.which( 'tar' ) + pigz_bin = shutil.which( 'pigz' ) + if not tar_bin or not pigz_bin: + logger.info( 'tar and pigz were not both found; using Python gztar archiver' ) + return False + + archive_path = f'{archive_name}.tar.gz' + threads = str( _available_cpu_count() ) + logger.info( f'Archiving baseline files with tar and pigz -9 ({threads} threads)...' ) + + try: + with open( archive_path, 'wb' ) as output: + tar_process = subprocess.Popen( [ tar_bin, '-C', baseline_path, '-cf', '-', '.' ], stdout=subprocess.PIPE ) + if tar_process.stdout is None: + raise RuntimeError( 'failed to capture tar output' ) + pigz_process = subprocess.Popen( [ pigz_bin, '-9', '-p', threads ], + stdin=tar_process.stdout, + stdout=output ) + tar_process.stdout.close() + + pigz_status = pigz_process.wait() + tar_status = tar_process.wait() + + if tar_status != 0 or pigz_status != 0: + try: + os.remove( archive_path ) + except FileNotFoundError: + pass + raise RuntimeError( f'tar exited with {tar_status}; pigz exited with {pigz_status}' ) + + except Exception as e: + logger.warning( 'Parallel baseline archive creation failed; using Python gztar archiver' ) + logger.warning( repr( e ) ) + return False + + return True + + def pack_baselines( archive_name: str, baseline_path: str, log_path: str = '' ): """ Pack and upload baselines to GCP @@ -201,7 +247,8 @@ def pack_baselines( archive_name: str, baseline_path: str, log_path: str = '' ): try: logger.info( 'Archiving baseline files...' ) - shutil.make_archive( archive_name, format='gztar', root_dir=baseline_path ) + if not _pack_baselines_with_parallel_gzip( archive_name, baseline_path ): + shutil.make_archive( archive_name, format='gztar', root_dir=baseline_path ) logger.info( f'Created {archive_name}.tar.gz' ) except Exception as e: logger.error( 'Failed to create baseline archive' ) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index f4911073..8f44d478 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -17,7 +17,9 @@ RTOL_DEFAULT = 0.0 ATOL_DEFAULT = 0.0 -EXCLUDE_DEFAULT = [ ".*/commandLine", ".*/schema$", ".*/globalToLocalMap", ".*/timeHistoryOutput.*/restart" ] +EXCLUDE_DEFAULT = [ + ".*/commandLine", ".*/schema$", ".*/globalToLocalMap", ".*/timeHistoryOutput.*/restart", ".*/dNdX", ".*/detJ" +] logger = logging.getLogger( 'geos-ats' )