diff --git a/.github/workflows/_decide.yml b/.github/workflows/_decide.yml new file mode 100644 index 0000000000..bcfc32289b --- /dev/null +++ b/.github/workflows/_decide.yml @@ -0,0 +1,62 @@ +name: _decide + +# Resolve the lane + backend from the triggering event. Shared by the +# per-platform entry workflows (ci-linux-x86_64 / ci-windows / ci-sbsa) so the +# rules live in exactly one place. `github.*` here refers to the caller's event. +# +# lane: fast (PR push) | full (approval / `ci: full` / main push) | +# nightly (schedule) | skip (non-approval review) +# backend: standard | rtx | both — from the `backend: TensorRT[-RTX]` labels +# (or the workflow_dispatch `backend` input) + +on: + workflow_call: + outputs: + lane: + description: "fast | full | nightly | skip" + value: ${{ jobs.decide.outputs.lane }} + backend: + description: "standard | rtx | both" + value: ${{ jobs.decide.outputs.backend }} + +jobs: + decide: + runs-on: ubuntu-latest + outputs: + lane: ${{ steps.pick.outputs.lane }} + backend: ${{ steps.pick.outputs.backend }} + steps: + - id: pick + env: + EVENT: ${{ github.event_name }} + REVIEW_STATE: ${{ github.event.review.state }} + HAS_FULL_LABEL: "${{ contains(github.event.pull_request.labels.*.name, 'ci: full') }}" + # Exact array-element match: 'backend: TensorRT' != 'backend: TensorRT-RTX'. + HAS_RTX_LABEL: "${{ contains(github.event.pull_request.labels.*.name, 'backend: TensorRT-RTX') }}" + HAS_STD_LABEL: "${{ contains(github.event.pull_request.labels.*.name, 'backend: TensorRT') }}" + DISPATCH_LANE: ${{ github.event.inputs.lane }} + DISPATCH_BACKEND: ${{ github.event.inputs.backend }} + run: | + set -euo pipefail + case "$EVENT" in + schedule) lane=nightly ;; + workflow_dispatch) lane="${DISPATCH_LANE:-full}" ;; + push) lane=full ;; # main canary + pull_request_review) + [ "$REVIEW_STATE" = "approved" ] && lane=full || lane=skip ;; + pull_request) + [ "$HAS_FULL_LABEL" = "true" ] && lane=full || lane=fast ;; + *) lane=fast ;; + esac + echo "lane=$lane" >> "$GITHUB_OUTPUT" + case "$EVENT" in + workflow_dispatch) backend="${DISPATCH_BACKEND:-both}" ;; + pull_request) + if [ "$HAS_RTX_LABEL" = "true" ] && [ "$HAS_STD_LABEL" = "true" ]; then backend=both + elif [ "$HAS_RTX_LABEL" = "true" ]; then backend=rtx + elif [ "$HAS_STD_LABEL" = "true" ]; then backend=standard + else backend=standard; fi ;; + *) backend=both ;; # push / approval / schedule + esac + echo "backend=$backend" >> "$GITHUB_OUTPUT" + echo "Resolved lane='$lane' backend='$backend' (event=$EVENT)." diff --git a/.github/workflows/_linux-x86_64-core.yml b/.github/workflows/_linux-x86_64-core.yml deleted file mode 100644 index c0011470b2..0000000000 --- a/.github/workflows/_linux-x86_64-core.yml +++ /dev/null @@ -1,671 +0,0 @@ -# Reusable core for the Linux x86_64 build+test pipeline. -# -# Both build-test-linux-x86_64.yml (standard TensorRT) and -# build-test-linux-x86_64_rtx.yml (TensorRT-RTX) call this with use-rtx -# false/true. Keep ALL build/test logic here so a change lands once. -# -# RTX differences are expressed two ways: -# * Jobs that only exist for standard TRT (torchscript, distributed, -# executorch) are gated with ``if: ${{ !inputs.use-rtx && ... }}``. -# * Jobs whose pytest scope differs branch inside the script on the -# ``$USE_TRT_RTX`` env var that linux-test.yml exports. -# -# The per-job pass/fail map is bubbled up via the ``results`` output so the -# thin caller workflows can render a single ``ci-rollup`` status check whose -# name (e.g. "CI / Linux x86_64") stays stable for branch protection. -name: _linux-x86_64-core - -on: - workflow_call: - inputs: - use-rtx: - description: "Build/test against TensorRT-RTX instead of standard TensorRT" - type: boolean - default: false - name-prefix: - description: "Display-name prefix for the build job (e.g. 'RTX - ')" - type: string - default: "" - outputs: - results: - description: "JSON map of {job-id: {result, outputs}} for every build/test job" - value: ${{ jobs.collect-results.outputs.results }} - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - LIMIT_PR=${{ github.event_name == 'pull_request' && 'true' || 'false' }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --use-rtx ${{ inputs.use-rtx }} --limit-pr-builds "${LIMIT_PR}" --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: filter-matrix - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - display-name: ${{ inputs.name-prefix }}Build Linux x86_64 torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - architecture: "x86_64" - use-rtx: ${{ inputs.use-rtx }} - pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple" - - # Standard-TRT only: ExecuTorch static build is not part of the RTX matrix. - executorch-static-build: - needs: [filter-matrix, build] - if: ${{ !inputs.use-rtx }} - uses: ./.github/workflows/executorch-static-linux.yml - with: - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - - L0-dynamo-converter-tests: - name: ${{ matrix.display-name }} - needs: [filter-matrix, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L0 dynamo converter tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L0-dynamo-converter-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l0_converter - L0-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [filter-matrix, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L0 dynamo core tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L0-dynamo-core-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l0_core - L0-py-core-tests: - name: ${{ matrix.display-name }} - needs: [filter-matrix, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L0 core python tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L0-py-core-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l0_py_core - # Standard-TRT only: TorchScript frontend is not exercised on RTX. - L0-torchscript-tests: - name: ${{ matrix.display-name }} - needs: [filter-matrix, build] - if: ${{ !inputs.use-rtx && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L0 torchscript tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L0-torchscript-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l0_torchscript - L1-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L0-dynamo-converter-tests, - L0-dynamo-core-tests, - L0-py-core-tests, - L0-torchscript-tests, - ] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L1 dynamo core tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L1-dynamo-core-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l1_dynamo_core - L1-dynamo-compile-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L0-dynamo-converter-tests, - L0-dynamo-core-tests, - L0-py-core-tests, - L0-torchscript-tests, - ] - if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L1 dynamo compile tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L1-dynamo-compile-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l1_dynamo_compile - L1-torch-compile-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L0-dynamo-converter-tests, - L0-dynamo-core-tests, - L0-py-core-tests, - L0-torchscript-tests, - ] - if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L1 torch compile tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L1-torch-compile-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l1_torch_compile - # Standard-TRT only: TorchScript frontend is not exercised on RTX. - L1-torchscript-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L0-dynamo-core-tests, - L0-dynamo-converter-tests, - L0-py-core-tests, - L0-torchscript-tests, - ] - if: "${{ !inputs.use-rtx && !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L1 torch script tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L1-torchscript-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l1_torchscript - L2-torch-compile-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L1-torch-compile-tests, - L1-dynamo-compile-tests, - L1-dynamo-core-tests, - L1-torchscript-tests, - ] - if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L2 torch compile tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L2-torch-compile-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l2_torch_compile - L2-dynamo-compile-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L1-dynamo-compile-tests, - L1-dynamo-core-tests, - L1-torch-compile-tests, - L1-torchscript-tests, - ] - if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci: run-l2') || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L2 dynamo compile tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L2-dynamo-compile-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l2_dynamo_compile - L2-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L1-dynamo-core-tests, - L1-dynamo-compile-tests, - L1-torch-compile-tests, - L1-torchscript-tests, - ] - if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci: run-l2') || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L2 dynamo core tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L2-dynamo-core-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l2_dynamo_core - L2-dynamo-plugin-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L1-dynamo-core-tests, - L1-dynamo-compile-tests, - L1-torch-compile-tests, - L1-torchscript-tests, - ] - if: "${{ !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci: run-l2') || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L2 dynamo plugin tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L2-dynamo-plugin-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l2_plugin - # Standard-TRT only: TorchScript frontend is not exercised on RTX. - L2-torchscript-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L1-dynamo-core-tests, - L1-dynamo-compile-tests, - L1-torch-compile-tests, - L1-torchscript-tests, - ] - if: "${{ !inputs.use-rtx && !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci: run-l2') || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L2 torch script tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L2-torchscript-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l2_torchscript - # Standard-TRT only: distributed tests need a multi-GPU runner not in the RTX matrix. - L2-dynamo-distributed-tests: - name: ${{ matrix.display-name }} - needs: - [ - filter-matrix, - build, - L1-dynamo-core-tests, - L1-dynamo-compile-tests, - L1-torch-compile-tests, - L1-torchscript-tests, - ] - if: "${{ !inputs.use-rtx && !contains(github.event.pull_request.labels.*.name, 'ci: only-l0') && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci: run-l2') || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && ((github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success()) }}" - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: L2 dynamo distributed tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: L2-dynamo-distributed-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: ${{ inputs.use-rtx }} - runner: linux.g4dn.12xlarge.nvidia.gpu - script: | - set -euo pipefail - # Tier definition lives in tests/py/utils/ci_helpers.sh (single source of - # truth, shared with the local `just` recipes). USE_TRT_RTX is exported - # by this workflow's caller. - source tests/py/utils/ci_helpers.sh - trt_tier_l2_distributed - # Gather every build/test job's result into a single JSON map and expose it - # as the ``results`` workflow output. The thin caller workflows render this - # into one ``ci-rollup`` status check. ``if: always()`` so the map is - # produced even when upstream jobs failed / were skipped / were cancelled. - collect-results: - if: ${{ always() }} - permissions: {} - needs: - [ - build, - L0-dynamo-converter-tests, - L0-dynamo-core-tests, - L0-py-core-tests, - L0-torchscript-tests, - L1-dynamo-core-tests, - L1-dynamo-compile-tests, - L1-torch-compile-tests, - L1-torchscript-tests, - L2-torch-compile-tests, - L2-dynamo-compile-tests, - L2-dynamo-core-tests, - L2-dynamo-plugin-tests, - L2-torchscript-tests, - L2-dynamo-distributed-tests, - ] - runs-on: ubuntu-latest - outputs: - results: ${{ steps.collect.outputs.results }} - steps: - - name: Collect job results - id: collect - env: - RESULTS: ${{ toJSON(needs) }} - run: | - set -euo pipefail - { - echo "results<> "${GITHUB_OUTPUT}" diff --git a/.github/workflows/_test-linux.yml b/.github/workflows/_test-linux.yml new file mode 100644 index 0000000000..dd526d5b12 --- /dev/null +++ b/.github/workflows/_test-linux.yml @@ -0,0 +1,153 @@ +name: _test-linux + +# Manifest-driven build (+ optional test) for ONE Linux channel. Covers both: +# * x86_64 — os=linux, architecture=x86_64, run-tests=true (build + suites) +# * SBSA — os=linux-aarch64, architecture=aarch64, run-tests=false (build-only; +# there are no aarch64 GPU test runners) +# +# One reusable serves both because the build (build_linux.yml) and test +# (linux-test.yml) reusables are the same — only inputs differ. (GitHub requires +# `uses:` to be a literal, so Windows, which needs build_windows/windows-test, +# has its own _test-windows.yml.) + +on: + workflow_call: + inputs: + lane: + description: "fast | full | nightly" + required: true + type: string + use-rtx: + description: "Test against TensorRT-RTX (x86_64 only)" + type: boolean + default: false + name-prefix: + description: "Display-name prefix (e.g. 'RTX - ', 'SBSA ')" + type: string + default: "" + os: + description: "generate_binary_build_matrix os: linux | linux-aarch64" + type: string + default: linux + architecture: + description: "x86_64 | aarch64" + type: string + default: x86_64 + run-tests: + description: "Run the suite matrix after building (false = build-only, e.g. SBSA)" + type: boolean + default: true + python-only: + description: "Build the PYTHON_ONLY=1 wheel (no C++ runtime) via env_vars_python_only.txt" + type: boolean + default: false + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: ${{ inputs.os }} + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + filter-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - uses: actions/checkout@v6 + with: + repository: pytorch/tensorrt + - name: Generate matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + # Build one representative wheel for PR-triggered runs (any lane); + # the full python×cuda matrix only on push-to-main and nightly. + LIMIT_PR=${{ (github.event_name == 'push' || github.event_name == 'schedule') && 'false' || 'true' }} + MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --use-rtx ${{ inputs.use-rtx }} --limit-pr-builds "${LIMIT_PR}" --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + build: + needs: filter-matrix + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script.sh + env-var-script: packaging/env_vars.txt + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + package-name: torch_tensorrt + display-name: ${{ inputs.name-prefix }}Build Linux ${{ inputs.architecture }} torch-tensorrt whl package + name: ${{ matrix.display-name }} + uses: ./.github/workflows/build_linux.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + # python-only swaps in the env_vars_python_only.txt (sets PYTHON_ONLY=1). + env-var-script: ${{ inputs.python-only && 'packaging/env_vars_python_only.txt' || matrix.env-var-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} + architecture: ${{ inputs.architecture }} + use-rtx: ${{ inputs.use-rtx }} + pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple" + + # Suite list from the manifest. Skipped for build-only channels (SBSA). + suite-matrix: + if: ${{ inputs.run-tests }} + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.gen.outputs.matrix }} + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - id: gen + run: | + set -euo pipefail + variant=${{ inputs.use-rtx && 'rtx' || 'standard' }} + json="$(python -m tests.ci matrix --platform linux-x86_64 --lane '${{ inputs.lane }}' --variant "${variant}")" + echo "matrix=${json}" >> "$GITHUB_OUTPUT" + echo "Lane '${{ inputs.lane }}' (${variant}) suites:"; echo "${json}" | python -m json.tool + + # One test job per suite (auto-skips when suite-matrix is skipped, i.e. SBSA). + test: + needs: [filter-matrix, build, suite-matrix] + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.suite-matrix.outputs.matrix) }} + uses: ./.github/workflows/linux-test.yml + with: + job-name: ${{ matrix.suite }}-${{ matrix.variant }} + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: packaging/pre_build_script.sh + use-rtx: ${{ inputs.use-rtx }} + fail-on-empty: true + script: | + set -euo pipefail + python -m tests.ci run "${{ matrix.suite }}" --variant "${{ matrix.variant }}" diff --git a/.github/workflows/_test-windows.yml b/.github/workflows/_test-windows.yml new file mode 100644 index 0000000000..9fd6a49437 --- /dev/null +++ b/.github/workflows/_test-windows.yml @@ -0,0 +1,142 @@ +name: _test-windows + +# Manifest-driven build + test for the Windows channel. Mirrors _test-linux.yml +# but uses build_windows.yml + windows-test.yml, and wraps the suite run in +# vc_env_helper.bat (the MSVC env). Suite SELECTION is identical to Linux — the +# manifest is platform-agnostic (`ci matrix --platform windows`); only the build +# reusable and the execution wrapper differ. + +on: + workflow_call: + inputs: + lane: + description: "fast | full | nightly" + required: true + type: string + use-rtx: + description: "Test against TensorRT-RTX" + type: boolean + default: false + name-prefix: + description: "Display-name prefix (e.g. 'RTX - ')" + type: string + default: "" + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + filter-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - uses: actions/checkout@v6 + with: + repository: pytorch/tensorrt + - name: Generate matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + # Build one representative wheel for PR-triggered runs (any lane); + # the full python×cuda matrix only on push-to-main and nightly. + LIMIT_PR=${{ (github.event_name == 'push' || github.event_name == 'schedule') && 'false' || 'true' }} + MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --use-rtx ${{ inputs.use-rtx }} --limit-pr-builds "${LIMIT_PR}" --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + # Swap the build runner label for a GPU test runner (same as the legacy windows flow). + substitute-runner: + needs: filter-matrix + outputs: + matrix: ${{ steps.substitute.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - name: Substitute runner + id: substitute + run: | + echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> "${GITHUB_OUTPUT}" + + build: + needs: substitute-runner + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script_windows.sh + env-script: packaging/vc_env_helper.bat + smoke-test-script: packaging/smoke_test_windows.py + package-name: torch_tensorrt + display-name: ${{ inputs.name-prefix }}Build Windows torch-tensorrt whl package + name: ${{ matrix.display-name }} + uses: ./.github/workflows/build_windows.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-script: ${{ matrix.env-script }} + smoke-test-script: ${{ matrix.smoke-test-script }} + package-name: ${{ matrix.package-name }} + trigger-event: ${{ github.event_name }} + # Build the RTX wheel when requested (matches the original RTX-windows + # entry); empty params + use-rtx=false for the standard build. + use-rtx: ${{ inputs.use-rtx }} + wheel-build-params: ${{ inputs.use-rtx && '--use-rtx' || '' }} + timeout: 120 + + suite-matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.gen.outputs.matrix }} + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - id: gen + run: | + set -euo pipefail + variant=${{ inputs.use-rtx && 'rtx' || 'standard' }} + json="$(python -m tests.ci matrix --platform windows --lane '${{ inputs.lane }}' --variant "${variant}")" + echo "matrix=${json}" >> "$GITHUB_OUTPUT" + echo "Lane '${{ inputs.lane }}' (${variant}) windows suites:"; echo "${json}" | python -m json.tool + + test: + needs: [substitute-runner, build, suite-matrix] + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.suite-matrix.outputs.matrix) }} + uses: ./.github/workflows/windows-test.yml + with: + job-name: ${{ matrix.suite }}-${{ matrix.variant }} + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: ${{ inputs.use-rtx }} + # vc_env_helper.bat sets the MSVC env, then runs the manifest runner; the + # inner `python -m pytest` it spawns inherits that env. + script: | + set -euo pipefail + packaging/vc_env_helper.bat python -m tests.ci run "${{ matrix.suite }}" --variant "${{ matrix.variant }}" diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml deleted file mode 100644 index d5bdf0ed95..0000000000 --- a/.github/workflows/blossom-ci.yml +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A workflow to trigger ci on hybrid infra (github + self hosted runner) -name: Blossom-CI -on: - issue_comment: - types: [created] - workflow_dispatch: - inputs: - platform: - description: 'runs-on argument' - required: false - args: - description: 'argument' - required: false -jobs: - Authorization: - name: Authorization - runs-on: blossom - outputs: - args: ${{ env.args }} - - # This job only runs for pull request comments - if: | - contains( 'andi4191, narendasan, peri044, bowang007,', format('{0},', github.actor)) && - github.event.comment.body == '/blossom-ci' - steps: - - name: Check if comment is issued by authorized person - run: blossom-ci - env: - OPERATION: 'AUTH' - REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }} - - Vulnerability-scan: - name: Vulnerability scan - needs: [Authorization] - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v6 - with: - repository: ${{ fromJson(needs.Authorization.outputs.args).repo }} - ref: ${{ fromJson(needs.Authorization.outputs.args).ref }} - lfs: 'true' - - # repo specific steps - #- name: Setup java - # uses: actions/setup-java@v5 - # with: - # java-version: 1.8 - - # add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file - #- name: Setup blackduck properties - # run: | - # PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g') - # echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties - # echo detect.maven.included.scopes=compile >> application.properties - - - name: Run blossom action - uses: NVIDIA/blossom-action@main - env: - REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }} - with: - args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }} - args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }} - args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }} - - Job-trigger: - name: Start ci job - needs: [Vulnerability-scan] - runs-on: blossom - steps: - - name: Start ci job - run: blossom-ci - env: - OPERATION: 'START-CI-JOB' - CI_SERVER: ${{ secrets.CI_SERVER }} - REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - Upload-Log: - name: Upload log - runs-on: blossom - if : github.event_name == 'workflow_dispatch' - steps: - - name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here) - run: blossom-ci - env: - OPERATION: 'POST-PROCESSING' - CI_SERVER: ${{ secrets.CI_SERVER }} - REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build-tensorrt-linux.yml b/.github/workflows/build-tensorrt-linux.yml deleted file mode 100644 index 11555e9f8f..0000000000 --- a/.github/workflows/build-tensorrt-linux.yml +++ /dev/null @@ -1,222 +0,0 @@ -name: Build Torch-TensorRT wheel on Linux with Future TensorRT Versions - -on: - workflow_call: - inputs: - repository: - description: 'Repository to checkout, defaults to ""' - default: "" - type: string - ref: - description: 'Reference to checkout, defaults to "nightly"' - default: "nightly" - type: string - test-infra-repository: - description: "Test infra repository to use" - default: "pytorch/test-infra" - type: string - test-infra-ref: - description: "Test infra reference to use" - default: "" - type: string - build-matrix: - description: "Build matrix to utilize" - default: "" - type: string - pre-script: - description: "Pre script to run prior to build" - default: "" - type: string - post-script: - description: "Post script to run prior to build" - default: "" - type: string - smoke-test-script: - description: "Script for Smoke Test for a specific domain" - default: "" - type: string - env-var-script: - description: "Script that sets Domain-Specific Environment Variables" - default: "" - type: string - package-name: - description: "Name of the actual python package that is imported" - default: "" - type: string - trigger-event: - description: "Trigger Event in caller that determines whether or not to upload" - default: "" - type: string - cache-path: - description: "The path(s) on the runner to cache or restore. The path is relative to repository." - default: "" - type: string - cache-key: - description: "The key created when saving a cache and the key used to search for a cache." - default: "" - type: string - architecture: - description: Architecture to build for x86_64 for default Linux, or aarch64 for Linux aarch64 builds - required: false - type: string - default: x86_64 - submodules: - description: Works as stated in actions/checkout, but the default value is recursive - required: false - type: string - default: recursive - setup-miniconda: - description: Set to true if setup-miniconda is needed - required: false - type: boolean - default: true - -permissions: - id-token: write - contents: read - -jobs: - build: - strategy: - fail-fast: false - matrix: ${{ fromJSON(inputs.build-matrix) }} - env: - PYTHON_VERSION: ${{ matrix.python_version }} - PACKAGE_TYPE: wheel - REPOSITORY: ${{ inputs.repository }} - REF: ${{ inputs.ref }} - CU_VERSION: ${{ matrix.desired_cuda }} - UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} - ARCH: ${{ inputs.architecture }} - TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }} - TENSORRT_VERSION: ${{ matrix.tensorrt.version }} - TENSORRT_URLS: ${{ matrix.tensorrt.urls }} - TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} - UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} - name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} - runs-on: ${{ matrix.validation_runner }} - container: - image: ${{ matrix.container_image }} - options: ${{ matrix.gpu_arch_type == 'cuda' && '--gpus all' || ' ' }} - # If a build is taking longer than 120 minutes on these runners we need - # to have a conversation - timeout-minutes: 120 - - steps: - - name: Clean workspace - shell: bash -l {0} - run: | - set -x - echo "::group::Cleanup debug output" - rm -rf "${GITHUB_WORKSPACE}" - mkdir -p "${GITHUB_WORKSPACE}" - if [[ "${{ inputs.architecture }}" = "aarch64" ]]; then - rm -rf "${RUNNER_TEMP}/*" - fi - echo "::endgroup::" - - uses: actions/checkout@v6 - with: - # Support the use case where we need to checkout someone's fork - repository: ${{ inputs.test-infra-repository }} - ref: ${{ inputs.test-infra-ref }} - path: test-infra - - uses: actions/checkout@v6 - if: ${{ env.ARCH == 'aarch64' }} - with: - # Support the use case where we need to checkout someone's fork - repository: "pytorch/builder" - ref: "main" - path: builder - - name: Set linux aarch64 CI - if: ${{ inputs.architecture == 'aarch64' }} - shell: bash -l {0} - env: - DESIRED_PYTHON: ${{ matrix.python_version }} - run: | - set +e - # TODO: This is temporary aarch64 setup script, this should be integrated into aarch64 docker. - ${GITHUB_WORKSPACE}/builder/aarch64_linux/aarch64_ci_setup.sh - echo "/opt/conda/bin" >> $GITHUB_PATH - set -e - - uses: ./test-infra/.github/actions/set-channel - - name: Set PYTORCH_VERSION - if: ${{ env.CHANNEL == 'test' }} - run: | - # When building RC, set the version to be the current candidate version, - # otherwise, leave it alone so nightly will pick up the latest - echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" - - uses: ./test-infra/.github/actions/setup-binary-builds - env: - PLATFORM: ${{ inputs.architecture == 'aarch64' && 'linux-aarch64' || ''}} - with: - repository: ${{ inputs.repository }} - ref: ${{ inputs.ref }} - submodules: ${{ inputs.submodules }} - setup-miniconda: ${{ inputs.setup-miniconda }} - python-version: ${{ env.PYTHON_VERSION }} - cuda-version: ${{ env.CU_VERSION }} - arch: ${{ env.ARCH }} - - name: Combine Env Var and Build Env Files - if: ${{ inputs.env-var-script != '' }} - working-directory: ${{ inputs.repository }} - shell: bash -l {0} - run: | - cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}" - - name: Install torch dependency - shell: bash -l {0} - run: | - set -x - # shellcheck disable=SC1090 - source "${BUILD_ENV_FILE}" - # shellcheck disable=SC2086 - ${CONDA_RUN} ${PIP_INSTALL_TORCH} - - name: Run Pre-Script with Caching - if: ${{ inputs.pre-script != '' }} - uses: ./test-infra/.github/actions/run-script-with-cache - with: - cache-path: ${{ inputs.cache-path }} - cache-key: ${{ inputs.cache-key }} - repository: ${{ inputs.repository }} - script: ${{ inputs.pre-script }} - - name: Build clean - working-directory: ${{ inputs.repository }} - shell: bash -l {0} - run: | - set -x - source "${BUILD_ENV_FILE}" - ${CONDA_RUN} python setup.py clean - - name: Build the wheel (bdist_wheel) - working-directory: ${{ inputs.repository }} - shell: bash -l {0} - run: | - set -x - source "${BUILD_ENV_FILE}" - ${CONDA_RUN} python setup.py bdist_wheel - - - name: Run Post-Script - if: ${{ inputs.post-script != '' }} - uses: ./test-infra/.github/actions/run-script-with-cache - with: - repository: ${{ inputs.repository }} - script: ${{ inputs.post-script }} - - name: Smoke Test - shell: bash -l {0} - env: - PACKAGE_NAME: ${{ inputs.package-name }} - SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} - run: | - set -x - source "${BUILD_ENV_FILE}" - # TODO: add smoke test for the auditwheel tarball built - - # NB: Only upload to GitHub after passing smoke tests - - name: Upload wheel to GitHub - continue-on-error: true - uses: actions/upload-artifact@v6 - with: - name: ${{ env.UPLOAD_ARTIFACT_NAME }} - path: ${{ inputs.repository }}/dist - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-tensorrt-windows.yml b/.github/workflows/build-tensorrt-windows.yml deleted file mode 100644 index 3c69427a22..0000000000 --- a/.github/workflows/build-tensorrt-windows.yml +++ /dev/null @@ -1,232 +0,0 @@ -name: Build Torch-TensorRT wheel on Windows with Future TensorRT Versions - -on: - workflow_call: - inputs: - repository: - description: 'Repository to checkout, defaults to ""' - default: "" - type: string - ref: - description: 'Reference to checkout, defaults to "nightly"' - default: "nightly" - type: string - test-infra-repository: - description: "Test infra repository to use" - default: "pytorch/test-infra" - type: string - test-infra-ref: - description: "Test infra reference to use" - default: "" - type: string - build-matrix: - description: "Build matrix to utilize" - default: "" - type: string - pre-script: - description: "Pre script to run prior to build" - default: "" - type: string - env-script: - description: "Script to setup environment variables for the build" - default: "" - type: string - wheel-build-params: - description: "Additional parameters for bdist_wheel" - default: "" - type: string - post-script: - description: "Post script to run prior to build" - default: "" - type: string - smoke-test-script: - description: "Script for Smoke Test for a specific domain" - default: "" - type: string - package-name: - description: "Name of the actual python package that is imported" - default: "" - type: string - trigger-event: - description: "Trigger Event in caller that determines whether or not to upload" - default: "" - type: string - cache-path: - description: "The path(s) on the runner to cache or restore. The path is relative to repository." - default: "" - type: string - cache-key: - description: "The key created when saving a cache and the key used to search for a cache." - default: "" - type: string - submodules: - description: "Works as stated in actions/checkout, but the default value is recursive" - required: false - type: string - default: recursive - timeout: - description: 'Timeout for the job (in minutes)' - default: 60 - type: number - -permissions: - id-token: write - contents: read - -jobs: - build: - strategy: - fail-fast: false - matrix: ${{ fromJSON(inputs.build-matrix) }} - env: - PYTHON_VERSION: ${{ matrix.python_version }} - PACKAGE_TYPE: wheel - REPOSITORY: ${{ inputs.repository }} - REF: ${{ inputs.ref }} - CU_VERSION: ${{ matrix.desired_cuda }} - UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} - ARCH: win_amd64 - TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }} - TENSORRT_VERSION: ${{ matrix.tensorrt.version }} - TENSORRT_URLS: ${{ matrix.tensorrt.urls }} - TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} - UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 - name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} - runs-on: ${{ matrix.validation_runner }} - defaults: - run: - shell: bash -l {0} - # If a build is taking longer than 60 minutes on these runners we need - # to have a conversation - timeout-minutes: 120 - steps: - - uses: actions/checkout@v6 - with: - # Support the use case where we need to checkout someone's fork - repository: ${{ inputs.test-infra-repository }} - ref: ${{ inputs.test-infra-ref }} - path: test-infra - - uses: ./test-infra/.github/actions/setup-ssh - name: Setup SSH - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - activate-with-label: false - instructions: "SSH with rdesktop using ssh -L 3389:localhost:3389 %%username%%@%%hostname%%" - - name: Add Conda scripts to GitHub path - run: | - echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH - - uses: ./test-infra/.github/actions/set-channel - - name: Set PYTORCH_VERSION - if: ${{ env.CHANNEL == 'test' }} - run: | - # When building RC, set the version to be the current candidate version, - # otherwise, leave it alone so nightly will pick up the latest - echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" - - uses: ./test-infra/.github/actions/setup-binary-builds - with: - repository: ${{ inputs.repository }} - ref: ${{ inputs.ref }} - submodules: ${{ inputs.submodules }} - setup-miniconda: false - python-version: ${{ env.PYTHON_VERSION }} - cuda-version: ${{ env.CU_VERSION }} - arch: ${{ env.ARCH }} - - name: Shorten Conda environment path - run: | - bash "${REPOSITORY}/.github/scripts/shorten-conda-env-windows.sh" - - name: Install XPU support package - if: ${{ matrix.gpu_arch_type == 'xpu' }} - run: | - cmd //c .\\test-infra\\.github\\scripts\\install_xpu.bat - - name: Install torch dependency - run: | - source "${BUILD_ENV_FILE}" - # shellcheck disable=SC2086 - ${CONDA_RUN} ${PIP_INSTALL_TORCH} - - name: Run Pre-Script with Caching - if: ${{ inputs.pre-script != '' }} - uses: ./test-infra/.github/actions/run-script-with-cache - with: - cache-path: ${{ inputs.cache-path }} - cache-key: ${{ inputs.cache-key }} - repository: ${{ inputs.repository }} - script: ${{ inputs.pre-script }} - is_windows: 'enabled' - - name: Build clean - working-directory: ${{ inputs.repository }} - env: - ENV_SCRIPT: ${{ inputs.env-script }} - run: | - source "${BUILD_ENV_FILE}" - if [[ -z "${ENV_SCRIPT}" ]]; then - ${CONDA_RUN} python setup.py clean - else - if [[ ! -f ${ENV_SCRIPT} ]]; then - echo "::error::Specified env-script file (${ENV_SCRIPT}) not found" - exit 1 - else - ${CONDA_RUN} ${ENV_SCRIPT} python setup.py clean - fi - fi - - name: Build the wheel (bdist_wheel) - working-directory: ${{ inputs.repository }} - env: - ENV_SCRIPT: ${{ inputs.env-script }} - BUILD_PARAMS: ${{ inputs.wheel-build-params }} - run: | - source "${BUILD_ENV_FILE}" - - if [[ "$CU_VERSION" == "cpu" ]]; then - # CUDA and CPU are ABI compatible on the CPU-only parts, so strip - # in this case - export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//')" - fi - - if [[ -z "${ENV_SCRIPT}" ]]; then - ${CONDA_RUN} python setup.py bdist_wheel - else - ${CONDA_RUN} ${ENV_SCRIPT} python setup.py bdist_wheel ${BUILD_PARAMS} - fi - - name: Run post-script - working-directory: ${{ inputs.repository }} - env: - POST_SCRIPT: ${{ inputs.post-script }} - ENV_SCRIPT: ${{ inputs.env-script }} - if: ${{ inputs.post-script != '' }} - run: | - set -euxo pipefail - source "${BUILD_ENV_FILE}" - ${CONDA_RUN} ${ENV_SCRIPT} ${POST_SCRIPT} - - name: Smoke Test - env: - ENV_SCRIPT: ${{ inputs.env-script }} - PACKAGE_NAME: ${{ inputs.package-name }} - SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} - run: | - source "${BUILD_ENV_FILE}" - WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") - echo "$WHEEL_NAME" - ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" - if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" - ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" - else - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" - ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" - fi - # NB: Only upload to GitHub after passing smoke tests - - name: Upload wheel to GitHub - continue-on-error: true - uses: actions/upload-artifact@v6 - with: - name: ${{ env.UPLOAD_ARTIFACT_NAME }} - path: ${{ inputs.repository }}/dist/ - - uses: ./test-infra/.github/actions/teardown-windows - if: always() - name: Teardown Windows - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml deleted file mode 100644 index 5cda6acec5..0000000000 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ /dev/null @@ -1,89 +0,0 @@ -name: Build and test Linux aarch64 wheels for Jetpack - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux-aarch64 - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.filter.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Filter matrix - id: filter - env: - LIMIT_PR_BUILDS: ${{ github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'ciflow/binaries/all') }} - run: | - set -eou pipefail - echo "LIMIT_PR_BUILDS=${LIMIT_PR_BUILDS}" - echo '${{ github.event_name }}' - echo '${{ github.event.ref}}' - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}" --jetpack true --limit-pr-builds "${LIMIT_PR_BUILDS}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: filter-matrix - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - display-name: Build Jetpack torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - architecture: "aarch64" - is-jetpack: true - - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-test-linux-aarch64-python-only.yml b/.github/workflows/build-test-linux-aarch64-python-only.yml deleted file mode 100644 index 9900877d82..0000000000 --- a/.github/workflows/build-test-linux-aarch64-python-only.yml +++ /dev/null @@ -1,89 +0,0 @@ -name: Python-only build Linux aarch64 wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -permissions: - contents: read - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux-aarch64 - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.filter.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Filter matrix - id: filter - env: - LIMIT_PR_BUILDS: ${{ github.event_name == 'pull_request' && !contains( github.event.pull_request.labels.*.name, 'ciflow/binaries/all') }} - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: filter-matrix - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars_python_only.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - display-name: Python-only build Linux aarch64 torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - architecture: "aarch64" - use-rtx: false - pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple" - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-python-only-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml deleted file mode 100644 index eea7ec01fa..0000000000 --- a/.github/workflows/build-test-linux-aarch64.yml +++ /dev/null @@ -1,85 +0,0 @@ -name: Build and test Linux aarch64 wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux-aarch64 - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.filter.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Filter matrix - id: filter - env: - LIMIT_PR_BUILDS: ${{ github.event_name == 'pull_request' && !contains( github.event.pull_request.labels.*.name, 'ciflow/binaries/all') }} - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: filter-matrix - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - display-name: Build SBSA torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - architecture: "aarch64" - pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple" - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/build-test-linux-x86_64-python-only.yml b/.github/workflows/build-test-linux-x86_64-python-only.yml deleted file mode 100644 index c79d4157bb..0000000000 --- a/.github/workflows/build-test-linux-x86_64-python-only.yml +++ /dev/null @@ -1,117 +0,0 @@ -name: Python-only build and test Linux x86_64 wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -permissions: - contents: read - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: filter-matrix - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars_python_only.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - display-name: Python-only build Linux x86_64 torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - architecture: "x86_64" - use-rtx: false - pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple" - - dynamo-runtime-tests: - name: ${{ matrix.display-name }} - needs: [filter-matrix, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: Python-only dynamo runtime tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: python-only-dynamo-runtime-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/runtime - python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/python_only_dynamo_runtime_tests_results.xml . - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-python-only-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml deleted file mode 100644 index 89b3ee3a32..0000000000 --- a/.github/workflows/build-test-linux-x86_64.yml +++ /dev/null @@ -1,123 +0,0 @@ -name: Build and test Linux x86_64 wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -jobs: - # All build/test logic lives in the shared reusable core; this workflow - # just selects standard TensorRT (use-rtx: false). See - # .github/workflows/_linux-x86_64-core.yml. - core: - permissions: - id-token: write - contents: read - uses: ./.github/workflows/_linux-x86_64-core.yml - with: - use-rtx: false - - # Single rollup status that summarises every build/test job. Mark this one - # as the required check in branch protection — reviewers see a single - # ✅/❌ instead of 50 matrix entries. Click-through still surfaces the - # individual job logs. - # - # ``if: always()`` makes the rollup run even if the core jobs failed, - # were skipped, or were cancelled (so we always render a check). The body - # fails the rollup iff any build/test job ended in 'failure'; 'skipped' - # (label-gated) and 'success' both count as healthy. - ci-rollup: - name: CI / Linux x86_64 - if: ${{ always() }} - permissions: {} - needs: [core] - runs-on: ubuntu-latest - steps: - - name: Aggregate job results - env: - RESULTS: ${{ needs.core.outputs.results }} - # Safety net: if the core reusable call concluded - # failure/cancelled but its results output came back empty - # (a known edge case), still fail the rollup rather than - # silently reporting green. - CORE_RESULT: ${{ needs.core.result }} - WORKFLOW_LABEL: "Linux x86_64" - run: | - set -euo pipefail - # Emit two surfaces: - # * stdout / job exit code → drives the green/red rollup - # status that branch protection keys on. - # * $GITHUB_STEP_SUMMARY → the markdown that renders - # on the workflow run page, with a per-job result table. - python3 - <<'PY' - import json, os, sys - raw = os.environ.get("RESULTS") or "{}" - core_result = os.environ.get("CORE_RESULT", "") - try: - needs = json.loads(raw) - except json.JSONDecodeError: - needs = {} - label = os.environ.get("WORKFLOW_LABEL", "Linux x86_64") - by_result = {"success": [], "failure": [], "skipped": [], "cancelled": []} - for name, info in needs.items(): - by_result.setdefault(info.get("result") or "unknown", []).append(name) - failed = sorted(by_result["failure"]) - passed = sorted(by_result["success"]) - skipped = sorted(by_result["skipped"]) - cancelled = sorted(by_result["cancelled"]) - - # --- stdout: short pass/fail summary for the log tab --- - print(f"PASS: {len(passed)}") - print(f"FAIL: {len(failed)}") - print(f"SKIPPED: {len(skipped)} (label-gated or never started)") - print(f"CANCELLED: {len(cancelled)}") - if failed: - print() - print("Failed jobs:") - for name in failed: - print(f" - {name}") - - # --- step summary: markdown table for reviewers --- - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if summary_path: - icon = {"success": "✅", "failure": "❌", "skipped": "⏭️", "cancelled": "🚫"} - with open(summary_path, "a", encoding="utf-8") as f: - f.write(f"# CI / {label} — rollup\n\n") - f.write( - f"**{len(passed)}** passed · " - f"**{len(failed)}** failed · " - f"**{len(skipped)}** skipped · " - f"**{len(cancelled)}** cancelled\n\n" - ) - f.write("| Result | Job |\n|---|---|\n") - for status in ("failure", "cancelled", "skipped", "success"): - for name in sorted(by_result.get(status, [])): - f.write(f"| {icon.get(status, '?')} {status} | `{name}` |\n") - if failed: - f.write( - "\n> Click into a failed job above to see " - "the rendered test table (via `pytest-results-action`) " - "and the `::warning::Reproduce locally with: ...` hint " - "near the bottom of the log.\n" - ) - - # Fail if any job failed, OR the core call did not succeed - # (covers an empty/missing results payload). - if failed or core_result not in ("success", "skipped", ""): - if core_result not in ("success", "skipped", "") and not failed: - print(f"\nCore reusable workflow concluded '{core_result}' " - f"with no per-job results; failing rollup defensively.") - sys.exit(1) - PY - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-linux-x86_64_rtx-python-only.yml b/.github/workflows/build-test-linux-x86_64_rtx-python-only.yml deleted file mode 100644 index 27e6a21f69..0000000000 --- a/.github/workflows/build-test-linux-x86_64_rtx-python-only.yml +++ /dev/null @@ -1,118 +0,0 @@ -name: RTX - Python-only build and test Linux x86_64 wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -permissions: - contents: read - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --use-rtx true --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: filter-matrix - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars_python_only.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - display-name: RTX - Python-only build Linux x86_64 torch-tensorrt-rtx whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - architecture: "x86_64" - use-rtx: true - pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple" - - dynamo-runtime-tests: - name: ${{ matrix.display-name }} - needs: [filter-matrix, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - display-name: RTX - Python-only dynamo runtime tests - uses: ./.github/workflows/linux-test.yml - with: - job-name: rtx-python-only-dynamo-runtime-tests - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.filter-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/runtime - python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/python_only_dynamo_runtime_tests_results.xml . - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-python-only-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml deleted file mode 100644 index aed838a50b..0000000000 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ /dev/null @@ -1,109 +0,0 @@ -name: RTX - Build and test Linux x86_64 wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -jobs: - # All build/test logic lives in the shared reusable core; this workflow - # just selects TensorRT-RTX (use-rtx: true). See - # .github/workflows/_linux-x86_64-core.yml. - core: - permissions: - id-token: write - contents: read - uses: ./.github/workflows/_linux-x86_64-core.yml - with: - use-rtx: true - name-prefix: "RTX - " - - # Single rollup status for the RTX matrix; mirror the non-RTX workflow's - # ci-rollup so branch protection can require one check per workflow. - ci-rollup: - name: CI / Linux x86_64 (RTX) - if: ${{ always() }} - permissions: {} - needs: [core] - runs-on: ubuntu-latest - steps: - - name: Aggregate job results - env: - RESULTS: ${{ needs.core.outputs.results }} - # Safety net: fail the rollup if the core call concluded - # failure/cancelled even when its results output came back empty. - CORE_RESULT: ${{ needs.core.result }} - # Surface a label so the markdown summary disambiguates RTX vs standard. - WORKFLOW_LABEL: "Linux x86_64 (RTX)" - run: | - set -euo pipefail - # Same logic as the non-RTX rollup: stdout for the rollup status, - # $GITHUB_STEP_SUMMARY for the reviewer-facing markdown table. - python3 - <<'PY' - import json, os, sys - raw = os.environ.get("RESULTS") or "{}" - core_result = os.environ.get("CORE_RESULT", "") - try: - needs = json.loads(raw) - except json.JSONDecodeError: - needs = {} - label = os.environ.get("WORKFLOW_LABEL", "Linux x86_64") - by_result = {"success": [], "failure": [], "skipped": [], "cancelled": []} - for name, info in needs.items(): - by_result.setdefault(info.get("result") or "unknown", []).append(name) - failed = sorted(by_result["failure"]) - passed = sorted(by_result["success"]) - skipped = sorted(by_result["skipped"]) - cancelled = sorted(by_result["cancelled"]) - - print(f"PASS: {len(passed)}") - print(f"FAIL: {len(failed)}") - print(f"SKIPPED: {len(skipped)} (label-gated or never started)") - print(f"CANCELLED: {len(cancelled)}") - if failed: - print() - print("Failed jobs:") - for name in failed: - print(f" - {name}") - - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if summary_path: - icon = {"success": "✅", "failure": "❌", "skipped": "⏭️", "cancelled": "🚫"} - with open(summary_path, "a", encoding="utf-8") as f: - f.write(f"# CI / {label} — rollup\n\n") - f.write( - f"**{len(passed)}** passed · " - f"**{len(failed)}** failed · " - f"**{len(skipped)}** skipped · " - f"**{len(cancelled)}** cancelled\n\n" - ) - f.write("| Result | Job |\n|---|---|\n") - for status in ("failure", "cancelled", "skipped", "success"): - for name in sorted(by_result.get(status, [])): - f.write(f"| {icon.get(status, '?')} {status} | `{name}` |\n") - if failed: - f.write( - "\n> Click into a failed job above to see the " - "rendered test table (via `pytest-results-action`) " - "and the `::warning::Reproduce locally with: ...` " - "hint near the bottom of the log.\n" - ) - - if failed or core_result not in ("success", "skipped", ""): - if core_result not in ("success", "skipped", "") and not failed: - print(f"\nCore reusable workflow concluded '{core_result}' " - f"with no per-job results; failing rollup defensively.") - sys.exit(1) - PY - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml deleted file mode 100644 index 2b1a08c45a..0000000000 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ /dev/null @@ -1,336 +0,0 @@ -name: Build and Test Torch-TensorRT on Linux with Future TensorRT Versions - -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * 0' # Runs at 00:00 UTC every Sunday (minute hour day-of-month month-of-year day-of-week) - -permissions: - id-token: write - contents: read - packages: write - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: linux - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - python-versions: '["3.11"]' - - generate-tensorrt-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: '3.11' - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate tensorrt matrix - id: generate - run: | - set -eou pipefail - python -m pip install --upgrade pip - pip install requests - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/generate-tensorrt-test-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - build: - needs: [generate-tensorrt-matrix] - name: Build torch-tensorrt whl package - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script.sh - env-var-script: packaging/env_vars.txt - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - package-name: torch_tensorrt - uses: ./.github/workflows/build-tensorrt-linux.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-var-script: ${{ matrix.env-var-script }} - post-script: ${{ matrix.post-script }} - package-name: ${{ matrix.package-name }} - smoke-test-script: ${{ matrix.smoke-test-script }} - trigger-event: ${{ github.event_name }} - - tests-py-torchscript-fe: - name: Test torchscript frontend [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-torchscript-fe - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH - pushd . - cd tests/modules - python hub.py - popd - pushd . - cd tests/py/ts - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ - popd - - tests-py-dynamo-converters: - name: Test dynamo converters [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-dynamo-converters - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ - popd - - tests-py-dynamo-fe: - name: Test dynamo frontend [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-dynamo-fe - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ - popd - - tests-py-dynamo-serde: - name: Test dynamo export serde [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-dynamo-serde - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py - popd - - tests-py-torch-compile-be: - name: Test torch compile backend [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-torch-compile-be - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py - popd - - tests-py-dynamo-core: - name: Test dynamo core [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-dynamo-core - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ - popd - - tests-py-dynamo-cudagraphs: - name: Test dynamo cudagraphs [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-dynamo-cudagraphs - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - nvidia-smi - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py || true - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py || true - popd - - tests-py-core: - name: Test core [Python] - needs: [generate-tensorrt-matrix, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - pre-script: packaging/pre_build_script.sh - post-script: packaging/post_build_script.sh - smoke-test-script: packaging/smoke_test_script.sh - uses: ./.github/workflows/linux-test.yml - with: - job-name: tests-py-core - repository: "pytorch/tensorrt" - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py/core - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml deleted file mode 100644 index 6c66c8d7c6..0000000000 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ /dev/null @@ -1,320 +0,0 @@ -name: Build and Test Torch-TensorRT on Windows with Future TensorRT Versions - -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * 0' # Runs at 00:00 UTC every Sunday (minute hour day-of-month month-of-year day-of-week) - -permissions: - id-token: write - contents: read - packages: write - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: windows - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - python-versions: '["3.11"]' - - generate-tensorrt-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: '3.11' - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate tensorrt matrix - id: generate - run: | - set -eou pipefail - python -m pip install --upgrade pip - pip install requests - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/generate-tensorrt-test-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - substitute-runner: - needs: generate-tensorrt-matrix - outputs: - matrix: ${{ steps.substitute.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - name: Substitute runner - id: substitute - run: | - echo matrix="$(echo '${{ needs.generate-tensorrt-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} - - build: - needs: substitute-runner - name: Build torch-tensorrt whl package - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script_windows.sh - env-script: packaging/vc_env_helper.bat - smoke-test-script: packaging/smoke_test_windows.py - package-name: torch_tensorrt - uses: ./.github/workflows/build-tensorrt-windows.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-script: ${{ matrix.env-script }} - smoke-test-script: ${{ matrix.smoke-test-script }} - package-name: ${{ matrix.package-name }} - trigger-event: ${{ github.event_name }} - timeout: 120 - - tests-py-torchscript-fe: - name: Test torchscript frontend [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-torchscript-fe - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/modules - python hub.py - popd - pushd . - cd tests/py/ts - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ - popd - - tests-py-dynamo-converters: - name: Test dynamo converters [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-converters - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ - popd - - tests-py-dynamo-fe: - name: Test dynamo frontend [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-fe - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ - popd - - tests-py-dynamo-serde: - name: Test dynamo export serde [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-serde - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py - popd - - tests-py-torch-compile-be: - name: Test torch compile backend [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-torch-compile-be - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py - popd - - tests-py-dynamo-core: - name: Test dynamo core [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-core - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ - popd - - tests-py-dynamo-cudagraphs: - name: Test dynamo cudagraphs [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-cudagraphs - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py - popd - - tests-py-core: - name: Test core [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-core - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py/core - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-windows-python-only.yml b/.github/workflows/build-test-windows-python-only.yml deleted file mode 100644 index c48a054479..0000000000 --- a/.github/workflows/build-test-windows-python-only.yml +++ /dev/null @@ -1,122 +0,0 @@ -name: Python-only build and test Windows wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -permissions: - contents: read - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: windows - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - substitute-runner: - needs: filter-matrix - outputs: - matrix: ${{ steps.substitute.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - name: Substitute runner - id: substitute - run: | - echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} - - build: - needs: substitute-runner - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script_windows.sh - env-script: packaging/vc_env_helper.bat - smoke-test-script: packaging/smoke_test_windows.py - package-name: torch_tensorrt - display-name: Python-only build Windows torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_windows.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-script: ${{ matrix.env-script }} - wheel-build-params: "--py-only" - smoke-test-script: ${{ matrix.smoke-test-script }} - package-name: ${{ matrix.package-name }} - trigger-event: ${{ github.event_name }} - timeout: 120 - - dynamo-runtime-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: Python-only dynamo runtime tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: python-only-dynamo-runtime-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/runtime - ../../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/python_only_dynamo_runtime_tests_results.xml . - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-python-only-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml deleted file mode 100644 index f092e5a7d0..0000000000 --- a/.github/workflows/build-test-windows.yml +++ /dev/null @@ -1,461 +0,0 @@ -name: Build and test Windows wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: windows - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: '3.11' - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - substitute-runner: - needs: filter-matrix - outputs: - matrix: ${{ steps.substitute.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - name: Substitute runner - id: substitute - run: | - echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} - - build: - needs: substitute-runner - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script_windows.sh - env-script: packaging/vc_env_helper.bat - smoke-test-script: packaging/smoke_test_windows.py - package-name: torch_tensorrt - display-name: Build Windows torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_windows.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-script: ${{ matrix.env-script }} - smoke-test-script: ${{ matrix.smoke-test-script }} - package-name: ${{ matrix.package-name }} - trigger-event: ${{ github.event_name }} - timeout: 120 - - L0-dynamo-converter-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 dynamo converter tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-dynamo-converter-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_converter_tests_results.xml --dist=loadscope --maxfail=20 conversion/ - popd - - L0-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 dynamo core tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-dynamo-core-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_runtime_tests_results.xml runtime/test_000_* - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_partitioning_tests_results.xml partitioning/test_000_* - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_lowering_tests_results.xml lowering/ - popd - - L0-py-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 core python tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-core-python-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/core - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_py_core_tests_results.xml . - popd - - L0-torchscript-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 torchscript tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-torchscript-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/modules - python hub.py - popd - pushd . - cd tests/py/ts - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_ts_api_tests_results.xml api/ - popd - - L1-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests, L0-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L1 dynamo core tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-dynamo-core-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_core_tests_results.xml runtime/test_001_* - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_core_partitioning_tests_results.xml partitioning/test_001_* - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_hlo_tests_results.xml hlo/ - popd - - L1-dynamo-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests, L0-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L1 dynamo compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-dynamo-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_compile_tests_results.xml models/ - popd - - L1-torch-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests, L0-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L1 torch compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-torch-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_be_tests_results.xml backend/ - ../../../packaging/vc_env_helper.bat python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_models_tests_results.xml --ir torch_compile models/test_models.py - ../../../packaging/vc_env_helper.bat python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_dyn_models_tests_results.xml --ir torch_compile models/test_dyn_models.py - popd - - L1-torchscript-tests: - name: L1 torchscript tests - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests, L0-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-torchscript-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/modules - python hub.py - popd - pushd . - cd tests/py/ts - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_ts_models_tests_results.xml models/ - popd - - L2-torch-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests, L1-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 torch compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-torch-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -m "not critical" -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_torch_compile_models_tests_results.xml --ir torch_compile models/test_models.py - ../../../packaging/vc_env_helper.bat python -m pytest -m "not critical" -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_torch_compile_dyn_models_tests_results.xml --ir torch_compile models/test_dyn_models.py - popd - - L2-dynamo-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests, L1-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 dynamo compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-dynamo-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -m "not critical" -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_compile_tests_results.xml models/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_compile_llm_tests_results.xml llm/ - popd - - L2-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests, L1-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 dynamo core tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-dynamo-core-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_core_tests_results.xml -k "not test_000_ and not test_001_" runtime/* - popd - - L2-dynamo-plugin-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests, L1-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 dynamo plugin tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-dynamo-plugin-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_plugins_tests_results.xml automatic_plugin/ - popd - - L2-torchscript-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests, L1-torchscript-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 torchscript tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-torchscript-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - pushd . - cd tests/modules - python hub.py - popd - pushd . - cd tests/py/ts - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_ts_integrations_tests_results.xml integrations/ - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-windows_rtx-python-only.yml b/.github/workflows/build-test-windows_rtx-python-only.yml deleted file mode 100644 index ab4678caa4..0000000000 --- a/.github/workflows/build-test-windows_rtx-python-only.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: RTX - Python-only build and test Windows wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -permissions: - contents: read - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: windows - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: "3.11" - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --use-rtx true --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - substitute-runner: - needs: filter-matrix - outputs: - matrix: ${{ steps.substitute.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - name: Substitute runner - id: substitute - run: | - echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} - - build: - needs: substitute-runner - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script_windows.sh - env-script: packaging/vc_env_helper.bat - smoke-test-script: packaging/smoke_test_windows.py - package-name: torch_tensorrt - display-name: RTX - Python-only build Windows torch-tensorrt-rtx whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_windows.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-script: ${{ matrix.env-script }} - wheel-build-params: "--py-only --use-rtx" - smoke-test-script: ${{ matrix.smoke-test-script }} - package-name: ${{ matrix.package-name }} - trigger-event: ${{ github.event_name }} - timeout: 120 - use-rtx: true - - dynamo-runtime-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: RTX - Python-only dynamo runtime tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: rtx-python-only-dynamo-runtime-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/runtime - ../../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/python_only_dynamo_runtime_tests_results.xml . - popd - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-python-only-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml deleted file mode 100644 index 53d2bd6a35..0000000000 --- a/.github/workflows/build-test-windows_rtx.yml +++ /dev/null @@ -1,381 +0,0 @@ -name: RTX - Build and test Windows wheels - -on: - pull_request: - push: - branches: - - main - - nightly - - release/* - tags: - # NOTE: Binary build pipelines should only get triggered on release candidate builds - # Release candidate tags look like: v1.11.0-rc1 - - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ - workflow_dispatch: - -jobs: - generate-matrix: - uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main - with: - package-type: wheel - os: windows - test-infra-repository: pytorch/test-infra - test-infra-ref: main - with-rocm: false - with-cpu: false - - filter-matrix: - needs: [generate-matrix] - outputs: - matrix: ${{ steps.generate.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v6 - with: - python-version: '3.11' - - uses: actions/checkout@v6 - with: - repository: pytorch/tensorrt - - name: Generate matrix - id: generate - run: | - set -eou pipefail - MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} - MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --use-rtx true --matrix "${MATRIX_BLOB}")" - echo "${MATRIX_BLOB}" - echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" - - substitute-runner: - needs: filter-matrix - outputs: - matrix: ${{ steps.substitute.outputs.matrix }} - runs-on: ubuntu-latest - steps: - - name: Substitute runner - id: substitute - run: | - echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} - - build: - needs: substitute-runner - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - pre-script: packaging/pre_build_script_windows.sh - env-script: packaging/vc_env_helper.bat - smoke-test-script: packaging/smoke_test_windows.py - package-name: torch_tensorrt - display-name: RTX - Build Windows torch-tensorrt whl package - name: ${{ matrix.display-name }} - uses: ./.github/workflows/build_windows.yml - with: - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: ${{ matrix.pre-script }} - env-script: ${{ matrix.env-script }} - smoke-test-script: ${{ matrix.smoke-test-script }} - package-name: ${{ matrix.package-name }} - trigger-event: ${{ github.event_name }} - wheel-build-params: "--use-rtx" - use-rtx: true - timeout: 120 - - L0-dynamo-converter-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 dynamo converter tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-dynamo-converter-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_converter_tests_results.xml --dist=loadscope --maxfail=20 conversion/ - popd - - L0-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 dynamo core tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-dynamo-core-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_runtime_tests_results.xml runtime/test_000_* - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_partitioning_tests_results.xml partitioning/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_dynamo_core_lowering_tests_results.xml lowering/ - popd - - L0-py-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L0 core python tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L0-core-python-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/core - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l0_py_core_tests_results.xml . - popd - - L1-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L1 dynamo core tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-dynamo-core-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_core_tests_results.xml runtime/test_001_* - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 8 --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_hlo_tests_results.xml hlo/ - popd - - L1-dynamo-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L1 dynamo compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-dynamo-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_dynamo_compile_tests_results.xml models/ - popd - - L1-torch-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L0-dynamo-converter-tests, L0-dynamo-core-tests, L0-py-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L1 torch compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L1-torch-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_be_tests_results.xml backend/ - ../../../packaging/vc_env_helper.bat python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_models_tests_results.xml --ir torch_compile models/test_models.py - ../../../packaging/vc_env_helper.bat python -m pytest -m critical -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l1_torch_compile_dyn_models_tests_results.xml --ir torch_compile models/test_dyn_models.py - popd - - L2-torch-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 torch compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-torch-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -m "not critical" -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_torch_compile_models_tests_results.xml --ir torch_compile models/test_models.py - ../../../packaging/vc_env_helper.bat python -m pytest -m "not critical" -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_torch_compile_dyn_models_tests_results.xml --ir torch_compile models/test_dyn_models.py - popd - - L2-dynamo-compile-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 dynamo compile tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-dynamo-compile-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo/ - ../../../packaging/vc_env_helper.bat python -m pytest -m "not critical" -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_compile_tests_results.xml models/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_compile_llm_tests_results.xml llm/ - popd - - L2-dynamo-core-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 dynamo core tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-dynamo-core-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_core_tests_results.xml -k "not test_000_ and not test_001_" runtime/* - popd - - L2-dynamo-plugin-tests: - name: ${{ matrix.display-name }} - needs: [substitute-runner, build, L1-torch-compile-tests, L1-dynamo-compile-tests, L1-dynamo-core-tests] - if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || startsWith(github.ref_name, 'release/') || (startsWith(github.ref, 'refs/tags/v') && contains(github.ref_name, '-rc')) || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }} - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - display-name: L2 dynamo plugin tests - uses: ./.github/workflows/windows-test.yml - with: - job-name: L2-dynamo-plugin-tests - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - pushd . - cd tests/py/dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/l2_dynamo_plugins_tests_results.xml automatic_plugin/ - popd - - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true diff --git a/.github/workflows/ci-linux-x86_64.yml b/.github/workflows/ci-linux-x86_64.yml new file mode 100644 index 0000000000..53985961d4 --- /dev/null +++ b/.github/workflows/ci-linux-x86_64.yml @@ -0,0 +1,117 @@ +name: CI Linux x86_64 + +# Per-platform entry (one small, independent run — not the old 8-channel mega-run). +# Runs the linux-x86_64 channels: standard, RTX, and python-only. Lane + backend +# come from the shared _decide reusable. + +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + pull_request_review: + types: [submitted] + push: + branches: [main] + schedule: + - cron: "0 7 * * *" + workflow_dispatch: + inputs: + lane: + description: "Which lane to run" + type: choice + options: [fast, full, nightly] + default: full + backend: + description: "Which backend(s) to test" + type: choice + options: [standard, rtx, both] + default: both + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + id-token: write + contents: read + +jobs: + decide: + uses: ./.github/workflows/_decide.yml + + # Standard — the only channel on the fast lane (every PR push). + standard: + needs: decide + if: needs.decide.outputs.lane != 'skip' && needs.decide.outputs.backend != 'rtx' + uses: ./.github/workflows/_test-linux.yml + with: + lane: ${{ needs.decide.outputs.lane }} + use-rtx: false + + rtx: + needs: decide + if: needs.decide.outputs.lane != 'skip' && needs.decide.outputs.backend != 'standard' + uses: ./.github/workflows/_test-linux.yml + with: + lane: ${{ needs.decide.outputs.lane }} + use-rtx: true + name-prefix: "RTX - " + + python-only: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'rtx' + uses: ./.github/workflows/_test-linux.yml + with: + lane: python-only + python-only: true + name-prefix: "Python-only " + + # python-only against TensorRT-RTX (so backend=both runs BOTH python-only variants). + python-only-rtx: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'standard' + uses: ./.github/workflows/_test-linux.yml + with: + lane: python-only + python-only: true + use-rtx: true + name-prefix: "Python-only RTX " + + # Required check. Fails ONLY on a genuine failure; skipped/cancelled don't block. + gate: + needs: [decide, standard, rtx, python-only, python-only-rtx] + if: always() + runs-on: ubuntu-latest + steps: + - name: Gate + run: | + set -euo pipefail + if [ "${{ contains(needs.*.result, 'failure') }}" = "true" ]; then + echo "::error::a Linux x86_64 channel failed — see the channel jobs above" + exit 1 + fi + echo "Linux x86_64 gate OK (cancelled/skipped channels ignored)." + + # Consolidated, agent-friendly report over this run's suites (informational). + report: + needs: [standard, rtx, python-only, python-only-rtx] + if: always() + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - uses: actions/download-artifact@v7 + with: + pattern: junit-* + path: all-results + merge-multiple: true + - name: Consolidated report + run: | + set -uo pipefail + mkdir -p all-results + if [ -z "$(find all-results -name '*.xml' 2>/dev/null)" ]; then + echo "No JUnit results uploaded." >> "$GITHUB_STEP_SUMMARY"; exit 0 + fi + python tests/py/utils/junit_summary.py all-results --agent >> "$GITHUB_STEP_SUMMARY" || true + python tests/py/utils/junit_summary.py all-results || true diff --git a/.github/workflows/ci-sbsa.yml b/.github/workflows/ci-sbsa.yml new file mode 100644 index 0000000000..9b1ed78650 --- /dev/null +++ b/.github/workflows/ci-sbsa.yml @@ -0,0 +1,76 @@ +name: CI SBSA + +# Per-platform entry — SBSA (linux-aarch64) is BUILD-ONLY (no aarch64 GPU test +# runners), so this just validates the wheel builds (standard + python-only) on +# the full/nightly lanes. No test/report jobs since nothing runs. + +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + pull_request_review: + types: [submitted] + push: + branches: [main] + schedule: + - cron: "0 7 * * *" + workflow_dispatch: + inputs: + lane: + description: "Which lane to run" + type: choice + options: [fast, full, nightly] + default: full + backend: + description: "Which backend(s) to test" + type: choice + options: [standard, rtx, both] + default: both + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + id-token: write + contents: read + +jobs: + decide: + uses: ./.github/workflows/_decide.yml + + build: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'rtx' + uses: ./.github/workflows/_test-linux.yml + with: + lane: ${{ needs.decide.outputs.lane }} + os: linux-aarch64 + architecture: aarch64 + run-tests: false + name-prefix: "SBSA " + + python-only: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'rtx' + uses: ./.github/workflows/_test-linux.yml + with: + lane: python-only + python-only: true + os: linux-aarch64 + architecture: aarch64 + run-tests: false + name-prefix: "Python-only SBSA " + + gate: + needs: [decide, build, python-only] + if: always() + runs-on: ubuntu-latest + steps: + - name: Gate + run: | + set -euo pipefail + if [ "${{ contains(needs.*.result, 'failure') }}" = "true" ]; then + echo "::error::an SBSA build failed — see the build jobs above" + exit 1 + fi + echo "SBSA gate OK (cancelled/skipped channels ignored)." diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml new file mode 100644 index 0000000000..62d5553d9f --- /dev/null +++ b/.github/workflows/ci-windows.yml @@ -0,0 +1,112 @@ +name: CI Windows + +# Per-platform entry — runs the Windows channels (standard, RTX, python-only) as +# one small independent run. Windows is heavier, so it runs on the full/nightly +# lanes only (never the fast PR-push lane). + +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + pull_request_review: + types: [submitted] + push: + branches: [main] + schedule: + - cron: "0 7 * * *" + workflow_dispatch: + inputs: + lane: + description: "Which lane to run" + type: choice + options: [fast, full, nightly] + default: full + backend: + description: "Which backend(s) to test" + type: choice + options: [standard, rtx, both] + default: both + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + id-token: write + contents: read + +jobs: + decide: + uses: ./.github/workflows/_decide.yml + + standard: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'rtx' + uses: ./.github/workflows/_test-windows.yml + with: + lane: ${{ needs.decide.outputs.lane }} + use-rtx: false + + rtx: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'standard' + uses: ./.github/workflows/_test-windows.yml + with: + lane: ${{ needs.decide.outputs.lane }} + use-rtx: true + name-prefix: "RTX - " + + python-only: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'rtx' + uses: ./.github/workflows/_test-windows.yml + with: + lane: python-only + name-prefix: "Python-only " + + # python-only against TensorRT-RTX (so backend=both runs BOTH python-only variants). + python-only-rtx: + needs: decide + if: (needs.decide.outputs.lane == 'full' || needs.decide.outputs.lane == 'nightly') && needs.decide.outputs.backend != 'standard' + uses: ./.github/workflows/_test-windows.yml + with: + lane: python-only + use-rtx: true + name-prefix: "Python-only RTX " + + gate: + needs: [decide, standard, rtx, python-only, python-only-rtx] + if: always() + runs-on: ubuntu-latest + steps: + - name: Gate + run: | + set -euo pipefail + if [ "${{ contains(needs.*.result, 'failure') }}" = "true" ]; then + echo "::error::a Windows channel failed — see the channel jobs above" + exit 1 + fi + echo "Windows gate OK (cancelled/skipped channels ignored)." + + report: + needs: [standard, rtx, python-only, python-only-rtx] + if: always() + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - uses: actions/download-artifact@v7 + with: + pattern: junit-* + path: all-results + merge-multiple: true + - name: Consolidated report + run: | + set -uo pipefail + mkdir -p all-results + if [ -z "$(find all-results -name '*.xml' 2>/dev/null)" ]; then + echo "No JUnit results uploaded." >> "$GITHUB_STEP_SUMMARY"; exit 0 + fi + python tests/py/utils/junit_summary.py all-results --agent >> "$GITHUB_STEP_SUMMARY" || true + python tests/py/utils/junit_summary.py all-results || true diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index d055276143..4537d4d010 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -174,6 +174,17 @@ jobs: display-options: fEs fail-on-empty: ${{ inputs.fail-on-empty }} + # Upload the raw JUnit so a downstream job can aggregate every suite's + # results into one consolidated report (junit_summary.py). Additive: the + # per-job pmeier summary above is unchanged. + - name: Upload JUnit results + if: always() + uses: actions/upload-artifact@v6 + with: + name: junit-${{ inputs.job-name }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} + path: ${{ env.RUNNER_TEST_RESULTS_DIR }}/**/*.xml + if-no-files-found: ignore + - name: Prepare artifacts for upload working-directory: ${{ inputs.repository }} id: check-artifacts diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 0d919ca3c3..89c2e83aab 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -153,6 +153,15 @@ jobs: summary: true display-options: fEs fail-on-empty: true + # Upload raw JUnit so the consolidated report (junit_summary.py) can + # aggregate Windows results alongside Linux. Additive. + - name: Upload JUnit results + if: always() + uses: actions/upload-artifact@v6 + with: + name: junit-${{ inputs.job-name }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} + path: ${{ env.RUNNER_TEST_RESULTS_DIR }}/*.xml + if-no-files-found: ignore - name: Teardown Windows if: ${{ always() }} uses: ./test-infra/.github/actions/teardown-windows diff --git a/TESTING_AND_CI_DESIGN.md b/TESTING_AND_CI_DESIGN.md new file mode 100644 index 0000000000..a73eafaad1 --- /dev/null +++ b/TESTING_AND_CI_DESIGN.md @@ -0,0 +1,392 @@ +# Torch-TensorRT — Testing & CI Design + +> **Status:** proposal / north-star design. Some pieces already exist on `main` +> (marked **✓ exists**); the rest is the target end-state and a rollout plan. +> +> **One-line goal:** *a developer can run exactly what CI runs, locally, with one +> command — and when something fails, the failure tells them how to reproduce and +> fix it.* + +--- + +## 1. Goals & non-goals + +**Goals** +- **Full coverage** of torch-tensorrt: converters, runtime, lowering, dynamo, torch-compile, torchscript, plugins, models, distributed — across the platforms/CUDA/Python we ship. +- **Ergonomic for developers**: the local experience is the *primary* design target, not an afterthought bolted onto CI. +- **Zero local/CI drift**: the command that runs a tier in CI is the *same* command you run locally. +- **Fast feedback by default, full coverage on demand**: a PR gets a signal in ~15 min; the exhaustive matrix runs when it matters. +- **Failures are never hidden**: nothing gets clobbered, every failure prints its own reproduce command, and the aggregate is machine- and agent-readable. + +**Non-goals** +- Replacing the PyTorch test-infra build plumbing (we reuse it for wheels/runners). +- A bespoke remote-execution cluster (we explicitly assume **no owned cache/RBE infra**). +- 100% of the matrix on every push (that's what nightly + the full lane are for). + +--- + +## 2. Design principles — the "pleasant" contract + +These are the invariants. Every decision below serves one of them. + +1. **One command to run anything.** `just ` — never a 200-char `pytest` incantation. +2. **Local == CI.** Suites are a declarative manifest + one runner; `just` and CI both call `ci run `, so there's nothing to drift. +3. **No rebuild to test.** Running tests never triggers a Bazel rebuild. +4. **Reproduce in one line.** Every CI failure prints `uv run --no-sync pytest … -n0 `. +5. **Build once, test many.** One wheel per (platform, CUDA, Python); every tier reuses it. +6. **Fail loud, aggregate everything.** One consolidated, agent-friendly report; no silent truncation; no fragile third-party glue. +7. **Cheap to retrigger, easy to discover.** Re-run via a PR comment; a menu tells you how. +8. **Pay for the signal you need.** Tiers + lanes + path filters; don't run GPU jobs for a docs typo. +9. **Flakes never mask real failures.** Retries are scoped to known signatures; everything else is quarantined with a tracking issue. + +--- + +## 3. Mental model + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ LAYER 0 — Test logic (platform-agnostic, the single source) │ +│ tests/ci/suites.py SUITES = [Suite(...)] manifest (DATA)│ +│ tests/ci/__main__.py `ci run ` · `ci matrix` one engine │ +│ @pytest.mark.{smoke,critical} lane membership on the test │ +│ tests/py/utils/junit_summary.py aggregate + --agent report ✓ │ +│ tests/py/utils/ci_helpers.sh env + trt_pytest wrapper only │ +│ pyproject.toml markers, optional dep groups │ +└───────────────┬───────────────────────────────┬───────────────────┘ + │ calls "ci run " │ matrix from "ci matrix" + ┌───────▼────────┐ ┌───────▼─────────────────┐ + │ LAYER 1 — LOCAL │ │ LAYER 1 — CI │ + │ justfile ✓ │ │ _build.yml / _test.yml│ + │ uv, jj fix │ │ ci.yml (lanes) │ + └────────────────┘ └───────┬─────────────────┘ + │ + ┌────────▼─────────┐ + │ LAYER 2 — STATUS │ + │ rollup check │ + │ PR comment(--agent)│ + │ /rerun, menu ✓ │ + └──────────────────┘ +``` + +The point: **Layer 0 is the contract — and it is _data + one engine_, not a pile of +shell functions.** A declarative **suite manifest** (`tests/ci/suites.py`) says *what* +each suite is; a single **runner** (`tests/ci/__main__.py`) is the *only* place that knows +*how* to turn a suite into a `pytest` command. `just` and CI are both thin callers of +`ci run `, and CI's job matrix is *generated* by `ci matrix` — so they cannot drift, +and adding a job is a one-line data change (§5.3). + +--- + +## 4. Test taxonomy + +### 4.1 Two orthogonal axes (retiring L0/L1/L2-by-filename) + +A job is a cell in **(subsystem × lane × variant)** — three *independent* axes. The old +`L0/L1/L2` tier numbers fused "what subsystem" with "how deep," and worse, encoded depth +in **filenames** (`runtime/test_000_*` = L0, `runtime/test_001_*` = L1, the rest = L2 via +`-k "not test_000_…"`). That's retired. The axes are now: + +- **Subsystem** *(what — a directory)*: `converters`, `runtime`, `lowering`, `partitioning`, `dynamo-models`, `torch-compile`, `torchscript`, `plugins`, `kernels`, `quantization`, `llm`, `distributed`, `executorch`. +- **Lane** *(how deep — a marker)*: `fast` (= `-m smoke`, every push), `full` (default, the ready-signal lane), `nightly` (everything + perf), plus `python-only` (a build-mode lane: the `PYTHON_ONLY=1` wheel validated against the runtime suite). +- **Variant** *(where — a dimension)*: `standard` / `rtx` — applied centrally by the runner, **not** as `if [ "$USE_TRT_RTX" ]` branches scattered through shell. +- **Platform / channel** *(another dimension)*: `linux-x86_64` / `windows` for tests (SBSA/aarch64 is build-only — no GPU test runners). One reusable per OS family (`uses:` must be literal); the suite *selection* is platform-agnostic. + +| | fast (every push) | full (ready signal) | nightly | +|---|---|---|---| +| converters | smoke subset | all | + fuzz | +| runtime / lowering / partitioning | smoke subset | all | — | +| dynamo-models / torch-compile | — | `-m critical` | all + llm | +| torchscript | api smoke | models | integrations | +| plugins / kernels / quantization | — | — | all (+optional deps) | +| distributed | — | — | multi-GPU | + +Depth lives on the test (a marker), so **moving a test between lanes is editing a +decorator, never renaming a file.** Sharding a big subsystem is a manifest field +(`shards: N`), not a `test_000_*` filename convention. + +### 4.2 Lane membership = markers, uniformly (`pyproject.toml`) +Membership is expressed *one* way — a marker on the test — never filename prefixes or +`-k "not test_000_…"` strings (today three different mechanisms coexist for the same idea). +- `smoke` *(proposed)* — the fast-lane subset of any subsystem. +- `critical` ✓ — the `full`-lane core; nightly runs the complement (`-m "not critical"`). Clean partition, no overlap, no gap. +- `unit` ✓ — the bulk of the suite. +- `flaky` *(proposed)* — quarantined out of gating lanes, tracked by an issue, still run nightly for visibility. + +### 4.3 Optional dependency groups (`-ext`) +`test-ext`, `kernels`, `quantization` (uv groups) + `executorch` (plain package). Suites that need them **skip cleanly when absent** (e.g. `conftest.py` `skip_no_cuda_core`), so a base checkout never hard-fails. `just install-test-ext` pulls them all. + +### 4.4 Directory layout (`tests/py/`) +``` +tests/ + ci/ # ← NEW: the Layer-0 brain — suites.py (manifest) + __main__.py (runner) + py/ + dynamo/{conversion,lowering,runtime,partitioning,models,hlo,llm,executorch,…} + ts/ # torchscript frontend + kernels/ quantization/ # gated on optional deps + distributed/ # multi-GPU + utils/ # ci_helpers.sh (env + trt_pytest wrapper), junit_summary.py +``` + +--- + +## 5. Layer 0 — the suite manifest + one runner + +This replaces the bash `trt_tier_*` library as the source of truth. The bash version +*works*, but config-in-shell conflates data with mechanism and quietly hides errors. Today +it: tiers tests by **filename prefix**; mixes markers / `-k` strings / globs for the same +"depth" concept; drops the L2 suites out of the `trt_pytest` rerun+repro wrapper (so L2 +failures get no repro hint); and — really — points **four** pytest runs at one +`--junitxml` path so three suites' results vanish from the aggregate. Those are all +symptoms of the same root cause, fixed by separating data from engine. + +### 5.1 The manifest — `tests/ci/suites.py` *(data; typed, mypy-validated)* +```python +@dataclass(frozen=True) +class Suite: + name: str # "dynamo-runtime" + paths: tuple[str, ...] # ("dynamo/runtime",) under tests/py + lanes: tuple[Lane, ...] = ("full",) # fast | full | nightly + shards: int = 1 # split a big suite across N CI jobs + dist: str | None = None # "--dist=loadscope" + needs: tuple[str, ...] = () # optional dep groups: ("kernels", "cuda-core") + variants: tuple[Variant, ...] = ("standard", "rtx") + extra_args: tuple[str, ...] = () # ("--ir", "torch_compile"), ("--maxfail", "20") + +SUITES = [ + Suite("dynamo-converters", ("dynamo/conversion",), lanes=("fast", "full"), + dist="--dist=loadscope", shards=4), + Suite("dynamo-runtime", ("dynamo/runtime",), lanes=("fast", "full")), + Suite("dynamo-models", ("dynamo/models",), lanes=("full", "nightly")), + Suite("kernels", ("kernels",), lanes=("nightly",), + needs=("kernels", "cuda-core"), variants=("standard",)), + # … one line per subsystem +] +``` +A typo'd field or path is a **static error**, not a silently-empty glob. (YAML + a pydantic +schema is the friendlier-for-non-coders alternative; we pick Python for the static checking +and because the same module generates the CI matrix.) + +### 5.2 The runner — `tests/ci/__main__.py` *(the ONLY place pytest mechanics live)* +```bash +python -m tests.ci matrix --lane fast --variant standard # → JSON for the GH Actions matrix +python -m tests.ci run dynamo-runtime --shard 2/4 # builds + runs pytest +``` +The runner — and nothing else — knows how to: apply the lane's marker (`fast` → `-m smoke`), +derive a *unique* junit path from `name+shard` (no more collisions), wrap **every** suite in +the `trt_pytest` reruns+repro helper *uniformly*, apply `--dist`/`extra_args`, gate on `needs` +(skip cleanly if a dep group is absent), and resolve variants centrally. Its knobs: + +| Knob | Default | Meaning | +|---|---|---| +| `PYTHON` | `python` | CI: container python; local: `uv run --no-sync python` (no rebuild) | +| `TRT_JOBS` | per-suite | xdist `-n`; **GPU-memory-aware, not `-n auto`** (one GPU OOMs long before it runs out of cores) | +| `TRT_PYTEST_RERUNS` | `1` (CI) / `0` (local) | gated reruns for known flake signatures only | +| `RUNNER_TEST_RESULTS_DIR` | `$TMPDIR/trt_test_results` | where JUnit XMLs land | +| `TMPDIR` | per-user | engine/timing cache; per-user to dodge cross-user permission collisions | + +> **Lesson baked in:** rerun args are a Python list passed to `subprocess` (or, in the +> shrunken shell wrapper, a bash *array*) — **never** an unquoted string. A multi-word +> `--only-rerun "Stream capture invalidated"` expanded unquoted word-splits into phantom +> test paths and silently collects **0 tests** — this is exactly how a real CI run failed. + +### 5.3 Adding a job — one line +- **Before** (bash): write a `trt_tier_*` function in `ci_helpers.sh` + a `just` recipe + a matrix entry in `_test.yml` + a `tests-report` tier-list entry — *4 edits, 3 files, in bash.* +- **After**: add one `Suite(...)` to `SUITES`. `just` exposes it, CI's matrix includes it (it's generated), the report aggregates it — **automatically.** + +### 5.4 `tests/py/utils/ci_helpers.sh` — shrunk, not deleted ✓exists +Keeps only the genuinely-shell bits the runner shells out to: env setup and the +array-safe `trt_pytest` wrapper. No tier definitions, no selection logic. + +### 5.5 `tests/py/utils/junit_summary.py` ✓exists +Reads all JUnit XMLs and emits a **human** report (TTY colors, `NO_COLOR`/`FORCE_COLOR`) and +an **`--agent`** report: Markdown with node id, file, junit path, a copy-paste +`uv run --no-sync pytest -k -n0` repro, message, and capped detail — *paste it +to Claude and it can start fixing.* Exits non-zero on any failure **or empty result set** +(the XMLs are the source of truth for pass/fail). No third-party result actions that crash +on empty input. + +--- + +## 6. Local developer experience + +### 6.1 The golden path +```bash +just test tests/py/dynamo/conversion/test_foo.py # inner loop: one file, fast +just tests-l0 # the smoke tier, exactly as CI runs it +just tests-report l1-ext --agent # run a whole tier past failures → paste-ready report +just lint # all pre-commit hooks (== the linter CI job) +``` +Everything is `uv run --no-sync` underneath — **tests never rebuild torch-tensorrt.** + +### 6.2 Recipes (justfile — a thin caller of the runner) +- `test *args` — raw pytest in the uv env (honors `pyproject` addopts). +- `suite [-- pytest args]` — run one manifest suite, *exactly* as CI runs it (`ci run `). +- `lane ` — run every suite in a lane locally. +- `report [--agent]` — **run a lane past failures, then print one consolidated report** (run + report in one step). +- `summary [--agent]` — re-print the last run's report without re-running. +- `lint` / `lint-changed` — pre-commit over all / changed files. +- `build` *(proposed)* — wrap the clean-rebuild flow; auto-detect ABI staleness after a nightly bump. +- `jobs=N` knob — raise xdist parallelism when your GPU has headroom (`just jobs=8 lane full`). + +During migration the legacy `tests-l0/l1/l2` recipes become thin aliases for the matching lanes, then retire. + +### 6.3 Build ergonomics +- `uv pip install -e . --no-deps --no-build-isolation` for incremental; **clean rebuild after a torch-nightly bump** (libtorch ABI changes → `undefined symbol` otherwise). +- `PYTHON_ONLY=1` for pure-Python iteration (skips Bazel entirely). +- A `build` skill / recipe encodes the decision tree so nobody re-learns it. + +### 6.4 Formatting is automatic, not a chore +- `pre-commit` for the full gate (the checks: mypy, validate-pyproject, uv-lock, …). +- **`jj fix`** wired to the *formatters* (black, ruff, isort, clang-format, buildifier) as stdin→stdout filters, version-pinned via `uvx` to the exact pre-commit revs → format an entire stack in one shot, no per-commit `pre-commit` dance. + +### 6.5 The failure→fix loop +Any failing tier → `just test-summary --agent` → the `analyze-test-report` skill triages (real bug vs torch-API change vs OOM/skip vs flake) and drives each to a fix using the printed repro commands. + +--- + +## 7. CI design + +### 7.1 Lanes — *without a merge queue* + +We **cannot** use GitHub's merge queue, so the full suite is gated by an explicit +"ready" signal instead of a queue: + +| Lane | Trigger | What runs | Required? | +|---|---|---|---| +| **Fast** | every PR push | lint + **1 representative build** (py-latest + newest CUDA, standard) + **L0** | informational | +| **Full** | `ci: full` label · `/ci full` comment · **approval** (`pull_request_review`) | full matrix · L1/L2 · RTX · all platforms | **yes** (`CI / full` rollup) | +| **Main canary** | `push` to `main` | full lane | — (catches trunk breakage → fast revert) | +| **Nightly** | `schedule` | full + `-ext` model/kernels/quant + perf + exhaustive CUDA/Python | — | + +**Gating mechanics.** Branch protection requires `CI / full`. It only reports after +the full lane runs (on label/approval). Pair with **"dismiss stale approvals on new +commits"** so a post-approval push invalidates approval → re-approve → full re-runs on +the new HEAD. + +> **Honest gap.** Without a merge queue we lose the guarantee that the *actually-merged* +> tree (after other PRs land) was tested as a unit — two independently-green PRs can break +> `main` together. Mitigation: (a) require PRs rebased on fresh `main` before the full run, +> (b) the main canary + fast revert. This is the inherent cost of no queue; we accept it. + +### 7.2 Topology — build once, test many ✓(x86_64 today) + +``` +ci.yml (one entry: pull_request | pull_request_review | push | schedule | comment) + ├─ _build.yml matrix{platform, cuda, python, variant} → ONE wheel artifact each + └─ _test.yml download wheel → run tier(s) via ci_helpers.sh → JUnit artifact + └─ rollup job aggregate JUnit → single status/platform + PR comment +``` + +Platform / RTX / python-only stop being **separate workflow files** and become +**matrix inputs**. This collapses today's ~11 near-duplicate entry workflows (incl. the +461-line inline Windows files) into `_build.yml` + `_test.yml` + a thin `ci.yml`. + +### 7.3 Caching — *without owned infra* + +We assume **no remote cache/RBE server**. Use GitHub's own cache, two layers, both wired +into the **vendored** build workflow (it's a local copy, so we can edit its steps): + +1. **sccache with the GitHub Actions backend** (`SCCACHE_GHA_ENABLED=true`) — free, no infra; caches C++ object compiles. Biggest win for Bazel C++ recompiles. +2. **Bazel `--disk_cache=` persisted via `actions/cache@v4`** — key on hashes of `MODULE.bazel` / `.bazelversion` / **torch-nightly + CUDA version** + `restore-keys` for partial hits. + +**Container caveat:** the build runs inside a manylinux container; `actions/cache` runs on +the host. Either **bind-mount** a host dir as the Bazel disk-cache path (no tokens cross +the boundary — preferred), or forward `ACTIONS_CACHE_URL`/`ACTIONS_RUNTIME_TOKEN` into the +container for sccache. + +**Warming model (forks for free):** populate caches on `push` to `main` (trusted, can +write); PRs — **including forks** — restore **read-only**. Forks get no secrets and can't +write the base cache, but they *can* read main's warm cache, so a fork PR still builds +incrementally with zero secret/infra exposure. + +> Caveats: GHA cache is **10 GB/repo, LRU-evicted** — scope it (sccache's compact object +> cache fits better than a raw disk_cache). **Key on the nightly/CUDA version** so a bump +> busts it — a stale cache here is exactly the ABI-mismatch class of bug. Step-up option if +> limits bite: BuildBuddy's free *hosted* tier (just an API key; non-fork PRs + main). + +### 7.4 Status & reporting +- **One rollup check per platform** (`CI / Linux x86_64`, …) summarizes its child jobs → branch protection keys on a *stable* name; reviewers see one green/red + a Markdown table. ✓exists +- JUnit from every job is aggregated by `junit_summary.py` into **one artifact + an auto PR comment** (the `--agent` report). Nothing clobbered, nothing missed. +- `--fail-on-empty` so "0 tests collected" is a failure, not a silent green. + +### 7.5 Ergonomic CI commands +- **`/rerun`** / **`/rerun all`** — re-run failed/cancelled or everything, no new commit (write-access gated). ✓exists +- **PR command menu** — on PR open, a comment lists the commands + local-repro recipes (so contributors discover them). ✓built +- **`/ci full`** *(proposed)* — opt a PR into the full lane (the merge-queue substitute trigger). + +### 7.6 Flake handling +- **Gated reruns for known signatures only** (`--only-rerun cudaErrorStreamCaptureInvalidated`, "Stream capture invalidated"). Never a blanket retry — that masks real failures. +- **Quarantine** `@pytest.mark.flaky` out of gating lanes with a tracking issue; still run nightly for visibility. +- **Cache model weights** as an artifact → kills the corrupted-download flakes (e.g. the mobilenet `unexpected EOF` torchscript failure). + +### 7.7 Concurrency & path filters +- `concurrency: cancel-in-progress` keyed on PR ref — never burn runners on superseded commits. +- **Path filters**: docs-only PR → skip GPU/C++; `.py`-only → skip the C++ test tier. (The single biggest waste-cutter on trivial PRs.) +- Skip CI on draft PRs; fast lane on "ready for review". + +--- + +## 8. Target file layout + +``` +.github/workflows/ + ci.yml # the ONLY build+test entry: lanes by event + _build.yml # reusable: matrix → wheel artifact (all platforms/variants) + _test.yml # reusable: wheel → suite(s) → JUnit; matrix GENERATED by `ci matrix` + nightly.yml # exhaustive matrix + -ext + perf + retrigger-ci.yml # /rerun ✓exists + pr-command-menu.yml # discoverability comment on open ✓built + linter.yml # pre-commit gate + release-*.yml # publish on tags (unchanged) +tests/ci/ + suites.py # ← the manifest (DATA): SUITES = [Suite(...)] + __main__.py # ← the runner/engine: `ci run` · `ci matrix` +tests/py/utils/ + ci_helpers.sh # SHRUNK: env + array-safe trt_pytest wrapper only ✓exists + junit_summary.py # aggregation + --agent report ✓exists +justfile # local entry — thin caller of `ci run`/`ci matrix` ✓exists +pyproject.toml # markers + optional dep groups ✓exists +TESTING_AND_CI_DESIGN.md # this document +``` + +**Deletions this enables:** the ~11 per-platform entry workflows, the parallel +schedule/dispatch build-test set (the source of the old/new *double-run*), and the +inline 461/381-line Windows workflows — all collapse into `_build.yml` + `_test.yml`. + +--- + +## 9. Rollout plan (incremental, each step shippable + verified) + +Use a **byte-equivalence harness** at each step — diff `ci run `'s emitted `pytest` +command against the bash tier it replaces, and render the workflow's generated script — +before flipping it on. Prove we didn't change *what runs*, only *how it's wired*. + +0. **Kill the double-run.** Delete/disable the superseded schedule/dispatch build-test set so a PR runs *one* set of jobs. (Halves PR cost, removes the confusion that surfaced in #4352.) +1. **Stand up the manifest + runner.** Port the `trt_tier_*` functions into `tests/ci/suites.py` + `tests/ci/__main__.py` one suite at a time, each producing a byte-identical command to the bash tier it replaces. Add `smoke` markers to replace the `test_000_*`/`test_001_*` filename tiering; shrink `ci_helpers.sh` to env + wrapper. +2. **Generalize the core** → `_build.yml` + `_test.yml` with a `platform`/`variant` input; `_test.yml`'s matrix is generated by `ci matrix`. Fold RTX + python-only in as variants. +3. **Migrate aarch64 + Windows** onto the same reusables (deletes the inline Windows files). +4. **Lanes** — add `ci.yml` orchestrator + the fast/full split + approval/label/`/ci full` triggers + `concurrency`. +5. **Nightly** — `nightly.yml` for the exhaustive matrix + `-ext`; trim the PR lane to fast. +6. **Caching** — sccache + Bazel disk_cache via GHA cache, warmed on main. +7. **Polish** — path filters, weight-cache, flake quarantine, drop fragile result actions. + +--- + +## 10. Open decisions + +- **Runner availability** for GPU jobs (how many parallel full lanes can we afford?). +- **GHA cache budget** — is 10 GB enough, or do we want BuildBuddy's free hosted tier? +- **Required-check names** for branch protection (must stay stable across the migration). +- **Fork policy** — confirm fork PRs are read-only on cache and never see secrets. +- **`/ci full` vs approval-only** as the full-lane trigger (or both). + +--- + +## 11. What "pleasant" feels like, end-to-end + +> Open a PR → a comment greets you with the command menu → a **fast green check in ~15 min**. +> Iterate on the fast lane. When ready, a reviewer approves (or you comment `/ci full`) → the +> full suite runs once. A failure? **One PR comment** with the exact `uv run --no-sync pytest …` +> for each failing test — paste it to Claude, or run it locally via `just`. Flaky? `/rerun`. +> Formatting? `jj fix`. You never assembled a `pytest` command by hand, never waited on a job +> irrelevant to your change, and never had a real failure hidden behind a flaky one. diff --git a/justfile b/justfile index c0f31ed1f7..25a433674b 100644 --- a/justfile +++ b/justfile @@ -1,4 +1,4 @@ -# Recipes source tests/py/utils/ci_helpers.sh (a bash library), so run them in bash. +# Recipes use bash (shebang recipes + the _ci launcher), so run them in bash. set shell := ["bash", "-cu"] # List all available recipes @@ -11,19 +11,26 @@ default: # PermissionError. Giving each user their own TMPDIR sidesteps that entirely. export TMPDIR := env_var_or_default("TMPDIR", "/tmp/torch_tensorrt_" + env_var_or_default("USER", "local")) -# pytest xdist parallelism for the parallel suites. Defaults to 2: these tiers +# pytest xdist parallelism for the parallel suites. Defaults to 4: these suites # build TensorRT engines, which are GPU-memory-heavy, and a single local GPU # OOMs (CUDA out-of-memory + segfaulting workers) well before it runs out of -# CPU cores — so `-n auto` is the wrong default here. CI runs 8 on dedicated -# GPU runners. Raise it if your GPU has headroom: just jobs=8 l0 +# CPU cores — so `-n auto` is the wrong default here. CI runs more on dedicated +# GPU runners. Raise it if your GPU has headroom: just jobs=8 lane full jobs := "4" -# Launcher + env shared by every tier recipe. The tier definitions themselves -# live in tests/py/utils/ci_helpers.sh — the SAME functions CI calls — so there is a -# single source of truth for what each tier runs. We only set environment -# policy here: PYTHON runs pytest against the already-built .venv (uv --no-sync, -# no rebuild) and TRT_JOBS feeds the parallel suites. -_tier := 'mkdir -p "$TMPDIR" && source tests/py/utils/ci_helpers.sh && export PYTHON="uv run --no-sync python" TRT_JOBS="' + jobs + '" TRT_PYTEST_RERUNS=0 &&' +# Which backend variant to run: standard | rtx. This selects the test SELECTION +# (RTX drops --dist on converters, runs the whole partitioning dir, etc.) — it +# does NOT switch the installed build. To actually run RTX locally you must have +# a torch-tensorrt built with USE_TRT_RTX=1 installed in the .venv. +# just variant=rtx lane fast +variant := "standard" + +# Launcher + env for the suite recipes. Suite definitions live in the manifest +# (tests/ci/suites.py) — the SAME data CI uses — so local and CI cannot drift. +# We only set environment policy here: PYTHON runs pytest against the already-built +# .venv (uv --no-sync, no rebuild), TRT_JOBS feeds xdist, and reruns are off +# locally (the rerunfailures plugin may be absent and you want to SEE flakes). +_ci := 'mkdir -p "$TMPDIR" && PYTHON="uv run --no-sync python" TRT_JOBS="' + jobs + '" TRT_PYTEST_RERUNS=0 uv run --no-sync python -m tests.ci' # ── Testing ─────────────────────────────────────────────────────────────────── @@ -34,148 +41,52 @@ test *args: @mkdir -p "$TMPDIR" uv run --no-sync pytest {{args}} -# ── CI tier reproduction ────────────────────────────────────────────────────── +# ── Suite manifest (tests/ci) — the single source of truth ────────────────────── # -# Run exactly what a CI tier runs, before pushing. Each recipe calls the tier -# function from tests/py/utils/ci_helpers.sh — the same one .github/workflows/ -# _linux-x86_64-core.yml invokes — so local and CI cannot drift. Extra args are -# forwarded to pytest, e.g. `just tests-l0-core -x -k test_foo`. -# (Standard-TensorRT scope; export USE_TRT_RTX=true before running for RTX.) - -# Full L0 smoke tier -tests-l0: tests-l0-converter tests-l0-core tests-l0-py-core tests-l0-torchscript - -tests-l0-converter *args: - {{_tier}} trt_tier_l0_converter {{args}} - -tests-l0-core *args: - {{_tier}} trt_tier_l0_core {{args}} - -tests-l0-py-core *args: - {{_tier}} trt_tier_l0_py_core {{args}} - -tests-l0-torchscript *args: - {{_tier}} trt_tier_l0_torchscript {{args}} - -# Full L1 tier -tests-l1: tests-l1-dynamo-core tests-l1-dynamo-compile tests-l1-torch-compile tests-l1-torchscript - -tests-l1-dynamo-core *args: - {{_tier}} trt_tier_l1_dynamo_core {{args}} - -tests-l1-dynamo-compile *args: - {{_tier}} trt_tier_l1_dynamo_compile {{args}} - -tests-l1-torch-compile *args: - {{_tier}} trt_tier_l1_torch_compile {{args}} - -tests-l1-torchscript *args: - {{_tier}} trt_tier_l1_torchscript {{args}} +# CI and these recipes both call `python -m tests.ci`, so what runs locally is +# exactly what runs in CI. See TESTING_AND_CI_DESIGN.md. + +# Validate the suite manifest (unique names/junit paths, valid setup steps) +doctor: + uv run --no-sync python -m tests.ci doctor + +# List every suite with its tier, lanes, and variants +suites: + uv run --no-sync python -m tests.ci list + +# Run ONE suite exactly as CI runs it (uses the {{variant}} backend). Args after `--`: +# just suite dynamo-runtime -- -k test_foo -x just variant=rtx suite dynamo-converters +suite name *args: + {{_ci}} run {{name}} --variant {{variant}} {{args}} + +# Run every suite in a LANE (fast|full|nightly), continuing past failures: +# just lane fast just variant=rtx lane fast +lane name *args: + {{_ci}} run-lane --lane {{name}} --variant {{variant}} {{args}} + +# Run a lane past failures, then print one consolidated report (--agent for Claude): +# just report fast --agent +report lane *summary_args: + #!/usr/bin/env bash + set -uo pipefail + mkdir -p "$TMPDIR" + results="${RUNNER_TEST_RESULTS_DIR:-$TMPDIR/trt_test_results}" + rm -f "$results"/*.xml 2>/dev/null || true + export PYTHON="uv run --no-sync python" TRT_JOBS="{{jobs}}" TRT_PYTEST_RERUNS=0 + uv run --no-sync python -m tests.ci run-lane --lane {{lane}} --variant {{variant}} || true + uv run --no-sync python tests/py/utils/junit_summary.py "$results" {{summary_args}} -# Pulls every optional test-dep group so the model / kernels / quantization / -# executorch suites run instead of skipping. Uses `uv pip install` (not `uv -# sync`) so it ADDS the deps without rebuilding torch-tensorrt or tearing down -# the env — this also sidesteps the test_ext↔quantization `uv sync` conflict, -# which only applies to lockfile resolution. executorch has no group, so it is -# installed as a plain package. +# Re-print the LAST run's consolidated report without re-running (--agent for Claude) +summary *args: + uv run --no-sync python tests/py/utils/junit_summary.py "${RUNNER_TEST_RESULTS_DIR:-$TMPDIR/trt_test_results}" {{args}} -# Install all optional test deps (test-ext + kernels + quantization + executorch) +# Added without a rebuild via `uv pip install --group` (sidesteps the +# test-ext↔quantization `uv sync` lockfile conflict). Run before `just lane full`. +# Install optional test deps so model/kernels/quantization/executorch suites run install-test-ext: uv pip install --group test-ext --group kernels --group quantization uv pip install pyyaml "executorch>=1.3.1" -# Full L1 tier + test-ext deps, so model-level cases run instead of skipping -tests-l1-ext: install-test-ext tests-l1-dynamo-core tests-l1-dynamo-compile tests-l1-torch-compile tests-l1-torchscript - -# Excludes tests-l2-distributed (needs 2+ GPUs and system MPI). Most L2 suites -# are model-level, so run tests-l2-ext (or `just install-test-ext` first) so -# they don't skip. - -# Full L2 tier (locally runnable suites) -tests-l2: tests-l2-torch-compile tests-l2-dynamo-compile tests-l2-dynamo-core tests-l2-plugin tests-l2-torchscript - -tests-l2-torch-compile *args: - {{_tier}} trt_tier_l2_torch_compile {{args}} - -tests-l2-dynamo-compile *args: - {{_tier}} trt_tier_l2_dynamo_compile {{args}} - -# Also installs executorch (additively) for the executorch/ suite. -tests-l2-dynamo-core *args: - {{_tier}} trt_tier_l2_dynamo_core {{args}} - -# Also installs cuda-python/cuda-core (additively) for the kernels/ QDP suite. -tests-l2-plugin *args: - {{_tier}} trt_tier_l2_plugin {{args}} - -tests-l2-torchscript *args: - {{_tier}} trt_tier_l2_torchscript {{args}} - -# Installs mpich/openmpi via dnf (root-capable container) and runs -# --nproc_per_node=2. Not part of the `tests-l2` aggregate. - -# CI-only: needs 2+ GPUs and system MPI -tests-l2-distributed *args: - {{_tier}} trt_tier_l2_distributed {{args}} - -# Full L2 tier + test-ext deps, so model-level cases run instead of skipping -tests-l2-ext: install-test-ext tests-l2-torch-compile tests-l2-dynamo-compile tests-l2-dynamo-core tests-l2-plugin tests-l2-torchscript - -# ── Run-all + consolidated failure report ───────────────────────────────────── - -# Unlike `just tests-l` (which aborts on the first failing suite), this runs -# every suite and aggregates the JUnit XMLs, so a single consolidated report -# shows all failures — nothing gets clobbered or missed. Append `-ext` to also -# install the test-ext deps first so model-level cases run instead of skipping. -# Pass `--agent` to print the paste-to-Claude Markdown report instead of the -# terminal one — i.e. run + report in one step (e.g. `just tests-report l1-ext -# --agent`). Exits non-zero if anything failed/errored. - -# Run a whole tier (l0|l1|l2[-ext]) past failures, then print one report [--agent] -tests-report level *report_args: - #!/usr/bin/env bash - set -uo pipefail # deliberately no -e: keep going past failures - mkdir -p "$TMPDIR" - # Accept an optional `-ext` suffix: install the test-ext group first. - lvl="{{level}}" - ext=0 - if [[ "$lvl" == *-ext ]]; then ext=1; lvl="${lvl%-ext}"; fi - case "$lvl" in - l0) tiers=(l0_converter l0_core l0_py_core l0_torchscript) ;; - l1) tiers=(l1_dynamo_core l1_dynamo_compile l1_torch_compile l1_torchscript) ;; - l2) tiers=(l2_torch_compile l2_dynamo_compile l2_dynamo_core l2_plugin l2_torchscript) ;; - *) echo "unknown level '{{level}}' (use l0|l1|l2, optionally with -ext)" >&2; exit 2 ;; - esac - if [[ "$ext" == 1 ]]; then - # Same set as `just install-test-ext`: pull every optional test-dep group - # so the model / kernels / quantization / executorch suites run instead of - # skipping. cuda-core resolves via uv here (the plugin tier's vanilla - # `pip install cuda-core` cannot fetch it on all hosts). - echo "==> installing test-ext + kernels + quantization + executorch deps" - uv pip install --group test-ext --group kernels --group quantization \ - || { echo "test-ext group install failed" >&2; exit 1; } - uv pip install pyyaml "executorch>=1.3.1" || true - fi - results="${RUNNER_TEST_RESULTS_DIR:-$TMPDIR/trt_test_results}" - rm -f "$results"/*.xml 2>/dev/null || true # drop stale results from prior runs - source tests/py/utils/ci_helpers.sh - export PYTHON="uv run --no-sync python" TRT_JOBS="{{jobs}}" TRT_PYTEST_RERUNS=0 - for t in "${tiers[@]}"; do - echo "==> trt_tier_$t" - "trt_tier_$t" || echo "::: trt_tier_$t exited non-zero (continuing) :::" - done - # Source of truth is the JUnit XMLs, not exit codes; this sets the final - # status. Extra args (e.g. --agent) are forwarded to the summary. - python3 tests/py/utils/junit_summary.py "$results" {{report_args}} - -# Reads the JUnit XMLs from the previous run (does not re-run). Pass --agent for -# a plain Markdown report to hand to Claude (`just test-summary --agent`). Exits -# non-zero if that run had failures. - -# Print the consolidated failure summary from the last run's JUnit results -test-summary *args: - python3 tests/py/utils/junit_summary.py {{args}} - # ── Linting ─────────────────────────────────────────────────────────────────── # Run all pre-commit hooks across the repo (matches the linter CI job) diff --git a/pyproject.toml b/pyproject.toml index 5ba4838ca0..1cc2042d90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,6 +143,8 @@ addopts = "-n auto --dist=loadfile" markers = [ "unit: a focused unit test (the bulk of the suite)", "critical: a smoke/critical-path case run in the L1 tier (pytest -m critical); the L2 tier runs the complement (-m 'not critical')", + "smoke: the fast-lane subset of a subsystem (pytest -m smoke); run on every push by the `fast` lane", + "flaky: a known-flaky test, quarantined out of the gating lanes and tracked by an issue; still run in nightly", ] norecursedirs = [ "bazel-*", @@ -156,6 +158,7 @@ norecursedirs = [ "tools", "modules", "utils", + "ci", ] [tool.uv] diff --git a/tests/ci/__init__.py b/tests/ci/__init__.py new file mode 100644 index 0000000000..20cec47fd1 --- /dev/null +++ b/tests/ci/__init__.py @@ -0,0 +1,16 @@ +"""Torch-TensorRT test-suite manifest + runner (the Layer-0 "brain"). + +This package is the single source of truth for *what each CI/local test job runs*. +It replaces the ``trt_tier_*`` bash functions in ``tests/py/utils/ci_helpers.sh``. + + * ``suites.py`` — the manifest: ``SUITES = [Suite(...), ...]`` (pure data). + * ``runner.py`` — the one engine that turns a ``Suite`` into a ``pytest`` command. + * ``__main__.py`` — the CLI: ``python -m tests.ci {list,show,run,matrix}``. + +Both ``just`` (local) and CI call ``python -m tests.ci run ``; CI's job +matrix is generated by ``python -m tests.ci matrix``. There is nothing to drift. +""" + +from .suites import SUITES, Lane, Suite, Tier, Variant + +__all__ = ["SUITES", "Suite", "Lane", "Tier", "Variant"] diff --git a/tests/ci/__main__.py b/tests/ci/__main__.py new file mode 100644 index 0000000000..fc59e2ecbc --- /dev/null +++ b/tests/ci/__main__.py @@ -0,0 +1,181 @@ +"""CLI for the test-suite manifest: python -m tests.ci {list,show,run,matrix,doctor} + +list all suites, tiers, lanes, variants +show a suite's resolved command per variant +run [opts] [-- ...] run one suite (the call CI + just both make) +matrix [--lane|--tier] JSON matrix `include` for GitHub Actions +doctor validate the manifest (CI lints this) +""" + +from __future__ import annotations + +import argparse +import json +import sys + +from .runner import REPO_ROOT, describe, junit_path, matrix, run_suite, select +from .suites import SUITES, by_name + + +def _cmd_list(_: argparse.Namespace) -> int: + width = max(len(s.name) for s in SUITES) + print( + f"{'SUITE'.ljust(width)} TIER LANES VARIANTS PLATFORMS" + ) + for s in SUITES: + print( + f"{s.name.ljust(width)} {s.tier:<4} " + f"{','.join(s.lanes):<21} {','.join(s.variants):<15} {','.join(s.platforms)}" + ) + print( + f"\n{len(SUITES)} suites. " + f"Run one: python -m tests.ci run (or `just suite `)" + ) + return 0 + + +def _cmd_show(args: argparse.Namespace) -> int: + s = by_name(args.name) + print(f"# {s.name} (tier={s.tier}, lanes={','.join(s.lanes)})") + for var in s.variants: + print(f"\n## variant: {var} junit: {junit_path(s).name}") + print(describe(s, var)) + return 0 + + +def _cmd_run(args: argparse.Namespace) -> int: + s = by_name(args.name) + variants = [args.variant] if args.variant else list(s.variants) + if args.variant and args.variant not in s.variants: + print( + f"::warning::{s.name} does not run on variant {args.variant!r}; " + f"it runs on {s.variants}", + file=sys.stderr, + ) + return 0 + rc = 0 + for var in variants: + rc = run_suite(s, var, dry_run=args.dry_run, extra=args.pytest_args) or rc + return rc + + +def _cmd_run_lane(args: argparse.Namespace) -> int: + """Run every suite in a lane/tier, continuing past failures (so one consolidated + report sees them all). Returns non-zero if any suite failed.""" + jobs = select( + lane=args.lane, tier=args.tier, variant=args.variant, platform=args.platform + ) + if not jobs: + print("::warning::no suites match the given filters", file=sys.stderr) + return 0 + rc = 0 + for s, var in jobs: + rc = run_suite(s, var, dry_run=args.dry_run) or rc + return rc + + +def _cmd_matrix(args: argparse.Namespace) -> int: + include = matrix( + lane=args.lane, tier=args.tier, variant=args.variant, platform=args.platform + ) + if not include: + print("::warning::matrix is empty for the given filters", file=sys.stderr) + print(json.dumps({"include": include})) + return 0 + + +def _cmd_doctor(_: argparse.Namespace) -> int: + """Static checks CI can gate on: unique names, unique junit paths, valid setup + steps, declared cwd dirs exist, every suite is reachable by some lane.""" + problems: list[str] = [] + names = [s.name for s in SUITES] + dupes = {n for n in names if names.count(n) > 1} + if dupes: + problems.append(f"duplicate suite names: {sorted(dupes)}") + + junits = [junit_path(s).name for s in SUITES] + jdupes = {j for j in junits if junits.count(j) > 1} + if jdupes: + problems.append(f"colliding junit paths: {sorted(jdupes)}") + + valid_setup = {"hub", "executorch", "cuda-core", "mpi"} + for s in SUITES: + for step in s.setup: + if step not in valid_setup: + problems.append(f"{s.name}: unknown setup step {step!r}") + if not s.lanes: + problems.append(f"{s.name}: belongs to no lane") + if not s.variants: + problems.append(f"{s.name}: runs on no variant") + if not s.platforms: + problems.append(f"{s.name}: runs on no platform") + cwd = REPO_ROOT / s.cwd + if not cwd.is_dir(): + problems.append(f"{s.name}: cwd {s.cwd} does not exist") + for var in s.variants: + if var not in (s.overrides.keys() | {"standard", "rtx"}): + problems.append(f"{s.name}: bad variant {var!r}") + + # Every suite should be exercised by some lane and some tier path. + if problems: + for p in problems: + print(f"✗ {p}", file=sys.stderr) + print(f"\n{len(problems)} manifest problem(s).", file=sys.stderr) + return 1 + print( + f"✓ manifest OK — {len(SUITES)} suites, " + f"{len(set(junits))} unique junit paths, no collisions." + ) + return 0 + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser(prog="python -m tests.ci", description=__doc__) + sub = p.add_subparsers(dest="cmd", required=True) + + sub.add_parser("list", help="list all suites").set_defaults(fn=_cmd_list) + + sp = sub.add_parser("show", help="show a suite's resolved command") + sp.add_argument("name") + sp.set_defaults(fn=_cmd_show) + + sp = sub.add_parser("run", help="run one suite") + sp.add_argument("name") + sp.add_argument("--variant", choices=("standard", "rtx")) + sp.add_argument( + "--dry-run", action="store_true", help="print the command, don't run" + ) + sp.add_argument( + "pytest_args", + nargs="*", + help="extra args forwarded to pytest " "(use `-- -k foo`)", + ) + sp.set_defaults(fn=_cmd_run) + + sp = sub.add_parser( + "run-lane", help="run every suite in a lane/tier, past failures" + ) + g = sp.add_mutually_exclusive_group() + g.add_argument("--lane", choices=("fast", "full", "nightly", "python-only")) + g.add_argument("--tier", choices=("l0", "l1", "l2")) + sp.add_argument("--variant", choices=("standard", "rtx")) + sp.add_argument("--platform", choices=("linux-x86_64", "windows")) + sp.add_argument("--dry-run", action="store_true") + sp.set_defaults(fn=_cmd_run_lane) + + sp = sub.add_parser("matrix", help="emit a GitHub Actions matrix as JSON") + g = sp.add_mutually_exclusive_group() + g.add_argument("--lane", choices=("fast", "full", "nightly", "python-only")) + g.add_argument("--tier", choices=("l0", "l1", "l2")) + sp.add_argument("--variant", choices=("standard", "rtx")) + sp.add_argument("--platform", choices=("linux-x86_64", "windows")) + sp.set_defaults(fn=_cmd_matrix) + + sub.add_parser("doctor", help="validate the manifest").set_defaults(fn=_cmd_doctor) + + args = p.parse_args(argv) + return args.fn(args) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/ci/runner.py b/tests/ci/runner.py new file mode 100644 index 0000000000..96fa3ddd69 --- /dev/null +++ b/tests/ci/runner.py @@ -0,0 +1,250 @@ +"""The one engine that turns a ``Suite`` into a ``pytest`` command and runs it. + +This is the *only* place that knows pytest mechanics (xdist workers, junit paths, +the flake-rerun wrapper, ``--dist``, optional-dep setup, variant resolution). The +manifest (``suites.py``) stays pure data. +""" + +from __future__ import annotations + +import glob +import os +import shlex +import subprocess +import sys +from pathlib import Path + +from .suites import SUITES, Suite, Variant, by_name + +# Repo root: tests/ci/runner.py -> parents[2]. Honor TRT_REPO_ROOT like the bash. +REPO_ROOT = Path( + os.environ.get("TRT_REPO_ROOT", str(Path(__file__).resolve().parents[2])) +) + +# Known transient cudagraph/TRT-driver flake signatures. Expand ONLY with +# concrete evidence — a broad regex hides real bugs. +_RERUN_ARGS = [ + "--reruns", + "1", + "--reruns-delay", + "5", + "--only-rerun", + "cudaErrorStreamCaptureInvalidated", + "--only-rerun", + "Stream capture invalidated", +] + + +def _launcher() -> list[str]: + """The python/pytest launcher. CI leaves PYTHON unset (-> container python); + the justfile sets PYTHON='uv run --no-sync python' to use the built venv.""" + return shlex.split(os.environ.get("PYTHON", "python")) + + +def _results_dir() -> Path: + d = os.environ.get("RUNNER_TEST_RESULTS_DIR") + if not d: + tmp = os.environ.get("TMPDIR", "/tmp") + d = str(Path(tmp) / "trt_test_results") + Path(d).mkdir(parents=True, exist_ok=True) + return Path(d) + + +def junit_path(suite: Suite) -> Path: + """Unique per suite (no more four-runs-one-file collisions).""" + return _results_dir() / f"{suite.name}.xml" + + +def _nproc(jobs: str | None) -> list[str]: + """``-n`` token. TRT_JOBS overrides the suite default; jobs=None -> serial.""" + if jobs is None: + return [] + return ["-n", os.environ.get("TRT_JOBS") or jobs] + + +def _reruns_enabled(reruns: bool) -> bool: + return reruns and os.environ.get("TRT_PYTEST_RERUNS", "1") != "0" + + +def _expand_paths(cwd: Path, paths: tuple[str, ...]) -> list[str]: + """Shell-style glob expansion (the bash relied on the shell for this). + + A pattern with ``*`` is expanded relative to ``cwd`` and sorted; a pattern + that matches nothing is kept literal (matches bash nullglob-off behavior, so + pytest reports the missing path rather than silently collecting 0 tests). + """ + out: list[str] = [] + for p in paths: + if "*" in p: + matches = sorted( + str(Path(m).relative_to(cwd)) for m in glob.glob(str(cwd / p)) + ) + out.extend(matches or [p]) + else: + out.append(p) + return out + + +def build_pytest_args(suite: Suite, variant: Variant) -> list[str]: + """The pytest args (everything after ``-m pytest``) for this suite+variant.""" + v = suite.for_variant(variant) + cwd = REPO_ROOT / v["cwd"] + args: list[str] = [] + if _reruns_enabled(v["reruns"]): + args += _RERUN_ARGS + if v["markers"]: + args += ["-m", v["markers"]] + args += ["-ra"] + args += _nproc(v["jobs"]) + args += ["--junitxml", str(junit_path(suite))] + if v["dist"]: + args += [v["dist"]] + if v["maxfail"] is not None: + args += [f"--maxfail={v['maxfail']}"] + if v["ir"]: + args += ["--ir", v["ir"]] + if v["keyword"]: + args += ["-k", v["keyword"]] + if v["verbose"]: + args += ["-v"] + args += _expand_paths(cwd, tuple(v["paths"])) + return args + + +def _setup_commands(step: str) -> list[tuple[list[str], Path]]: + """(argv, cwd) pairs for a named setup step.""" + launcher = _launcher() + if step == "hub": + return [(launcher + ["hub.py"], REPO_ROOT / "tests/modules")] + if step == "executorch": + return [ + ( + launcher + ["-m", "pip", "install", "pyyaml", "executorch>=1.3.1"], + REPO_ROOT, + ) + ] + if step == "cuda-core": + return [ + (launcher + ["-m", "pip", "install", "cuda-python", "cuda-core"], REPO_ROOT) + ] + if step == "mpi": + return [ + ( + [ + "dnf", + "install", + "-y", + "mpich", + "mpich-devel", + "openmpi", + "openmpi-devel", + ], + REPO_ROOT, + ) + ] + raise KeyError(f"unknown setup step {step!r} in a suite definition") + + +def describe(suite: Suite, variant: Variant) -> str: + """The full command line, for --dry-run / show (quoting-safe display).""" + v = suite.for_variant(variant) + pre = [] + for step in v["setup"]: + for argv, cwd in _setup_commands(step): + pre.append(f" (cd {cwd.relative_to(REPO_ROOT)} && {shlex.join(argv)})") + cmd = shlex.join(_launcher() + ["-m", "pytest"] + build_pytest_args(suite, variant)) + lines = pre + [f" (cd {v['cwd']} && {cmd})"] + for f in v["follow"]: + lines.append(f" (cd {v['cwd']} && {shlex.join(_launcher() + list(f))})") + return "\n".join(lines) + + +def run_suite( + suite: Suite, + variant: Variant, + *, + dry_run: bool = False, + extra: list[str] | None = None, +) -> int: + """Run setup steps, the pytest command, then any follow commands. Returns the + process exit code (non-zero on first failure), mirroring the bash tiers.""" + v = suite.for_variant(variant) + extra = extra or [] + env = {**os.environ, **{k: str(val) for k, val in v["env"].items()}} + cwd = REPO_ROOT / v["cwd"] + pytest_cmd = ( + _launcher() + ["-m", "pytest"] + build_pytest_args(suite, variant) + extra + ) + + if dry_run: + print(describe(suite, variant)) + if extra: + print(f" # + extra pytest args: {shlex.join(extra)}") + return 0 + + for step in v["setup"]: + for argv, scwd in _setup_commands(step): + print(f"==> setup[{step}]: {shlex.join(argv)}", flush=True) + rc = subprocess.run(argv, cwd=scwd, env=env).returncode + if rc != 0: + print(f"::warning::setup step {step!r} exited {rc}", flush=True) + + print(f"==> {suite.name} [{variant}]: {shlex.join(pytest_cmd)}", flush=True) + rc = subprocess.run(pytest_cmd, cwd=cwd, env=env).returncode + if rc != 0: + repro = shlex.join( + ["uv", "run", "--no-sync", "pytest"] + + build_pytest_args(suite, variant) + + extra + ) + print( + f"::warning::{suite.name} failed. Reproduce: cd {v['cwd']} && {repro}", + flush=True, + ) + return rc + + for f in v["follow"]: + fcmd = _launcher() + list(f) + print(f"==> {suite.name} follow: {shlex.join(fcmd)}", flush=True) + frc = subprocess.run(fcmd, cwd=cwd, env=env).returncode + if frc != 0: + return frc + return 0 + + +def select( + *, + lane: str | None = None, + tier: str | None = None, + variant: str | None = None, + platform: str | None = None, + names: list[str] | None = None, +) -> list[tuple[Suite, Variant]]: + """All (suite, variant) jobs matching the filters. No filter on an axis = all.""" + jobs: list[tuple[Suite, Variant]] = [] + pool = [by_name(n) for n in names] if names else list(SUITES) + for s in pool: + if lane is not None and lane not in s.lanes: + continue + if tier is not None and s.tier != tier: + continue + if platform is not None and platform not in s.platforms: + continue + for var in s.variants: + if variant is not None and var != variant: + continue + jobs.append((s, var)) + return jobs + + +def matrix(**filters: str | None) -> list[dict[str, str]]: + """GitHub-Actions matrix ``include`` entries for the selected jobs.""" + return [ + { + "suite": s.name, + "variant": var, + "tier": s.tier, + "cwd": s.for_variant(var)["cwd"], + } + for s, var in select(**filters) + ] diff --git a/tests/ci/suites.py b/tests/ci/suites.py new file mode 100644 index 0000000000..4e87e53898 --- /dev/null +++ b/tests/ci/suites.py @@ -0,0 +1,346 @@ +"""The test-suite manifest — pure data describing every test job. + +A *suite* is one ``pytest`` invocation against one subsystem. A CI/local *job* +is a (suite x variant) pair, optionally sharded across N runners. Suites are +grouped two ways: + + * ``tier`` — the legacy L0/L1/L2 grouping, kept so the migration can be + coverage-equivalent to today's ``ci_helpers.sh`` (``ci matrix --tier l0``). + * ``lanes`` — the target grouping (``fast`` / ``full`` / ``nightly``) the + redesign moves to (``ci matrix --lane fast``). Depth within a subsystem is + expressed by a marker on the test, not a filename prefix. + +Deliberate differences from the bash tiers (improvements, not regressions): + * ``hlo/`` ran in BOTH l0_core (standard) and l1_dynamo_core — wasteful. It is + one ``dynamo-hlo`` suite here, run once. + * l2_plugin re-ran the whole ``conversion/`` dir (already covered by + ``dynamo-converters``) and pointed four ``--junitxml`` runs at ONE path, so + three suites' results vanished from the aggregate. Each suite here owns a + unique junit name (derived from ``name`` + shard), and the redundant + ``conversion/`` re-run is dropped. + * L2 suites used raw ``pytest`` (no rerun wrapper / no repro hint); every suite + now runs through the same wrapper uniformly (gated by ``TRT_PYTEST_RERUNS``). + +Validate the manifest: ``python -m tests.ci doctor`` +Inspect a command: ``python -m tests.ci run --dry-run`` +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal + +Tier = Literal["l0", "l1", "l2"] +# python-only validates the PYTHON_ONLY=1 wheel (no C++ runtime) against the +# dynamo runtime suite. It's its own lane because it pairs a distinct BUILD mode +# with a focused test set, orthogonal to fast/full/nightly depth. +Lane = Literal["fast", "full", "nightly", "python-only"] +Variant = Literal["standard", "rtx"] +# Test channels. SBSA (linux-aarch64) is build-only — no GPU test runners — so it +# is a build channel handled at the workflow level, not a suite platform here. +Platform = Literal["linux-x86_64", "windows"] + +ALL_VARIANTS: tuple[Variant, ...] = ("standard", "rtx") +ALL_PLATFORMS: tuple[Platform, ...] = ("linux-x86_64", "windows") + + +@dataclass(frozen=True) +class Suite: + """One ``pytest`` invocation against one subsystem. + + Fields map 1:1 onto the command the runner builds. ``overrides`` lets a + single suite differ per variant (e.g. RTX selects a different path set) + without splitting it into two entries with two names. + """ + + name: str + tier: Tier + lanes: tuple[Lane, ...] + cwd: str = "tests/py/dynamo" # relative to repo root + paths: tuple[str, ...] = () # pytest positionals (rel to cwd); globs ok + markers: str | None = None # -m EXPR + keyword: str | None = None # -k EXPR + dist: str | None = None # --dist=loadscope + maxfail: int | None = None # --maxfail=N + ir: str | None = None # --ir torch_compile + jobs: str | None = None # xdist default: None=serial, "8"/"auto"/"4" + reruns: bool = True # wrap in the flake-rerun helper + verbose: bool = False # -v + variants: tuple[Variant, ...] = ALL_VARIANTS + platforms: tuple[Platform, ...] = ALL_PLATFORMS # channels this suite runs on + setup: tuple[str, ...] = () # named pre-steps: hub|executorch|cuda-core|mpi + follow: tuple[tuple[str, ...], ...] = () # extra argv to run AFTER pytest + env: dict[str, str] = field(default_factory=dict) + overrides: dict[str, dict[str, Any]] = field(default_factory=dict) # per-variant + + def for_variant(self, variant: Variant) -> dict[str, Any]: + """This suite's effective fields for ``variant`` (applies overrides).""" + base = { + f: getattr(self, f) + for f in ( + "cwd", + "paths", + "markers", + "keyword", + "dist", + "maxfail", + "ir", + "jobs", + "reruns", + "verbose", + "setup", + "follow", + "env", + ) + } + base.update(self.overrides.get(variant, {})) + return base + + +# ── L0 — smoke / fast lane ──────────────────────────────────────────────────── +_L0: list[Suite] = [ + Suite( + "dynamo-converters", + tier="l0", + lanes=("fast", "full"), + paths=("conversion/",), + dist="--dist=loadscope", + maxfail=20, + jobs="8", + # RTX does not shard converters with loadscope. + overrides={"rtx": {"dist": None}}, + ), + Suite( + "dynamo-runtime-smoke", + tier="l0", + lanes=("fast", "full"), + paths=("runtime/test_000_*",), + jobs="8", + ), + Suite( + "dynamo-partitioning-smoke", + tier="l0", + lanes=("fast", "full"), + paths=("partitioning/test_000_*",), + jobs="8", + # RTX runs the whole partitioning suite (no smoke subset split). + overrides={"rtx": {"paths": ("partitioning/",)}}, + ), + Suite( + "dynamo-lowering", + tier="l0", + lanes=("fast", "full"), + paths=("lowering/",), + jobs="8", + ), + Suite( + "py-core", + tier="l0", + lanes=("fast", "full"), + cwd="tests/py/core", + paths=(".",), + jobs="8", + ), + Suite( + "ts-api", + tier="l0", + lanes=("fast", "full"), + cwd="tests/py/ts", + paths=("api/",), + setup=("hub",), + variants=("standard",), + ), +] + +# ── L1 — critical-path / full lane ──────────────────────────────────────────── +_L1: list[Suite] = [ + Suite( + "dynamo-runtime", + tier="l1", + lanes=("full",), + paths=("runtime/test_001_*",), + jobs="8", + ), + Suite( + "dynamo-partitioning", + tier="l1", + lanes=("full",), + paths=("partitioning/test_001_*",), + jobs="8", + variants=("standard",), + ), + Suite( + # Was run in BOTH l0_core (std) and l1_dynamo_core (both) — deduped to once. + "dynamo-hlo", + tier="l1", + lanes=("full",), + paths=("hlo/",), + jobs="8", + ), + Suite( + "dynamo-models-critical", + tier="l1", + lanes=("full",), + paths=("models/",), + markers="critical", + ), + Suite( + "torch-compile-backend", + tier="l1", + lanes=("full",), + paths=("backend/",), + ), + Suite( + "torch-compile-models-critical", + tier="l1", + lanes=("full",), + paths=("models/test_models.py", "models/test_dyn_models.py"), + markers="critical", + ir="torch_compile", + ), + Suite( + "ts-models", + tier="l1", + lanes=("full",), + cwd="tests/py/ts", + paths=("models/",), + setup=("hub",), + variants=("standard",), + ), +] + +# ── L2 — exhaustive / full + nightly ────────────────────────────────────────── +_L2: list[Suite] = [ + Suite( + "torch-compile-models", + tier="l2", + lanes=("full", "nightly"), + paths=("models/test_models.py", "models/test_dyn_models.py"), + markers="not critical", + ir="torch_compile", + jobs="auto", + ), + Suite( + "dynamo-models", + tier="l2", + lanes=("full", "nightly"), + paths=("models/",), + markers="not critical", + jobs="auto", + ), + Suite( + "dynamo-llm", + tier="l2", + lanes=("nightly",), + paths=("llm/",), + jobs="auto", + ), + Suite( + "dynamo-runtime-full", + tier="l2", + lanes=("full", "nightly"), + paths=("runtime/",), + keyword="not test_000_ and not test_001_", + jobs="auto", + ), + Suite( + "executorch", + tier="l2", + lanes=("nightly",), + paths=("executorch/",), + setup=("executorch",), + jobs="auto", + variants=("standard",), + platforms=("linux-x86_64",), + ), + Suite( + # Standard: the automatic-plugin trio. RTX: the whole automatic_plugin dir. + # (The redundant conversion/ re-run from the old l2_plugin is dropped.) + "plugins-automatic", + tier="l2", + lanes=("nightly",), + jobs="auto", + paths=( + "automatic_plugin/test_automatic_plugin.py", + "automatic_plugin/test_automatic_plugin_with_attrs.py", + "automatic_plugin/test_flashinfer_rmsnorm.py", + ), + overrides={"rtx": {"paths": ("automatic_plugin/",)}}, + ), + Suite( + "kernels", + tier="l2", + lanes=("nightly",), + cwd="tests/py/kernels", + paths=(".",), + setup=("cuda-core",), + jobs="auto", + variants=("standard",), + platforms=("linux-x86_64",), + ), + Suite( + "ts-integrations", + tier="l2", + lanes=("nightly",), + cwd="tests/py/ts", + paths=("integrations/",), + setup=("hub",), + jobs="auto", + variants=("standard",), + ), + Suite( + "distributed", + tier="l2", + lanes=("nightly",), + paths=( + "distributed/test_nccl_ops.py", + "distributed/test_native_nccl.py", + "distributed/test_export_save_load.py", + ), + jobs="auto", + verbose=True, + reruns=False, + variants=("standard",), + platforms=("linux-x86_64",), + setup=("mpi",), + env={"USE_HOST_DEPS": "1", "CI_BUILD": "1", "USE_TRTLLM_PLUGINS": "1"}, + follow=( + ( + "-m", + "torch_tensorrt.distributed.run", + "--nproc_per_node=2", + "distributed/test_native_nccl.py", + "--multirank", + ), + ( + "-m", + "torch_tensorrt.distributed.run", + "--nproc_per_node=2", + "distributed/test_export_save_load.py", + "--multirank", + ), + ), + ), +] + +# ── python-only — validates the PYTHON_ONLY=1 wheel against the runtime suite ── +_PYTHON_ONLY: list[Suite] = [ + Suite( + "python-only-runtime", + tier="l1", + lanes=("python-only",), + paths=("runtime/",), + jobs="8", + # Runs for BOTH backends: the PYTHON_ONLY=1 wheel is validated against + # standard TensorRT and TensorRT-RTX (variants default to both). + ), +] + +SUITES: tuple[Suite, ...] = tuple(_L0 + _L1 + _L2 + _PYTHON_ONLY) + + +def by_name(name: str) -> Suite: + for s in SUITES: + if s.name == name: + return s + raise KeyError(f"no suite named {name!r}; try `python -m tests.ci list`")