Skip to content

[CI] Update runner configuration for setup and nightly tests #9005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ env:

jobs:
test-build-docker-images:
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus
if: github.event_name == 'pull_request'
steps:
- name: Set up Docker Buildx
Expand Down Expand Up @@ -50,7 +51,8 @@ jobs:
if: steps.file_changes.outputs.all != ''

build-and-push-docker-images:
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus
if: github.event_name != 'pull_request'

permissions:
Expand Down Expand Up @@ -98,4 +100,4 @@ jobs:
slack_channel: ${{ env.CI_SLACK_CHANNEL }}
title: "🤗 Results of the ${{ matrix.image-name }} Docker Image build"
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
12 changes: 8 additions & 4 deletions .github/workflows/nightly_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ env:
jobs:
setup_torch_cuda_pipeline_matrix:
name: Setup Torch Pipelines CUDA Slow Tests Matrix
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus
container:
image: diffusers/diffusers-pytorch-cpu
outputs:
Expand Down Expand Up @@ -55,7 +56,8 @@ jobs:
max-parallel: 8
matrix:
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
Expand Down Expand Up @@ -105,7 +107,8 @@ jobs:

run_nightly_tests_for_other_torch_modules:
name: Nightly Torch CUDA Tests
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
Expand Down Expand Up @@ -234,7 +237,8 @@ jobs:

run_nightly_onnx_tests:
name: Nightly ONNXRuntime CUDA tests on Ubuntu
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --gpus 0 --shm-size "16gb" --ipc host
Expand Down
11 changes: 7 additions & 4 deletions .github/workflows/pr_test_fetcher.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ concurrency:
jobs:
setup_pr_tests:
name: Setup PR Tests
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus
container:
image: diffusers/diffusers-pytorch-cpu
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
Expand Down Expand Up @@ -73,7 +74,8 @@ jobs:
max-parallel: 2
matrix:
modules: ${{ fromJson(needs.setup_pr_tests.outputs.matrix) }}
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus
container:
image: diffusers/diffusers-pytorch-cpu
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
Expand Down Expand Up @@ -123,12 +125,13 @@ jobs:
config:
- name: Hub tests for models, schedulers, and pipelines
framework: hub_tests_pytorch
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_hub

name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.runner }}
runs-on:
group: ${{ matrix.config.runner }}
container:
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/pr_test_peft_backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ jobs:

name: LoRA - ${{ matrix.lib-versions }}

runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus

container:
image: diffusers/diffusers-pytorch-cpu
Expand Down Expand Up @@ -128,4 +129,4 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: pr_${{ matrix.config.report }}_test_reports
path: reports
path: reports
14 changes: 8 additions & 6 deletions .github/workflows/pr_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,29 @@ jobs:
config:
- name: Fast PyTorch Pipeline CPU tests
framework: pytorch_pipelines
runner: [ self-hosted, intel-cpu, 32-cpu, 256-ram, ci ]
runner: aws-highmemory-32-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu_pipelines
- name: Fast PyTorch Models & Schedulers CPU tests
framework: pytorch_models
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu_models_schedulers
- name: Fast Flax CPU tests
framework: flax
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-flax-cpu
report: flax_cpu
- name: PyTorch Example CPU tests
framework: pytorch_examples
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_example_cpu

name: ${{ matrix.config.name }}

runs-on: ${{ matrix.config.runner }}
runs-on:
group: ${{ matrix.config.runner }}

container:
image: ${{ matrix.config.image }}
Expand Down Expand Up @@ -180,7 +181,8 @@ jobs:
config:
- name: Hub tests for models, schedulers, and pipelines
framework: hub_tests_pytorch
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner:
group: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_hub

Expand Down
21 changes: 14 additions & 7 deletions .github/workflows/push_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ env:
jobs:
setup_torch_cuda_pipeline_matrix:
name: Setup Torch Pipelines CUDA Slow Tests Matrix
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
runs-on:
group: aws-general-8-plus
container:
image: diffusers/diffusers-pytorch-cpu
outputs:
Expand Down Expand Up @@ -57,7 +58,8 @@ jobs:
max-parallel: 8
matrix:
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
Expand Down Expand Up @@ -101,7 +103,8 @@ jobs:

torch_cuda_tests:
name: Torch CUDA Tests
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
Expand Down Expand Up @@ -201,7 +204,8 @@ jobs:

onnx_cuda_tests:
name: ONNX CUDA Tests
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
Expand Down Expand Up @@ -249,7 +253,8 @@ jobs:
run_torch_compile_tests:
name: PyTorch Compile CUDA tests

runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge

container:
image: diffusers/diffusers-pytorch-compile-cuda
Expand Down Expand Up @@ -291,7 +296,8 @@ jobs:
run_xformers_tests:
name: PyTorch xformers CUDA tests

runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge

container:
image: diffusers/diffusers-pytorch-xformers-cuda
Expand Down Expand Up @@ -332,7 +338,8 @@ jobs:
run_examples_tests:
name: Examples PyTorch CUDA tests on Ubuntu

runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge

container:
image: diffusers/diffusers-pytorch-cuda
Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/push_tests_fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,29 @@ jobs:
config:
- name: Fast PyTorch CPU tests on Ubuntu
framework: pytorch
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu
- name: Fast Flax CPU tests on Ubuntu
framework: flax
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-flax-cpu
report: flax_cpu
- name: Fast ONNXRuntime CPU tests on Ubuntu
framework: onnxruntime
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-onnxruntime-cpu
report: onnx_cpu
- name: PyTorch Example CPU tests on Ubuntu
framework: pytorch_examples
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
runner: aws-general-8-plus
image: diffusers/diffusers-pytorch-cpu
report: torch_example_cpu

name: ${{ matrix.config.name }}

runs-on: ${{ matrix.config.runner }}
runs-on:
group: ${{ matrix.config.runner }}

container:
image: ${{ matrix.config.image }}
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/run_tests_from_a_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ env:
jobs:
run_tests:
name: "Run a test on our runner from a PR"
runs-on: [single-gpu, nvidia-gpu, t4, ci]
runs-on:
group: aws-g4dn-2xlarge
container:
image: ${{ github.event.inputs.docker_image }}
options: --gpus 0 --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
Expand Down Expand Up @@ -70,4 +71,4 @@ jobs:
env:
PY_TEST: ${{ github.event.inputs.test }}
run: |
pytest "$PY_TEST"
pytest "$PY_TEST"
3 changes: 2 additions & 1 deletion .github/workflows/ssh-pr-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ env:
jobs:
ssh_runner:
name: "SSH"
runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci]
runs-on:
group: aws-highmemory-32-plus
container:
image: ${{ github.event.inputs.docker_image }}
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --privileged
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/ssh-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ env:
jobs:
ssh_runner:
name: "SSH"
runs-on: [single-gpu, nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
runs-on:
group: "${{ github.event.inputs.runner_type }}"
container:
image: ${{ github.event.inputs.docker_image }}
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged
Expand Down
Loading