From ccc0132e98776cd2c0a8d708df1299118d7f72c3 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 18:47:45 +0200 Subject: [PATCH 01/10] [CI] Localize the HF cache --- .github/workflows/pr_tests.yml | 2 +- .github/workflows/push_tests.yml | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index f63d4ffda464..248876f97406 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -21,7 +21,7 @@ jobs: runs-on: [ self-hosted, docker-gpu ] container: image: python:3.7 - options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/root/.cache/pip steps: - name: Checkout diffusers diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 3db6814e071d..064620070c7a 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -4,6 +4,9 @@ on: push: branches: - main + pull_request: + branches: + - main env: HF_HOME: /mnt/cache @@ -22,7 +25,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/root/.cache/pip steps: - name: Checkout diffusers @@ -73,7 +76,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/root/.cache/pip steps: - name: Checkout diffusers From a50a0fcd24c97f1c5aae5952ca98973160b1358c Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 18:57:41 +0200 Subject: [PATCH 02/10] pip cache --- .github/workflows/push_tests.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 064620070c7a..38ef5416eead 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -10,6 +10,7 @@ on: env: HF_HOME: /mnt/cache + PIP_CACHE: /mnt/pip_cache OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 1000 @@ -25,7 +26,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/root/.cache/pip + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:"$HF_HOME" -v /mnt/pip_cache:"$PIP_CACHE" steps: - name: Checkout diffusers @@ -41,8 +42,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip uninstall -y torch torchvision torchtext - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 - python -m pip install -e .[quality,test] + python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 --cache-dir="$PIP_CACHE" + python -m pip install -e .[quality,test] --cache-dir="$PIP_CACHE" - name: Environment run: | @@ -76,7 +77,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/root/.cache/pip + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:"$HF_HOME" -v /mnt/pip_cache:"$PIP_CACHE" steps: - name: Checkout diffusers @@ -92,8 +93,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip uninstall -y torch torchvision torchtext - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 - python -m pip install -e .[quality,test,training] + python -m pip --cache-dir=/mnt/pip_cache install torch --extra-index-url https://download.pytorch.org/whl/cu116 --cache-dir="$PIP_CACHE" + python -m pip install -e .[quality,test,training] --cache-dir="$PIP_CACHE" - name: Environment run: | From e3e39279e4ba794916d79acd47e009560d4c2d99 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 19:58:26 +0200 Subject: [PATCH 03/10] de-env --- .github/workflows/push_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 38ef5416eead..3c6f32c62a20 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -26,7 +26,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:"$HF_HOME" -v /mnt/pip_cache:"$PIP_CACHE" + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache -v /mnt/pip_cache:/mnt/pip_cache steps: - name: Checkout diffusers @@ -77,7 +77,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:"$HF_HOME" -v /mnt/pip_cache:"$PIP_CACHE" + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache -v /mnt/pip_cache:/mnt/pip_cache steps: - name: Checkout diffusers From 73d5f9864d04e59c8a94bd5bc1a62b79b19678d5 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 20:16:26 +0200 Subject: [PATCH 04/10] refactor matrix --- .github/workflows/pr_tests.yml | 6 +++--- .github/workflows/push_tests.yml | 12 ++---------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 248876f97406..372f654d7d8f 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -21,7 +21,7 @@ jobs: runs-on: [ self-hosted, docker-gpu ] container: image: python:3.7 - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/root/.cache/pip + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/mnt/pip_cache steps: - name: Checkout diffusers @@ -32,8 +32,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu - python -m pip install -e .[quality,test] + python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu --cache-dir="$PIP_CACHE" + python -m pip install -e .[quality,test] --cache-dir="$PIP_CACHE" - name: Environment run: | diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 3c6f32c62a20..1909aba63462 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -19,11 +19,7 @@ env: jobs: run_tests_single_gpu: name: Diffusers tests - strategy: - fail-fast: false - matrix: - machine_type: [ single-gpu ] - runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] + runs-on: [ self-hosted, docker-gpu, single-gpu ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache -v /mnt/pip_cache:/mnt/pip_cache @@ -70,11 +66,7 @@ jobs: run_examples_single_gpu: name: Examples tests - strategy: - fail-fast: false - matrix: - machine_type: [ single-gpu ] - runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] + runs-on: [ self-hosted, docker-gpu, single-gpu ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache -v /mnt/pip_cache:/mnt/pip_cache From 11a8e979ce6be967bd698dfbf81135756f82b0b7 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 20:18:10 +0200 Subject: [PATCH 05/10] fix fast cache --- .github/workflows/pr_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 372f654d7d8f..198080d2c03a 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -11,6 +11,7 @@ concurrency: env: HF_HOME: /mnt/cache + PIP_CACHE: /mnt/pip_cache OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 60 From f9ab705f88c4eda8515a62c6ee5b413e4157cfa7 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 21:29:59 +0200 Subject: [PATCH 06/10] less onnx steps --- .github/workflows/pr_tests.yml | 2 +- setup.py | 4 ++-- src/diffusers/dependency_versions_table.py | 2 +- tests/test_pipelines.py | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 198080d2c03a..cdfd9c24996b 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -18,7 +18,7 @@ env: jobs: run_tests_cpu: - name: Diffusers tests + name: PyTorch tests runs-on: [ self-hosted, docker-gpu ] container: image: python:3.7 diff --git a/setup.py b/setup.py index ff5f14564487..2e87d75960ca 100644 --- a/setup.py +++ b/setup.py @@ -92,7 +92,7 @@ "jaxlib>=0.1.65,<=0.3.6", "modelcards>=0.1.4", "numpy", - "onnxruntime-gpu", + "onnxruntime", "pytest", "pytest-timeout", "pytest-xdist", @@ -178,7 +178,7 @@ def run(self): extras["training"] = deps_list("accelerate", "datasets", "tensorboard", "modelcards") extras["test"] = deps_list( "datasets", - "onnxruntime-gpu", + "onnxruntime", "pytest", "pytest-timeout", "pytest-xdist", diff --git a/src/diffusers/dependency_versions_table.py b/src/diffusers/dependency_versions_table.py index 82ca5dbb6f56..367b5c57a262 100644 --- a/src/diffusers/dependency_versions_table.py +++ b/src/diffusers/dependency_versions_table.py @@ -17,7 +17,7 @@ "jaxlib": "jaxlib>=0.1.65,<=0.3.6", "modelcards": "modelcards>=0.1.4", "numpy": "numpy", - "onnxruntime-gpu": "onnxruntime-gpu", + "onnxruntime": "onnxruntime", "pytest": "pytest", "pytest-timeout": "pytest-timeout", "pytest-xdist": "pytest-xdist", diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index d0d78171378e..cde652f3b828 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -1422,18 +1422,18 @@ def test_stable_diffusion_inpaint_pipeline_k_lms(self): @slow def test_stable_diffusion_onnx(self): sd_pipe = StableDiffusionOnnxPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CUDAExecutionProvider", use_auth_token=True + "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider", use_auth_token=True ) prompt = "A painting of a squirrel eating a burger" np.random.seed(0) - output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=20, output_type="np") + output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=5, output_type="np") image = output.images image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 512, 512, 3) - expected_slice = np.array([0.0385, 0.0252, 0.0234, 0.0287, 0.0358, 0.0287, 0.0276, 0.0235, 0.0010]) + expected_slice = np.array([0.3602, 0.3688, 0.3652, 0.3895, 0.3782, 0.3747, 0.3927, 0.4241, 0.4327]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 @slow @@ -1592,7 +1592,7 @@ def test_callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: assert latents.shape == (1, 4, 64, 64) latents_slice = latents[0, -3:, -3:, -1] expected_slice = np.array( - [-0.6254, -0.2742, -1.0710, 0.2296, -1.1683, 0.6913, -2.0605, -0.0682, 0.9700] + [-0.5950, -0.3039, -1.1672, 0.1594, -1.1572, 0.6719, -1.9712, -0.0403, 0.9592] ) assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 @@ -1606,6 +1606,6 @@ def test_callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: prompt = "Andromeda galaxy in a bottle" np.random.seed(0) - pipe(prompt=prompt, num_inference_steps=50, guidance_scale=7.5, callback=test_callback_fn, callback_steps=1) + pipe(prompt=prompt, num_inference_steps=5, guidance_scale=7.5, callback=test_callback_fn, callback_steps=1) assert test_callback_fn.has_been_called - assert number_of_steps == 51 + assert number_of_steps == 6 From 1ef3ead5898fdffeb3519a5b5428f5329441f40e Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 21:30:48 +0200 Subject: [PATCH 07/10] revert --- .github/workflows/pr_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index cdfd9c24996b..198080d2c03a 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -18,7 +18,7 @@ env: jobs: run_tests_cpu: - name: PyTorch tests + name: Diffusers tests runs-on: [ self-hosted, docker-gpu ] container: image: python:3.7 From 7134bbc96ac1998e9aa8d88605016b201a574711 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 21:47:03 +0200 Subject: [PATCH 08/10] revert pip cache --- .github/workflows/pr_tests.yml | 5 ++--- .github/workflows/push_tests.yml | 13 ++++++------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 198080d2c03a..f031ae670cc1 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -11,7 +11,6 @@ concurrency: env: HF_HOME: /mnt/cache - PIP_CACHE: /mnt/pip_cache OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 60 @@ -22,7 +21,7 @@ jobs: runs-on: [ self-hosted, docker-gpu ] container: image: python:3.7 - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ -v /mnt/pip_cache:/mnt/pip_cache + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ steps: - name: Checkout diffusers @@ -34,7 +33,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu --cache-dir="$PIP_CACHE" - python -m pip install -e .[quality,test] --cache-dir="$PIP_CACHE" + python -m pip install -e .[quality,test] - name: Environment run: | diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 1909aba63462..20997da45ed7 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -10,7 +10,6 @@ on: env: HF_HOME: /mnt/cache - PIP_CACHE: /mnt/pip_cache OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 1000 @@ -22,7 +21,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, single-gpu ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache -v /mnt/pip_cache:/mnt/pip_cache + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache steps: - name: Checkout diffusers @@ -38,8 +37,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip uninstall -y torch torchvision torchtext - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 --cache-dir="$PIP_CACHE" - python -m pip install -e .[quality,test] --cache-dir="$PIP_CACHE" + python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 + python -m pip install -e .[quality,test] - name: Environment run: | @@ -69,7 +68,7 @@ jobs: runs-on: [ self-hosted, docker-gpu, single-gpu ] container: image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache -v /mnt/pip_cache:/mnt/pip_cache + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache steps: - name: Checkout diffusers @@ -85,8 +84,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip uninstall -y torch torchvision torchtext - python -m pip --cache-dir=/mnt/pip_cache install torch --extra-index-url https://download.pytorch.org/whl/cu116 --cache-dir="$PIP_CACHE" - python -m pip install -e .[quality,test,training] --cache-dir="$PIP_CACHE" + python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 + python -m pip install -e .[quality,test,training] - name: Environment run: | From 882bfd47d015aafcbdadecbf4753e8724de351ae Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 21:48:00 +0200 Subject: [PATCH 09/10] revert pip cache --- .github/workflows/pr_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index f031ae670cc1..c25aa888f459 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -32,7 +32,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu --cache-dir="$PIP_CACHE" + python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu python -m pip install -e .[quality,test] - name: Environment From a158a60ea64efaf1514e1f1331e9bfbd59f639a3 Mon Sep 17 00:00:00 2001 From: anton-l Date: Mon, 3 Oct 2022 22:11:06 +0200 Subject: [PATCH 10/10] remove debugging trigger --- .github/workflows/push_tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 20997da45ed7..3e4a81c91c01 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -4,9 +4,6 @@ on: push: branches: - main - pull_request: - branches: - - main env: HF_HOME: /mnt/cache