From 995802c84e3abb66e2f79be462b7f9afc622f42c Mon Sep 17 00:00:00 2001
From: anton-l <anton@huggingface.co>
Date: Thu, 1 Sep 2022 14:05:43 +0200
Subject: [PATCH 1/2] Fix nondeterministic tests for GPU runs

---
 tests/test_pipelines.py |  6 +++++-
 tests/test_training.py  | 17 ++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 168fd58ff703..6d50eca153ff 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -235,6 +235,7 @@ def test_ldm_text2img(self):
         expected_slice = np.array([0.5074, 0.5026, 0.4998, 0.4056, 0.3523, 0.4649, 0.5289, 0.5299, 0.4897])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
+    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_ddim(self):
         unet = self.dummy_cond_unet
         scheduler = DDIMScheduler(
@@ -276,6 +277,7 @@ def test_stable_diffusion_ddim(self):
         expected_slice = np.array([0.5112, 0.4692, 0.4715, 0.5206, 0.4894, 0.5114, 0.5096, 0.4932, 0.4755])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
+    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_pndm(self):
         unet = self.dummy_cond_unet
         scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
@@ -310,9 +312,9 @@ def test_stable_diffusion_pndm(self):
         expected_slice = np.array([0.4937, 0.4649, 0.4716, 0.5145, 0.4889, 0.513, 0.513, 0.4905, 0.4738])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
+    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_k_lms(self):
         unet = self.dummy_cond_unet
-        scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
         scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear")
         vae = self.dummy_vae
         bert = self.dummy_text_encoder
@@ -394,6 +396,7 @@ def test_karras_ve_pipeline(self):
         expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
+    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_img2img(self):
         unet = self.dummy_cond_unet
         scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
@@ -435,6 +438,7 @@ def test_stable_diffusion_img2img(self):
         expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
+    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_inpaint(self):
         unet = self.dummy_cond_unet
         scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
diff --git a/tests/test_training.py b/tests/test_training.py
index 48903c37ce82..a9d330ef6af0 100644
--- a/tests/test_training.py
+++ b/tests/test_training.py
@@ -18,8 +18,8 @@
 import torch
 
 from diffusers import DDIMScheduler, DDPMScheduler, UNet2DModel
-from diffusers.testing_utils import slow, torch_device
-from diffusers.training_utils import enable_full_determinism, set_seed
+from diffusers.testing_utils import slow
+from diffusers.training_utils import set_seed
 
 
 torch.backends.cuda.matmul.allow_tf32 = False
@@ -34,8 +34,7 @@ def get_model_optimizer(self, resolution=32):
 
     @slow
     def test_training_step_equality(self):
-        enable_full_determinism(0)
-
+        device = "cpu"  # ensure full determinism without setting the CUBLAS_WORKSPACE_CONFIG env variable
         ddpm_scheduler = DDPMScheduler(
             num_train_timesteps=1000,
             beta_start=0.0001,
@@ -57,13 +56,13 @@ def test_training_step_equality(self):
 
         # shared batches for DDPM and DDIM
         set_seed(0)
-        clean_images = [torch.randn((4, 3, 32, 32)).clip(-1, 1).to(torch_device) for _ in range(4)]
-        noise = [torch.randn((4, 3, 32, 32)).to(torch_device) for _ in range(4)]
-        timesteps = [torch.randint(0, 1000, (4,)).long().to(torch_device) for _ in range(4)]
+        clean_images = [torch.randn((4, 3, 32, 32)).clip(-1, 1).to(device) for _ in range(4)]
+        noise = [torch.randn((4, 3, 32, 32)).to(device) for _ in range(4)]
+        timesteps = [torch.randint(0, 1000, (4,)).long().to(device) for _ in range(4)]
 
         # train with a DDPM scheduler
         model, optimizer = self.get_model_optimizer(resolution=32)
-        model.train().to(torch_device)
+        model.train().to(device)
         for i in range(4):
             optimizer.zero_grad()
             ddpm_noisy_images = ddpm_scheduler.add_noise(clean_images[i], noise[i], timesteps[i])
@@ -75,7 +74,7 @@ def test_training_step_equality(self):
 
         # recreate the model and optimizer, and retry with DDIM
         model, optimizer = self.get_model_optimizer(resolution=32)
-        model.train().to(torch_device)
+        model.train().to(device)
         for i in range(4):
             optimizer.zero_grad()
             ddim_noisy_images = ddim_scheduler.add_noise(clean_images[i], noise[i], timesteps[i])

From 8445bed62acac4b19189fbccf09fc7c5fec8da48 Mon Sep 17 00:00:00 2001
From: anton-l <anton@huggingface.co>
Date: Thu, 1 Sep 2022 14:56:09 +0200
Subject: [PATCH 2/2] force SD fast tests to the CPU

---
 tests/test_pipelines.py | 85 ++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 6d50eca153ff..011604775558 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -235,8 +235,8 @@ def test_ldm_text2img(self):
         expected_slice = np.array([0.5074, 0.5026, 0.4998, 0.4056, 0.3523, 0.4649, 0.5289, 0.5299, 0.4897])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
-    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_ddim(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
         unet = self.dummy_cond_unet
         scheduler = DDIMScheduler(
             beta_start=0.00085,
@@ -260,14 +260,11 @@ def test_stable_diffusion_ddim(self):
             safety_checker=self.dummy_safety_checker,
             feature_extractor=self.dummy_extractor,
         )
-        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe = sd_pipe.to(device)
 
         prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast("cuda"):
-            output = sd_pipe(
-                [prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np"
-            )
+        generator = torch.Generator(device=device).manual_seed(0)
+        output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
 
         image = output["sample"]
 
@@ -277,8 +274,8 @@ def test_stable_diffusion_ddim(self):
         expected_slice = np.array([0.5112, 0.4692, 0.4715, 0.5206, 0.4894, 0.5114, 0.5096, 0.4932, 0.4755])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
-    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_pndm(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
         unet = self.dummy_cond_unet
         scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
         vae = self.dummy_vae
@@ -295,14 +292,11 @@ def test_stable_diffusion_pndm(self):
             safety_checker=self.dummy_safety_checker,
             feature_extractor=self.dummy_extractor,
         )
-        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe = sd_pipe.to(device)
 
         prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast("cuda"):
-            output = sd_pipe(
-                [prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np"
-            )
+        generator = torch.Generator(device=device).manual_seed(0)
+        output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
 
         image = output["sample"]
 
@@ -312,8 +306,8 @@ def test_stable_diffusion_pndm(self):
         expected_slice = np.array([0.4937, 0.4649, 0.4716, 0.5145, 0.4889, 0.513, 0.513, 0.4905, 0.4738])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
-    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_k_lms(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
         unet = self.dummy_cond_unet
         scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear")
         vae = self.dummy_vae
@@ -330,14 +324,11 @@ def test_stable_diffusion_k_lms(self):
             safety_checker=self.dummy_safety_checker,
             feature_extractor=self.dummy_extractor,
         )
-        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe = sd_pipe.to(device)
 
         prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast("cuda"):
-            output = sd_pipe(
-                [prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np"
-            )
+        generator = torch.Generator(device=device).manual_seed(0)
+        output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
 
         image = output["sample"]
 
@@ -396,15 +387,15 @@ def test_karras_ve_pipeline(self):
         expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
-    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_img2img(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
         unet = self.dummy_cond_unet
         scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
         vae = self.dummy_vae
         bert = self.dummy_text_encoder
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        init_image = self.dummy_image
+        init_image = self.dummy_image.to(device)
 
         # make sure here that pndm scheduler skips prk
         sd_pipe = StableDiffusionImg2ImgPipeline(
@@ -416,19 +407,18 @@ def test_stable_diffusion_img2img(self):
             safety_checker=self.dummy_safety_checker,
             feature_extractor=self.dummy_extractor,
         )
-        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe = sd_pipe.to(device)
 
         prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast("cuda"):
-            output = sd_pipe(
-                [prompt],
-                generator=generator,
-                guidance_scale=6.0,
-                num_inference_steps=2,
-                output_type="np",
-                init_image=init_image,
-            )
+        generator = torch.Generator(device=device).manual_seed(0)
+        output = sd_pipe(
+            [prompt],
+            generator=generator,
+            guidance_scale=6.0,
+            num_inference_steps=2,
+            output_type="np",
+            init_image=init_image,
+        )
 
         image = output["sample"]
 
@@ -438,15 +428,15 @@ def test_stable_diffusion_img2img(self):
         expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218])
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
-    @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility")
     def test_stable_diffusion_inpaint(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
         unet = self.dummy_cond_unet
         scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True)
         vae = self.dummy_vae
         bert = self.dummy_text_encoder
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        image = self.dummy_image.permute(0, 2, 3, 1)[0]
+        image = self.dummy_image.to(device).permute(0, 2, 3, 1)[0]
         init_image = Image.fromarray(np.uint8(image)).convert("RGB")
         mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((128, 128))
 
@@ -460,20 +450,19 @@ def test_stable_diffusion_inpaint(self):
             safety_checker=self.dummy_safety_checker,
             feature_extractor=self.dummy_extractor,
         )
-        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe = sd_pipe.to(device)
 
         prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast("cuda"):
-            output = sd_pipe(
-                [prompt],
-                generator=generator,
-                guidance_scale=6.0,
-                num_inference_steps=2,
-                output_type="np",
-                init_image=init_image,
-                mask_image=mask_image,
-            )
+        generator = torch.Generator(device=device).manual_seed(0)
+        output = sd_pipe(
+            [prompt],
+            generator=generator,
+            guidance_scale=6.0,
+            num_inference_steps=2,
+            output_type="np",
+            init_image=init_image,
+            mask_image=mask_image,
+        )
 
         image = output["sample"]