From 995802c84e3abb66e2f79be462b7f9afc622f42c Mon Sep 17 00:00:00 2001 From: anton-l Date: Thu, 1 Sep 2022 14:05:43 +0200 Subject: [PATCH 1/2] Fix nondeterministic tests for GPU runs --- tests/test_pipelines.py | 6 +++++- tests/test_training.py | 17 ++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 168fd58ff703..6d50eca153ff 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -235,6 +235,7 @@ def test_ldm_text2img(self): expected_slice = np.array([0.5074, 0.5026, 0.4998, 0.4056, 0.3523, 0.4649, 0.5289, 0.5299, 0.4897]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_ddim(self): unet = self.dummy_cond_unet scheduler = DDIMScheduler( @@ -276,6 +277,7 @@ def test_stable_diffusion_ddim(self): expected_slice = np.array([0.5112, 0.4692, 0.4715, 0.5206, 0.4894, 0.5114, 0.5096, 0.4932, 0.4755]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_pndm(self): unet = self.dummy_cond_unet scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) @@ -310,9 +312,9 @@ def test_stable_diffusion_pndm(self): expected_slice = np.array([0.4937, 0.4649, 0.4716, 0.5145, 0.4889, 0.513, 0.513, 0.4905, 0.4738]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_k_lms(self): unet = self.dummy_cond_unet - scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear") vae = self.dummy_vae bert = self.dummy_text_encoder @@ -394,6 +396,7 @@ def test_karras_ve_pipeline(self): expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_img2img(self): unet = self.dummy_cond_unet scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) @@ -435,6 +438,7 @@ def test_stable_diffusion_img2img(self): expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_inpaint(self): unet = self.dummy_cond_unet scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) diff --git a/tests/test_training.py b/tests/test_training.py index 48903c37ce82..a9d330ef6af0 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -18,8 +18,8 @@ import torch from diffusers import DDIMScheduler, DDPMScheduler, UNet2DModel -from diffusers.testing_utils import slow, torch_device -from diffusers.training_utils import enable_full_determinism, set_seed +from diffusers.testing_utils import slow +from diffusers.training_utils import set_seed torch.backends.cuda.matmul.allow_tf32 = False @@ -34,8 +34,7 @@ def get_model_optimizer(self, resolution=32): @slow def test_training_step_equality(self): - enable_full_determinism(0) - + device = "cpu" # ensure full determinism without setting the CUBLAS_WORKSPACE_CONFIG env variable ddpm_scheduler = DDPMScheduler( num_train_timesteps=1000, beta_start=0.0001, @@ -57,13 +56,13 @@ def test_training_step_equality(self): # shared batches for DDPM and DDIM set_seed(0) - clean_images = [torch.randn((4, 3, 32, 32)).clip(-1, 1).to(torch_device) for _ in range(4)] - noise = [torch.randn((4, 3, 32, 32)).to(torch_device) for _ in range(4)] - timesteps = [torch.randint(0, 1000, (4,)).long().to(torch_device) for _ in range(4)] + clean_images = [torch.randn((4, 3, 32, 32)).clip(-1, 1).to(device) for _ in range(4)] + noise = [torch.randn((4, 3, 32, 32)).to(device) for _ in range(4)] + timesteps = [torch.randint(0, 1000, (4,)).long().to(device) for _ in range(4)] # train with a DDPM scheduler model, optimizer = self.get_model_optimizer(resolution=32) - model.train().to(torch_device) + model.train().to(device) for i in range(4): optimizer.zero_grad() ddpm_noisy_images = ddpm_scheduler.add_noise(clean_images[i], noise[i], timesteps[i]) @@ -75,7 +74,7 @@ def test_training_step_equality(self): # recreate the model and optimizer, and retry with DDIM model, optimizer = self.get_model_optimizer(resolution=32) - model.train().to(torch_device) + model.train().to(device) for i in range(4): optimizer.zero_grad() ddim_noisy_images = ddim_scheduler.add_noise(clean_images[i], noise[i], timesteps[i]) From 8445bed62acac4b19189fbccf09fc7c5fec8da48 Mon Sep 17 00:00:00 2001 From: anton-l Date: Thu, 1 Sep 2022 14:56:09 +0200 Subject: [PATCH 2/2] force SD fast tests to the CPU --- tests/test_pipelines.py | 85 ++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 6d50eca153ff..011604775558 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -235,8 +235,8 @@ def test_ldm_text2img(self): expected_slice = np.array([0.5074, 0.5026, 0.4998, 0.4056, 0.3523, 0.4649, 0.5289, 0.5299, 0.4897]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_ddim(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet scheduler = DDIMScheduler( beta_start=0.00085, @@ -260,14 +260,11 @@ def test_stable_diffusion_ddim(self): safety_checker=self.dummy_safety_checker, feature_extractor=self.dummy_extractor, ) - sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(device) prompt = "A painting of a squirrel eating a burger" - generator = torch.Generator(device=torch_device).manual_seed(0) - with torch.autocast("cuda"): - output = sd_pipe( - [prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np" - ) + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") image = output["sample"] @@ -277,8 +274,8 @@ def test_stable_diffusion_ddim(self): expected_slice = np.array([0.5112, 0.4692, 0.4715, 0.5206, 0.4894, 0.5114, 0.5096, 0.4932, 0.4755]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_pndm(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) vae = self.dummy_vae @@ -295,14 +292,11 @@ def test_stable_diffusion_pndm(self): safety_checker=self.dummy_safety_checker, feature_extractor=self.dummy_extractor, ) - sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(device) prompt = "A painting of a squirrel eating a burger" - generator = torch.Generator(device=torch_device).manual_seed(0) - with torch.autocast("cuda"): - output = sd_pipe( - [prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np" - ) + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") image = output["sample"] @@ -312,8 +306,8 @@ def test_stable_diffusion_pndm(self): expected_slice = np.array([0.4937, 0.4649, 0.4716, 0.5145, 0.4889, 0.513, 0.513, 0.4905, 0.4738]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_k_lms(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear") vae = self.dummy_vae @@ -330,14 +324,11 @@ def test_stable_diffusion_k_lms(self): safety_checker=self.dummy_safety_checker, feature_extractor=self.dummy_extractor, ) - sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(device) prompt = "A painting of a squirrel eating a burger" - generator = torch.Generator(device=torch_device).manual_seed(0) - with torch.autocast("cuda"): - output = sd_pipe( - [prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np" - ) + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") image = output["sample"] @@ -396,15 +387,15 @@ def test_karras_ve_pipeline(self): expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_img2img(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) vae = self.dummy_vae bert = self.dummy_text_encoder tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - init_image = self.dummy_image + init_image = self.dummy_image.to(device) # make sure here that pndm scheduler skips prk sd_pipe = StableDiffusionImg2ImgPipeline( @@ -416,19 +407,18 @@ def test_stable_diffusion_img2img(self): safety_checker=self.dummy_safety_checker, feature_extractor=self.dummy_extractor, ) - sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(device) prompt = "A painting of a squirrel eating a burger" - generator = torch.Generator(device=torch_device).manual_seed(0) - with torch.autocast("cuda"): - output = sd_pipe( - [prompt], - generator=generator, - guidance_scale=6.0, - num_inference_steps=2, - output_type="np", - init_image=init_image, - ) + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + [prompt], + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + init_image=init_image, + ) image = output["sample"] @@ -438,15 +428,15 @@ def test_stable_diffusion_img2img(self): expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - @unittest.skipIf(torch_device != "cpu", "Stable diffusion fast tests need to run on CPU for reproducibility") def test_stable_diffusion_inpaint(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet scheduler = PNDMScheduler(tensor_format="pt", skip_prk_steps=True) vae = self.dummy_vae bert = self.dummy_text_encoder tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - image = self.dummy_image.permute(0, 2, 3, 1)[0] + image = self.dummy_image.to(device).permute(0, 2, 3, 1)[0] init_image = Image.fromarray(np.uint8(image)).convert("RGB") mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((128, 128)) @@ -460,20 +450,19 @@ def test_stable_diffusion_inpaint(self): safety_checker=self.dummy_safety_checker, feature_extractor=self.dummy_extractor, ) - sd_pipe = sd_pipe.to(torch_device) + sd_pipe = sd_pipe.to(device) prompt = "A painting of a squirrel eating a burger" - generator = torch.Generator(device=torch_device).manual_seed(0) - with torch.autocast("cuda"): - output = sd_pipe( - [prompt], - generator=generator, - guidance_scale=6.0, - num_inference_steps=2, - output_type="np", - init_image=init_image, - mask_image=mask_image, - ) + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + [prompt], + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + init_image=init_image, + mask_image=mask_image, + ) image = output["sample"]