diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index ca6c580ffc55..c7aba95c4a50 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -234,8 +234,8 @@ def __call__( uncond_tokens = [""] elif type(prompt) is not type(negative_prompt): raise TypeError( - "`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - " {type(prompt)}." + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." ) elif isinstance(negative_prompt, str): uncond_tokens = [negative_prompt] diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 3e5ac4b33582..3e359e59b181 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -195,7 +195,6 @@ def __call__( """ if isinstance(prompt, str): batch_size = 1 - prompt = [prompt] elif isinstance(prompt, list): batch_size = len(prompt) else: @@ -250,8 +249,8 @@ def __call__( uncond_tokens = [""] elif type(prompt) is not type(negative_prompt): raise TypeError( - "`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - " {type(prompt)}." + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." ) elif isinstance(negative_prompt, str): uncond_tokens = [negative_prompt] @@ -285,6 +284,8 @@ def __call__( init_latents = init_latent_dist.sample(generator=generator) init_latents = 0.18215 * init_latents + if isinstance(prompt, str): + prompt = [prompt] if len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] == 0: # expand init_latents for batch_size deprecation_message = ( diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 30a588e754b3..a6fb4a091c43 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -266,8 +266,8 @@ def __call__( uncond_tokens = [""] elif type(prompt) is not type(negative_prompt): raise TypeError( - "`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - " {type(prompt)}." + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." ) elif isinstance(negative_prompt, str): uncond_tokens = [negative_prompt] diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_onnx.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_onnx.py index 4bd6c2c8bb3e..acd6446af6fe 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_onnx.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_onnx.py @@ -108,8 +108,8 @@ def __call__( uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( - "`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - " {type(prompt)}." + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." ) elif isinstance(negative_prompt, str): uncond_tokens = [negative_prompt] * batch_size diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 8004241ac15e..75ed23b8a8c6 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -575,6 +575,46 @@ def test_stable_diffusion_attention_chunk(self): assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 1e-4 + def test_stable_diffusion_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet + scheduler = PNDMScheduler(skip_prk_steps=True) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=self.dummy_safety_checker, + feature_extractor=self.dummy_extractor, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + negative_prompt = "french fries" + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + prompt, + negative_prompt=negative_prompt, + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + ) + + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 128, 128, 3) + expected_slice = np.array([0.4851, 0.4617, 0.4765, 0.5127, 0.4845, 0.5153, 0.5141, 0.4886, 0.4719]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_score_sde_ve_pipeline(self): unet = self.dummy_uncond_unet scheduler = ScoreSdeVeScheduler() @@ -704,6 +744,48 @@ def test_stable_diffusion_img2img(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_img2img_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet + scheduler = PNDMScheduler(skip_prk_steps=True) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + init_image = self.dummy_image.to(device) + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionImg2ImgPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=self.dummy_safety_checker, + feature_extractor=self.dummy_extractor, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + negative_prompt = "french fries" + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + prompt, + negative_prompt=negative_prompt, + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + init_image=init_image, + ) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_img2img_multiple_init_images(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet @@ -861,6 +943,52 @@ def test_stable_diffusion_inpaint(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_inpaint_negative_prompt(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet + scheduler = PNDMScheduler(skip_prk_steps=True) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB") + mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((128, 128)) + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionInpaintPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=self.dummy_safety_checker, + feature_extractor=self.dummy_extractor, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + negative_prompt = "french fries" + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + prompt, + negative_prompt=negative_prompt, + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + init_image=init_image, + mask_image=mask_image, + ) + + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4765, 0.5339, 0.4541, 0.6240, 0.5439, 0.4055, 0.5503, 0.5891, 0.5150]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_num_images_per_prompt(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet