-
Notifications
You must be signed in to change notification settings - Fork 6.1k
handle lora scale and clip skip in lpw sd and sdxl community pipelines #8988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c8e9c25
d11bb59
ba7241f
1af1fbc
4464d72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,21 +25,25 @@ | |
from diffusers.loaders import ( | ||
FromSingleFileMixin, | ||
IPAdapterMixin, | ||
StableDiffusionLoraLoaderMixin, | ||
StableDiffusionXLLoraLoaderMixin, | ||
TextualInversionLoaderMixin, | ||
) | ||
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel | ||
from diffusers.models.attention_processor import AttnProcessor2_0, XFormersAttnProcessor | ||
from diffusers.models.lora import adjust_lora_scale_text_encoder | ||
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin | ||
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput | ||
from diffusers.schedulers import KarrasDiffusionSchedulers | ||
from diffusers.utils import ( | ||
USE_PEFT_BACKEND, | ||
deprecate, | ||
is_accelerate_available, | ||
is_accelerate_version, | ||
is_invisible_watermark_available, | ||
logging, | ||
replace_example_docstring, | ||
scale_lora_layers, | ||
unscale_lora_layers, | ||
) | ||
from diffusers.utils.torch_utils import randn_tensor | ||
|
||
|
@@ -261,6 +265,7 @@ def get_weighted_text_embeddings_sdxl( | |
num_images_per_prompt: int = 1, | ||
device: Optional[torch.device] = None, | ||
clip_skip: Optional[int] = None, | ||
lora_scale: Optional[int] = None, | ||
): | ||
""" | ||
This function can process long prompt with weights, no length limitation | ||
|
@@ -281,6 +286,24 @@ def get_weighted_text_embeddings_sdxl( | |
""" | ||
device = device or pipe._execution_device | ||
|
||
# set lora scale so that monkey patched LoRA | ||
# function of text encoder can correctly access it | ||
if lora_scale is not None and isinstance(pipe, StableDiffusionXLLoraLoaderMixin): | ||
pipe._lora_scale = lora_scale | ||
|
||
# dynamically adjust the LoRA scale | ||
if pipe.text_encoder is not None: | ||
if not USE_PEFT_BACKEND: | ||
adjust_lora_scale_text_encoder(pipe.text_encoder, lora_scale) | ||
else: | ||
scale_lora_layers(pipe.text_encoder, lora_scale) | ||
|
||
if pipe.text_encoder_2 is not None: | ||
if not USE_PEFT_BACKEND: | ||
adjust_lora_scale_text_encoder(pipe.text_encoder_2, lora_scale) | ||
else: | ||
scale_lora_layers(pipe.text_encoder_2, lora_scale) | ||
Comment on lines
+301
to
+305
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just copied these lines from pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py, should i just leave scale_lora_layers(pipe.text_encoder_2, lora_scale) ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh then it's okay. |
||
|
||
if prompt_2: | ||
prompt = f"{prompt} {prompt_2}" | ||
|
||
|
@@ -429,6 +452,16 @@ def get_weighted_text_embeddings_sdxl( | |
bs_embed * num_images_per_prompt, -1 | ||
) | ||
|
||
if pipe.text_encoder is not None: | ||
if isinstance(pipe, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: | ||
# Retrieve the original scale by scaling back the LoRA layers | ||
unscale_lora_layers(pipe.text_encoder, lora_scale) | ||
|
||
if pipe.text_encoder_2 is not None: | ||
if isinstance(pipe, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: | ||
# Retrieve the original scale by scaling back the LoRA layers | ||
unscale_lora_layers(pipe.text_encoder_2, lora_scale) | ||
|
||
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds | ||
|
||
|
||
|
@@ -549,7 +582,7 @@ class SDXLLongPromptWeightingPipeline( | |
StableDiffusionMixin, | ||
FromSingleFileMixin, | ||
IPAdapterMixin, | ||
StableDiffusionLoraLoaderMixin, | ||
StableDiffusionXLLoraLoaderMixin, | ||
TextualInversionLoaderMixin, | ||
): | ||
r""" | ||
|
@@ -561,8 +594,8 @@ class SDXLLongPromptWeightingPipeline( | |
The pipeline also inherits the following loading methods: | ||
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files | ||
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters | ||
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights | ||
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights | ||
- [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights | ||
- [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights | ||
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings | ||
|
||
Args: | ||
|
@@ -743,7 +776,7 @@ def encode_prompt( | |
|
||
# set lora scale so that monkey patched LoRA | ||
# function of text encoder can correctly access it | ||
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin): | ||
if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin): | ||
self._lora_scale = lora_scale | ||
|
||
if prompt is not None and isinstance(prompt, str): | ||
|
@@ -1612,7 +1645,9 @@ def __call__( | |
image_embeds = torch.cat([negative_image_embeds, image_embeds]) | ||
|
||
# 3. Encode input prompt | ||
(self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None) | ||
lora_scale = ( | ||
self._cross_attention_kwargs.get("scale", None) if self._cross_attention_kwargs is not None else None | ||
) | ||
|
||
negative_prompt = negative_prompt if negative_prompt is not None else "" | ||
|
||
|
@@ -1627,6 +1662,7 @@ def __call__( | |
neg_prompt=negative_prompt, | ||
num_images_per_prompt=num_images_per_prompt, | ||
clip_skip=clip_skip, | ||
lora_scale=lora_scale, | ||
) | ||
dtype = prompt_embeds.dtype | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this? Because without the PEFT backend, you cannot really do LoRA inference in the recent diffusers versions. No strong opinions either.