Skip to content

Commit 4ab22fe

Browse files
Xiaobin-Luwestfish
authored andcommitted
修复paddlenlp develop版本适配错误_10-11 (#735)
1 parent dc16221 commit 4ab22fe

File tree

8 files changed

+89
-58
lines changed

8 files changed

+89
-58
lines changed

ppdiffusers/deploy/controlnet/scripts/export.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414

1515
export USE_PPXFORMERS=False
1616
export FLAGS_set_to_1d=1
17-
python export_model.py --pretrained_model_name_or_path runwayml/stable-diffusion-v1-5 --controlnet_pretrained_model_name_or_path lllyasviel/sd-controlnet-canny --output_path static_model/stable-diffusion-v1-5-canny
17+
python export_model.py --pretrained_model_name_or_path runwayml/stable-diffusion-v1-5 --controlnet_pretrained_model_name_or_path lllyasviel/sd-controlnet-canny --output_path static_model/stable-diffusion-v1-5-canny --width 512 --height 512

ppdiffusers/deploy/controlnet/scripts/tune_and_tensorrt.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
export USE_PPXFORMERS=False
2020
export FLAGS_set_to_1d=1
2121
# 1. export the model to static_model.
22-
python export_model.py --pretrained_model_name_or_path runwayml/stable-diffusion-v1-5 --controlnet_pretrained_model_name_or_path lllyasviel/sd-controlnet-canny --output_path static_model/stable-diffusion-v1-5-canny
22+
python export_model.py --pretrained_model_name_or_path runwayml/stable-diffusion-v1-5 --controlnet_pretrained_model_name_or_path lllyasviel/sd-controlnet-canny --output_path static_model/stable-diffusion-v1-5-canny --width 512 --height 512
2323

2424
# 2. tune the shapes of the model for tensorrt
2525
python infer.py --model_dir static_model/stable-diffusion-v1-5-canny/ --scheduler "ddim" --backend paddle --device gpu --task_name all --width 512 --height 512 --inference_steps 50 --tune True --use_fp16 False

ppdiffusers/ppdiffusers/models/embeddings.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -225,19 +225,20 @@ def forward(self, latent):
225225
latent = self.norm(latent)
226226

227227
# Interpolate or crop positional embeddings as needed
228-
if self.pos_embed_max_size:
229-
pos_embed = self.cropped_pos_embed(height, width)
230-
else:
231-
if self.height != height or self.width != width:
232-
pos_embed = get_2d_sincos_pos_embed(
233-
embed_dim=self.pos_embed.shape[-1],
234-
grid_size=(height, width),
235-
base_size=self.base_size,
236-
interpolation_scale=self.interpolation_scale,
237-
)
238-
pos_embed = paddle.to_tensor(pos_embed).astype(paddle.float32).unsqueeze(0)
228+
if self.add_pos_embed:
229+
if self.pos_embed_max_size:
230+
pos_embed = self.cropped_pos_embed(height, width)
239231
else:
240-
pos_embed = self.pos_embed
232+
if self.height != height or self.width != width:
233+
pos_embed = get_2d_sincos_pos_embed(
234+
embed_dim=self.pos_embed.shape[-1],
235+
grid_size=(height, width),
236+
base_size=self.base_size,
237+
interpolation_scale=self.interpolation_scale,
238+
)
239+
pos_embed = paddle.to_tensor(pos_embed).astype(paddle.float32).unsqueeze(0)
240+
else:
241+
pos_embed = self.pos_embed
241242

242243
# NOTE, new add for unidiffusers!
243244
if self.add_pos_embed:

ppdiffusers/ppdiffusers/models/transformer_2d.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def forward(
355355
else:
356356
batch, height, width, _ = hidden_states.shape
357357
residual = hidden_states
358-
358+
shape = paddle.shape(hidden_states)
359359
hidden_states = self.norm(hidden_states)
360360
if not self.use_linear_projection:
361361
hidden_states = (
@@ -441,7 +441,10 @@ def custom_forward(*inputs):
441441
# 3. Output
442442
if self.is_input_continuous:
443443
if not self.use_linear_projection:
444-
hidden_states = hidden_states.reshape([batch, height, width, self.inner_dim])
444+
if self.data_format == "NCHW":
445+
hidden_states = hidden_states.reshape([shape[0], shape[2], shape[3], self.inner_dim])
446+
else:
447+
hidden_states = hidden_states.reshape([shape[0], shape[1], shape[2], self.inner_dim])
445448
if self.data_format == "NCHW":
446449
hidden_states = hidden_states.transpose([0, 3, 1, 2])
447450
hidden_states = (
@@ -455,7 +458,10 @@ def custom_forward(*inputs):
455458
if not USE_PEFT_BACKEND
456459
else self.proj_out(hidden_states)
457460
)
458-
hidden_states = hidden_states.reshape([batch, height, width, self.inner_dim])
461+
if self.data_format == "NCHW":
462+
hidden_states = hidden_states.reshape([shape[0], shape[2], shape[3], self.inner_dim])
463+
else:
464+
hidden_states = hidden_states.reshape([shape[0], shape[1], shape[2], self.inner_dim])
459465
if self.data_format == "NCHW":
460466
hidden_states = hidden_states.transpose([0, 3, 1, 2])
461467

ppdiffusers/ppdiffusers/pipelines/unidiffuser/modeling_text_decoder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,8 @@ def generate_beam(
334334
logits[is_stopped] = -float(np.inf)
335335
logits[is_stopped, 0] = 0
336336
scores_sum = scores[:, None] + logits
337-
seq_lengths[~is_stopped] += 1
337+
is_stopped_tensor_int32 = paddle.cast(~is_stopped, dtype='int32')
338+
seq_lengths += is_stopped_tensor_int32
338339
scores_sum_average = scores_sum / seq_lengths[:, None].cast(scores_sum.dtype)
339340
scores_sum_average, next_tokens = scores_sum_average.reshape([-1]).topk(beam_size, -1)
340341
next_tokens_source = next_tokens // scores_sum.shape[1]

ppdiffusers/ppdiffusers/transformers/clip/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def from_text_vision_configs(cls, text_config: CLIPTextConfig, vision_config: CL
448448

449449
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
450450

451-
def to_dict(self):
451+
def to_dict(self, *args, ** kwargs):
452452
"""
453453
Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
454454

ppdiffusers/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -479,42 +479,3 @@ def test_xformers_attention_forwardGenerator_pass(self):
479479
self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results")
480480

481481
enable_full_determinism()
482-
483-
484-
@slow
485-
@require_paddle_gpu
486-
class StableVideoDiffusionPipelineSlowTests(unittest.TestCase):
487-
def tearDown(self):
488-
# clean up the VRAM after each test
489-
super().tearDown()
490-
gc.collect()
491-
paddle.device.cuda.empty_cache()
492-
493-
def test_sd_video(self):
494-
pipe = StableVideoDiffusionPipeline.from_pretrained(
495-
"stabilityai/stable-video-diffusion-img2vid-xt",
496-
variant="fp16",
497-
paddle_dtype=paddle.float16,
498-
)
499-
pipe.set_progress_bar_config(disable=None)
500-
image = load_image(
501-
"https://paddlenlp.bj.bcebos.com/models/community/hf-internal-testing/diffusers-images/cat_6.png"
502-
)
503-
504-
generator = paddle.Generator().manual_seed(0)
505-
num_frames = 3
506-
507-
output = pipe(
508-
image=image,
509-
num_frames=num_frames,
510-
generator=generator,
511-
num_inference_steps=3,
512-
output_type="np",
513-
)
514-
515-
image = output.frames[0]
516-
assert image.shape == (num_frames, 576, 1024, 3)
517-
518-
image_slice = image[0, -3:, -3:, -1]
519-
expected_slice = np.array([0.8592, 0.8645, 0.8499, 0.8722, 0.8769, 0.8421, 0.8557, 0.8528, 0.8285])
520-
assert numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice.flatten()) < 1e-3
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import gc
2+
import random
3+
import tempfile
4+
import unittest
5+
6+
import numpy as np
7+
import paddle
8+
9+
import ppdiffusers
10+
from ppdiffusers import (
11+
StableVideoDiffusionPipeline,
12+
)
13+
from ppdiffusers.utils import (
14+
is_accelerate_available,
15+
is_accelerate_version,
16+
load_image,
17+
logging,
18+
)
19+
from ppdiffusers.utils.testing_utils import (
20+
paddle_device,
21+
slow,
22+
require_paddle_gpu,
23+
numpy_cosine_similarity_distance,
24+
)
25+
26+
@slow
27+
@require_paddle_gpu
28+
class StableVideoDiffusionPipelineSlowTests(unittest.TestCase):
29+
def tearDown(self):
30+
# clean up the VRAM after each test
31+
super().tearDown()
32+
gc.collect()
33+
paddle.device.cuda.empty_cache()
34+
35+
def test_sd_video(self):
36+
pipe = StableVideoDiffusionPipeline.from_pretrained(
37+
"stabilityai/stable-video-diffusion-img2vid-xt",
38+
variant="fp16",
39+
paddle_dtype=paddle.float16,
40+
)
41+
pipe.set_progress_bar_config(disable=None)
42+
image = load_image(
43+
"https://paddlenlp.bj.bcebos.com/models/community/hf-internal-testing/diffusers-images/cat_6.png"
44+
)
45+
46+
generator = paddle.Generator().manual_seed(0)
47+
num_frames = 3
48+
49+
output = pipe(
50+
image=image,
51+
num_frames=num_frames,
52+
generator=generator,
53+
num_inference_steps=25,
54+
output_type="np",
55+
)
56+
57+
image = output.frames[0]
58+
assert image.shape == (num_frames, 576, 1024, 3)
59+
60+
image_slice = image[0, -3:, -3:, -1]
61+
expected_slice = np.array([0.8592, 0.8645, 0.8499, 0.8722, 0.8769, 0.8421, 0.8557, 0.8528, 0.8285])
62+
assert numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice.flatten()) < 1e-3

0 commit comments

Comments
 (0)