add sd3 test

westfish · westfish · commit 54b7d5afe42b · 2024-09-24T08:59:06.000Z
diff --git a/ppdiffusers/examples/dreambooth/README_sd3.md b/ppdiffusers/examples/dreambooth/README_sd3.md
@@ -55,7 +55,7 @@ python train_dreambooth_sd3.py \
   --checkpointing_steps=250
 ```
 
-fp16训练需要显存47000MiB，为了更好地跟踪我们的训练实验，我们在上面的命令中使用了以下标志：
+fp16训练需要显存67000MiB，为了更好地跟踪我们的训练实验，我们在上面的命令中使用了以下标志：
 
 * `report_to="wandb"` 将确保在 Weights and Biases 上跟踪训练运行。要使用它，请确保安装 `wandb`，使用 `pip install wandb`。
 * `validation_prompt` 和 `validation_epochs` 允许脚本进行几次验证推理运行。这可以让我们定性地检查训练是否按预期进行。
@@ -118,7 +118,7 @@ python train_dreambooth_lora_sd3.py \
   --checkpointing_steps=250
 ```
 
-训练完成后，我们可以通过以下python脚本执行推理：
+fp16训练需要显存47000MiB，。训练完成后，我们可以通过以下python脚本执行推理：
 ```python
 from ppdiffusers import StableDiffusion3Pipeline
 from ppdiffusers import (
diff --git a/ppdiffusers/ppdiffusers/models/transformer_sd3.py b/ppdiffusers/ppdiffusers/models/transformer_sd3.py
@@ -145,7 +145,7 @@ def attn_processors(self) -> Dict[str, AttentionProcessor]:
         # set recursively
         processors = {}
 
-        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
+        def fn_recursive_add_processors(name: str, module: paddle.nn.Layer, processors: Dict[str, AttentionProcessor]):
             if hasattr(module, "get_processor"):
                 processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
 
@@ -178,7 +178,7 @@ def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, Atte
                 f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
             )
 
-        def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
+        def fn_recursive_attn_processor(name: str, module: paddle.nn.Layer, processor):
             if hasattr(module, "set_processor"):
                 if not isinstance(processor, dict):
                     module.set_processor(processor)
@@ -267,7 +267,7 @@ def forward(
             # weight the lora layers by setting `lora_scale` for each PEFT layer
             scale_lora_layers(self, lora_scale)
         else:
-            logger.info(
+            logger.debug(
                 "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
             )
 
diff --git a/ppdiffusers/tests/models/test_models_transformer_sd3.py b/ppdiffusers/tests/models/test_models_transformer_sd3.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import paddle
+from ppdiffusers import SD3Transformer2DModel
+from ppdiffusers.utils.testing_utils import (
+    enable_full_determinism,
+    paddle_device,
+)
+from .test_modeling_common import ModelTesterMixin
+
+enable_full_determinism()
+
+class SD3TransformerTests(ModelTesterMixin, unittest.TestCase):
+    model_class = SD3Transformer2DModel
+    main_input_name = "hidden_states"
+    @property
+    def dummy_input(self):
+        batch_size = 2
+        num_channels = 4
+        height = width = embedding_dim = 32
+        pooled_embedding_dim = embedding_dim * 2
+        sequence_length = 154
+        hidden_states = paddle.randn((batch_size, num_channels, height, width))
+        encoder_hidden_states = paddle.randn((batch_size, sequence_length, embedding_dim))
+        pooled_prompt_embeds = paddle.randn((batch_size, pooled_embedding_dim))
+        timestep = paddle.randint(0, 1000, shape=(batch_size,))
+        return {
+            "hidden_states": hidden_states,
+            "encoder_hidden_states": encoder_hidden_states,
+            "pooled_projections": pooled_prompt_embeds,
+            "timestep": timestep,
+        }
+    @property
+    def input_shape(self):
+        return (4, 32, 32)
+    @property
+    def output_shape(self):
+        return (4, 32, 32)
+    def prepare_init_args_and_inputs_for_common(self):
+        init_dict = {
+            "sample_size": 32,
+            "patch_size": 1,
+            "in_channels": 4,
+            "num_layers": 1,
+            "attention_head_dim": 8,
+            "num_attention_heads": 4,
+            "caption_projection_dim": 32,
+            "joint_attention_dim": 32,
+            "pooled_projection_dim": 64,
+            "out_channels": 4,
+        }
+        inputs_dict = self.dummy_input
+        return init_dict, inputs_dict
+
+    @unittest.skip("SD3Transformer2DModel uses a dedicated attention processor. This test doesn't apply")
+    def test_from_save_pretrained(self):
+        pass
+
+    @unittest.skip("SD3Transformer2DModel uses a dedicated attention processor. This test doesn't apply")
+    def test_outputs_equivalence(self):
+        pass
+
+    @unittest.skip("SD3Transformer2DModel uses a dedicated attention processor. This test doesn't apply")
+    def test_set_attn_processor_for_determinism(self):
+        pass
+