Skip to content

Commit cce0f0c

Browse files
a-r-r-o-w蒋硕
authored andcommitted
Remove CogVideoX mentions from single file docs; Test updates (huggingface#9444)
* remove mentions from single file * update tests * update
1 parent 08816ed commit cce0f0c

File tree

4 files changed

+9
-18
lines changed

4 files changed

+9
-18
lines changed

docs/source/en/api/loaders/single_file.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
2222
2323
## Supported pipelines
2424

25-
- [`CogVideoXPipeline`]
26-
- [`CogVideoXImageToVideoPipeline`]
27-
- [`CogVideoXVideoToVideoPipeline`]
2825
- [`StableDiffusionPipeline`]
2926
- [`StableDiffusionImg2ImgPipeline`]
3027
- [`StableDiffusionInpaintPipeline`]
@@ -52,7 +49,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
5249
- [`UNet2DConditionModel`]
5350
- [`StableCascadeUNet`]
5451
- [`AutoencoderKL`]
55-
- [`AutoencoderKLCogVideoX`]
5652
- [`ControlNetModel`]
5753
- [`SD3Transformer2DModel`]
5854
- [`FluxTransformer2DModel`]

tests/pipelines/cogvideo/test_cogvideox.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
5757
"callback_on_step_end_tensor_inputs",
5858
]
5959
)
60+
test_xformers_attention = False
6061

6162
def get_dummy_components(self):
6263
torch.manual_seed(0)
@@ -71,8 +72,8 @@ def get_dummy_components(self):
7172
time_embed_dim=2,
7273
text_embed_dim=32, # Must match with tiny-random-t5
7374
num_layers=1,
74-
sample_width=16, # latent width: 2 -> final width: 16
75-
sample_height=16, # latent height: 2 -> final height: 16
75+
sample_width=2, # latent width: 2 -> final width: 16
76+
sample_height=2, # latent height: 2 -> final height: 16
7677
sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9
7778
patch_size=2,
7879
temporal_compression_ratio=4,
@@ -280,10 +281,6 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2):
280281
"VAE tiling should not affect the inference results",
281282
)
282283

283-
@unittest.skip("xformers attention processor does not exist for CogVideoX")
284-
def test_xformers_attention_forwardGenerator_pass(self):
285-
pass
286-
287284
def test_fused_qkv_projections(self):
288285
device = "cpu" # ensure determinism for the device-dependent torch.Generator
289286
components = self.get_dummy_components()

tests/pipelines/cogvideo/test_cogvideox_image2video.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,9 @@ def test_vae_tiling(self, expected_diff_max: float = 0.3):
269269
generator_device = "cpu"
270270
components = self.get_dummy_components()
271271

272-
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions.
273-
# See the if-statement on "self.use_learned_positional_embeddings"
272+
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization.
273+
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
274+
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
274275
components["transformer"] = CogVideoXTransformer3DModel.from_config(
275276
components["transformer"].config,
276277
sample_height=16,

tests/pipelines/cogvideo/test_cogvideox_video2video.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
5151
"callback_on_step_end_tensor_inputs",
5252
]
5353
)
54+
test_xformers_attention = False
5455

5556
def get_dummy_components(self):
5657
torch.manual_seed(0)
@@ -65,8 +66,8 @@ def get_dummy_components(self):
6566
time_embed_dim=2,
6667
text_embed_dim=32, # Must match with tiny-random-t5
6768
num_layers=1,
68-
sample_width=16, # latent width: 2 -> final width: 16
69-
sample_height=16, # latent height: 2 -> final height: 16
69+
sample_width=2, # latent width: 2 -> final width: 16
70+
sample_height=2, # latent height: 2 -> final height: 16
7071
sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9
7172
patch_size=2,
7273
temporal_compression_ratio=4,
@@ -285,10 +286,6 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2):
285286
"VAE tiling should not affect the inference results",
286287
)
287288

288-
@unittest.skip("xformers attention processor does not exist for CogVideoX")
289-
def test_xformers_attention_forwardGenerator_pass(self):
290-
pass
291-
292289
def test_fused_qkv_projections(self):
293290
device = "cpu" # ensure determinism for the device-dependent torch.Generator
294291
components = self.get_dummy_components()

0 commit comments

Comments
 (0)