diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index 25e1ea28dcf0..f963310f12eb 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -144,10 +144,11 @@ def forward(self, hidden_states, context=None): residual = hidden_states hidden_states = self.norm(hidden_states) hidden_states = self.proj_in(hidden_states) - hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * weight, channel) + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * weight, inner_dim) for block in self.transformer_blocks: hidden_states = block(hidden_states, context=context) - hidden_states = hidden_states.reshape(batch, height, weight, channel).permute(0, 3, 1, 2) + hidden_states = hidden_states.reshape(batch, height, weight, inner_dim).permute(0, 3, 1, 2) hidden_states = self.proj_out(hidden_states) return hidden_states + residual