Spaces:

Leoxing
/

PIA

Runtime error

LeoXing1996 commited on Dec 28, 2023

Commit

9121982

•

1 Parent(s): 00a4e2b

update memory efficient attention

Files changed (2) hide show

animatediff/models/motion_module.py CHANGED Viewed

@@ -467,6 +467,14 @@ class CrossAttention(nn.Module):
         hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
         return hidden_states
 class VersatileAttention(CrossAttention):
     def __init__(
@@ -532,7 +540,12 @@ class VersatileAttention(CrossAttention):
                 attention_mask = attention_mask.repeat_interleave(self.heads, dim=0)
         # attention, what we cannot get enough of
-        if self._use_memory_efficient_attention_xformers:
             hidden_states = self._memory_efficient_attention_xformers(query, key, value, attention_mask)
             # Some versions of xformers return output in fp32, cast it back to the dtype of the input
             hidden_states = hidden_states.to(query.dtype)

         hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
         return hidden_states
+    def _memory_efficient_attention_pt20(self, query, key, value, attention_mask):
+        query = query.contiguous()
+        key = key.contiguous()
+        value = value.contiguous()
+        hidden_states = torch.nn.functional.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask, dropout_p=0, is_causal=False)
+        hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
+        return hidden_states
 class VersatileAttention(CrossAttention):
     def __init__(
                 attention_mask = attention_mask.repeat_interleave(self.heads, dim=0)
         # attention, what we cannot get enough of
+        if hasattr(F, 'scaled_dot_product_attention'):
+            # NOTE: pt20's scaled_dot_product_attention seems more memory efficient than
+            # xformers' memory_efficient_attention, set it as the first class citizen
+            hidden_states = self._memory_efficient_attention_pt20(query, key, value, attention_mask)
+            hidden_states = hidden_states.to(query.dtype)
+        elif self._use_memory_efficient_attention_xformers:
             hidden_states = self._memory_efficient_attention_xformers(query, key, value, attention_mask)
             # Some versions of xformers return output in fp32, cast it back to the dtype of the input
             hidden_states = hidden_states.to(query.dtype)

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 opencv-python
 torchvision==0.14.1
 diffusers==0.24.0
 transformers==4.25.1

 opencv-python
+torch>=2.0.0
 torchvision==0.14.1
 diffusers==0.24.0
 transformers==4.25.1