dunzhang
/

stella_en_400M_v5

JesusCrist commited on Sep 28

Commit

2aa5579

•

1 Parent(s): 1bb50bc

Update modeling.py (#19)

- Update modeling.py (cf77e3147c0869839c6e74a0fd473960ebe4c105)

Co-authored-by: Anoymous <JesusCrist@users.noreply.huggingface.co>

Files changed (1) hide show

modeling.py CHANGED Viewed

@@ -897,11 +897,11 @@ class NewModel(NewPreTrainedModel):
         if unpad_inputs:
             assert self.config.use_memory_efficient_attention
-            attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length)
         else:
             # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
             # ourselves in which case we just need to make it broadcastable to all heads.
-            attention_bias = self.get_extended_attention_mask(attention_mask, input_shape)
             if self.config.use_memory_efficient_attention:
                 # Invalid shape for attention bias: torch.Size([48, 1, 1, 512]) (expected (48, 12, 512, 512))
                 attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)

         if unpad_inputs:
             assert self.config.use_memory_efficient_attention
+            attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length,device=self.device)
         else:
             # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
             # ourselves in which case we just need to make it broadcastable to all heads.
+            attention_bias = self.get_extended_attention_mask(attention_mask, input_shape,device=self.device)
             if self.config.use_memory_efficient_attention:
                 # Invalid shape for attention bias: torch.Size([48, 1, 1, 512]) (expected (48, 12, 512, 512))
                 attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)