jinaai
/

jina-bert-implementation

Jackmin108 commited on Oct 23, 2023

Commit

5ee2c37

•

1 Parent(s): 4fa2261

Remove triton flash implementation

Files changed (1) hide show

modeling_bert.py CHANGED Viewed

@@ -63,12 +63,6 @@ try:
 except ImportError:
     scaled_dot_product_attention = None
-# Triton implementation
-try:
-    from .flash_attn_triton import flash_attn_func
-except Exception:
-    flash_attn_func = None
 # This is used by encode but user may not have it installed
 try:
     from tqdm.autonotebook import trange
@@ -324,18 +318,6 @@ class JinaBertSelfAttention(nn.Module):
         output_attentions: Optional[bool] = False,
         bias: Optional[torch.FloatTensor] = None,
     ) -> Tuple[torch.Tensor]:
-        if self.attn_implementation == 'triton':
-            b, s, h = hidden_states.shape
-            q = self.query(hidden_states)
-            k = self.key(hidden_states)
-            v = self.value(hidden_states)
-            # B x S x hidden_dim -> B x S x num_heads x head_dim
-            q = q.view(b, s, self.num_attention_heads, self.attention_head_size)
-            k = k.view(b, s, self.num_attention_heads, self.attention_head_size)
-            v = v.view(b, s, self.num_attention_heads, self.attention_head_size)
-            attn = flash_attn_func(q, k, v, bias)
-            return (attn.view(b, s, h),)
         mixed_query_layer = self.query(hidden_states)
         # If this is instantiated as a cross-attention module, the keys

 except ImportError:
     scaled_dot_product_attention = None
 # This is used by encode but user may not have it installed
 try:
     from tqdm.autonotebook import trange
         output_attentions: Optional[bool] = False,
         bias: Optional[torch.FloatTensor] = None,
     ) -> Tuple[torch.Tensor]:
         mixed_query_layer = self.query(hidden_states)
         # If this is instantiated as a cross-attention module, the keys