Spaces:

flax-community
/

dalle-mini

Running

boris commited on Apr 6, 2022

Commit

2c583b3

•

1 Parent(s): a2dcee4

fix: sinkformer

Files changed (1) hide show

src/dalle_mini/model/modeling.py CHANGED Viewed

@@ -211,7 +211,7 @@ def dot_product_attention_weights(
     dtype: Any = jnp.float32,
     precision: PrecisionLike = None,
     sinkhorn_iters: int = 1,
-    causal: bool = False,
 ):
     """
     Computes dot-product attention weights given query and key.
@@ -239,7 +239,7 @@ def dot_product_attention_weights(
         attn_weights = attn_weights + embed_pos
     # normalize the attention weights
-    if causal or sinkhorn_iters == 1:
         # sinkhorn does not work for causal (leaks info of future tokens into past)
         attn_weights = jax.nn.softmax(attn_weights).astype(dtype)
     else:
@@ -461,7 +461,7 @@ class FlaxBartAttention(FlaxBartAttention):
             dtype=self.dtype,
             precision=None,
             sinkhorn_iters=self.config.sinkhorn_iters,
-            causal=self.causal,
         )
         if self.config.use_cosine_attention:
             # divide by tau

     dtype: Any = jnp.float32,
     precision: PrecisionLike = None,
     sinkhorn_iters: int = 1,
+    is_encoder: bool = False,
 ):
     """
     Computes dot-product attention weights given query and key.
         attn_weights = attn_weights + embed_pos
     # normalize the attention weights
+    if not is_encoder or sinkhorn_iters == 1:
         # sinkhorn does not work for causal (leaks info of future tokens into past)
         attn_weights = jax.nn.softmax(attn_weights).astype(dtype)
     else:
             dtype=self.dtype,
             precision=None,
             sinkhorn_iters=self.config.sinkhorn_iters,
+            is_encoder=self.is_encoder,
         )
         if self.config.use_cosine_attention:
             # divide by tau