fix bug when fine-tuning without flash-attention
Browse files- modeling_telechat.py +1 -0
modeling_telechat.py
CHANGED
@@ -270,6 +270,7 @@ class TELECHATAttention(nn.Module):
|
|
270 |
self.pruned_heads = set()
|
271 |
|
272 |
self.use_flash_attn = False
|
|
|
273 |
|
274 |
|
275 |
|
|
|
270 |
self.pruned_heads = set()
|
271 |
|
272 |
self.use_flash_attn = False
|
273 |
+
self.is_cross_attention = False
|
274 |
|
275 |
|
276 |
|