oweller2
commited on
Commit
•
7561dc4
1
Parent(s):
bfd1777
add
Browse files- modeling_flexbert.py +1 -2
- padding.py +0 -1
modeling_flexbert.py
CHANGED
@@ -1646,8 +1646,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
|
|
1646 |
if attention_mask is None:
|
1647 |
# unpad expects a encoder-like mask where all non-padding are ones
|
1648 |
attention_mask = torch.ones_like(input_ids)
|
1649 |
-
# zero out pad tokens
|
1650 |
-
attention_mask[input_ids == 50283] = 0
|
1651 |
input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
|
1652 |
input_ids, attention_mask, position_ids, labels
|
1653 |
)
|
|
|
1646 |
if attention_mask is None:
|
1647 |
# unpad expects a encoder-like mask where all non-padding are ones
|
1648 |
attention_mask = torch.ones_like(input_ids)
|
1649 |
+
attention_mask[input_ids == 50283] = 0 # zero out pad tokens
|
|
|
1650 |
input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
|
1651 |
input_ids, attention_mask, position_ids, labels
|
1652 |
)
|
padding.py
CHANGED
@@ -27,7 +27,6 @@ def unpad_input(
|
|
27 |
unpadded_position_ids: (total_nnz) or None
|
28 |
unpadded_labels: (total_nnz) or None
|
29 |
"""
|
30 |
-
breakpoint()
|
31 |
seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
|
32 |
indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
|
33 |
max_seqlen_in_batch = int(seqlens_in_batch.max().item())
|
|
|
27 |
unpadded_position_ids: (total_nnz) or None
|
28 |
unpadded_labels: (total_nnz) or None
|
29 |
"""
|
|
|
30 |
seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
|
31 |
indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
|
32 |
max_seqlen_in_batch = int(seqlens_in_batch.max().item())
|