oweller2 commited on
Commit
7561dc4
1 Parent(s): bfd1777
Files changed (2) hide show
  1. modeling_flexbert.py +1 -2
  2. padding.py +0 -1
modeling_flexbert.py CHANGED
@@ -1646,8 +1646,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
1646
  if attention_mask is None:
1647
  # unpad expects a encoder-like mask where all non-padding are ones
1648
  attention_mask = torch.ones_like(input_ids)
1649
- # zero out pad tokens
1650
- attention_mask[input_ids == 50283] = 0
1651
  input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
1652
  input_ids, attention_mask, position_ids, labels
1653
  )
 
1646
  if attention_mask is None:
1647
  # unpad expects a encoder-like mask where all non-padding are ones
1648
  attention_mask = torch.ones_like(input_ids)
1649
+ attention_mask[input_ids == 50283] = 0 # zero out pad tokens
 
1650
  input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
1651
  input_ids, attention_mask, position_ids, labels
1652
  )
padding.py CHANGED
@@ -27,7 +27,6 @@ def unpad_input(
27
  unpadded_position_ids: (total_nnz) or None
28
  unpadded_labels: (total_nnz) or None
29
  """
30
- breakpoint()
31
  seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
32
  indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
33
  max_seqlen_in_batch = int(seqlens_in_batch.max().item())
 
27
  unpadded_position_ids: (total_nnz) or None
28
  unpadded_labels: (total_nnz) or None
29
  """
 
30
  seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
31
  indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
32
  max_seqlen_in_batch = int(seqlens_in_batch.max().item())