orionweller
/

test-flex-gpt

Model card Files Files and versions Community

oweller2 commited on 5 days ago

Commit

7561dc4

•

1 Parent(s): bfd1777

add

Files changed (2) hide show

modeling_flexbert.py +1 -2
padding.py +0 -1

modeling_flexbert.py CHANGED Viewed

@@ -1646,8 +1646,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
             if attention_mask is None:
                 # unpad expects a encoder-like mask where all non-padding are ones
                 attention_mask = torch.ones_like(input_ids)
-                # zero out pad tokens
-                attention_mask[input_ids == 50283] = 0
             input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
                 input_ids, attention_mask, position_ids, labels
             )

             if attention_mask is None:
                 # unpad expects a encoder-like mask where all non-padding are ones
                 attention_mask = torch.ones_like(input_ids)
+                attention_mask[input_ids == 50283] = 0  # zero out pad tokens
             input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
                 input_ids, attention_mask, position_ids, labels
             )

padding.py CHANGED Viewed

@@ -27,7 +27,6 @@ def unpad_input(
         unpadded_position_ids: (total_nnz) or None
         unpadded_labels: (total_nnz) or None
     """
-    breakpoint()
     seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
     indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
     max_seqlen_in_batch = int(seqlens_in_batch.max().item())

         unpadded_position_ids: (total_nnz) or None
         unpadded_labels: (total_nnz) or None
     """
     seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
     indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
     max_seqlen_in_batch = int(seqlens_in_batch.max().item())