Ray2333
/

GRM-llama3-8B-sftreg

Text Classification

text-generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Ray2333 commited on Jul 7

Commit

3bcb014

•

1 Parent(s): dc744d1

Update model.py

Files changed (1) hide show

model.py +6 -1

model.py CHANGED Viewed

@@ -132,7 +132,12 @@ class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper):
             last_hidden_state = last_hidden_state.to(self.v_head.summary[0].weight.device)
         # use the last token value as reward
-        last_index = attention_mask.sum(dim=-1) - 1
         value = self.v_head(last_hidden_state).squeeze(-1)[torch.arange(len(last_hidden_state)), last_index]
         # force upcast in fp32 if logits are in half-precision

             last_hidden_state = last_hidden_state.to(self.v_head.summary[0].weight.device)
         # use the last token value as reward
+        if torch.any(attention_mask[:, 0] == 0):
+            # left padding
+            last_index = attention_mask.shape[-1] - 1
+        else:
+            # right padding
+            last_index = attention_mask.sum(dim=-1) - 1
         value = self.v_head(last_hidden_state).squeeze(-1)[torch.arange(len(last_hidden_state)), last_index]
         # force upcast in fp32 if logits are in half-precision