Update model.py
Browse files
model.py
CHANGED
@@ -132,7 +132,12 @@ class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper):
|
|
132 |
last_hidden_state = last_hidden_state.to(self.v_head.summary[0].weight.device)
|
133 |
|
134 |
# use the last token value as reward
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
136 |
value = self.v_head(last_hidden_state).squeeze(-1)[torch.arange(len(last_hidden_state)), last_index]
|
137 |
|
138 |
# force upcast in fp32 if logits are in half-precision
|
|
|
132 |
last_hidden_state = last_hidden_state.to(self.v_head.summary[0].weight.device)
|
133 |
|
134 |
# use the last token value as reward
|
135 |
+
if torch.any(attention_mask[:, 0] == 0):
|
136 |
+
# left padding
|
137 |
+
last_index = attention_mask.shape[-1] - 1
|
138 |
+
else:
|
139 |
+
# right padding
|
140 |
+
last_index = attention_mask.sum(dim=-1) - 1
|
141 |
value = self.v_head(last_hidden_state).squeeze(-1)[torch.arange(len(last_hidden_state)), last_index]
|
142 |
|
143 |
# force upcast in fp32 if logits are in half-precision
|