Update modeling_decision_tree_reward_model.py
Browse files
modeling_decision_tree_reward_model.py
CHANGED
@@ -93,5 +93,7 @@ class LlamaForDecisionTreeRewardModel(LlamaForSequenceClassification):
|
|
93 |
rewards_2 = embedding_2 @ weight.T + bias
|
94 |
rewards_diff = rewards_2 - rewards_1
|
95 |
return {
|
96 |
-
"preference": self.tree.predict(rewards_diff)[0],
|
97 |
-
"
|
|
|
|
|
|
93 |
rewards_2 = embedding_2 @ weight.T + bias
|
94 |
rewards_diff = rewards_2 - rewards_1
|
95 |
return {
|
96 |
+
"preference": self.tree.predict(rewards_diff)[0],
|
97 |
+
"rewards": np.concatenate([rewards_1, rewards_2]),
|
98 |
+
"attributes": self.attributes
|
99 |
+
}
|