Model save

Browse files

Files changed (4) hide show

README.md +17 -18
all_results.json +4 -17
train_results.json +4 -4
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -2,7 +2,6 @@
 base_model: princeton-nlp/Llama-3-Base-8B-SFT
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - dpo
 - generated_from_trainer
@@ -18,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [princeton-nlp/Llama-3-Base-8B-SFT](https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5199
-- Rewards/chosen: -0.1477
-- Rewards/rejected: -0.9502
 - Rewards/accuracies: 0.7260
-- Rewards/margins: 0.8025
-- Logps/rejected: -283.9596
-- Logps/chosen: -291.2388
-- Logits/rejected: -0.3914
-- Logits/chosen: -0.4217
 ## Model description
@@ -63,15 +62,15 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.6297        | 0.1047 | 100  | 0.6140          | 0.1358         | -0.1277          | 0.6960             | 0.2634          | -275.7340      | -288.4034    | -0.5479         | -0.5526       |
-| 0.5676        | 0.2094 | 200  | 0.5569          | -0.1144        | -0.6599          | 0.7000             | 0.5455          | -281.0560      | -290.9051    | -0.4945         | -0.5116       |
-| 0.5414        | 0.3141 | 300  | 0.5403          | -0.3808        | -1.0461          | 0.7260             | 0.6652          | -284.9180      | -293.5698    | -0.4540         | -0.4775       |
-| 0.5124        | 0.4187 | 400  | 0.5341          | -0.2337        | -0.9896          | 0.7040             | 0.7559          | -284.3532      | -292.0986    | -0.4243         | -0.4516       |
-| 0.5529        | 0.5234 | 500  | 0.5260          | -0.2177        | -1.0037          | 0.7240             | 0.7861          | -284.4948      | -291.9380    | -0.3995         | -0.4290       |
-| 0.53          | 0.6281 | 600  | 0.5244          | -0.0687        | -0.8583          | 0.7200             | 0.7895          | -283.0403      | -290.4489    | -0.4028         | -0.4317       |
-| 0.5028        | 0.7328 | 700  | 0.5190          | -0.3357        | -1.1360          | 0.7320             | 0.8003          | -285.8177      | -293.1184    | -0.3874         | -0.4179       |
-| 0.5347        | 0.8375 | 800  | 0.5191          | -0.1404        | -0.9419          | 0.7320             | 0.8015          | -283.8760      | -291.1650    | -0.3924         | -0.4225       |
-| 0.4783        | 0.9422 | 900  | 0.5190          | -0.1399        | -0.9459          | 0.7260             | 0.8060          | -283.9163      | -291.1600    | -0.3917         | -0.4219       |
 ### Framework versions

 base_model: princeton-nlp/Llama-3-Base-8B-SFT
 library_name: peft
 tags:
 - trl
 - dpo
 - generated_from_trainer
 This model is a fine-tuned version of [princeton-nlp/Llama-3-Base-8B-SFT](https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5181
+- Rewards/chosen: 0.0150
+- Rewards/rejected: -0.7988
 - Rewards/accuracies: 0.7260
+- Rewards/margins: 0.8139
+- Logps/rejected: -284.6649
+- Logps/chosen: -292.3968
+- Logits/rejected: -0.3842
+- Logits/chosen: -0.4151
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6275        | 0.1047 | 100  | 0.6122          | 0.2594         | -0.0099          | 0.6920             | 0.2693          | -276.7753      | -289.9533    | -0.5582         | -0.5619       |
+| 0.5726        | 0.2094 | 200  | 0.5529          | -0.0787        | -0.6353          | 0.7040             | 0.5565          | -283.0293      | -293.3344    | -0.5103         | -0.5266       |
+| 0.5429        | 0.3141 | 300  | 0.5380          | -0.1730        | -0.8455          | 0.7260             | 0.6725          | -285.1317      | -294.2773    | -0.4689         | -0.4910       |
+| 0.5054        | 0.4187 | 400  | 0.5332          | -0.0870        | -0.8469          | 0.7240             | 0.7599          | -285.1459      | -293.4173    | -0.4261         | -0.4535       |
+| 0.5508        | 0.5234 | 500  | 0.5267          | -0.0207        | -0.8088          | 0.7180             | 0.7881          | -284.7646      | -292.7540    | -0.4045         | -0.4335       |
+| 0.5338        | 0.6281 | 600  | 0.5263          | 0.1981         | -0.5901          | 0.7300             | 0.7882          | -282.5771      | -290.5659    | -0.4002         | -0.4304       |
+| 0.5064        | 0.7328 | 700  | 0.5175          | -0.2007        | -1.0076          | 0.7300             | 0.8068          | -286.7521      | -294.5546    | -0.3761         | -0.4080       |
+| 0.5349        | 0.8375 | 800  | 0.5197          | 0.0149         | -0.7896          | 0.7200             | 0.8045          | -284.5727      | -292.3984    | -0.3853         | -0.4161       |
+| 0.4775        | 0.9422 | 900  | 0.5181          | 0.0150         | -0.7988          | 0.7260             | 0.8139          | -284.6649      | -292.3968    | -0.3842         | -0.4151       |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,22 +1,9 @@
 {
     "epoch": 0.9997382884061764,
-    "eval_logits/chosen": -0.42172786593437195,
-    "eval_logits/rejected": -0.39140966534614563,
-    "eval_logps/chosen": -291.23876953125,
-    "eval_logps/rejected": -283.9595947265625,
-    "eval_loss": 0.5199193358421326,
-    "eval_rewards/accuracies": 0.7260000109672546,
-    "eval_rewards/chosen": -0.14773716032505035,
-    "eval_rewards/margins": 0.8024731278419495,
-    "eval_rewards/rejected": -0.9502103328704834,
-    "eval_runtime": 348.2733,
-    "eval_samples": 2000,
-    "eval_samples_per_second": 5.743,
-    "eval_steps_per_second": 0.359,
     "total_flos": 0.0,
-    "train_loss": 0.543355998318857,
-    "train_runtime": 19333.2554,
     "train_samples": 61135,
-    "train_samples_per_second": 3.162,
-    "train_steps_per_second": 0.049
 }

 {
     "epoch": 0.9997382884061764,
     "total_flos": 0.0,
+    "train_loss": 0.5431846207973221,
+    "train_runtime": 19104.9346,
     "train_samples": 61135,
+    "train_samples_per_second": 3.2,
+    "train_steps_per_second": 0.05
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 0.9997382884061764,
     "total_flos": 0.0,
-    "train_loss": 0.543355998318857,
-    "train_runtime": 19333.2554,
     "train_samples": 61135,
-    "train_samples_per_second": 3.162,
-    "train_steps_per_second": 0.049
 }

 {
     "epoch": 0.9997382884061764,
     "total_flos": 0.0,
+    "train_loss": 0.5431846207973221,
+    "train_runtime": 19104.9346,
     "train_samples": 61135,
+    "train_samples_per_second": 3.2,
+    "train_steps_per_second": 0.05
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff