Wenboz commited on
Commit
be98c76
·
verified ·
1 Parent(s): df3d920

Model save

Browse files
Files changed (4) hide show
  1. README.md +17 -18
  2. all_results.json +4 -17
  3. train_results.json +4 -4
  4. trainer_state.json +0 -0
README.md CHANGED
@@ -2,7 +2,6 @@
2
  base_model: princeton-nlp/Llama-3-Base-8B-SFT
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
@@ -18,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [princeton-nlp/Llama-3-Base-8B-SFT](https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.5199
22
- - Rewards/chosen: -0.1477
23
- - Rewards/rejected: -0.9502
24
  - Rewards/accuracies: 0.7260
25
- - Rewards/margins: 0.8025
26
- - Logps/rejected: -283.9596
27
- - Logps/chosen: -291.2388
28
- - Logits/rejected: -0.3914
29
- - Logits/chosen: -0.4217
30
 
31
  ## Model description
32
 
@@ -63,15 +62,15 @@ The following hyperparameters were used during training:
63
 
64
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
- | 0.6297 | 0.1047 | 100 | 0.6140 | 0.1358 | -0.1277 | 0.6960 | 0.2634 | -275.7340 | -288.4034 | -0.5479 | -0.5526 |
67
- | 0.5676 | 0.2094 | 200 | 0.5569 | -0.1144 | -0.6599 | 0.7000 | 0.5455 | -281.0560 | -290.9051 | -0.4945 | -0.5116 |
68
- | 0.5414 | 0.3141 | 300 | 0.5403 | -0.3808 | -1.0461 | 0.7260 | 0.6652 | -284.9180 | -293.5698 | -0.4540 | -0.4775 |
69
- | 0.5124 | 0.4187 | 400 | 0.5341 | -0.2337 | -0.9896 | 0.7040 | 0.7559 | -284.3532 | -292.0986 | -0.4243 | -0.4516 |
70
- | 0.5529 | 0.5234 | 500 | 0.5260 | -0.2177 | -1.0037 | 0.7240 | 0.7861 | -284.4948 | -291.9380 | -0.3995 | -0.4290 |
71
- | 0.53 | 0.6281 | 600 | 0.5244 | -0.0687 | -0.8583 | 0.7200 | 0.7895 | -283.0403 | -290.4489 | -0.4028 | -0.4317 |
72
- | 0.5028 | 0.7328 | 700 | 0.5190 | -0.3357 | -1.1360 | 0.7320 | 0.8003 | -285.8177 | -293.1184 | -0.3874 | -0.4179 |
73
- | 0.5347 | 0.8375 | 800 | 0.5191 | -0.1404 | -0.9419 | 0.7320 | 0.8015 | -283.8760 | -291.1650 | -0.3924 | -0.4225 |
74
- | 0.4783 | 0.9422 | 900 | 0.5190 | -0.1399 | -0.9459 | 0.7260 | 0.8060 | -283.9163 | -291.1600 | -0.3917 | -0.4219 |
75
 
76
 
77
  ### Framework versions
 
2
  base_model: princeton-nlp/Llama-3-Base-8B-SFT
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
17
 
18
  This model is a fine-tuned version of [princeton-nlp/Llama-3-Base-8B-SFT](https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.5181
21
+ - Rewards/chosen: 0.0150
22
+ - Rewards/rejected: -0.7988
23
  - Rewards/accuracies: 0.7260
24
+ - Rewards/margins: 0.8139
25
+ - Logps/rejected: -284.6649
26
+ - Logps/chosen: -292.3968
27
+ - Logits/rejected: -0.3842
28
+ - Logits/chosen: -0.4151
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.6275 | 0.1047 | 100 | 0.6122 | 0.2594 | -0.0099 | 0.6920 | 0.2693 | -276.7753 | -289.9533 | -0.5582 | -0.5619 |
66
+ | 0.5726 | 0.2094 | 200 | 0.5529 | -0.0787 | -0.6353 | 0.7040 | 0.5565 | -283.0293 | -293.3344 | -0.5103 | -0.5266 |
67
+ | 0.5429 | 0.3141 | 300 | 0.5380 | -0.1730 | -0.8455 | 0.7260 | 0.6725 | -285.1317 | -294.2773 | -0.4689 | -0.4910 |
68
+ | 0.5054 | 0.4187 | 400 | 0.5332 | -0.0870 | -0.8469 | 0.7240 | 0.7599 | -285.1459 | -293.4173 | -0.4261 | -0.4535 |
69
+ | 0.5508 | 0.5234 | 500 | 0.5267 | -0.0207 | -0.8088 | 0.7180 | 0.7881 | -284.7646 | -292.7540 | -0.4045 | -0.4335 |
70
+ | 0.5338 | 0.6281 | 600 | 0.5263 | 0.1981 | -0.5901 | 0.7300 | 0.7882 | -282.5771 | -290.5659 | -0.4002 | -0.4304 |
71
+ | 0.5064 | 0.7328 | 700 | 0.5175 | -0.2007 | -1.0076 | 0.7300 | 0.8068 | -286.7521 | -294.5546 | -0.3761 | -0.4080 |
72
+ | 0.5349 | 0.8375 | 800 | 0.5197 | 0.0149 | -0.7896 | 0.7200 | 0.8045 | -284.5727 | -292.3984 | -0.3853 | -0.4161 |
73
+ | 0.4775 | 0.9422 | 900 | 0.5181 | 0.0150 | -0.7988 | 0.7260 | 0.8139 | -284.6649 | -292.3968 | -0.3842 | -0.4151 |
74
 
75
 
76
  ### Framework versions
all_results.json CHANGED
@@ -1,22 +1,9 @@
1
  {
2
  "epoch": 0.9997382884061764,
3
- "eval_logits/chosen": -0.42172786593437195,
4
- "eval_logits/rejected": -0.39140966534614563,
5
- "eval_logps/chosen": -291.23876953125,
6
- "eval_logps/rejected": -283.9595947265625,
7
- "eval_loss": 0.5199193358421326,
8
- "eval_rewards/accuracies": 0.7260000109672546,
9
- "eval_rewards/chosen": -0.14773716032505035,
10
- "eval_rewards/margins": 0.8024731278419495,
11
- "eval_rewards/rejected": -0.9502103328704834,
12
- "eval_runtime": 348.2733,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 5.743,
15
- "eval_steps_per_second": 0.359,
16
  "total_flos": 0.0,
17
- "train_loss": 0.543355998318857,
18
- "train_runtime": 19333.2554,
19
  "train_samples": 61135,
20
- "train_samples_per_second": 3.162,
21
- "train_steps_per_second": 0.049
22
  }
 
1
  {
2
  "epoch": 0.9997382884061764,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5431846207973221,
5
+ "train_runtime": 19104.9346,
6
  "train_samples": 61135,
7
+ "train_samples_per_second": 3.2,
8
+ "train_steps_per_second": 0.05
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9997382884061764,
3
  "total_flos": 0.0,
4
- "train_loss": 0.543355998318857,
5
- "train_runtime": 19333.2554,
6
  "train_samples": 61135,
7
- "train_samples_per_second": 3.162,
8
- "train_steps_per_second": 0.049
9
  }
 
1
  {
2
  "epoch": 0.9997382884061764,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5431846207973221,
5
+ "train_runtime": 19104.9346,
6
  "train_samples": 61135,
7
+ "train_samples_per_second": 3.2,
8
+ "train_steps_per_second": 0.05
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff