jikaixuan commited on
Commit
1a3c608
1 Parent(s): f53b622

End of training

Browse files
Files changed (3) hide show
  1. README.md +16 -12
  2. all_results.json +15 -0
  3. eval_results.json +14 -14
README.md CHANGED
@@ -1,10 +1,14 @@
1
  ---
 
2
  library_name: peft
3
  tags:
 
4
  - trl
5
  - dpo
6
  - generated_from_trainer
7
  base_model: mistralai/Mistral-7B-v0.1
 
 
8
  model-index:
9
  - name: zephyr-7b
10
  results: []
@@ -15,19 +19,19 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # zephyr-7b
17
 
18
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.6790
21
- - Rewards/chosen: -0.5476
22
- - Rewards/rejected: -0.8618
23
- - Rewards/accuracies: 0.3571
24
- - Rewards/margins: 0.3143
25
- - Logps/rejected: -161.5806
26
- - Logps/chosen: -123.6563
27
- - Logits/rejected: 1.4905
28
- - Logits/chosen: 1.3693
29
- - Use Label: 16436.9844
30
- - Pred Label: 2251.0159
31
 
32
  ## Model description
33
 
 
1
  ---
2
+ license: apache-2.0
3
  library_name: peft
4
  tags:
5
+ - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
  base_model: mistralai/Mistral-7B-v0.1
10
+ datasets:
11
+ - HuggingFaceH4/ultrafeedback_binarized
12
  model-index:
13
  - name: zephyr-7b
14
  results: []
 
19
 
20
  # zephyr-7b
21
 
22
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.6789
25
+ - Rewards/chosen: -0.5482
26
+ - Rewards/rejected: -0.8623
27
+ - Rewards/accuracies: 0.3591
28
+ - Rewards/margins: 0.3141
29
+ - Logps/rejected: -161.6313
30
+ - Logps/chosen: -123.7209
31
+ - Logits/rejected: 1.4916
32
+ - Logits/chosen: 1.3712
33
+ - Use Label: 17581.0469
34
+ - Pred Label: 2490.9524
35
 
36
  ## Model description
37
 
all_results.json CHANGED
@@ -1,5 +1,20 @@
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "train_loss": 0.6760230718482851,
4
  "train_runtime": 20063.9235,
5
  "train_samples": 61135,
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": 1.3711830377578735,
4
+ "eval_logits/rejected": 1.4916048049926758,
5
+ "eval_logps/chosen": -123.7209243774414,
6
+ "eval_logps/rejected": -161.63131713867188,
7
+ "eval_loss": 0.6788680553436279,
8
+ "eval_pred_label": 2490.952392578125,
9
+ "eval_rewards/accuracies": 0.3591269850730896,
10
+ "eval_rewards/chosen": -0.548203706741333,
11
+ "eval_rewards/margins": 0.3141288757324219,
12
+ "eval_rewards/rejected": -0.8623325824737549,
13
+ "eval_runtime": 247.4536,
14
+ "eval_samples": 2000,
15
+ "eval_samples_per_second": 8.082,
16
+ "eval_steps_per_second": 0.255,
17
+ "eval_use_label": 17581.046875,
18
  "train_loss": 0.6760230718482851,
19
  "train_runtime": 20063.9235,
20
  "train_samples": 61135,
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 13.463111877441406,
4
- "eval_logits/rejected": 13.527791023254395,
5
- "eval_logps/chosen": -15128.9541015625,
6
- "eval_logps/rejected": -14704.759765625,
7
- "eval_loss": 0.3621964752674103,
8
- "eval_pred_label": 8140.01611328125,
9
- "eval_rewards/accuracies": 0.2420634925365448,
10
- "eval_rewards/chosen": -150.60052490234375,
11
- "eval_rewards/margins": -4.306910991668701,
12
- "eval_rewards/rejected": -146.29360961914062,
13
- "eval_runtime": 245.5331,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 8.146,
16
- "eval_steps_per_second": 0.257,
17
- "eval_use_label": 11931.984375
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": 1.3711830377578735,
4
+ "eval_logits/rejected": 1.4916048049926758,
5
+ "eval_logps/chosen": -123.7209243774414,
6
+ "eval_logps/rejected": -161.63131713867188,
7
+ "eval_loss": 0.6788680553436279,
8
+ "eval_pred_label": 2490.952392578125,
9
+ "eval_rewards/accuracies": 0.3591269850730896,
10
+ "eval_rewards/chosen": -0.548203706741333,
11
+ "eval_rewards/margins": 0.3141288757324219,
12
+ "eval_rewards/rejected": -0.8623325824737549,
13
+ "eval_runtime": 247.4536,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 8.082,
16
+ "eval_steps_per_second": 0.255,
17
+ "eval_use_label": 17581.046875
18
  }