Jerry46 commited on
Commit
9bd5dfb
·
1 Parent(s): 18bdd3d

Model save

Browse files
README.md CHANGED
@@ -13,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6931
19
- - Rewards/chosen: 0.0
20
- - Rewards/rejected: 0.0
21
- - Rewards/accuracies: 0.0
22
- - Rewards/margins: 0.0
23
- - Logps/rejected: -179.8250
24
- - Logps/chosen: -194.3670
25
- - Logits/rejected: -3.0430
26
- - Logits/chosen: -3.0079
27
 
28
  ## Model description
29
 
@@ -42,25 +42,27 @@ More information needed
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
- - learning_rate: 0.05
46
  - train_batch_size: 8
47
  - eval_batch_size: 4
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
- - num_devices: 2
51
  - gradient_accumulation_steps: 2
52
- - total_train_batch_size: 32
53
- - total_eval_batch_size: 8
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
- - num_epochs: 1
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.3466 | 1.0 | 1 | 0.6931 | 0.0 | 0.0 | 0.0 | 0.0 | -179.8250 | -194.3670 | -3.0430 | -3.0079 |
 
 
64
 
65
 
66
  ### Framework versions
 
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: -0.2038
19
+ - Rewards/chosen: -1.1628
20
+ - Rewards/rejected: -2.4457
21
+ - Rewards/accuracies: 0.6840
22
+ - Rewards/margins: 1.2829
23
+ - Logps/rejected: -252.9479
24
+ - Logps/chosen: -282.7848
25
+ - Logits/rejected: -2.9400
26
+ - Logits/chosen: -2.9655
27
 
28
  ## Model description
29
 
 
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
  - train_batch_size: 8
47
  - eval_batch_size: 4
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
+ - num_devices: 4
51
  - gradient_accumulation_steps: 2
52
+ - total_train_batch_size: 64
53
+ - total_eval_batch_size: 16
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 3
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6137 | 1.0 | 968 | 0.6277 | -0.0287 | -0.4191 | 0.7040 | 0.3905 | -232.6823 | -271.4433 | -2.9989 | -3.0154 |
64
+ | 0.0705 | 2.0 | 1937 | 0.0570 | -0.6708 | -1.6676 | 0.6960 | 0.9968 | -245.1669 | -277.8647 | -2.9609 | -2.9830 |
65
+ | -0.2602 | 3.0 | 2904 | -0.2038 | -1.1628 | -2.4457 | 0.6840 | 1.2829 | -252.9479 | -282.7848 | -2.9400 | -2.9655 |
66
 
67
 
68
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,21 @@
1
  {
2
- "epoch": 0.86,
3
- "train_loss": 1.0374332269032795,
4
- "train_runtime": 116.0419,
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "train_samples": 61966,
6
- "train_samples_per_second": 0.862,
7
- "train_steps_per_second": 0.026
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.965512990951538,
4
+ "eval_logits/rejected": -2.9399757385253906,
5
+ "eval_logps/chosen": -282.7847900390625,
6
+ "eval_logps/rejected": -252.9479217529297,
7
+ "eval_loss": -0.203842431306839,
8
+ "eval_rewards/accuracies": 0.6840000152587891,
9
+ "eval_rewards/chosen": -1.1628247499465942,
10
+ "eval_rewards/margins": 1.2828813791275024,
11
+ "eval_rewards/rejected": -2.4457061290740967,
12
+ "eval_runtime": 444.1107,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.503,
15
+ "eval_steps_per_second": 0.281,
16
+ "train_loss": 0.36701411283100355,
17
+ "train_runtime": 84636.1866,
18
  "train_samples": 61966,
19
+ "train_samples_per_second": 2.196,
20
+ "train_steps_per_second": 0.034
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_logits/chosen": -3.0078587532043457,
4
- "eval_logits/rejected": -3.042999744415283,
5
- "eval_logps/chosen": -194.36697387695312,
6
- "eval_logps/rejected": -179.82501220703125,
7
- "eval_loss": 0.6931471824645996,
8
- "eval_rewards/accuracies": 0.0,
9
- "eval_rewards/chosen": 0.0,
10
- "eval_rewards/margins": 0.0,
11
- "eval_rewards/rejected": 0.0,
12
- "eval_runtime": 5.0964,
13
- "eval_samples": 10,
14
- "eval_samples_per_second": 1.962,
15
- "eval_steps_per_second": 0.392
16
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.965512990951538,
4
+ "eval_logits/rejected": -2.9399757385253906,
5
+ "eval_logps/chosen": -282.7847900390625,
6
+ "eval_logps/rejected": -252.9479217529297,
7
+ "eval_loss": -0.203842431306839,
8
+ "eval_rewards/accuracies": 0.6840000152587891,
9
+ "eval_rewards/chosen": -1.1628247499465942,
10
+ "eval_rewards/margins": 1.2828813791275024,
11
+ "eval_rewards/rejected": -2.4457061290740967,
12
+ "eval_runtime": 444.1107,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.503,
15
+ "eval_steps_per_second": 0.281
16
  }
runs/Dec10_14-39-55_uclaml03.cs.ucla.edu/events.out.tfevents.1702333147.uclaml03.cs.ucla.edu.2921932.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c72e0be945c8e8579e85873da199ac30c8349d4cc8c0f46bc59098496e7072
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.86,
3
- "train_loss": 1.0374332269032795,
4
- "train_runtime": 116.0419,
5
  "train_samples": 61966,
6
- "train_samples_per_second": 0.862,
7
- "train_steps_per_second": 0.026
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.36701411283100355,
4
+ "train_runtime": 84636.1866,
5
  "train_samples": 61966,
6
+ "train_samples_per_second": 2.196,
7
+ "train_steps_per_second": 0.034
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff