silviasapora commited on
Commit
3f81b20
1 Parent(s): 9dc97f6

Model save

Browse files
README.md CHANGED
@@ -5,6 +5,7 @@ base_model: google/gemma-7b
5
  tags:
6
  - trl
7
  - orpo
 
8
  - generated_from_trainer
9
  model-index:
10
  - name: gemma-7b-orpo-low-quality
@@ -16,20 +17,20 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # gemma-7b-orpo-low-quality
18
 
19
- This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.5398
22
- - Rewards/chosen: -0.0540
23
- - Rewards/rejected: -0.0625
24
- - Rewards/accuracies: 0.5396
25
- - Rewards/margins: 0.0085
26
- - Logps/rejected: -1.2503
27
- - Logps/chosen: -1.0803
28
- - Logits/rejected: 271.8756
29
- - Logits/chosen: 300.6891
30
- - Nll Loss: 1.4724
31
- - Log Odds Ratio: -0.6945
32
- - Log Odds Chosen: 0.2937
33
 
34
  ## Model description
35
 
@@ -66,9 +67,9 @@ The following hyperparameters were used during training:
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
68
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
69
- | 1.441 | 0.9955 | 167 | 1.4762 | -0.0510 | -0.0574 | 0.5324 | 0.0064 | -1.1485 | -1.0204 | 290.1581 | 318.9965 | 1.4310 | -0.6990 | 0.1934 |
70
- | 1.0908 | 1.9970 | 335 | 1.4250 | -0.0497 | -0.0576 | 0.5324 | 0.0079 | -1.1528 | -0.9950 | 285.8206 | 314.6779 | 1.3697 | -0.6970 | 0.2360 |
71
- | 0.5724 | 2.9866 | 501 | 1.5398 | -0.0540 | -0.0625 | 0.5396 | 0.0085 | -1.2503 | -1.0803 | 271.8756 | 300.6891 | 1.4724 | -0.6945 | 0.2937 |
72
 
73
 
74
  ### Framework versions
 
5
  tags:
6
  - trl
7
  - orpo
8
+ - alignment-handbook
9
  - generated_from_trainer
10
  model-index:
11
  - name: gemma-7b-orpo-low-quality
 
17
 
18
  # gemma-7b-orpo-low-quality
19
 
20
+ This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.5517
23
+ - Rewards/chosen: -0.0554
24
+ - Rewards/rejected: -0.0646
25
+ - Rewards/accuracies: 0.5612
26
+ - Rewards/margins: 0.0092
27
+ - Logps/rejected: -1.2920
28
+ - Logps/chosen: -1.1085
29
+ - Logits/rejected: 268.0282
30
+ - Logits/chosen: 297.1682
31
+ - Nll Loss: 1.4855
32
+ - Log Odds Ratio: -0.6970
33
+ - Log Odds Chosen: 0.2856
34
 
35
  ## Model description
36
 
 
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
69
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
70
+ | 1.436 | 0.9955 | 167 | 1.4679 | -0.0508 | -0.0571 | 0.5468 | 0.0063 | -1.1420 | -1.0158 | 288.9292 | 318.3812 | 1.4121 | -0.6895 | 0.1983 |
71
+ | 1.1098 | 1.9970 | 335 | 1.4451 | -0.0518 | -0.0579 | 0.5468 | 0.0061 | -1.1581 | -1.0353 | 286.4312 | 315.0296 | 1.3839 | -0.7228 | 0.2105 |
72
+ | 0.5921 | 2.9866 | 501 | 1.5517 | -0.0554 | -0.0646 | 0.5612 | 0.0092 | -1.2920 | -1.1085 | 268.0282 | 297.1682 | 1.4855 | -0.6970 | 0.2856 |
73
 
74
 
75
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,25 @@
1
  {
2
  "epoch": 2.9865871833084947,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
- "train_loss": 1.4594077459590402,
5
- "train_runtime": 13816.0738,
6
  "train_samples": 5364,
7
- "train_samples_per_second": 1.165,
8
- "train_steps_per_second": 0.036
9
  }
 
1
  {
2
  "epoch": 2.9865871833084947,
3
+ "eval_log_odds_chosen": 0.2937372922897339,
4
+ "eval_log_odds_ratio": -0.6945178508758545,
5
+ "eval_logits/chosen": 300.6891174316406,
6
+ "eval_logits/rejected": 271.8756103515625,
7
+ "eval_logps/chosen": -1.0802680253982544,
8
+ "eval_logps/rejected": -1.2502641677856445,
9
+ "eval_loss": 1.539820671081543,
10
+ "eval_nll_loss": 1.4724125862121582,
11
+ "eval_rewards/accuracies": 0.5395683646202087,
12
+ "eval_rewards/chosen": -0.05401340499520302,
13
+ "eval_rewards/margins": 0.00849980115890503,
14
+ "eval_rewards/rejected": -0.06251321732997894,
15
+ "eval_runtime": 112.2968,
16
+ "eval_samples": 553,
17
+ "eval_samples_per_second": 4.924,
18
+ "eval_steps_per_second": 1.238,
19
  "total_flos": 0.0,
20
+ "train_loss": 1.4570662823027956,
21
+ "train_runtime": 13599.7579,
22
  "train_samples": 5364,
23
+ "train_samples_per_second": 1.183,
24
+ "train_steps_per_second": 0.037
25
  }
eval_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 2.9928514694201747,
3
- "eval_log_odds_chosen": 0.44380733370780945,
4
- "eval_log_odds_ratio": -0.6702221632003784,
5
- "eval_logits/chosen": 286.3763122558594,
6
- "eval_logits/rejected": 275.9735412597656,
7
- "eval_logps/chosen": -1.2025552988052368,
8
- "eval_logps/rejected": -1.5090675354003906,
9
- "eval_loss": 1.639459252357483,
10
- "eval_nll_loss": 1.5846672058105469,
11
- "eval_rewards/accuracies": 0.6028881072998047,
12
- "eval_rewards/chosen": -0.060127776116132736,
13
- "eval_rewards/margins": 0.015325604937970638,
14
- "eval_rewards/rejected": -0.07545337826013565,
15
- "eval_runtime": 278.5267,
16
  "eval_samples": 553,
17
- "eval_samples_per_second": 1.985,
18
- "eval_steps_per_second": 0.995
19
  }
 
1
  {
2
+ "epoch": 2.9865871833084947,
3
+ "eval_log_odds_chosen": 0.2937372922897339,
4
+ "eval_log_odds_ratio": -0.6945178508758545,
5
+ "eval_logits/chosen": 300.6891174316406,
6
+ "eval_logits/rejected": 271.8756103515625,
7
+ "eval_logps/chosen": -1.0802680253982544,
8
+ "eval_logps/rejected": -1.2502641677856445,
9
+ "eval_loss": 1.539820671081543,
10
+ "eval_nll_loss": 1.4724125862121582,
11
+ "eval_rewards/accuracies": 0.5395683646202087,
12
+ "eval_rewards/chosen": -0.05401340499520302,
13
+ "eval_rewards/margins": 0.00849980115890503,
14
+ "eval_rewards/rejected": -0.06251321732997894,
15
+ "eval_runtime": 112.2968,
16
  "eval_samples": 553,
17
+ "eval_samples_per_second": 4.924,
18
+ "eval_steps_per_second": 1.238
19
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec52e755b8dc3619df2908f535b0555e2dc06427850ceaee163434c259b1fbd
3
  size 4995496656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06c0d9666d139358ff5554a34051d99111e3b97010f1883f0a6925ea2e38718e
3
  size 4995496656
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea059af29dbe1d13cd4663702981098306a719842c8b6daa22775a2d75c899b8
3
  size 4982953168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643dc469fdeca59e9b5d64126415a7f81c4c4023c6fe9602df8d357561f23ca8
3
  size 4982953168
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b2900743ebfa120b1bd6c6aede29909b467ef078355948272c970ceeb044543
3
  size 4982953200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd109beace3c5c26c78fd106f86e2fdec798571b3fb4af30fe6e97cade6055ef
3
  size 4982953200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cac536ba4dd9bee8fa767d40d69a1aa96062853affb4442bc5f1f4549548ace
3
  size 2113988336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c441bebbedad362fed1b7797d7a88cbe4194604bb8d0849cf39e090d388d5e
3
  size 2113988336
runs/Sep18_20-46-35_65ecb96dba42/events.out.tfevents.1726706643.65ecb96dba42.1160.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dcb674752307598161fde9ef2f1cc73bf63c20b269b17cec3f31c6d70e29389
3
+ size 997
runs/Sep20_22-18-14_65ecb96dba42/events.out.tfevents.1726870791.65ecb96dba42.231696.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0bb882f7aed954a1701345b71297539c9e7d30585f625e045f4df78796c93ce
3
+ size 5404
runs/Sep21_02-30-57_65ecb96dba42/events.out.tfevents.1726885944.65ecb96dba42.269402.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e43bdcc7b933bf8176a870fcd82606e6648f4165cf3fcd77d084bf8de8461f
3
+ size 94110
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.9865871833084947,
3
  "total_flos": 0.0,
4
- "train_loss": 1.4594077459590402,
5
- "train_runtime": 13816.0738,
6
  "train_samples": 5364,
7
- "train_samples_per_second": 1.165,
8
- "train_steps_per_second": 0.036
9
  }
 
1
  {
2
  "epoch": 2.9865871833084947,
3
  "total_flos": 0.0,
4
+ "train_loss": 1.4570662823027956,
5
+ "train_runtime": 13599.7579,
6
  "train_samples": 5364,
7
+ "train_samples_per_second": 1.183,
8
+ "train_steps_per_second": 0.037
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7652e215bce2c4fe6a8a69c60dc1736d525ab7940382c277bdbc4dac5af83a39
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2d73edee19e8e838afa6616d74780dc6e6b8b0472ea9ec06e39279b279612c
3
  size 6776