silviasapora
commited on
Commit
•
3f81b20
1
Parent(s):
9dc97f6
Model save
Browse files- README.md +17 -16
- all_results.json +20 -4
- eval_results.json +16 -16
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- runs/Sep18_20-46-35_65ecb96dba42/events.out.tfevents.1726706643.65ecb96dba42.1160.1 +3 -0
- runs/Sep20_22-18-14_65ecb96dba42/events.out.tfevents.1726870791.65ecb96dba42.231696.0 +3 -0
- runs/Sep21_02-30-57_65ecb96dba42/events.out.tfevents.1726885944.65ecb96dba42.269402.0 +3 -0
- train_results.json +4 -4
- trainer_state.json +0 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -5,6 +5,7 @@ base_model: google/gemma-7b
|
|
5 |
tags:
|
6 |
- trl
|
7 |
- orpo
|
|
|
8 |
- generated_from_trainer
|
9 |
model-index:
|
10 |
- name: gemma-7b-orpo-low-quality
|
@@ -16,20 +17,20 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
# gemma-7b-orpo-low-quality
|
18 |
|
19 |
-
This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 1.
|
22 |
-
- Rewards/chosen: -0.
|
23 |
-
- Rewards/rejected: -0.
|
24 |
-
- Rewards/accuracies: 0.
|
25 |
-
- Rewards/margins: 0.
|
26 |
-
- Logps/rejected: -1.
|
27 |
-
- Logps/chosen: -1.
|
28 |
-
- Logits/rejected:
|
29 |
-
- Logits/chosen:
|
30 |
-
- Nll Loss: 1.
|
31 |
-
- Log Odds Ratio: -0.
|
32 |
-
- Log Odds Chosen: 0.
|
33 |
|
34 |
## Model description
|
35 |
|
@@ -66,9 +67,9 @@ The following hyperparameters were used during training:
|
|
66 |
|
67 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
|
68 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
|
69 |
-
| 1.
|
70 |
-
| 1.
|
71 |
-
| 0.
|
72 |
|
73 |
|
74 |
### Framework versions
|
|
|
5 |
tags:
|
6 |
- trl
|
7 |
- orpo
|
8 |
+
- alignment-handbook
|
9 |
- generated_from_trainer
|
10 |
model-index:
|
11 |
- name: gemma-7b-orpo-low-quality
|
|
|
17 |
|
18 |
# gemma-7b-orpo-low-quality
|
19 |
|
20 |
+
This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on an unknown dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 1.5517
|
23 |
+
- Rewards/chosen: -0.0554
|
24 |
+
- Rewards/rejected: -0.0646
|
25 |
+
- Rewards/accuracies: 0.5612
|
26 |
+
- Rewards/margins: 0.0092
|
27 |
+
- Logps/rejected: -1.2920
|
28 |
+
- Logps/chosen: -1.1085
|
29 |
+
- Logits/rejected: 268.0282
|
30 |
+
- Logits/chosen: 297.1682
|
31 |
+
- Nll Loss: 1.4855
|
32 |
+
- Log Odds Ratio: -0.6970
|
33 |
+
- Log Odds Chosen: 0.2856
|
34 |
|
35 |
## Model description
|
36 |
|
|
|
67 |
|
68 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
|
69 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
|
70 |
+
| 1.436 | 0.9955 | 167 | 1.4679 | -0.0508 | -0.0571 | 0.5468 | 0.0063 | -1.1420 | -1.0158 | 288.9292 | 318.3812 | 1.4121 | -0.6895 | 0.1983 |
|
71 |
+
| 1.1098 | 1.9970 | 335 | 1.4451 | -0.0518 | -0.0579 | 0.5468 | 0.0061 | -1.1581 | -1.0353 | 286.4312 | 315.0296 | 1.3839 | -0.7228 | 0.2105 |
|
72 |
+
| 0.5921 | 2.9866 | 501 | 1.5517 | -0.0554 | -0.0646 | 0.5612 | 0.0092 | -1.2920 | -1.1085 | 268.0282 | 297.1682 | 1.4855 | -0.6970 | 0.2856 |
|
73 |
|
74 |
|
75 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,9 +1,25 @@
|
|
1 |
{
|
2 |
"epoch": 2.9865871833084947,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 1.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 5364,
|
7 |
-
"train_samples_per_second": 1.
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.9865871833084947,
|
3 |
+
"eval_log_odds_chosen": 0.2937372922897339,
|
4 |
+
"eval_log_odds_ratio": -0.6945178508758545,
|
5 |
+
"eval_logits/chosen": 300.6891174316406,
|
6 |
+
"eval_logits/rejected": 271.8756103515625,
|
7 |
+
"eval_logps/chosen": -1.0802680253982544,
|
8 |
+
"eval_logps/rejected": -1.2502641677856445,
|
9 |
+
"eval_loss": 1.539820671081543,
|
10 |
+
"eval_nll_loss": 1.4724125862121582,
|
11 |
+
"eval_rewards/accuracies": 0.5395683646202087,
|
12 |
+
"eval_rewards/chosen": -0.05401340499520302,
|
13 |
+
"eval_rewards/margins": 0.00849980115890503,
|
14 |
+
"eval_rewards/rejected": -0.06251321732997894,
|
15 |
+
"eval_runtime": 112.2968,
|
16 |
+
"eval_samples": 553,
|
17 |
+
"eval_samples_per_second": 4.924,
|
18 |
+
"eval_steps_per_second": 1.238,
|
19 |
"total_flos": 0.0,
|
20 |
+
"train_loss": 1.4570662823027956,
|
21 |
+
"train_runtime": 13599.7579,
|
22 |
"train_samples": 5364,
|
23 |
+
"train_samples_per_second": 1.183,
|
24 |
+
"train_steps_per_second": 0.037
|
25 |
}
|
eval_results.json
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
{
|
2 |
-
"epoch": 2.
|
3 |
-
"eval_log_odds_chosen": 0.
|
4 |
-
"eval_log_odds_ratio": -0.
|
5 |
-
"eval_logits/chosen":
|
6 |
-
"eval_logits/rejected":
|
7 |
-
"eval_logps/chosen": -1.
|
8 |
-
"eval_logps/rejected": -1.
|
9 |
-
"eval_loss": 1.
|
10 |
-
"eval_nll_loss": 1.
|
11 |
-
"eval_rewards/accuracies": 0.
|
12 |
-
"eval_rewards/chosen": -0.
|
13 |
-
"eval_rewards/margins": 0.
|
14 |
-
"eval_rewards/rejected": -0.
|
15 |
-
"eval_runtime":
|
16 |
"eval_samples": 553,
|
17 |
-
"eval_samples_per_second":
|
18 |
-
"eval_steps_per_second":
|
19 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 2.9865871833084947,
|
3 |
+
"eval_log_odds_chosen": 0.2937372922897339,
|
4 |
+
"eval_log_odds_ratio": -0.6945178508758545,
|
5 |
+
"eval_logits/chosen": 300.6891174316406,
|
6 |
+
"eval_logits/rejected": 271.8756103515625,
|
7 |
+
"eval_logps/chosen": -1.0802680253982544,
|
8 |
+
"eval_logps/rejected": -1.2502641677856445,
|
9 |
+
"eval_loss": 1.539820671081543,
|
10 |
+
"eval_nll_loss": 1.4724125862121582,
|
11 |
+
"eval_rewards/accuracies": 0.5395683646202087,
|
12 |
+
"eval_rewards/chosen": -0.05401340499520302,
|
13 |
+
"eval_rewards/margins": 0.00849980115890503,
|
14 |
+
"eval_rewards/rejected": -0.06251321732997894,
|
15 |
+
"eval_runtime": 112.2968,
|
16 |
"eval_samples": 553,
|
17 |
+
"eval_samples_per_second": 4.924,
|
18 |
+
"eval_steps_per_second": 1.238
|
19 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4995496656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06c0d9666d139358ff5554a34051d99111e3b97010f1883f0a6925ea2e38718e
|
3 |
size 4995496656
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982953168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:643dc469fdeca59e9b5d64126415a7f81c4c4023c6fe9602df8d357561f23ca8
|
3 |
size 4982953168
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982953200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd109beace3c5c26c78fd106f86e2fdec798571b3fb4af30fe6e97cade6055ef
|
3 |
size 4982953200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2113988336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68c441bebbedad362fed1b7797d7a88cbe4194604bb8d0849cf39e090d388d5e
|
3 |
size 2113988336
|
runs/Sep18_20-46-35_65ecb96dba42/events.out.tfevents.1726706643.65ecb96dba42.1160.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dcb674752307598161fde9ef2f1cc73bf63c20b269b17cec3f31c6d70e29389
|
3 |
+
size 997
|
runs/Sep20_22-18-14_65ecb96dba42/events.out.tfevents.1726870791.65ecb96dba42.231696.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0bb882f7aed954a1701345b71297539c9e7d30585f625e045f4df78796c93ce
|
3 |
+
size 5404
|
runs/Sep21_02-30-57_65ecb96dba42/events.out.tfevents.1726885944.65ecb96dba42.269402.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6e43bdcc7b933bf8176a870fcd82606e6648f4165cf3fcd77d084bf8de8461f
|
3 |
+
size 94110
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 2.9865871833084947,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 1.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 5364,
|
7 |
-
"train_samples_per_second": 1.
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.9865871833084947,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 1.4570662823027956,
|
5 |
+
"train_runtime": 13599.7579,
|
6 |
"train_samples": 5364,
|
7 |
+
"train_samples_per_second": 1.183,
|
8 |
+
"train_steps_per_second": 0.037
|
9 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba2d73edee19e8e838afa6616d74780dc6e6b8b0472ea9ec06e39279b279612c
|
3 |
size 6776
|