wxzhang commited on
Commit
59c7c4d
·
verified ·
1 Parent(s): 048bd6f

Model save

Browse files
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - trl
4
+ - dpo
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: dpo-selective-longerrun
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # dpo-selective-longerrun
15
+
16
+ This model was trained from scratch on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.4916
19
+ - Rewards/chosen: -0.6959
20
+ - Rewards/rejected: -2.0431
21
+ - Rewards/accuracies: 0.7579
22
+ - Rewards/margins: 1.3472
23
+ - Logps/rejected: -312.5994
24
+ - Logps/chosen: -310.2374
25
+ - Logits/rejected: -2.3498
26
+ - Logits/chosen: -2.3901
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 4
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 4
51
+ - gradient_accumulation_steps: 4
52
+ - total_train_batch_size: 64
53
+ - total_eval_batch_size: 32
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: cosine
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - training_steps: 1500
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6163 | 0.1 | 100 | 0.6145 | 0.0147 | -0.2611 | 0.7024 | 0.2758 | -276.9589 | -296.0254 | -2.3069 | -2.3542 |
64
+ | 0.5608 | 0.21 | 200 | 0.5507 | -0.0898 | -0.8075 | 0.7401 | 0.7176 | -287.8870 | -298.1169 | -2.3806 | -2.4286 |
65
+ | 0.4934 | 0.31 | 300 | 0.5225 | -0.1646 | -1.0392 | 0.7579 | 0.8746 | -292.5221 | -299.6117 | -2.3416 | -2.3850 |
66
+ | 0.4812 | 0.42 | 400 | 0.5148 | -0.2130 | -1.1798 | 0.7599 | 0.9668 | -295.3333 | -300.5795 | -2.3285 | -2.3697 |
67
+ | 0.5217 | 0.52 | 500 | 0.5094 | -0.1747 | -1.1571 | 0.7599 | 0.9824 | -294.8788 | -299.8136 | -2.3074 | -2.3432 |
68
+ | 0.5069 | 0.63 | 600 | 0.5037 | -0.0404 | -1.0494 | 0.7659 | 1.0090 | -292.7251 | -297.1272 | -2.2444 | -2.2854 |
69
+ | 0.4582 | 0.73 | 700 | 0.5003 | -0.6338 | -1.7232 | 0.7599 | 1.0894 | -306.2008 | -308.9958 | -2.2469 | -2.2897 |
70
+ | 0.457 | 0.84 | 800 | 0.4907 | -0.4901 | -1.6054 | 0.7639 | 1.1153 | -303.8464 | -306.1228 | -2.2928 | -2.3342 |
71
+ | 0.4723 | 0.94 | 900 | 0.4933 | -0.4418 | -1.5567 | 0.7659 | 1.1149 | -302.8719 | -305.1562 | -2.3355 | -2.3762 |
72
+ | 0.3094 | 1.05 | 1000 | 0.4922 | -0.8030 | -2.0474 | 0.7639 | 1.2444 | -312.6856 | -312.3804 | -2.3698 | -2.4094 |
73
+ | 0.2725 | 1.15 | 1100 | 0.4921 | -0.5635 | -1.8640 | 0.7460 | 1.3005 | -309.0183 | -307.5903 | -2.3382 | -2.3785 |
74
+ | 0.2932 | 1.26 | 1200 | 0.4924 | -0.6522 | -2.0030 | 0.7579 | 1.3509 | -311.7977 | -309.3632 | -2.3511 | -2.3915 |
75
+ | 0.275 | 1.36 | 1300 | 0.4916 | -0.6366 | -1.9750 | 0.7599 | 1.3383 | -311.2369 | -309.0526 | -2.3531 | -2.3934 |
76
+ | 0.2768 | 1.47 | 1400 | 0.4922 | -0.7011 | -2.0464 | 0.7579 | 1.3453 | -312.6646 | -310.3419 | -2.3505 | -2.3908 |
77
+ | 0.2863 | 1.57 | 1500 | 0.4916 | -0.6959 | -2.0431 | 0.7579 | 1.3472 | -312.5994 | -310.2374 | -2.3498 | -2.3901 |
78
+
79
+
80
+ ### Framework versions
81
+
82
+ - Transformers 4.36.2
83
+ - Pytorch 2.1.2
84
+ - Datasets 2.14.6
85
+ - Tokenizers 0.15.0
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.57,
3
+ "eval_logits/chosen": -2.3900513648986816,
4
+ "eval_logits/rejected": -2.3497886657714844,
5
+ "eval_logps/chosen": -310.23736572265625,
6
+ "eval_logps/rejected": -312.59942626953125,
7
+ "eval_loss": 0.4916023015975952,
8
+ "eval_rewards/accuracies": 0.7579365372657776,
9
+ "eval_rewards/chosen": -0.6958636045455933,
10
+ "eval_rewards/margins": 1.3472286462783813,
11
+ "eval_rewards/rejected": -2.0430922508239746,
12
+ "eval_runtime": 406.1845,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.924,
15
+ "eval_steps_per_second": 0.155,
16
+ "train_loss": 0.4390208276112874,
17
+ "train_runtime": 42485.337,
18
+ "train_samples": 61135,
19
+ "train_samples_per_second": 2.26,
20
+ "train_steps_per_second": 0.035
21
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.57,
3
+ "eval_logits/chosen": -2.3900513648986816,
4
+ "eval_logits/rejected": -2.3497886657714844,
5
+ "eval_logps/chosen": -310.23736572265625,
6
+ "eval_logps/rejected": -312.59942626953125,
7
+ "eval_loss": 0.4916023015975952,
8
+ "eval_rewards/accuracies": 0.7579365372657776,
9
+ "eval_rewards/chosen": -0.6958636045455933,
10
+ "eval_rewards/margins": 1.3472286462783813,
11
+ "eval_rewards/rejected": -2.0430922508239746,
12
+ "eval_runtime": 406.1845,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.924,
15
+ "eval_steps_per_second": 0.155
16
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2c7a1a307945ec96f407a102056e928bb30995af5faf8db45f1e8b6d068c13
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1679d2c6f5b18eedb6203de18af20f25509f6b25ad464d691f186a82f812e5
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeec79b3d54f47e3bbcd0a4eaff350310c108129786496747314cdbcfbfd3269
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.57,
3
+ "train_loss": 0.4390208276112874,
4
+ "train_runtime": 42485.337,
5
+ "train_samples": 61135,
6
+ "train_samples_per_second": 2.26,
7
+ "train_steps_per_second": 0.035
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.5702695629416383,
5
+ "eval_steps": 100,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 3.3333333333333334e-09,
14
+ "logits/chosen": -2.606384754180908,
15
+ "logits/rejected": -2.4982504844665527,
16
+ "logps/chosen": -296.16644287109375,
17
+ "logps/rejected": -225.2230224609375,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 3.3333333333333334e-08,
28
+ "logits/chosen": -2.517352819442749,
29
+ "logits/rejected": -2.4637084007263184,
30
+ "logps/chosen": -342.76483154296875,
31
+ "logps/rejected": -293.0290222167969,
32
+ "loss": 0.6947,
33
+ "rewards/accuracies": 0.4305555522441864,
34
+ "rewards/chosen": 0.0013976963236927986,
35
+ "rewards/margins": -0.0040576523169875145,
36
+ "rewards/rejected": 0.005455348175019026,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.02,
41
+ "learning_rate": 6.666666666666667e-08,
42
+ "logits/chosen": -2.396883726119995,
43
+ "logits/rejected": -2.3743655681610107,
44
+ "logps/chosen": -238.6938018798828,
45
+ "logps/rejected": -248.8633575439453,
46
+ "loss": 0.6943,
47
+ "rewards/accuracies": 0.48124998807907104,
48
+ "rewards/chosen": -0.0019892356358468533,
49
+ "rewards/margins": -0.0029293105471879244,
50
+ "rewards/rejected": 0.0009400752605870366,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.03,
55
+ "learning_rate": 1e-07,
56
+ "logits/chosen": -2.490886688232422,
57
+ "logits/rejected": -2.408034563064575,
58
+ "logps/chosen": -289.0502014160156,
59
+ "logps/rejected": -257.93157958984375,
60
+ "loss": 0.6915,
61
+ "rewards/accuracies": 0.48750001192092896,
62
+ "rewards/chosen": 8.620424341643229e-05,
63
+ "rewards/margins": 0.0004866129602305591,
64
+ "rewards/rejected": -0.000400408694986254,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.04,
69
+ "learning_rate": 1.3333333333333334e-07,
70
+ "logits/chosen": -2.479222536087036,
71
+ "logits/rejected": -2.4444398880004883,
72
+ "logps/chosen": -298.08795166015625,
73
+ "logps/rejected": -280.84173583984375,
74
+ "loss": 0.6898,
75
+ "rewards/accuracies": 0.5249999761581421,
76
+ "rewards/chosen": 0.010273179039359093,
77
+ "rewards/margins": 0.005869547836482525,
78
+ "rewards/rejected": 0.004403631202876568,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.05,
83
+ "learning_rate": 1.6666666666666665e-07,
84
+ "logits/chosen": -2.505727767944336,
85
+ "logits/rejected": -2.4711194038391113,
86
+ "logps/chosen": -268.0919494628906,
87
+ "logps/rejected": -254.45358276367188,
88
+ "loss": 0.6838,
89
+ "rewards/accuracies": 0.6625000238418579,
90
+ "rewards/chosen": 0.024473872035741806,
91
+ "rewards/margins": 0.018633713945746422,
92
+ "rewards/rejected": 0.0058401599526405334,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.06,
97
+ "learning_rate": 2e-07,
98
+ "logits/chosen": -2.456902265548706,
99
+ "logits/rejected": -2.411407947540283,
100
+ "logps/chosen": -293.38934326171875,
101
+ "logps/rejected": -269.47882080078125,
102
+ "loss": 0.677,
103
+ "rewards/accuracies": 0.6812499761581421,
104
+ "rewards/chosen": 0.04791393131017685,
105
+ "rewards/margins": 0.03436511009931564,
106
+ "rewards/rejected": 0.01354882400482893,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.07,
111
+ "learning_rate": 2.3333333333333333e-07,
112
+ "logits/chosen": -2.463378429412842,
113
+ "logits/rejected": -2.385895013809204,
114
+ "logps/chosen": -290.9529724121094,
115
+ "logps/rejected": -276.04107666015625,
116
+ "loss": 0.6614,
117
+ "rewards/accuracies": 0.643750011920929,
118
+ "rewards/chosen": 0.09689084440469742,
119
+ "rewards/margins": 0.058380376547575,
120
+ "rewards/rejected": 0.03851046413183212,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.08,
125
+ "learning_rate": 2.6666666666666667e-07,
126
+ "logits/chosen": -2.4076247215270996,
127
+ "logits/rejected": -2.3244595527648926,
128
+ "logps/chosen": -321.92596435546875,
129
+ "logps/rejected": -284.5071105957031,
130
+ "loss": 0.6387,
131
+ "rewards/accuracies": 0.668749988079071,
132
+ "rewards/chosen": 0.15204539895057678,
133
+ "rewards/margins": 0.11996610462665558,
134
+ "rewards/rejected": 0.032079294323921204,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.09,
139
+ "learning_rate": 3e-07,
140
+ "logits/chosen": -2.3717925548553467,
141
+ "logits/rejected": -2.362457752227783,
142
+ "logps/chosen": -314.3221435546875,
143
+ "logps/rejected": -272.3472595214844,
144
+ "loss": 0.6254,
145
+ "rewards/accuracies": 0.7437499761581421,
146
+ "rewards/chosen": 0.159285306930542,
147
+ "rewards/margins": 0.18115119636058807,
148
+ "rewards/rejected": -0.02186589315533638,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.1,
153
+ "learning_rate": 3.333333333333333e-07,
154
+ "logits/chosen": -2.3045754432678223,
155
+ "logits/rejected": -2.326310634613037,
156
+ "logps/chosen": -290.2933349609375,
157
+ "logps/rejected": -319.2317199707031,
158
+ "loss": 0.6163,
159
+ "rewards/accuracies": 0.731249988079071,
160
+ "rewards/chosen": 0.04698774218559265,
161
+ "rewards/margins": 0.2375941276550293,
162
+ "rewards/rejected": -0.19060640037059784,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.1,
167
+ "eval_logits/chosen": -2.3541910648345947,
168
+ "eval_logits/rejected": -2.306934118270874,
169
+ "eval_logps/chosen": -296.0254211425781,
170
+ "eval_logps/rejected": -276.9588623046875,
171
+ "eval_loss": 0.6145145297050476,
172
+ "eval_rewards/accuracies": 0.7023809552192688,
173
+ "eval_rewards/chosen": 0.01473198737949133,
174
+ "eval_rewards/margins": 0.2757962942123413,
175
+ "eval_rewards/rejected": -0.2610643208026886,
176
+ "eval_runtime": 413.5508,
177
+ "eval_samples_per_second": 4.836,
178
+ "eval_steps_per_second": 0.152,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 0.12,
183
+ "learning_rate": 3.666666666666666e-07,
184
+ "logits/chosen": -2.4004225730895996,
185
+ "logits/rejected": -2.336871385574341,
186
+ "logps/chosen": -265.05352783203125,
187
+ "logps/rejected": -254.93212890625,
188
+ "loss": 0.6078,
189
+ "rewards/accuracies": 0.699999988079071,
190
+ "rewards/chosen": -0.014697420410811901,
191
+ "rewards/margins": 0.27927759289741516,
192
+ "rewards/rejected": -0.29397499561309814,
193
+ "step": 110
194
+ },
195
+ {
196
+ "epoch": 0.13,
197
+ "learning_rate": 4e-07,
198
+ "logits/chosen": -2.400813579559326,
199
+ "logits/rejected": -2.317667245864868,
200
+ "logps/chosen": -289.80938720703125,
201
+ "logps/rejected": -268.9970703125,
202
+ "loss": 0.6067,
203
+ "rewards/accuracies": 0.6812499761581421,
204
+ "rewards/chosen": 0.11111988872289658,
205
+ "rewards/margins": 0.3709365725517273,
206
+ "rewards/rejected": -0.2598166763782501,
207
+ "step": 120
208
+ },
209
+ {
210
+ "epoch": 0.14,
211
+ "learning_rate": 4.3333333333333335e-07,
212
+ "logits/chosen": -2.4633655548095703,
213
+ "logits/rejected": -2.370208263397217,
214
+ "logps/chosen": -321.96954345703125,
215
+ "logps/rejected": -289.915771484375,
216
+ "loss": 0.5958,
217
+ "rewards/accuracies": 0.699999988079071,
218
+ "rewards/chosen": 0.06524975597858429,
219
+ "rewards/margins": 0.3465055525302887,
220
+ "rewards/rejected": -0.2812557816505432,
221
+ "step": 130
222
+ },
223
+ {
224
+ "epoch": 0.15,
225
+ "learning_rate": 4.6666666666666666e-07,
226
+ "logits/chosen": -2.4572839736938477,
227
+ "logits/rejected": -2.402984857559204,
228
+ "logps/chosen": -267.6603088378906,
229
+ "logps/rejected": -240.7797088623047,
230
+ "loss": 0.5789,
231
+ "rewards/accuracies": 0.6875,
232
+ "rewards/chosen": 0.05029069632291794,
233
+ "rewards/margins": 0.3604304790496826,
234
+ "rewards/rejected": -0.3101397752761841,
235
+ "step": 140
236
+ },
237
+ {
238
+ "epoch": 0.16,
239
+ "learning_rate": 5e-07,
240
+ "logits/chosen": -2.3665835857391357,
241
+ "logits/rejected": -2.3545050621032715,
242
+ "logps/chosen": -304.1943054199219,
243
+ "logps/rejected": -298.508056640625,
244
+ "loss": 0.6084,
245
+ "rewards/accuracies": 0.6625000238418579,
246
+ "rewards/chosen": 0.032247237861156464,
247
+ "rewards/margins": 0.392182320356369,
248
+ "rewards/rejected": -0.35993510484695435,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.17,
253
+ "learning_rate": 4.999323102948654e-07,
254
+ "logits/chosen": -2.453312397003174,
255
+ "logits/rejected": -2.3983371257781982,
256
+ "logps/chosen": -269.2843322753906,
257
+ "logps/rejected": -277.93682861328125,
258
+ "loss": 0.5419,
259
+ "rewards/accuracies": 0.699999988079071,
260
+ "rewards/chosen": 0.10002978146076202,
261
+ "rewards/margins": 0.553473174571991,
262
+ "rewards/rejected": -0.4534434676170349,
263
+ "step": 160
264
+ },
265
+ {
266
+ "epoch": 0.18,
267
+ "learning_rate": 4.997292778346312e-07,
268
+ "logits/chosen": -2.493710517883301,
269
+ "logits/rejected": -2.4315335750579834,
270
+ "logps/chosen": -300.1785888671875,
271
+ "logps/rejected": -257.55157470703125,
272
+ "loss": 0.562,
273
+ "rewards/accuracies": 0.6625000238418579,
274
+ "rewards/chosen": -0.0674915537238121,
275
+ "rewards/margins": 0.48717761039733887,
276
+ "rewards/rejected": -0.5546691417694092,
277
+ "step": 170
278
+ },
279
+ {
280
+ "epoch": 0.19,
281
+ "learning_rate": 4.99391012564956e-07,
282
+ "logits/chosen": -2.3834595680236816,
283
+ "logits/rejected": -2.3311541080474854,
284
+ "logps/chosen": -271.11114501953125,
285
+ "logps/rejected": -289.841552734375,
286
+ "loss": 0.5335,
287
+ "rewards/accuracies": 0.737500011920929,
288
+ "rewards/chosen": -0.27489790320396423,
289
+ "rewards/margins": 0.6237329244613647,
290
+ "rewards/rejected": -0.8986309170722961,
291
+ "step": 180
292
+ },
293
+ {
294
+ "epoch": 0.2,
295
+ "learning_rate": 4.989176976624511e-07,
296
+ "logits/chosen": -2.546043634414673,
297
+ "logits/rejected": -2.4504528045654297,
298
+ "logps/chosen": -307.6429138183594,
299
+ "logps/rejected": -274.8311462402344,
300
+ "loss": 0.5298,
301
+ "rewards/accuracies": 0.793749988079071,
302
+ "rewards/chosen": 0.14119036495685577,
303
+ "rewards/margins": 0.8371874094009399,
304
+ "rewards/rejected": -0.6959971189498901,
305
+ "step": 190
306
+ },
307
+ {
308
+ "epoch": 0.21,
309
+ "learning_rate": 4.983095894354857e-07,
310
+ "logits/chosen": -2.489591121673584,
311
+ "logits/rejected": -2.4581377506256104,
312
+ "logps/chosen": -268.5490417480469,
313
+ "logps/rejected": -291.10986328125,
314
+ "loss": 0.5608,
315
+ "rewards/accuracies": 0.6875,
316
+ "rewards/chosen": -0.1413675844669342,
317
+ "rewards/margins": 0.6023878455162048,
318
+ "rewards/rejected": -0.7437554597854614,
319
+ "step": 200
320
+ },
321
+ {
322
+ "epoch": 0.21,
323
+ "eval_logits/chosen": -2.4285523891448975,
324
+ "eval_logits/rejected": -2.3805854320526123,
325
+ "eval_logps/chosen": -298.1169128417969,
326
+ "eval_logps/rejected": -287.88702392578125,
327
+ "eval_loss": 0.5506500005722046,
328
+ "eval_rewards/accuracies": 0.7400793433189392,
329
+ "eval_rewards/chosen": -0.08984197676181793,
330
+ "eval_rewards/margins": 0.7176319360733032,
331
+ "eval_rewards/rejected": -0.80747389793396,
332
+ "eval_runtime": 409.3127,
333
+ "eval_samples_per_second": 4.886,
334
+ "eval_steps_per_second": 0.154,
335
+ "step": 200
336
+ },
337
+ {
338
+ "epoch": 0.22,
339
+ "learning_rate": 4.975670171853925e-07,
340
+ "logits/chosen": -2.48987078666687,
341
+ "logits/rejected": -2.4578893184661865,
342
+ "logps/chosen": -309.9327087402344,
343
+ "logps/rejected": -268.804443359375,
344
+ "loss": 0.5555,
345
+ "rewards/accuracies": 0.7250000238418579,
346
+ "rewards/chosen": 0.00010260194540023804,
347
+ "rewards/margins": 0.6477264165878296,
348
+ "rewards/rejected": -0.6476237773895264,
349
+ "step": 210
350
+ },
351
+ {
352
+ "epoch": 0.23,
353
+ "learning_rate": 4.966903830281448e-07,
354
+ "logits/chosen": -2.5101141929626465,
355
+ "logits/rejected": -2.5068182945251465,
356
+ "logps/chosen": -302.55804443359375,
357
+ "logps/rejected": -306.7073059082031,
358
+ "loss": 0.5371,
359
+ "rewards/accuracies": 0.71875,
360
+ "rewards/chosen": 0.022780220955610275,
361
+ "rewards/margins": 0.6337054371833801,
362
+ "rewards/rejected": -0.6109251379966736,
363
+ "step": 220
364
+ },
365
+ {
366
+ "epoch": 0.24,
367
+ "learning_rate": 4.956801616766034e-07,
368
+ "logits/chosen": -2.477936267852783,
369
+ "logits/rejected": -2.4627578258514404,
370
+ "logps/chosen": -334.6808776855469,
371
+ "logps/rejected": -300.4921569824219,
372
+ "loss": 0.5438,
373
+ "rewards/accuracies": 0.7437499761581421,
374
+ "rewards/chosen": -0.660649299621582,
375
+ "rewards/margins": 0.6745242476463318,
376
+ "rewards/rejected": -1.3351736068725586,
377
+ "step": 230
378
+ },
379
+ {
380
+ "epoch": 0.25,
381
+ "learning_rate": 4.945369001834514e-07,
382
+ "logits/chosen": -2.4449329376220703,
383
+ "logits/rejected": -2.3937153816223145,
384
+ "logps/chosen": -280.98468017578125,
385
+ "logps/rejected": -243.7540283203125,
386
+ "loss": 0.5362,
387
+ "rewards/accuracies": 0.6812499761581421,
388
+ "rewards/chosen": -0.5287132263183594,
389
+ "rewards/margins": 0.6548066139221191,
390
+ "rewards/rejected": -1.1835198402404785,
391
+ "step": 240
392
+ },
393
+ {
394
+ "epoch": 0.26,
395
+ "learning_rate": 4.932612176449559e-07,
396
+ "logits/chosen": -2.3831703662872314,
397
+ "logits/rejected": -2.3532094955444336,
398
+ "logps/chosen": -308.6506042480469,
399
+ "logps/rejected": -295.2610778808594,
400
+ "loss": 0.5517,
401
+ "rewards/accuracies": 0.7437499761581421,
402
+ "rewards/chosen": -0.20035383105278015,
403
+ "rewards/margins": 0.6892730593681335,
404
+ "rewards/rejected": -0.8896268606185913,
405
+ "step": 250
406
+ },
407
+ {
408
+ "epoch": 0.27,
409
+ "learning_rate": 4.918538048657159e-07,
410
+ "logits/chosen": -2.381070852279663,
411
+ "logits/rejected": -2.356685161590576,
412
+ "logps/chosen": -260.6903076171875,
413
+ "logps/rejected": -273.7911682128906,
414
+ "loss": 0.5333,
415
+ "rewards/accuracies": 0.6812499761581421,
416
+ "rewards/chosen": -0.3518093228340149,
417
+ "rewards/margins": 0.6164388060569763,
418
+ "rewards/rejected": -0.9682480692863464,
419
+ "step": 260
420
+ },
421
+ {
422
+ "epoch": 0.28,
423
+ "learning_rate": 4.903154239845797e-07,
424
+ "logits/chosen": -2.385469436645508,
425
+ "logits/rejected": -2.3523507118225098,
426
+ "logps/chosen": -312.2256774902344,
427
+ "logps/rejected": -312.83526611328125,
428
+ "loss": 0.5092,
429
+ "rewards/accuracies": 0.731249988079071,
430
+ "rewards/chosen": -0.35005810856819153,
431
+ "rewards/margins": 0.9718685150146484,
432
+ "rewards/rejected": -1.3219265937805176,
433
+ "step": 270
434
+ },
435
+ {
436
+ "epoch": 0.29,
437
+ "learning_rate": 4.88646908061933e-07,
438
+ "logits/chosen": -2.4627230167388916,
439
+ "logits/rejected": -2.418569564819336,
440
+ "logps/chosen": -317.95703125,
441
+ "logps/rejected": -289.7148742675781,
442
+ "loss": 0.5104,
443
+ "rewards/accuracies": 0.768750011920929,
444
+ "rewards/chosen": -0.29435399174690247,
445
+ "rewards/margins": 0.9088577032089233,
446
+ "rewards/rejected": -1.2032116651535034,
447
+ "step": 280
448
+ },
449
+ {
450
+ "epoch": 0.3,
451
+ "learning_rate": 4.868491606285823e-07,
452
+ "logits/chosen": -2.4506328105926514,
453
+ "logits/rejected": -2.3806400299072266,
454
+ "logps/chosen": -302.1065673828125,
455
+ "logps/rejected": -251.01498413085938,
456
+ "loss": 0.5353,
457
+ "rewards/accuracies": 0.706250011920929,
458
+ "rewards/chosen": -0.43174147605895996,
459
+ "rewards/margins": 0.6914370656013489,
460
+ "rewards/rejected": -1.123178482055664,
461
+ "step": 290
462
+ },
463
+ {
464
+ "epoch": 0.31,
465
+ "learning_rate": 4.849231551964771e-07,
466
+ "logits/chosen": -2.431826591491699,
467
+ "logits/rejected": -2.354094982147217,
468
+ "logps/chosen": -268.3138122558594,
469
+ "logps/rejected": -259.5442199707031,
470
+ "loss": 0.4934,
471
+ "rewards/accuracies": 0.737500011920929,
472
+ "rewards/chosen": -0.18001610040664673,
473
+ "rewards/margins": 0.8297437429428101,
474
+ "rewards/rejected": -1.009759783744812,
475
+ "step": 300
476
+ },
477
+ {
478
+ "epoch": 0.31,
479
+ "eval_logits/chosen": -2.3849596977233887,
480
+ "eval_logits/rejected": -2.3416402339935303,
481
+ "eval_logps/chosen": -299.6116638183594,
482
+ "eval_logps/rejected": -292.5220947265625,
483
+ "eval_loss": 0.5225438475608826,
484
+ "eval_rewards/accuracies": 0.7579365372657776,
485
+ "eval_rewards/chosen": -0.16457918286323547,
486
+ "eval_rewards/margins": 0.8746477365493774,
487
+ "eval_rewards/rejected": -1.03922700881958,
488
+ "eval_runtime": 407.614,
489
+ "eval_samples_per_second": 4.907,
490
+ "eval_steps_per_second": 0.155,
491
+ "step": 300
492
+ },
493
+ {
494
+ "epoch": 0.32,
495
+ "learning_rate": 4.828699347315356e-07,
496
+ "logits/chosen": -2.3300023078918457,
497
+ "logits/rejected": -2.291306972503662,
498
+ "logps/chosen": -281.8946228027344,
499
+ "logps/rejected": -328.680908203125,
500
+ "loss": 0.4757,
501
+ "rewards/accuracies": 0.8374999761581421,
502
+ "rewards/chosen": -0.23267221450805664,
503
+ "rewards/margins": 1.1307260990142822,
504
+ "rewards/rejected": -1.3633983135223389,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 0.33,
509
+ "learning_rate": 4.806906110888606e-07,
510
+ "logits/chosen": -2.2734484672546387,
511
+ "logits/rejected": -2.2220654487609863,
512
+ "logps/chosen": -344.95184326171875,
513
+ "logps/rejected": -317.2240295410156,
514
+ "loss": 0.5251,
515
+ "rewards/accuracies": 0.737500011920929,
516
+ "rewards/chosen": -0.40918412804603577,
517
+ "rewards/margins": 1.0021181106567383,
518
+ "rewards/rejected": -1.4113022089004517,
519
+ "step": 320
520
+ },
521
+ {
522
+ "epoch": 0.35,
523
+ "learning_rate": 4.783863644106502e-07,
524
+ "logits/chosen": -2.2840628623962402,
525
+ "logits/rejected": -2.2298922538757324,
526
+ "logps/chosen": -281.0782775878906,
527
+ "logps/rejected": -242.35263061523438,
528
+ "loss": 0.553,
529
+ "rewards/accuracies": 0.7124999761581421,
530
+ "rewards/chosen": -0.017911773175001144,
531
+ "rewards/margins": 0.6968544721603394,
532
+ "rewards/rejected": -0.714766263961792,
533
+ "step": 330
534
+ },
535
+ {
536
+ "epoch": 0.36,
537
+ "learning_rate": 4.759584424871301e-07,
538
+ "logits/chosen": -2.270648717880249,
539
+ "logits/rejected": -2.2538352012634277,
540
+ "logps/chosen": -297.785888671875,
541
+ "logps/rejected": -256.94305419921875,
542
+ "loss": 0.5741,
543
+ "rewards/accuracies": 0.6625000238418579,
544
+ "rewards/chosen": 0.040852271020412445,
545
+ "rewards/margins": 0.6195092797279358,
546
+ "rewards/rejected": -0.5786570310592651,
547
+ "step": 340
548
+ },
549
+ {
550
+ "epoch": 0.37,
551
+ "learning_rate": 4.7340816008085305e-07,
552
+ "logits/chosen": -2.3321385383605957,
553
+ "logits/rejected": -2.301793336868286,
554
+ "logps/chosen": -321.775390625,
555
+ "logps/rejected": -314.6920471191406,
556
+ "loss": 0.5205,
557
+ "rewards/accuracies": 0.8187500238418579,
558
+ "rewards/chosen": -0.3098350465297699,
559
+ "rewards/margins": 0.9677619934082031,
560
+ "rewards/rejected": -1.2775970697402954,
561
+ "step": 350
562
+ },
563
+ {
564
+ "epoch": 0.38,
565
+ "learning_rate": 4.707368982147317e-07,
566
+ "logits/chosen": -2.460111379623413,
567
+ "logits/rejected": -2.429689884185791,
568
+ "logps/chosen": -289.2377014160156,
569
+ "logps/rejected": -296.45123291015625,
570
+ "loss": 0.5145,
571
+ "rewards/accuracies": 0.75,
572
+ "rewards/chosen": -0.8857641220092773,
573
+ "rewards/margins": 0.7435467839241028,
574
+ "rewards/rejected": -1.6293108463287354,
575
+ "step": 360
576
+ },
577
+ {
578
+ "epoch": 0.39,
579
+ "learning_rate": 4.6794610342419056e-07,
580
+ "logits/chosen": -2.403916120529175,
581
+ "logits/rejected": -2.338923931121826,
582
+ "logps/chosen": -287.01434326171875,
583
+ "logps/rejected": -307.77728271484375,
584
+ "loss": 0.4818,
585
+ "rewards/accuracies": 0.762499988079071,
586
+ "rewards/chosen": -0.28949111700057983,
587
+ "rewards/margins": 1.0141589641571045,
588
+ "rewards/rejected": -1.303650140762329,
589
+ "step": 370
590
+ },
591
+ {
592
+ "epoch": 0.4,
593
+ "learning_rate": 4.650372869738414e-07,
594
+ "logits/chosen": -2.4299778938293457,
595
+ "logits/rejected": -2.404153347015381,
596
+ "logps/chosen": -310.12152099609375,
597
+ "logps/rejected": -294.5881652832031,
598
+ "loss": 0.5046,
599
+ "rewards/accuracies": 0.762499988079071,
600
+ "rewards/chosen": -0.4136735796928406,
601
+ "rewards/margins": 0.9840124249458313,
602
+ "rewards/rejected": -1.3976860046386719,
603
+ "step": 380
604
+ },
605
+ {
606
+ "epoch": 0.41,
607
+ "learning_rate": 4.6201202403910643e-07,
608
+ "logits/chosen": -2.3376293182373047,
609
+ "logits/rejected": -2.289973020553589,
610
+ "logps/chosen": -299.35546875,
611
+ "logps/rejected": -284.88323974609375,
612
+ "loss": 0.5184,
613
+ "rewards/accuracies": 0.7562500238418579,
614
+ "rewards/chosen": -0.6497005224227905,
615
+ "rewards/margins": 1.0391151905059814,
616
+ "rewards/rejected": -1.688815712928772,
617
+ "step": 390
618
+ },
619
+ {
620
+ "epoch": 0.42,
621
+ "learning_rate": 4.588719528532341e-07,
622
+ "logits/chosen": -2.458130359649658,
623
+ "logits/rejected": -2.4228129386901855,
624
+ "logps/chosen": -315.5902404785156,
625
+ "logps/rejected": -292.9128112792969,
626
+ "loss": 0.4812,
627
+ "rewards/accuracies": 0.6875,
628
+ "rewards/chosen": -0.5732877254486084,
629
+ "rewards/margins": 0.7860868573188782,
630
+ "rewards/rejected": -1.359374761581421,
631
+ "step": 400
632
+ },
633
+ {
634
+ "epoch": 0.42,
635
+ "eval_logits/chosen": -2.369720458984375,
636
+ "eval_logits/rejected": -2.3284802436828613,
637
+ "eval_logps/chosen": -300.5794982910156,
638
+ "eval_logps/rejected": -295.33331298828125,
639
+ "eval_loss": 0.5147603154182434,
640
+ "eval_rewards/accuracies": 0.7599206566810608,
641
+ "eval_rewards/chosen": -0.21297024190425873,
642
+ "eval_rewards/margins": 0.9668172597885132,
643
+ "eval_rewards/rejected": -1.179787516593933,
644
+ "eval_runtime": 407.8456,
645
+ "eval_samples_per_second": 4.904,
646
+ "eval_steps_per_second": 0.154,
647
+ "step": 400
648
+ },
649
+ {
650
+ "epoch": 0.43,
651
+ "learning_rate": 4.5561877382016553e-07,
652
+ "logits/chosen": -2.4733567237854004,
653
+ "logits/rejected": -2.429727792739868,
654
+ "logps/chosen": -300.5076599121094,
655
+ "logps/rejected": -275.7524108886719,
656
+ "loss": 0.5144,
657
+ "rewards/accuracies": 0.6937500238418579,
658
+ "rewards/chosen": -0.15985186398029327,
659
+ "rewards/margins": 0.7756703495979309,
660
+ "rewards/rejected": -0.935522198677063,
661
+ "step": 410
662
+ },
663
+ {
664
+ "epoch": 0.44,
665
+ "learning_rate": 4.5225424859373684e-07,
666
+ "logits/chosen": -2.3798115253448486,
667
+ "logits/rejected": -2.3424534797668457,
668
+ "logps/chosen": -243.74472045898438,
669
+ "logps/rejected": -266.5513916015625,
670
+ "loss": 0.5302,
671
+ "rewards/accuracies": 0.6812499761581421,
672
+ "rewards/chosen": -0.005200001411139965,
673
+ "rewards/margins": 0.7427350878715515,
674
+ "rewards/rejected": -0.7479350566864014,
675
+ "step": 420
676
+ },
677
+ {
678
+ "epoch": 0.45,
679
+ "learning_rate": 4.487801991237119e-07,
680
+ "logits/chosen": -2.441993236541748,
681
+ "logits/rejected": -2.414069890975952,
682
+ "logps/chosen": -303.46893310546875,
683
+ "logps/rejected": -286.26470947265625,
684
+ "loss": 0.5387,
685
+ "rewards/accuracies": 0.7124999761581421,
686
+ "rewards/chosen": -0.12445823103189468,
687
+ "rewards/margins": 0.7546092867851257,
688
+ "rewards/rejected": -0.8790675401687622,
689
+ "step": 430
690
+ },
691
+ {
692
+ "epoch": 0.46,
693
+ "learning_rate": 4.451985066691648e-07,
694
+ "logits/chosen": -2.444979429244995,
695
+ "logits/rejected": -2.402029514312744,
696
+ "logps/chosen": -300.88037109375,
697
+ "logps/rejected": -275.1875915527344,
698
+ "loss": 0.4965,
699
+ "rewards/accuracies": 0.7124999761581421,
700
+ "rewards/chosen": -0.5577610731124878,
701
+ "rewards/margins": 0.7658098340034485,
702
+ "rewards/rejected": -1.323570966720581,
703
+ "step": 440
704
+ },
705
+ {
706
+ "epoch": 0.47,
707
+ "learning_rate": 4.415111107797445e-07,
708
+ "logits/chosen": -2.290574550628662,
709
+ "logits/rejected": -2.2670164108276367,
710
+ "logps/chosen": -236.2515106201172,
711
+ "logps/rejected": -255.0359344482422,
712
+ "loss": 0.5183,
713
+ "rewards/accuracies": 0.71875,
714
+ "rewards/chosen": -0.5639761090278625,
715
+ "rewards/margins": 0.8503490686416626,
716
+ "rewards/rejected": -1.4143251180648804,
717
+ "step": 450
718
+ },
719
+ {
720
+ "epoch": 0.48,
721
+ "learning_rate": 4.377200082453748e-07,
722
+ "logits/chosen": -2.467921495437622,
723
+ "logits/rejected": -2.422136068344116,
724
+ "logps/chosen": -329.4405822753906,
725
+ "logps/rejected": -305.3108825683594,
726
+ "loss": 0.4962,
727
+ "rewards/accuracies": 0.7124999761581421,
728
+ "rewards/chosen": -0.41245952248573303,
729
+ "rewards/margins": 0.9212630987167358,
730
+ "rewards/rejected": -1.333722710609436,
731
+ "step": 460
732
+ },
733
+ {
734
+ "epoch": 0.49,
735
+ "learning_rate": 4.3382725201495717e-07,
736
+ "logits/chosen": -2.3804707527160645,
737
+ "logits/rejected": -2.3205363750457764,
738
+ "logps/chosen": -282.99945068359375,
739
+ "logps/rejected": -292.9880065917969,
740
+ "loss": 0.4927,
741
+ "rewards/accuracies": 0.71875,
742
+ "rewards/chosen": -0.5624107718467712,
743
+ "rewards/margins": 0.8573344349861145,
744
+ "rewards/rejected": -1.4197450876235962,
745
+ "step": 470
746
+ },
747
+ {
748
+ "epoch": 0.5,
749
+ "learning_rate": 4.2983495008466273e-07,
750
+ "logits/chosen": -2.302927255630493,
751
+ "logits/rejected": -2.272263765335083,
752
+ "logps/chosen": -271.0894470214844,
753
+ "logps/rejected": -286.90118408203125,
754
+ "loss": 0.5345,
755
+ "rewards/accuracies": 0.6937500238418579,
756
+ "rewards/chosen": -0.5512509346008301,
757
+ "rewards/margins": 0.8141476511955261,
758
+ "rewards/rejected": -1.365398645401001,
759
+ "step": 480
760
+ },
761
+ {
762
+ "epoch": 0.51,
763
+ "learning_rate": 4.2574526435641546e-07,
764
+ "logits/chosen": -2.311328172683716,
765
+ "logits/rejected": -2.276010274887085,
766
+ "logps/chosen": -285.27960205078125,
767
+ "logps/rejected": -270.05401611328125,
768
+ "loss": 0.5116,
769
+ "rewards/accuracies": 0.71875,
770
+ "rewards/chosen": -0.49702826142311096,
771
+ "rewards/margins": 0.8841336369514465,
772
+ "rewards/rejected": -1.3811619281768799,
773
+ "step": 490
774
+ },
775
+ {
776
+ "epoch": 0.52,
777
+ "learning_rate": 4.2156040946718343e-07,
778
+ "logits/chosen": -2.3711423873901367,
779
+ "logits/rejected": -2.286292552947998,
780
+ "logps/chosen": -304.9607238769531,
781
+ "logps/rejected": -297.2706604003906,
782
+ "loss": 0.5217,
783
+ "rewards/accuracies": 0.762499988079071,
784
+ "rewards/chosen": -0.5594313740730286,
785
+ "rewards/margins": 1.1461693048477173,
786
+ "rewards/rejected": -1.7056007385253906,
787
+ "step": 500
788
+ },
789
+ {
790
+ "epoch": 0.52,
791
+ "eval_logits/chosen": -2.3431873321533203,
792
+ "eval_logits/rejected": -2.3073697090148926,
793
+ "eval_logps/chosen": -299.8136291503906,
794
+ "eval_logps/rejected": -294.8787841796875,
795
+ "eval_loss": 0.5093801617622375,
796
+ "eval_rewards/accuracies": 0.7599206566810608,
797
+ "eval_rewards/chosen": -0.17467793822288513,
798
+ "eval_rewards/margins": 0.982383131980896,
799
+ "eval_rewards/rejected": -1.157060980796814,
800
+ "eval_runtime": 409.4153,
801
+ "eval_samples_per_second": 4.885,
802
+ "eval_steps_per_second": 0.154,
803
+ "step": 500
804
+ },
805
+ {
806
+ "epoch": 0.53,
807
+ "learning_rate": 4.172826515897145e-07,
808
+ "logits/chosen": -2.380160331726074,
809
+ "logits/rejected": -2.3342180252075195,
810
+ "logps/chosen": -285.99005126953125,
811
+ "logps/rejected": -262.095703125,
812
+ "loss": 0.4819,
813
+ "rewards/accuracies": 0.8062499761581421,
814
+ "rewards/chosen": -0.1991649866104126,
815
+ "rewards/margins": 0.9838080406188965,
816
+ "rewards/rejected": -1.1829731464385986,
817
+ "step": 510
818
+ },
819
+ {
820
+ "epoch": 0.54,
821
+ "learning_rate": 4.129143072053638e-07,
822
+ "logits/chosen": -2.430504322052002,
823
+ "logits/rejected": -2.3967652320861816,
824
+ "logps/chosen": -295.1175842285156,
825
+ "logps/rejected": -298.79034423828125,
826
+ "loss": 0.4879,
827
+ "rewards/accuracies": 0.75,
828
+ "rewards/chosen": -0.2341586798429489,
829
+ "rewards/margins": 0.9701293110847473,
830
+ "rewards/rejected": -1.2042880058288574,
831
+ "step": 520
832
+ },
833
+ {
834
+ "epoch": 0.55,
835
+ "learning_rate": 4.084577418496775e-07,
836
+ "logits/chosen": -2.459068775177002,
837
+ "logits/rejected": -2.4153661727905273,
838
+ "logps/chosen": -303.06683349609375,
839
+ "logps/rejected": -333.80609130859375,
840
+ "loss": 0.4868,
841
+ "rewards/accuracies": 0.737500011920929,
842
+ "rewards/chosen": -0.4716320037841797,
843
+ "rewards/margins": 1.0053110122680664,
844
+ "rewards/rejected": -1.4769428968429565,
845
+ "step": 530
846
+ },
847
+ {
848
+ "epoch": 0.57,
849
+ "learning_rate": 4.039153688314145e-07,
850
+ "logits/chosen": -2.4637537002563477,
851
+ "logits/rejected": -2.4269044399261475,
852
+ "logps/chosen": -304.27166748046875,
853
+ "logps/rejected": -302.20477294921875,
854
+ "loss": 0.4986,
855
+ "rewards/accuracies": 0.768750011920929,
856
+ "rewards/chosen": -0.45887041091918945,
857
+ "rewards/margins": 1.0204750299453735,
858
+ "rewards/rejected": -1.4793453216552734,
859
+ "step": 540
860
+ },
861
+ {
862
+ "epoch": 0.58,
863
+ "learning_rate": 3.9928964792569654e-07,
864
+ "logits/chosen": -2.4167208671569824,
865
+ "logits/rejected": -2.3573527336120605,
866
+ "logps/chosen": -303.3789978027344,
867
+ "logps/rejected": -296.81060791015625,
868
+ "loss": 0.5343,
869
+ "rewards/accuracies": 0.731249988079071,
870
+ "rewards/chosen": -0.59644615650177,
871
+ "rewards/margins": 0.9994708895683289,
872
+ "rewards/rejected": -1.595916986465454,
873
+ "step": 550
874
+ },
875
+ {
876
+ "epoch": 0.59,
877
+ "learning_rate": 3.945830840419966e-07,
878
+ "logits/chosen": -2.3017094135284424,
879
+ "logits/rejected": -2.2486953735351562,
880
+ "logps/chosen": -298.501708984375,
881
+ "logps/rejected": -250.04556274414062,
882
+ "loss": 0.5037,
883
+ "rewards/accuracies": 0.7437499761581421,
884
+ "rewards/chosen": -0.4263156056404114,
885
+ "rewards/margins": 0.9166458249092102,
886
+ "rewards/rejected": -1.3429615497589111,
887
+ "step": 560
888
+ },
889
+ {
890
+ "epoch": 0.6,
891
+ "learning_rate": 3.8979822586768666e-07,
892
+ "logits/chosen": -2.308485507965088,
893
+ "logits/rejected": -2.259333610534668,
894
+ "logps/chosen": -274.44329833984375,
895
+ "logps/rejected": -297.6800537109375,
896
+ "loss": 0.5011,
897
+ "rewards/accuracies": 0.7749999761581421,
898
+ "rewards/chosen": -0.19708169996738434,
899
+ "rewards/margins": 0.9468144178390503,
900
+ "rewards/rejected": -1.1438961029052734,
901
+ "step": 570
902
+ },
903
+ {
904
+ "epoch": 0.61,
905
+ "learning_rate": 3.849376644878782e-07,
906
+ "logits/chosen": -2.3406989574432373,
907
+ "logits/rejected": -2.2872297763824463,
908
+ "logps/chosen": -293.8192443847656,
909
+ "logps/rejected": -292.8212585449219,
910
+ "loss": 0.5012,
911
+ "rewards/accuracies": 0.8062499761581421,
912
+ "rewards/chosen": -0.24491167068481445,
913
+ "rewards/margins": 1.1491771936416626,
914
+ "rewards/rejected": -1.394088864326477,
915
+ "step": 580
916
+ },
917
+ {
918
+ "epoch": 0.62,
919
+ "learning_rate": 3.800040319823038e-07,
920
+ "logits/chosen": -2.2375845909118652,
921
+ "logits/rejected": -2.21081280708313,
922
+ "logps/chosen": -281.64678955078125,
923
+ "logps/rejected": -281.1907958984375,
924
+ "loss": 0.5189,
925
+ "rewards/accuracies": 0.668749988079071,
926
+ "rewards/chosen": -0.33811599016189575,
927
+ "rewards/margins": 0.640630841255188,
928
+ "rewards/rejected": -0.9787468910217285,
929
+ "step": 590
930
+ },
931
+ {
932
+ "epoch": 0.63,
933
+ "learning_rate": 3.75e-07,
934
+ "logits/chosen": -2.336167812347412,
935
+ "logits/rejected": -2.3122599124908447,
936
+ "logps/chosen": -316.1159362792969,
937
+ "logps/rejected": -348.52728271484375,
938
+ "loss": 0.5069,
939
+ "rewards/accuracies": 0.762499988079071,
940
+ "rewards/chosen": 0.08971457183361053,
941
+ "rewards/margins": 1.1014090776443481,
942
+ "rewards/rejected": -1.0116945505142212,
943
+ "step": 600
944
+ },
945
+ {
946
+ "epoch": 0.63,
947
+ "eval_logits/chosen": -2.2853615283966064,
948
+ "eval_logits/rejected": -2.2443532943725586,
949
+ "eval_logps/chosen": -297.127197265625,
950
+ "eval_logps/rejected": -292.7251281738281,
951
+ "eval_loss": 0.5037240982055664,
952
+ "eval_rewards/accuracies": 0.7658730149269104,
953
+ "eval_rewards/chosen": -0.04035760462284088,
954
+ "eval_rewards/margins": 1.009021520614624,
955
+ "eval_rewards/rejected": -1.0493791103363037,
956
+ "eval_runtime": 411.3039,
957
+ "eval_samples_per_second": 4.863,
958
+ "eval_steps_per_second": 0.153,
959
+ "step": 600
960
+ },
961
+ {
962
+ "epoch": 0.64,
963
+ "learning_rate": 3.699282783125616e-07,
964
+ "logits/chosen": -2.29797625541687,
965
+ "logits/rejected": -2.2567286491394043,
966
+ "logps/chosen": -333.64312744140625,
967
+ "logps/rejected": -276.2323913574219,
968
+ "loss": 0.4939,
969
+ "rewards/accuracies": 0.768750011920929,
970
+ "rewards/chosen": -0.5196839570999146,
971
+ "rewards/margins": 0.9884433746337891,
972
+ "rewards/rejected": -1.508127212524414,
973
+ "step": 610
974
+ },
975
+ {
976
+ "epoch": 0.65,
977
+ "learning_rate": 3.647916133467529e-07,
978
+ "logits/chosen": -2.3571763038635254,
979
+ "logits/rejected": -2.2932493686676025,
980
+ "logps/chosen": -308.9863586425781,
981
+ "logps/rejected": -301.33367919921875,
982
+ "loss": 0.4919,
983
+ "rewards/accuracies": 0.7749999761581421,
984
+ "rewards/chosen": -1.0287619829177856,
985
+ "rewards/margins": 0.9334309697151184,
986
+ "rewards/rejected": -1.9621927738189697,
987
+ "step": 620
988
+ },
989
+ {
990
+ "epoch": 0.66,
991
+ "learning_rate": 3.595927866972693e-07,
992
+ "logits/chosen": -2.352038860321045,
993
+ "logits/rejected": -2.2862563133239746,
994
+ "logps/chosen": -293.9407958984375,
995
+ "logps/rejected": -288.50885009765625,
996
+ "loss": 0.4733,
997
+ "rewards/accuracies": 0.7875000238418579,
998
+ "rewards/chosen": -0.5753384828567505,
999
+ "rewards/margins": 1.169237494468689,
1000
+ "rewards/rejected": -1.744576096534729,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 0.67,
1005
+ "learning_rate": 3.5433461362045447e-07,
1006
+ "logits/chosen": -2.3236405849456787,
1007
+ "logits/rejected": -2.302849054336548,
1008
+ "logps/chosen": -262.9497375488281,
1009
+ "logps/rejected": -284.64642333984375,
1010
+ "loss": 0.4962,
1011
+ "rewards/accuracies": 0.7562500238418579,
1012
+ "rewards/chosen": -0.15619780123233795,
1013
+ "rewards/margins": 1.1346657276153564,
1014
+ "rewards/rejected": -1.2908635139465332,
1015
+ "step": 640
1016
+ },
1017
+ {
1018
+ "epoch": 0.68,
1019
+ "learning_rate": 3.490199415097892e-07,
1020
+ "logits/chosen": -2.3758909702301025,
1021
+ "logits/rejected": -2.300546169281006,
1022
+ "logps/chosen": -324.71881103515625,
1023
+ "logps/rejected": -303.84881591796875,
1024
+ "loss": 0.494,
1025
+ "rewards/accuracies": 0.8062499761581421,
1026
+ "rewards/chosen": 0.004534644540399313,
1027
+ "rewards/margins": 1.049611210823059,
1028
+ "rewards/rejected": -1.045076608657837,
1029
+ "step": 650
1030
+ },
1031
+ {
1032
+ "epoch": 0.69,
1033
+ "learning_rate": 3.43651648353978e-07,
1034
+ "logits/chosen": -2.4268946647644043,
1035
+ "logits/rejected": -2.3779337406158447,
1036
+ "logps/chosen": -320.46429443359375,
1037
+ "logps/rejected": -281.7370910644531,
1038
+ "loss": 0.5142,
1039
+ "rewards/accuracies": 0.65625,
1040
+ "rewards/chosen": -0.28596049547195435,
1041
+ "rewards/margins": 0.7663468718528748,
1042
+ "rewards/rejected": -1.052307367324829,
1043
+ "step": 660
1044
+ },
1045
+ {
1046
+ "epoch": 0.7,
1047
+ "learning_rate": 3.3823264117846717e-07,
1048
+ "logits/chosen": -2.353480100631714,
1049
+ "logits/rejected": -2.3156867027282715,
1050
+ "logps/chosen": -294.9992980957031,
1051
+ "logps/rejected": -279.8269958496094,
1052
+ "loss": 0.5042,
1053
+ "rewards/accuracies": 0.8125,
1054
+ "rewards/chosen": -0.3519328236579895,
1055
+ "rewards/margins": 1.0733650922775269,
1056
+ "rewards/rejected": -1.4252979755401611,
1057
+ "step": 670
1058
+ },
1059
+ {
1060
+ "epoch": 0.71,
1061
+ "learning_rate": 3.327658544712395e-07,
1062
+ "logits/chosen": -2.349534273147583,
1063
+ "logits/rejected": -2.2964000701904297,
1064
+ "logps/chosen": -302.86798095703125,
1065
+ "logps/rejected": -278.29827880859375,
1066
+ "loss": 0.5219,
1067
+ "rewards/accuracies": 0.6625000238418579,
1068
+ "rewards/chosen": -0.47534093260765076,
1069
+ "rewards/margins": 0.663826584815979,
1070
+ "rewards/rejected": -1.1391674280166626,
1071
+ "step": 680
1072
+ },
1073
+ {
1074
+ "epoch": 0.72,
1075
+ "learning_rate": 3.272542485937368e-07,
1076
+ "logits/chosen": -2.3173270225524902,
1077
+ "logits/rejected": -2.2520382404327393,
1078
+ "logps/chosen": -266.728515625,
1079
+ "logps/rejected": -234.1517333984375,
1080
+ "loss": 0.5044,
1081
+ "rewards/accuracies": 0.762499988079071,
1082
+ "rewards/chosen": -0.2846493721008301,
1083
+ "rewards/margins": 0.9897111058235168,
1084
+ "rewards/rejected": -1.2743605375289917,
1085
+ "step": 690
1086
+ },
1087
+ {
1088
+ "epoch": 0.73,
1089
+ "learning_rate": 3.2170080817777257e-07,
1090
+ "logits/chosen": -2.3343617916107178,
1091
+ "logits/rejected": -2.2195441722869873,
1092
+ "logps/chosen": -297.25250244140625,
1093
+ "logps/rejected": -286.7340393066406,
1094
+ "loss": 0.4582,
1095
+ "rewards/accuracies": 0.762499988079071,
1096
+ "rewards/chosen": -0.5840951800346375,
1097
+ "rewards/margins": 1.0429940223693848,
1098
+ "rewards/rejected": -1.6270891427993774,
1099
+ "step": 700
1100
+ },
1101
+ {
1102
+ "epoch": 0.73,
1103
+ "eval_logits/chosen": -2.289731979370117,
1104
+ "eval_logits/rejected": -2.246857166290283,
1105
+ "eval_logps/chosen": -308.9957580566406,
1106
+ "eval_logps/rejected": -306.2008056640625,
1107
+ "eval_loss": 0.500346839427948,
1108
+ "eval_rewards/accuracies": 0.7599206566810608,
1109
+ "eval_rewards/chosen": -0.6337829232215881,
1110
+ "eval_rewards/margins": 1.0893793106079102,
1111
+ "eval_rewards/rejected": -1.7231621742248535,
1112
+ "eval_runtime": 438.5736,
1113
+ "eval_samples_per_second": 4.56,
1114
+ "eval_steps_per_second": 0.144,
1115
+ "step": 700
1116
+ },
1117
+ {
1118
+ "epoch": 0.74,
1119
+ "learning_rate": 3.1610854050930057e-07,
1120
+ "logits/chosen": -2.3422415256500244,
1121
+ "logits/rejected": -2.2789344787597656,
1122
+ "logps/chosen": -298.95867919921875,
1123
+ "logps/rejected": -271.91290283203125,
1124
+ "loss": 0.474,
1125
+ "rewards/accuracies": 0.71875,
1126
+ "rewards/chosen": -0.5943951606750488,
1127
+ "rewards/margins": 0.9698866009712219,
1128
+ "rewards/rejected": -1.5642818212509155,
1129
+ "step": 710
1130
+ },
1131
+ {
1132
+ "epoch": 0.75,
1133
+ "learning_rate": 3.104804738999169e-07,
1134
+ "logits/chosen": -2.296921968460083,
1135
+ "logits/rejected": -2.277675151824951,
1136
+ "logps/chosen": -288.68511962890625,
1137
+ "logps/rejected": -294.5284729003906,
1138
+ "loss": 0.4741,
1139
+ "rewards/accuracies": 0.7562500238418579,
1140
+ "rewards/chosen": -0.4995554983615875,
1141
+ "rewards/margins": 1.2392946481704712,
1142
+ "rewards/rejected": -1.7388503551483154,
1143
+ "step": 720
1144
+ },
1145
+ {
1146
+ "epoch": 0.76,
1147
+ "learning_rate": 3.048196560469758e-07,
1148
+ "logits/chosen": -2.320403575897217,
1149
+ "logits/rejected": -2.276542901992798,
1150
+ "logps/chosen": -317.94012451171875,
1151
+ "logps/rejected": -283.49920654296875,
1152
+ "loss": 0.4819,
1153
+ "rewards/accuracies": 0.7562500238418579,
1154
+ "rewards/chosen": -0.4403966963291168,
1155
+ "rewards/margins": 1.0142490863800049,
1156
+ "rewards/rejected": -1.4546458721160889,
1157
+ "step": 730
1158
+ },
1159
+ {
1160
+ "epoch": 0.77,
1161
+ "learning_rate": 2.991291523832075e-07,
1162
+ "logits/chosen": -2.3161144256591797,
1163
+ "logits/rejected": -2.2607274055480957,
1164
+ "logps/chosen": -326.93829345703125,
1165
+ "logps/rejected": -328.2031555175781,
1166
+ "loss": 0.4746,
1167
+ "rewards/accuracies": 0.8125,
1168
+ "rewards/chosen": -0.6947178840637207,
1169
+ "rewards/margins": 1.2727077007293701,
1170
+ "rewards/rejected": -1.9674255847930908,
1171
+ "step": 740
1172
+ },
1173
+ {
1174
+ "epoch": 0.79,
1175
+ "learning_rate": 2.934120444167326e-07,
1176
+ "logits/chosen": -2.292912006378174,
1177
+ "logits/rejected": -2.2443642616271973,
1178
+ "logps/chosen": -282.31536865234375,
1179
+ "logps/rejected": -312.34735107421875,
1180
+ "loss": 0.4777,
1181
+ "rewards/accuracies": 0.793749988079071,
1182
+ "rewards/chosen": -0.9387601017951965,
1183
+ "rewards/margins": 1.1481364965438843,
1184
+ "rewards/rejected": -2.0868964195251465,
1185
+ "step": 750
1186
+ },
1187
+ {
1188
+ "epoch": 0.8,
1189
+ "learning_rate": 2.8767142806237077e-07,
1190
+ "logits/chosen": -2.369844913482666,
1191
+ "logits/rejected": -2.368522882461548,
1192
+ "logps/chosen": -308.49700927734375,
1193
+ "logps/rejected": -300.10455322265625,
1194
+ "loss": 0.4665,
1195
+ "rewards/accuracies": 0.75,
1196
+ "rewards/chosen": -0.539741039276123,
1197
+ "rewards/margins": 1.0256460905075073,
1198
+ "rewards/rejected": -1.5653870105743408,
1199
+ "step": 760
1200
+ },
1201
+ {
1202
+ "epoch": 0.81,
1203
+ "learning_rate": 2.819104119651487e-07,
1204
+ "logits/chosen": -2.3911471366882324,
1205
+ "logits/rejected": -2.3795692920684814,
1206
+ "logps/chosen": -315.2966613769531,
1207
+ "logps/rejected": -321.0699157714844,
1208
+ "loss": 0.5096,
1209
+ "rewards/accuracies": 0.7437499761581421,
1210
+ "rewards/chosen": -0.39289337396621704,
1211
+ "rewards/margins": 1.0190023183822632,
1212
+ "rewards/rejected": -1.411895751953125,
1213
+ "step": 770
1214
+ },
1215
+ {
1216
+ "epoch": 0.82,
1217
+ "learning_rate": 2.761321158169134e-07,
1218
+ "logits/chosen": -2.3824098110198975,
1219
+ "logits/rejected": -2.333876132965088,
1220
+ "logps/chosen": -309.57403564453125,
1221
+ "logps/rejected": -271.635009765625,
1222
+ "loss": 0.4965,
1223
+ "rewards/accuracies": 0.762499988079071,
1224
+ "rewards/chosen": -0.31998205184936523,
1225
+ "rewards/margins": 1.0204508304595947,
1226
+ "rewards/rejected": -1.3404327630996704,
1227
+ "step": 780
1228
+ },
1229
+ {
1230
+ "epoch": 0.83,
1231
+ "learning_rate": 2.703396686669646e-07,
1232
+ "logits/chosen": -2.3626012802124023,
1233
+ "logits/rejected": -2.336550235748291,
1234
+ "logps/chosen": -282.4324035644531,
1235
+ "logps/rejected": -289.73907470703125,
1236
+ "loss": 0.4969,
1237
+ "rewards/accuracies": 0.768750011920929,
1238
+ "rewards/chosen": -0.40831103920936584,
1239
+ "rewards/margins": 0.9678645133972168,
1240
+ "rewards/rejected": -1.3761756420135498,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 0.84,
1245
+ "learning_rate": 2.6453620722761895e-07,
1246
+ "logits/chosen": -2.2754549980163574,
1247
+ "logits/rejected": -2.227632999420166,
1248
+ "logps/chosen": -324.36065673828125,
1249
+ "logps/rejected": -315.53265380859375,
1250
+ "loss": 0.457,
1251
+ "rewards/accuracies": 0.768750011920929,
1252
+ "rewards/chosen": -0.40555134415626526,
1253
+ "rewards/margins": 1.2698910236358643,
1254
+ "rewards/rejected": -1.6754423379898071,
1255
+ "step": 800
1256
+ },
1257
+ {
1258
+ "epoch": 0.84,
1259
+ "eval_logits/chosen": -2.3341588973999023,
1260
+ "eval_logits/rejected": -2.292834520339966,
1261
+ "eval_logps/chosen": -306.1227722167969,
1262
+ "eval_logps/rejected": -303.84637451171875,
1263
+ "eval_loss": 0.4906502664089203,
1264
+ "eval_rewards/accuracies": 0.7638888955116272,
1265
+ "eval_rewards/chosen": -0.4901339113712311,
1266
+ "eval_rewards/margins": 1.1153074502944946,
1267
+ "eval_rewards/rejected": -1.6054412126541138,
1268
+ "eval_runtime": 442.4535,
1269
+ "eval_samples_per_second": 4.52,
1270
+ "eval_steps_per_second": 0.142,
1271
+ "step": 800
1272
+ },
1273
+ {
1274
+ "epoch": 0.85,
1275
+ "learning_rate": 2.5872487417562527e-07,
1276
+ "logits/chosen": -2.3688154220581055,
1277
+ "logits/rejected": -2.2756457328796387,
1278
+ "logps/chosen": -302.0091552734375,
1279
+ "logps/rejected": -267.61712646484375,
1280
+ "loss": 0.5048,
1281
+ "rewards/accuracies": 0.7124999761581421,
1282
+ "rewards/chosen": -0.739054799079895,
1283
+ "rewards/margins": 0.9176353216171265,
1284
+ "rewards/rejected": -1.656690001487732,
1285
+ "step": 810
1286
+ },
1287
+ {
1288
+ "epoch": 0.86,
1289
+ "learning_rate": 2.5290881645034926e-07,
1290
+ "logits/chosen": -2.371279239654541,
1291
+ "logits/rejected": -2.3216042518615723,
1292
+ "logps/chosen": -307.4602966308594,
1293
+ "logps/rejected": -276.8441162109375,
1294
+ "loss": 0.5156,
1295
+ "rewards/accuracies": 0.7749999761581421,
1296
+ "rewards/chosen": -0.4735354781150818,
1297
+ "rewards/margins": 1.1825616359710693,
1298
+ "rewards/rejected": -1.656097173690796,
1299
+ "step": 820
1300
+ },
1301
+ {
1302
+ "epoch": 0.87,
1303
+ "learning_rate": 2.4709118354965077e-07,
1304
+ "logits/chosen": -2.3698923587799072,
1305
+ "logits/rejected": -2.3812479972839355,
1306
+ "logps/chosen": -283.11395263671875,
1307
+ "logps/rejected": -314.9549865722656,
1308
+ "loss": 0.4728,
1309
+ "rewards/accuracies": 0.7562500238418579,
1310
+ "rewards/chosen": -0.5311114192008972,
1311
+ "rewards/margins": 0.8778692483901978,
1312
+ "rewards/rejected": -1.4089806079864502,
1313
+ "step": 830
1314
+ },
1315
+ {
1316
+ "epoch": 0.88,
1317
+ "learning_rate": 2.412751258243748e-07,
1318
+ "logits/chosen": -2.377016305923462,
1319
+ "logits/rejected": -2.3782734870910645,
1320
+ "logps/chosen": -294.67608642578125,
1321
+ "logps/rejected": -310.6152038574219,
1322
+ "loss": 0.4955,
1323
+ "rewards/accuracies": 0.6875,
1324
+ "rewards/chosen": -0.6087425351142883,
1325
+ "rewards/margins": 0.8135608434677124,
1326
+ "rewards/rejected": -1.4223034381866455,
1327
+ "step": 840
1328
+ },
1329
+ {
1330
+ "epoch": 0.89,
1331
+ "learning_rate": 2.3546379277238103e-07,
1332
+ "logits/chosen": -2.422614574432373,
1333
+ "logits/rejected": -2.39127779006958,
1334
+ "logps/chosen": -313.9159240722656,
1335
+ "logps/rejected": -270.3652648925781,
1336
+ "loss": 0.4874,
1337
+ "rewards/accuracies": 0.7875000238418579,
1338
+ "rewards/chosen": -0.7056568264961243,
1339
+ "rewards/margins": 0.9642340540885925,
1340
+ "rewards/rejected": -1.6698909997940063,
1341
+ "step": 850
1342
+ },
1343
+ {
1344
+ "epoch": 0.9,
1345
+ "learning_rate": 2.2966033133303545e-07,
1346
+ "logits/chosen": -2.378150463104248,
1347
+ "logits/rejected": -2.3483357429504395,
1348
+ "logps/chosen": -326.0942687988281,
1349
+ "logps/rejected": -330.057373046875,
1350
+ "loss": 0.4994,
1351
+ "rewards/accuracies": 0.731249988079071,
1352
+ "rewards/chosen": -0.8599470257759094,
1353
+ "rewards/margins": 0.9078429341316223,
1354
+ "rewards/rejected": -1.7677898406982422,
1355
+ "step": 860
1356
+ },
1357
+ {
1358
+ "epoch": 0.91,
1359
+ "learning_rate": 2.2386788418308665e-07,
1360
+ "logits/chosen": -2.4225101470947266,
1361
+ "logits/rejected": -2.4114060401916504,
1362
+ "logps/chosen": -266.55108642578125,
1363
+ "logps/rejected": -283.1296081542969,
1364
+ "loss": 0.501,
1365
+ "rewards/accuracies": 0.706250011920929,
1366
+ "rewards/chosen": -0.5755284428596497,
1367
+ "rewards/margins": 1.0859739780426025,
1368
+ "rewards/rejected": -1.6615022420883179,
1369
+ "step": 870
1370
+ },
1371
+ {
1372
+ "epoch": 0.92,
1373
+ "learning_rate": 2.1808958803485133e-07,
1374
+ "logits/chosen": -2.483870506286621,
1375
+ "logits/rejected": -2.426514148712158,
1376
+ "logps/chosen": -305.41119384765625,
1377
+ "logps/rejected": -296.4634704589844,
1378
+ "loss": 0.4761,
1379
+ "rewards/accuracies": 0.7562500238418579,
1380
+ "rewards/chosen": -0.4832102358341217,
1381
+ "rewards/margins": 0.9984723925590515,
1382
+ "rewards/rejected": -1.481682538986206,
1383
+ "step": 880
1384
+ },
1385
+ {
1386
+ "epoch": 0.93,
1387
+ "learning_rate": 2.123285719376292e-07,
1388
+ "logits/chosen": -2.4902679920196533,
1389
+ "logits/rejected": -2.4276280403137207,
1390
+ "logps/chosen": -306.55987548828125,
1391
+ "logps/rejected": -308.28216552734375,
1392
+ "loss": 0.4792,
1393
+ "rewards/accuracies": 0.831250011920929,
1394
+ "rewards/chosen": -0.5795844793319702,
1395
+ "rewards/margins": 1.21144700050354,
1396
+ "rewards/rejected": -1.7910315990447998,
1397
+ "step": 890
1398
+ },
1399
+ {
1400
+ "epoch": 0.94,
1401
+ "learning_rate": 2.065879555832674e-07,
1402
+ "logits/chosen": -2.3825860023498535,
1403
+ "logits/rejected": -2.32081937789917,
1404
+ "logps/chosen": -300.0443115234375,
1405
+ "logps/rejected": -298.1470642089844,
1406
+ "loss": 0.4723,
1407
+ "rewards/accuracies": 0.7875000238418579,
1408
+ "rewards/chosen": -0.6022388339042664,
1409
+ "rewards/margins": 1.1292412281036377,
1410
+ "rewards/rejected": -1.7314800024032593,
1411
+ "step": 900
1412
+ },
1413
+ {
1414
+ "epoch": 0.94,
1415
+ "eval_logits/chosen": -2.3762104511260986,
1416
+ "eval_logits/rejected": -2.3354876041412354,
1417
+ "eval_logps/chosen": -305.15618896484375,
1418
+ "eval_logps/rejected": -302.8718566894531,
1419
+ "eval_loss": 0.4933333098888397,
1420
+ "eval_rewards/accuracies": 0.7658730149269104,
1421
+ "eval_rewards/chosen": -0.4418059289455414,
1422
+ "eval_rewards/margins": 1.114911437034607,
1423
+ "eval_rewards/rejected": -1.5567171573638916,
1424
+ "eval_runtime": 427.8237,
1425
+ "eval_samples_per_second": 4.675,
1426
+ "eval_steps_per_second": 0.147,
1427
+ "step": 900
1428
+ },
1429
+ {
1430
+ "epoch": 0.95,
1431
+ "learning_rate": 2.0087084761679243e-07,
1432
+ "logits/chosen": -2.433973789215088,
1433
+ "logits/rejected": -2.3652501106262207,
1434
+ "logps/chosen": -312.04620361328125,
1435
+ "logps/rejected": -285.4141540527344,
1436
+ "loss": 0.4747,
1437
+ "rewards/accuracies": 0.731249988079071,
1438
+ "rewards/chosen": -0.3190104365348816,
1439
+ "rewards/margins": 1.0960520505905151,
1440
+ "rewards/rejected": -1.4150625467300415,
1441
+ "step": 910
1442
+ },
1443
+ {
1444
+ "epoch": 0.96,
1445
+ "learning_rate": 1.9518034395302412e-07,
1446
+ "logits/chosen": -2.3947741985321045,
1447
+ "logits/rejected": -2.349290370941162,
1448
+ "logps/chosen": -300.0207214355469,
1449
+ "logps/rejected": -293.3101501464844,
1450
+ "loss": 0.4644,
1451
+ "rewards/accuracies": 0.7749999761581421,
1452
+ "rewards/chosen": -0.2700619399547577,
1453
+ "rewards/margins": 1.2211670875549316,
1454
+ "rewards/rejected": -1.4912290573120117,
1455
+ "step": 920
1456
+ },
1457
+ {
1458
+ "epoch": 0.97,
1459
+ "learning_rate": 1.895195261000831e-07,
1460
+ "logits/chosen": -2.426356792449951,
1461
+ "logits/rejected": -2.363300323486328,
1462
+ "logps/chosen": -273.56988525390625,
1463
+ "logps/rejected": -281.79144287109375,
1464
+ "loss": 0.4792,
1465
+ "rewards/accuracies": 0.8125,
1466
+ "rewards/chosen": -0.19313231110572815,
1467
+ "rewards/margins": 1.6159725189208984,
1468
+ "rewards/rejected": -1.8091049194335938,
1469
+ "step": 930
1470
+ },
1471
+ {
1472
+ "epoch": 0.98,
1473
+ "learning_rate": 1.8389145949069951e-07,
1474
+ "logits/chosen": -2.504002094268799,
1475
+ "logits/rejected": -2.434199810028076,
1476
+ "logps/chosen": -312.13922119140625,
1477
+ "logps/rejected": -303.17279052734375,
1478
+ "loss": 0.4749,
1479
+ "rewards/accuracies": 0.75,
1480
+ "rewards/chosen": -0.8358801603317261,
1481
+ "rewards/margins": 1.0024197101593018,
1482
+ "rewards/rejected": -1.8382999897003174,
1483
+ "step": 940
1484
+ },
1485
+ {
1486
+ "epoch": 0.99,
1487
+ "learning_rate": 1.782991918222275e-07,
1488
+ "logits/chosen": -2.4335973262786865,
1489
+ "logits/rejected": -2.371967315673828,
1490
+ "logps/chosen": -297.8493957519531,
1491
+ "logps/rejected": -308.2081604003906,
1492
+ "loss": 0.5115,
1493
+ "rewards/accuracies": 0.7437499761581421,
1494
+ "rewards/chosen": -1.042378544807434,
1495
+ "rewards/margins": 0.9967982172966003,
1496
+ "rewards/rejected": -2.0391769409179688,
1497
+ "step": 950
1498
+ },
1499
+ {
1500
+ "epoch": 1.0,
1501
+ "learning_rate": 1.7274575140626315e-07,
1502
+ "logits/chosen": -2.4119739532470703,
1503
+ "logits/rejected": -2.3814332485198975,
1504
+ "logps/chosen": -291.4102478027344,
1505
+ "logps/rejected": -332.5351257324219,
1506
+ "loss": 0.3924,
1507
+ "rewards/accuracies": 0.824999988079071,
1508
+ "rewards/chosen": -0.6064465045928955,
1509
+ "rewards/margins": 1.3707728385925293,
1510
+ "rewards/rejected": -1.9772193431854248,
1511
+ "step": 960
1512
+ },
1513
+ {
1514
+ "epoch": 1.02,
1515
+ "learning_rate": 1.672341455287605e-07,
1516
+ "logits/chosen": -2.4356229305267334,
1517
+ "logits/rejected": -2.380049228668213,
1518
+ "logps/chosen": -317.14300537109375,
1519
+ "logps/rejected": -315.25616455078125,
1520
+ "loss": 0.2962,
1521
+ "rewards/accuracies": 0.9375,
1522
+ "rewards/chosen": -0.3146964907646179,
1523
+ "rewards/margins": 1.8185898065567017,
1524
+ "rewards/rejected": -2.133286237716675,
1525
+ "step": 970
1526
+ },
1527
+ {
1528
+ "epoch": 1.03,
1529
+ "learning_rate": 1.617673588215328e-07,
1530
+ "logits/chosen": -2.4142088890075684,
1531
+ "logits/rejected": -2.3693430423736572,
1532
+ "logps/chosen": -248.46224975585938,
1533
+ "logps/rejected": -285.1511535644531,
1534
+ "loss": 0.2886,
1535
+ "rewards/accuracies": 0.887499988079071,
1536
+ "rewards/chosen": -0.18761667609214783,
1537
+ "rewards/margins": 1.7726829051971436,
1538
+ "rewards/rejected": -1.9602997303009033,
1539
+ "step": 980
1540
+ },
1541
+ {
1542
+ "epoch": 1.04,
1543
+ "learning_rate": 1.5634835164602196e-07,
1544
+ "logits/chosen": -2.4898295402526855,
1545
+ "logits/rejected": -2.4521660804748535,
1546
+ "logps/chosen": -336.2506103515625,
1547
+ "logps/rejected": -302.1678161621094,
1548
+ "loss": 0.2961,
1549
+ "rewards/accuracies": 0.875,
1550
+ "rewards/chosen": -0.21727129817008972,
1551
+ "rewards/margins": 1.7004649639129639,
1552
+ "rewards/rejected": -1.917736291885376,
1553
+ "step": 990
1554
+ },
1555
+ {
1556
+ "epoch": 1.05,
1557
+ "learning_rate": 1.5098005849021078e-07,
1558
+ "logits/chosen": -2.4500224590301514,
1559
+ "logits/rejected": -2.4026894569396973,
1560
+ "logps/chosen": -296.7429504394531,
1561
+ "logps/rejected": -283.27789306640625,
1562
+ "loss": 0.3094,
1563
+ "rewards/accuracies": 0.8812500238418579,
1564
+ "rewards/chosen": -0.8134616017341614,
1565
+ "rewards/margins": 1.605277419090271,
1566
+ "rewards/rejected": -2.418738842010498,
1567
+ "step": 1000
1568
+ },
1569
+ {
1570
+ "epoch": 1.05,
1571
+ "eval_logits/chosen": -2.40937876701355,
1572
+ "eval_logits/rejected": -2.369751214981079,
1573
+ "eval_logps/chosen": -312.38043212890625,
1574
+ "eval_logps/rejected": -312.68560791015625,
1575
+ "eval_loss": 0.49218976497650146,
1576
+ "eval_rewards/accuracies": 0.7638888955116272,
1577
+ "eval_rewards/chosen": -0.803017795085907,
1578
+ "eval_rewards/margins": 1.2443830966949463,
1579
+ "eval_rewards/rejected": -2.04740047454834,
1580
+ "eval_runtime": 407.9979,
1581
+ "eval_samples_per_second": 4.902,
1582
+ "eval_steps_per_second": 0.154,
1583
+ "step": 1000
1584
+ },
1585
+ {
1586
+ "epoch": 1.06,
1587
+ "learning_rate": 1.4566538637954554e-07,
1588
+ "logits/chosen": -2.456105947494507,
1589
+ "logits/rejected": -2.391247272491455,
1590
+ "logps/chosen": -317.70330810546875,
1591
+ "logps/rejected": -292.7303771972656,
1592
+ "loss": 0.2867,
1593
+ "rewards/accuracies": 0.856249988079071,
1594
+ "rewards/chosen": -0.4770506024360657,
1595
+ "rewards/margins": 1.7201404571533203,
1596
+ "rewards/rejected": -2.1971912384033203,
1597
+ "step": 1010
1598
+ },
1599
+ {
1600
+ "epoch": 1.07,
1601
+ "learning_rate": 1.404072133027306e-07,
1602
+ "logits/chosen": -2.468541383743286,
1603
+ "logits/rejected": -2.4047131538391113,
1604
+ "logps/chosen": -324.1610107421875,
1605
+ "logps/rejected": -310.6138610839844,
1606
+ "loss": 0.3011,
1607
+ "rewards/accuracies": 0.862500011920929,
1608
+ "rewards/chosen": -0.3531934320926666,
1609
+ "rewards/margins": 1.7022035121917725,
1610
+ "rewards/rejected": -2.055396795272827,
1611
+ "step": 1020
1612
+ },
1613
+ {
1614
+ "epoch": 1.08,
1615
+ "learning_rate": 1.3520838665324703e-07,
1616
+ "logits/chosen": -2.451047658920288,
1617
+ "logits/rejected": -2.426084280014038,
1618
+ "logps/chosen": -289.8973083496094,
1619
+ "logps/rejected": -315.23968505859375,
1620
+ "loss": 0.2848,
1621
+ "rewards/accuracies": 0.9125000238418579,
1622
+ "rewards/chosen": -0.3617556095123291,
1623
+ "rewards/margins": 1.7169090509414673,
1624
+ "rewards/rejected": -2.078664779663086,
1625
+ "step": 1030
1626
+ },
1627
+ {
1628
+ "epoch": 1.09,
1629
+ "learning_rate": 1.3007172168743852e-07,
1630
+ "logits/chosen": -2.4811718463897705,
1631
+ "logits/rejected": -2.442286968231201,
1632
+ "logps/chosen": -315.90509033203125,
1633
+ "logps/rejected": -345.96820068359375,
1634
+ "loss": 0.2943,
1635
+ "rewards/accuracies": 0.875,
1636
+ "rewards/chosen": -0.2960956394672394,
1637
+ "rewards/margins": 1.6934324502944946,
1638
+ "rewards/rejected": -1.9895280599594116,
1639
+ "step": 1040
1640
+ },
1641
+ {
1642
+ "epoch": 1.1,
1643
+ "learning_rate": 1.2500000000000005e-07,
1644
+ "logits/chosen": -2.4405837059020996,
1645
+ "logits/rejected": -2.38596773147583,
1646
+ "logps/chosen": -287.0311279296875,
1647
+ "logps/rejected": -303.80535888671875,
1648
+ "loss": 0.2866,
1649
+ "rewards/accuracies": 0.893750011920929,
1650
+ "rewards/chosen": -0.2767558693885803,
1651
+ "rewards/margins": 1.9063446521759033,
1652
+ "rewards/rejected": -2.183100461959839,
1653
+ "step": 1050
1654
+ },
1655
+ {
1656
+ "epoch": 1.11,
1657
+ "learning_rate": 1.1999596801769616e-07,
1658
+ "logits/chosen": -2.440513849258423,
1659
+ "logits/rejected": -2.3295609951019287,
1660
+ "logps/chosen": -326.0518493652344,
1661
+ "logps/rejected": -333.1617431640625,
1662
+ "loss": 0.2744,
1663
+ "rewards/accuracies": 0.918749988079071,
1664
+ "rewards/chosen": -0.25197187066078186,
1665
+ "rewards/margins": 2.045137882232666,
1666
+ "rewards/rejected": -2.297109603881836,
1667
+ "step": 1060
1668
+ },
1669
+ {
1670
+ "epoch": 1.12,
1671
+ "learning_rate": 1.1506233551212185e-07,
1672
+ "logits/chosen": -2.414334535598755,
1673
+ "logits/rejected": -2.3740944862365723,
1674
+ "logps/chosen": -306.10992431640625,
1675
+ "logps/rejected": -327.01959228515625,
1676
+ "loss": 0.2711,
1677
+ "rewards/accuracies": 0.90625,
1678
+ "rewards/chosen": -0.261447936296463,
1679
+ "rewards/margins": 2.056182384490967,
1680
+ "rewards/rejected": -2.3176300525665283,
1681
+ "step": 1070
1682
+ },
1683
+ {
1684
+ "epoch": 1.13,
1685
+ "learning_rate": 1.1020177413231332e-07,
1686
+ "logits/chosen": -2.424375057220459,
1687
+ "logits/rejected": -2.3422465324401855,
1688
+ "logps/chosen": -267.51129150390625,
1689
+ "logps/rejected": -272.04876708984375,
1690
+ "loss": 0.2992,
1691
+ "rewards/accuracies": 0.84375,
1692
+ "rewards/chosen": -0.5566546320915222,
1693
+ "rewards/margins": 1.5810120105743408,
1694
+ "rewards/rejected": -2.1376664638519287,
1695
+ "step": 1080
1696
+ },
1697
+ {
1698
+ "epoch": 1.14,
1699
+ "learning_rate": 1.0541691595800336e-07,
1700
+ "logits/chosen": -2.4174036979675293,
1701
+ "logits/rejected": -2.417484760284424,
1702
+ "logps/chosen": -288.2060852050781,
1703
+ "logps/rejected": -316.9212341308594,
1704
+ "loss": 0.2776,
1705
+ "rewards/accuracies": 0.918749988079071,
1706
+ "rewards/chosen": -0.14382892847061157,
1707
+ "rewards/margins": 1.983428716659546,
1708
+ "rewards/rejected": -2.127257823944092,
1709
+ "step": 1090
1710
+ },
1711
+ {
1712
+ "epoch": 1.15,
1713
+ "learning_rate": 1.007103520743035e-07,
1714
+ "logits/chosen": -2.4386072158813477,
1715
+ "logits/rejected": -2.383044719696045,
1716
+ "logps/chosen": -316.8988342285156,
1717
+ "logps/rejected": -311.27642822265625,
1718
+ "loss": 0.2725,
1719
+ "rewards/accuracies": 0.887499988079071,
1720
+ "rewards/chosen": -0.27822011709213257,
1721
+ "rewards/margins": 1.802807092666626,
1722
+ "rewards/rejected": -2.081026792526245,
1723
+ "step": 1100
1724
+ },
1725
+ {
1726
+ "epoch": 1.15,
1727
+ "eval_logits/chosen": -2.3785088062286377,
1728
+ "eval_logits/rejected": -2.3381552696228027,
1729
+ "eval_logps/chosen": -307.5903015136719,
1730
+ "eval_logps/rejected": -309.0183410644531,
1731
+ "eval_loss": 0.49208447337150574,
1732
+ "eval_rewards/accuracies": 0.7460317611694336,
1733
+ "eval_rewards/chosen": -0.5635129809379578,
1734
+ "eval_rewards/margins": 1.3005269765853882,
1735
+ "eval_rewards/rejected": -1.8640400171279907,
1736
+ "eval_runtime": 431.8636,
1737
+ "eval_samples_per_second": 4.631,
1738
+ "eval_steps_per_second": 0.146,
1739
+ "step": 1100
1740
+ },
1741
+ {
1742
+ "epoch": 1.16,
1743
+ "learning_rate": 9.608463116858542e-08,
1744
+ "logits/chosen": -2.4096856117248535,
1745
+ "logits/rejected": -2.3269238471984863,
1746
+ "logps/chosen": -319.33123779296875,
1747
+ "logps/rejected": -314.37982177734375,
1748
+ "loss": 0.2832,
1749
+ "rewards/accuracies": 0.893750011920929,
1750
+ "rewards/chosen": -0.4104345738887787,
1751
+ "rewards/margins": 1.9642629623413086,
1752
+ "rewards/rejected": -2.37469744682312,
1753
+ "step": 1110
1754
+ },
1755
+ {
1756
+ "epoch": 1.17,
1757
+ "learning_rate": 9.15422581503224e-08,
1758
+ "logits/chosen": -2.4082019329071045,
1759
+ "logits/rejected": -2.3823094367980957,
1760
+ "logps/chosen": -281.8961181640625,
1761
+ "logps/rejected": -310.1343078613281,
1762
+ "loss": 0.2659,
1763
+ "rewards/accuracies": 0.925000011920929,
1764
+ "rewards/chosen": -0.4686800539493561,
1765
+ "rewards/margins": 1.9623409509658813,
1766
+ "rewards/rejected": -2.431021213531494,
1767
+ "step": 1120
1768
+ },
1769
+ {
1770
+ "epoch": 1.18,
1771
+ "learning_rate": 8.70856927946362e-08,
1772
+ "logits/chosen": -2.4076130390167236,
1773
+ "logits/rejected": -2.377281427383423,
1774
+ "logps/chosen": -313.17877197265625,
1775
+ "logps/rejected": -320.7528381347656,
1776
+ "loss": 0.2712,
1777
+ "rewards/accuracies": 0.925000011920929,
1778
+ "rewards/chosen": -0.3578185439109802,
1779
+ "rewards/margins": 1.8895679712295532,
1780
+ "rewards/rejected": -2.2473864555358887,
1781
+ "step": 1130
1782
+ },
1783
+ {
1784
+ "epoch": 1.19,
1785
+ "learning_rate": 8.271734841028552e-08,
1786
+ "logits/chosen": -2.4748058319091797,
1787
+ "logits/rejected": -2.3889963626861572,
1788
+ "logps/chosen": -317.75933837890625,
1789
+ "logps/rejected": -335.86553955078125,
1790
+ "loss": 0.2587,
1791
+ "rewards/accuracies": 0.925000011920929,
1792
+ "rewards/chosen": -0.460686057806015,
1793
+ "rewards/margins": 1.9483623504638672,
1794
+ "rewards/rejected": -2.409048557281494,
1795
+ "step": 1140
1796
+ },
1797
+ {
1798
+ "epoch": 1.2,
1799
+ "learning_rate": 7.843959053281663e-08,
1800
+ "logits/chosen": -2.419320583343506,
1801
+ "logits/rejected": -2.3716225624084473,
1802
+ "logps/chosen": -308.89141845703125,
1803
+ "logps/rejected": -291.35797119140625,
1804
+ "loss": 0.3091,
1805
+ "rewards/accuracies": 0.90625,
1806
+ "rewards/chosen": -0.3241550624370575,
1807
+ "rewards/margins": 1.9242489337921143,
1808
+ "rewards/rejected": -2.248404026031494,
1809
+ "step": 1150
1810
+ },
1811
+ {
1812
+ "epoch": 1.21,
1813
+ "learning_rate": 7.425473564358456e-08,
1814
+ "logits/chosen": -2.4155445098876953,
1815
+ "logits/rejected": -2.374995708465576,
1816
+ "logps/chosen": -285.75823974609375,
1817
+ "logps/rejected": -320.6084899902344,
1818
+ "loss": 0.2787,
1819
+ "rewards/accuracies": 0.893750011920929,
1820
+ "rewards/chosen": -0.321144163608551,
1821
+ "rewards/margins": 2.070187568664551,
1822
+ "rewards/rejected": -2.391331911087036,
1823
+ "step": 1160
1824
+ },
1825
+ {
1826
+ "epoch": 1.22,
1827
+ "learning_rate": 7.016504991533726e-08,
1828
+ "logits/chosen": -2.4452130794525146,
1829
+ "logits/rejected": -2.416900157928467,
1830
+ "logps/chosen": -333.56890869140625,
1831
+ "logps/rejected": -288.9085998535156,
1832
+ "loss": 0.2819,
1833
+ "rewards/accuracies": 0.893750011920929,
1834
+ "rewards/chosen": -0.34982940554618835,
1835
+ "rewards/margins": 1.8246349096298218,
1836
+ "rewards/rejected": -2.174464702606201,
1837
+ "step": 1170
1838
+ },
1839
+ {
1840
+ "epoch": 1.24,
1841
+ "learning_rate": 6.617274798504286e-08,
1842
+ "logits/chosen": -2.4035491943359375,
1843
+ "logits/rejected": -2.3629653453826904,
1844
+ "logps/chosen": -306.9756164550781,
1845
+ "logps/rejected": -289.35479736328125,
1846
+ "loss": 0.2805,
1847
+ "rewards/accuracies": 0.925000011920929,
1848
+ "rewards/chosen": -0.36044904589653015,
1849
+ "rewards/margins": 1.8948723077774048,
1850
+ "rewards/rejected": -2.2553212642669678,
1851
+ "step": 1180
1852
+ },
1853
+ {
1854
+ "epoch": 1.25,
1855
+ "learning_rate": 6.22799917546252e-08,
1856
+ "logits/chosen": -2.459360361099243,
1857
+ "logits/rejected": -2.391692638397217,
1858
+ "logps/chosen": -314.09185791015625,
1859
+ "logps/rejected": -300.3113098144531,
1860
+ "loss": 0.2546,
1861
+ "rewards/accuracies": 0.925000011920929,
1862
+ "rewards/chosen": -0.302379310131073,
1863
+ "rewards/margins": 2.0588433742523193,
1864
+ "rewards/rejected": -2.361222743988037,
1865
+ "step": 1190
1866
+ },
1867
+ {
1868
+ "epoch": 1.26,
1869
+ "learning_rate": 5.848888922025552e-08,
1870
+ "logits/chosen": -2.473045825958252,
1871
+ "logits/rejected": -2.456024408340454,
1872
+ "logps/chosen": -312.7765808105469,
1873
+ "logps/rejected": -322.79498291015625,
1874
+ "loss": 0.2932,
1875
+ "rewards/accuracies": 0.862500011920929,
1876
+ "rewards/chosen": -0.290397971868515,
1877
+ "rewards/margins": 1.813227653503418,
1878
+ "rewards/rejected": -2.103625774383545,
1879
+ "step": 1200
1880
+ },
1881
+ {
1882
+ "epoch": 1.26,
1883
+ "eval_logits/chosen": -2.391538381576538,
1884
+ "eval_logits/rejected": -2.351095199584961,
1885
+ "eval_logps/chosen": -309.3631591796875,
1886
+ "eval_logps/rejected": -311.79766845703125,
1887
+ "eval_loss": 0.49240604043006897,
1888
+ "eval_rewards/accuracies": 0.7579365372657776,
1889
+ "eval_rewards/chosen": -0.6521540880203247,
1890
+ "eval_rewards/margins": 1.3508530855178833,
1891
+ "eval_rewards/rejected": -2.003007173538208,
1892
+ "eval_runtime": 414.0649,
1893
+ "eval_samples_per_second": 4.83,
1894
+ "eval_steps_per_second": 0.152,
1895
+ "step": 1200
1896
+ },
1897
+ {
1898
+ "epoch": 1.27,
1899
+ "learning_rate": 5.48014933308352e-08,
1900
+ "logits/chosen": -2.407109022140503,
1901
+ "logits/rejected": -2.3259758949279785,
1902
+ "logps/chosen": -230.8815155029297,
1903
+ "logps/rejected": -267.571044921875,
1904
+ "loss": 0.3043,
1905
+ "rewards/accuracies": 0.8812500238418579,
1906
+ "rewards/chosen": -0.5537179112434387,
1907
+ "rewards/margins": 1.7985881567001343,
1908
+ "rewards/rejected": -2.3523058891296387,
1909
+ "step": 1210
1910
+ },
1911
+ {
1912
+ "epoch": 1.28,
1913
+ "learning_rate": 5.121980087628802e-08,
1914
+ "logits/chosen": -2.468632221221924,
1915
+ "logits/rejected": -2.407593250274658,
1916
+ "logps/chosen": -321.1299133300781,
1917
+ "logps/rejected": -319.76788330078125,
1918
+ "loss": 0.2968,
1919
+ "rewards/accuracies": 0.893750011920929,
1920
+ "rewards/chosen": -0.3099203407764435,
1921
+ "rewards/margins": 2.04097580909729,
1922
+ "rewards/rejected": -2.350896120071411,
1923
+ "step": 1220
1924
+ },
1925
+ {
1926
+ "epoch": 1.29,
1927
+ "learning_rate": 4.774575140626316e-08,
1928
+ "logits/chosen": -2.399498462677002,
1929
+ "logits/rejected": -2.317718982696533,
1930
+ "logps/chosen": -311.56378173828125,
1931
+ "logps/rejected": -322.6271057128906,
1932
+ "loss": 0.2885,
1933
+ "rewards/accuracies": 0.893750011920929,
1934
+ "rewards/chosen": -0.34509938955307007,
1935
+ "rewards/margins": 1.8977705240249634,
1936
+ "rewards/rejected": -2.2428698539733887,
1937
+ "step": 1230
1938
+ },
1939
+ {
1940
+ "epoch": 1.3,
1941
+ "learning_rate": 4.438122617983442e-08,
1942
+ "logits/chosen": -2.3652095794677734,
1943
+ "logits/rejected": -2.3344194889068604,
1944
+ "logps/chosen": -290.28790283203125,
1945
+ "logps/rejected": -294.1825866699219,
1946
+ "loss": 0.2989,
1947
+ "rewards/accuracies": 0.875,
1948
+ "rewards/chosen": -0.4549785256385803,
1949
+ "rewards/margins": 1.7224676609039307,
1950
+ "rewards/rejected": -2.177445888519287,
1951
+ "step": 1240
1952
+ },
1953
+ {
1954
+ "epoch": 1.31,
1955
+ "learning_rate": 4.112804714676593e-08,
1956
+ "logits/chosen": -2.459439754486084,
1957
+ "logits/rejected": -2.4127402305603027,
1958
+ "logps/chosen": -292.1894226074219,
1959
+ "logps/rejected": -307.1498107910156,
1960
+ "loss": 0.2864,
1961
+ "rewards/accuracies": 0.925000011920929,
1962
+ "rewards/chosen": -0.346891850233078,
1963
+ "rewards/margins": 1.9702911376953125,
1964
+ "rewards/rejected": -2.317183017730713,
1965
+ "step": 1250
1966
+ },
1967
+ {
1968
+ "epoch": 1.32,
1969
+ "learning_rate": 3.798797596089351e-08,
1970
+ "logits/chosen": -2.427961587905884,
1971
+ "logits/rejected": -2.4093945026397705,
1972
+ "logps/chosen": -316.4217224121094,
1973
+ "logps/rejected": -341.4559631347656,
1974
+ "loss": 0.2918,
1975
+ "rewards/accuracies": 0.90625,
1976
+ "rewards/chosen": -0.42415857315063477,
1977
+ "rewards/margins": 1.8887336254119873,
1978
+ "rewards/rejected": -2.312892198562622,
1979
+ "step": 1260
1980
+ },
1981
+ {
1982
+ "epoch": 1.33,
1983
+ "learning_rate": 3.496271302615869e-08,
1984
+ "logits/chosen": -2.4277498722076416,
1985
+ "logits/rejected": -2.35754656791687,
1986
+ "logps/chosen": -290.10235595703125,
1987
+ "logps/rejected": -304.0705261230469,
1988
+ "loss": 0.2807,
1989
+ "rewards/accuracies": 0.925000011920929,
1990
+ "rewards/chosen": -0.38870421051979065,
1991
+ "rewards/margins": 2.0075557231903076,
1992
+ "rewards/rejected": -2.3962597846984863,
1993
+ "step": 1270
1994
+ },
1995
+ {
1996
+ "epoch": 1.34,
1997
+ "learning_rate": 3.205389657580943e-08,
1998
+ "logits/chosen": -2.438321590423584,
1999
+ "logits/rejected": -2.3755178451538086,
2000
+ "logps/chosen": -283.269775390625,
2001
+ "logps/rejected": -337.3073425292969,
2002
+ "loss": 0.2519,
2003
+ "rewards/accuracies": 0.90625,
2004
+ "rewards/chosen": -0.2695261836051941,
2005
+ "rewards/margins": 2.0451509952545166,
2006
+ "rewards/rejected": -2.3146772384643555,
2007
+ "step": 1280
2008
+ },
2009
+ {
2010
+ "epoch": 1.35,
2011
+ "learning_rate": 2.9263101785268252e-08,
2012
+ "logits/chosen": -2.4288270473480225,
2013
+ "logits/rejected": -2.39312481880188,
2014
+ "logps/chosen": -325.607421875,
2015
+ "logps/rejected": -317.9021911621094,
2016
+ "loss": 0.2867,
2017
+ "rewards/accuracies": 0.925000011920929,
2018
+ "rewards/chosen": -0.3596641421318054,
2019
+ "rewards/margins": 1.9093034267425537,
2020
+ "rewards/rejected": -2.268967390060425,
2021
+ "step": 1290
2022
+ },
2023
+ {
2024
+ "epoch": 1.36,
2025
+ "learning_rate": 2.659183991914696e-08,
2026
+ "logits/chosen": -2.419738531112671,
2027
+ "logits/rejected": -2.362929105758667,
2028
+ "logps/chosen": -296.9184875488281,
2029
+ "logps/rejected": -324.33587646484375,
2030
+ "loss": 0.275,
2031
+ "rewards/accuracies": 0.918749988079071,
2032
+ "rewards/chosen": -0.3492244482040405,
2033
+ "rewards/margins": 2.0286529064178467,
2034
+ "rewards/rejected": -2.3778772354125977,
2035
+ "step": 1300
2036
+ },
2037
+ {
2038
+ "epoch": 1.36,
2039
+ "eval_logits/chosen": -2.39336895942688,
2040
+ "eval_logits/rejected": -2.353144407272339,
2041
+ "eval_logps/chosen": -309.0525817871094,
2042
+ "eval_logps/rejected": -311.2369079589844,
2043
+ "eval_loss": 0.4915713965892792,
2044
+ "eval_rewards/accuracies": 0.7599206566810608,
2045
+ "eval_rewards/chosen": -0.6366247534751892,
2046
+ "eval_rewards/margins": 1.3383426666259766,
2047
+ "eval_rewards/rejected": -1.9749674797058105,
2048
+ "eval_runtime": 412.9249,
2049
+ "eval_samples_per_second": 4.843,
2050
+ "eval_steps_per_second": 0.153,
2051
+ "step": 1300
2052
+ },
2053
+ {
2054
+ "epoch": 1.37,
2055
+ "learning_rate": 2.4041557512869876e-08,
2056
+ "logits/chosen": -2.3756937980651855,
2057
+ "logits/rejected": -2.3104584217071533,
2058
+ "logps/chosen": -307.74395751953125,
2059
+ "logps/rejected": -316.4643859863281,
2060
+ "loss": 0.301,
2061
+ "rewards/accuracies": 0.856249988079071,
2062
+ "rewards/chosen": -0.6529245972633362,
2063
+ "rewards/margins": 1.7977030277252197,
2064
+ "rewards/rejected": -2.4506278038024902,
2065
+ "step": 1310
2066
+ },
2067
+ {
2068
+ "epoch": 1.38,
2069
+ "learning_rate": 2.1613635589349756e-08,
2070
+ "logits/chosen": -2.4178683757781982,
2071
+ "logits/rejected": -2.3683836460113525,
2072
+ "logps/chosen": -293.5295715332031,
2073
+ "logps/rejected": -329.43756103515625,
2074
+ "loss": 0.2802,
2075
+ "rewards/accuracies": 0.918749988079071,
2076
+ "rewards/chosen": -0.4704504907131195,
2077
+ "rewards/margins": 1.998552680015564,
2078
+ "rewards/rejected": -2.469003200531006,
2079
+ "step": 1320
2080
+ },
2081
+ {
2082
+ "epoch": 1.39,
2083
+ "learning_rate": 1.9309388911139424e-08,
2084
+ "logits/chosen": -2.4530060291290283,
2085
+ "logits/rejected": -2.417515993118286,
2086
+ "logps/chosen": -303.9961853027344,
2087
+ "logps/rejected": -316.8196716308594,
2088
+ "loss": 0.2883,
2089
+ "rewards/accuracies": 0.8812500238418579,
2090
+ "rewards/chosen": -0.4491092562675476,
2091
+ "rewards/margins": 1.8476078510284424,
2092
+ "rewards/rejected": -2.2967171669006348,
2093
+ "step": 1330
2094
+ },
2095
+ {
2096
+ "epoch": 1.4,
2097
+ "learning_rate": 1.713006526846439e-08,
2098
+ "logits/chosen": -2.3352913856506348,
2099
+ "logits/rejected": -2.3332459926605225,
2100
+ "logps/chosen": -309.6740417480469,
2101
+ "logps/rejected": -316.5276184082031,
2102
+ "loss": 0.3034,
2103
+ "rewards/accuracies": 0.831250011920929,
2104
+ "rewards/chosen": -0.5785962343215942,
2105
+ "rewards/margins": 1.668442964553833,
2106
+ "rewards/rejected": -2.247039318084717,
2107
+ "step": 1340
2108
+ },
2109
+ {
2110
+ "epoch": 1.41,
2111
+ "learning_rate": 1.507684480352292e-08,
2112
+ "logits/chosen": -2.444365978240967,
2113
+ "logits/rejected": -2.3748979568481445,
2114
+ "logps/chosen": -273.01177978515625,
2115
+ "logps/rejected": -300.568115234375,
2116
+ "loss": 0.2704,
2117
+ "rewards/accuracies": 0.887499988079071,
2118
+ "rewards/chosen": -0.5734509229660034,
2119
+ "rewards/margins": 1.8402652740478516,
2120
+ "rewards/rejected": -2.4137163162231445,
2121
+ "step": 1350
2122
+ },
2123
+ {
2124
+ "epoch": 1.42,
2125
+ "learning_rate": 1.3150839371417699e-08,
2126
+ "logits/chosen": -2.3913302421569824,
2127
+ "logits/rejected": -2.3794515132904053,
2128
+ "logps/chosen": -270.5587463378906,
2129
+ "logps/rejected": -277.6205749511719,
2130
+ "loss": 0.3003,
2131
+ "rewards/accuracies": 0.893750011920929,
2132
+ "rewards/chosen": -0.3957940936088562,
2133
+ "rewards/margins": 1.739976167678833,
2134
+ "rewards/rejected": -2.135770320892334,
2135
+ "step": 1360
2136
+ },
2137
+ {
2138
+ "epoch": 1.43,
2139
+ "learning_rate": 1.1353091938067023e-08,
2140
+ "logits/chosen": -2.40122652053833,
2141
+ "logits/rejected": -2.317656993865967,
2142
+ "logps/chosen": -306.97454833984375,
2143
+ "logps/rejected": -288.46038818359375,
2144
+ "loss": 0.2884,
2145
+ "rewards/accuracies": 0.887499988079071,
2146
+ "rewards/chosen": -0.4694311022758484,
2147
+ "rewards/margins": 1.7464059591293335,
2148
+ "rewards/rejected": -2.215837240219116,
2149
+ "step": 1370
2150
+ },
2151
+ {
2152
+ "epoch": 1.44,
2153
+ "learning_rate": 9.684576015420275e-09,
2154
+ "logits/chosen": -2.429955005645752,
2155
+ "logits/rejected": -2.340874433517456,
2156
+ "logps/chosen": -284.2897033691406,
2157
+ "logps/rejected": -346.4473571777344,
2158
+ "loss": 0.259,
2159
+ "rewards/accuracies": 0.9312499761581421,
2160
+ "rewards/chosen": -0.4956478178501129,
2161
+ "rewards/margins": 2.128859043121338,
2162
+ "rewards/rejected": -2.624507188796997,
2163
+ "step": 1380
2164
+ },
2165
+ {
2166
+ "epoch": 1.46,
2167
+ "learning_rate": 8.14619513428405e-09,
2168
+ "logits/chosen": -2.41424822807312,
2169
+ "logits/rejected": -2.4188144207000732,
2170
+ "logps/chosen": -284.09356689453125,
2171
+ "logps/rejected": -301.265380859375,
2172
+ "loss": 0.3023,
2173
+ "rewards/accuracies": 0.8687499761581421,
2174
+ "rewards/chosen": -0.6043499708175659,
2175
+ "rewards/margins": 1.725403070449829,
2176
+ "rewards/rejected": -2.3297529220581055,
2177
+ "step": 1390
2178
+ },
2179
+ {
2180
+ "epoch": 1.47,
2181
+ "learning_rate": 6.738782355044048e-09,
2182
+ "logits/chosen": -2.420943260192871,
2183
+ "logits/rejected": -2.3737356662750244,
2184
+ "logps/chosen": -295.2271423339844,
2185
+ "logps/rejected": -300.8685607910156,
2186
+ "loss": 0.2768,
2187
+ "rewards/accuracies": 0.9125000238418579,
2188
+ "rewards/chosen": -0.4684749245643616,
2189
+ "rewards/margins": 2.138996124267578,
2190
+ "rewards/rejected": -2.607471227645874,
2191
+ "step": 1400
2192
+ },
2193
+ {
2194
+ "epoch": 1.47,
2195
+ "eval_logits/chosen": -2.3908376693725586,
2196
+ "eval_logits/rejected": -2.3505396842956543,
2197
+ "eval_logps/chosen": -310.34185791015625,
2198
+ "eval_logps/rejected": -312.6646423339844,
2199
+ "eval_loss": 0.49215617775917053,
2200
+ "eval_rewards/accuracies": 0.7579365372657776,
2201
+ "eval_rewards/chosen": -0.7010902166366577,
2202
+ "eval_rewards/margins": 1.345264196395874,
2203
+ "eval_rewards/rejected": -2.0463547706604004,
2204
+ "eval_runtime": 434.5194,
2205
+ "eval_samples_per_second": 4.603,
2206
+ "eval_steps_per_second": 0.145,
2207
+ "step": 1400
2208
+ },
2209
+ {
2210
+ "epoch": 1.48,
2211
+ "learning_rate": 5.463099816548577e-09,
2212
+ "logits/chosen": -2.435065269470215,
2213
+ "logits/rejected": -2.368187427520752,
2214
+ "logps/chosen": -305.482666015625,
2215
+ "logps/rejected": -319.5362548828125,
2216
+ "loss": 0.2907,
2217
+ "rewards/accuracies": 0.8999999761581421,
2218
+ "rewards/chosen": -0.512501060962677,
2219
+ "rewards/margins": 1.8626611232757568,
2220
+ "rewards/rejected": -2.375162124633789,
2221
+ "step": 1410
2222
+ },
2223
+ {
2224
+ "epoch": 1.49,
2225
+ "learning_rate": 4.319838323396691e-09,
2226
+ "logits/chosen": -2.425668239593506,
2227
+ "logits/rejected": -2.394260883331299,
2228
+ "logps/chosen": -285.38983154296875,
2229
+ "logps/rejected": -326.1392822265625,
2230
+ "loss": 0.28,
2231
+ "rewards/accuracies": 0.9312499761581421,
2232
+ "rewards/chosen": -0.43002137541770935,
2233
+ "rewards/margins": 1.9287302494049072,
2234
+ "rewards/rejected": -2.3587517738342285,
2235
+ "step": 1420
2236
+ },
2237
+ {
2238
+ "epoch": 1.5,
2239
+ "learning_rate": 3.309616971855195e-09,
2240
+ "logits/chosen": -2.3961329460144043,
2241
+ "logits/rejected": -2.3518338203430176,
2242
+ "logps/chosen": -313.55267333984375,
2243
+ "logps/rejected": -286.34320068359375,
2244
+ "loss": 0.2816,
2245
+ "rewards/accuracies": 0.887499988079071,
2246
+ "rewards/chosen": -0.41422533988952637,
2247
+ "rewards/margins": 1.944323182106018,
2248
+ "rewards/rejected": -2.358548402786255,
2249
+ "step": 1430
2250
+ },
2251
+ {
2252
+ "epoch": 1.51,
2253
+ "learning_rate": 2.4329828146074096e-09,
2254
+ "logits/chosen": -2.386317491531372,
2255
+ "logits/rejected": -2.3524794578552246,
2256
+ "logps/chosen": -313.0305480957031,
2257
+ "logps/rejected": -332.1868896484375,
2258
+ "loss": 0.2798,
2259
+ "rewards/accuracies": 0.8999999761581421,
2260
+ "rewards/chosen": -0.3209957182407379,
2261
+ "rewards/margins": 2.147188663482666,
2262
+ "rewards/rejected": -2.46818470954895,
2263
+ "step": 1440
2264
+ },
2265
+ {
2266
+ "epoch": 1.52,
2267
+ "learning_rate": 1.690410564514244e-09,
2268
+ "logits/chosen": -2.411320209503174,
2269
+ "logits/rejected": -2.3845181465148926,
2270
+ "logps/chosen": -296.78228759765625,
2271
+ "logps/rejected": -306.8433837890625,
2272
+ "loss": 0.2898,
2273
+ "rewards/accuracies": 0.893750011920929,
2274
+ "rewards/chosen": -0.4139639735221863,
2275
+ "rewards/margins": 1.8331149816513062,
2276
+ "rewards/rejected": -2.2470791339874268,
2277
+ "step": 1450
2278
+ },
2279
+ {
2280
+ "epoch": 1.53,
2281
+ "learning_rate": 1.0823023375489126e-09,
2282
+ "logits/chosen": -2.468296766281128,
2283
+ "logits/rejected": -2.4058051109313965,
2284
+ "logps/chosen": -282.4747619628906,
2285
+ "logps/rejected": -294.89923095703125,
2286
+ "loss": 0.2817,
2287
+ "rewards/accuracies": 0.887499988079071,
2288
+ "rewards/chosen": -0.3540436029434204,
2289
+ "rewards/margins": 1.8670495748519897,
2290
+ "rewards/rejected": -2.22109317779541,
2291
+ "step": 1460
2292
+ },
2293
+ {
2294
+ "epoch": 1.54,
2295
+ "learning_rate": 6.089874350439505e-10,
2296
+ "logits/chosen": -2.4278903007507324,
2297
+ "logits/rejected": -2.412611961364746,
2298
+ "logps/chosen": -309.91180419921875,
2299
+ "logps/rejected": -351.41607666015625,
2300
+ "loss": 0.2826,
2301
+ "rewards/accuracies": 0.8812500238418579,
2302
+ "rewards/chosen": -0.38388413190841675,
2303
+ "rewards/margins": 1.9360164403915405,
2304
+ "rewards/rejected": -2.3199005126953125,
2305
+ "step": 1470
2306
+ },
2307
+ {
2308
+ "epoch": 1.55,
2309
+ "learning_rate": 2.707221653688585e-10,
2310
+ "logits/chosen": -2.3936755657196045,
2311
+ "logits/rejected": -2.3537259101867676,
2312
+ "logps/chosen": -316.6966247558594,
2313
+ "logps/rejected": -323.77154541015625,
2314
+ "loss": 0.2674,
2315
+ "rewards/accuracies": 0.918749988079071,
2316
+ "rewards/chosen": -0.3255850076675415,
2317
+ "rewards/margins": 2.003681182861328,
2318
+ "rewards/rejected": -2.329266309738159,
2319
+ "step": 1480
2320
+ },
2321
+ {
2322
+ "epoch": 1.56,
2323
+ "learning_rate": 6.768970513457151e-11,
2324
+ "logits/chosen": -2.3720383644104004,
2325
+ "logits/rejected": -2.346740245819092,
2326
+ "logps/chosen": -301.3364562988281,
2327
+ "logps/rejected": -312.578125,
2328
+ "loss": 0.2749,
2329
+ "rewards/accuracies": 0.8687499761581421,
2330
+ "rewards/chosen": -0.46938952803611755,
2331
+ "rewards/margins": 1.996596097946167,
2332
+ "rewards/rejected": -2.4659857749938965,
2333
+ "step": 1490
2334
+ },
2335
+ {
2336
+ "epoch": 1.57,
2337
+ "learning_rate": 0.0,
2338
+ "logits/chosen": -2.345372200012207,
2339
+ "logits/rejected": -2.3051817417144775,
2340
+ "logps/chosen": -275.92779541015625,
2341
+ "logps/rejected": -335.74761962890625,
2342
+ "loss": 0.2863,
2343
+ "rewards/accuracies": 0.856249988079071,
2344
+ "rewards/chosen": -0.5277979969978333,
2345
+ "rewards/margins": 2.0437979698181152,
2346
+ "rewards/rejected": -2.5715959072113037,
2347
+ "step": 1500
2348
+ },
2349
+ {
2350
+ "epoch": 1.57,
2351
+ "eval_logits/chosen": -2.3900513648986816,
2352
+ "eval_logits/rejected": -2.3497886657714844,
2353
+ "eval_logps/chosen": -310.23736572265625,
2354
+ "eval_logps/rejected": -312.59942626953125,
2355
+ "eval_loss": 0.4916023015975952,
2356
+ "eval_rewards/accuracies": 0.7579365372657776,
2357
+ "eval_rewards/chosen": -0.6958636045455933,
2358
+ "eval_rewards/margins": 1.3472286462783813,
2359
+ "eval_rewards/rejected": -2.0430922508239746,
2360
+ "eval_runtime": 454.0528,
2361
+ "eval_samples_per_second": 4.405,
2362
+ "eval_steps_per_second": 0.139,
2363
+ "step": 1500
2364
+ },
2365
+ {
2366
+ "epoch": 1.57,
2367
+ "step": 1500,
2368
+ "total_flos": 0.0,
2369
+ "train_loss": 0.4390208276112874,
2370
+ "train_runtime": 42485.337,
2371
+ "train_samples_per_second": 2.26,
2372
+ "train_steps_per_second": 0.035
2373
+ }
2374
+ ],
2375
+ "logging_steps": 10,
2376
+ "max_steps": 1500,
2377
+ "num_input_tokens_seen": 0,
2378
+ "num_train_epochs": 2,
2379
+ "save_steps": 100,
2380
+ "total_flos": 0.0,
2381
+ "train_batch_size": 4,
2382
+ "trial_name": null,
2383
+ "trial_params": null
2384
+ }