nlee-208 commited on
Commit
6ec61e3
·
verified ·
1 Parent(s): ce250b4

Model save

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: uf-mistral-it-dpo-iopo-iter1-short
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/nlee28/lucky/runs/frp5enmo)
17
+ # uf-mistral-it-dpo-iopo-iter1-short
18
+
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-07
39
+ - train_batch_size: 4
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 2
44
+ - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 32
46
+ - total_eval_batch_size: 16
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 1
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.42.4
59
+ - Pytorch 2.1.2.post303
60
+ - Datasets 2.18.0
61
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9995320542817033,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.2609846996010913,
5
+ "train_runtime": 23133.9065,
6
+ "train_samples": 34189,
7
+ "train_samples_per_second": 1.478,
8
+ "train_steps_per_second": 0.046
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.42.4"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26499eaf24f2ebc46e532888d40207ad8748943f8a491a34be8bf5dcacbc5849
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcbcdc1c5c7bd4d3c1ae1e3b8838716a9b450b8be2f84d6bec21ac485beacdb8
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26858d843948b46f046762ba0e83e3e623d6326c98ecb6c995014ce11432f97
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/Jul21_18-45-33_gpu-1/events.out.tfevents.1721555530.gpu-1.731892.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d33207d27e4278e7464a0b9473b24440de9f67e85246c473d785ea9f1b722b2
3
- size 78912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358fca42c8de6932f769209f0b0d741f1af856f17b010c1c3fdfaa66d9bac27a
3
+ size 79266
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9995320542817033,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.2609846996010913,
5
+ "train_runtime": 23133.9065,
6
+ "train_samples": 34189,
7
+ "train_samples_per_second": 1.478,
8
+ "train_steps_per_second": 0.046
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9995320542817033,
5
+ "eval_steps": 500,
6
+ "global_step": 1068,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.009358914365933552,
13
+ "grad_norm": 438.15121357366934,
14
+ "learning_rate": 4.672897196261682e-08,
15
+ "logits/chosen": -2.27764630317688,
16
+ "logits/rejected": -2.233543634414673,
17
+ "logps/chosen": -181.85769653320312,
18
+ "logps/rejected": -160.63929748535156,
19
+ "loss": 0.6952,
20
+ "rewards/accuracies": 0.35624998807907104,
21
+ "rewards/chosen": -0.0006220912327989936,
22
+ "rewards/margins": -0.00201073894277215,
23
+ "rewards/rejected": 0.0013886478263884783,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.018717828731867104,
28
+ "grad_norm": 392.19190635385917,
29
+ "learning_rate": 9.345794392523364e-08,
30
+ "logits/chosen": -2.1865861415863037,
31
+ "logits/rejected": -2.144514560699463,
32
+ "logps/chosen": -186.0104217529297,
33
+ "logps/rejected": -161.47190856933594,
34
+ "loss": 0.6858,
35
+ "rewards/accuracies": 0.5249999761581421,
36
+ "rewards/chosen": 0.014014361426234245,
37
+ "rewards/margins": 0.011283891275525093,
38
+ "rewards/rejected": 0.002730469685047865,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.028076743097800654,
43
+ "grad_norm": 350.1425757201821,
44
+ "learning_rate": 1.4018691588785045e-07,
45
+ "logits/chosen": -2.2283122539520264,
46
+ "logits/rejected": -2.222783088684082,
47
+ "logps/chosen": -204.5108184814453,
48
+ "logps/rejected": -161.1767578125,
49
+ "loss": 0.6368,
50
+ "rewards/accuracies": 0.643750011920929,
51
+ "rewards/chosen": 0.14504937827587128,
52
+ "rewards/margins": 0.13457781076431274,
53
+ "rewards/rejected": 0.010471588931977749,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.03743565746373421,
58
+ "grad_norm": 345.4533132113738,
59
+ "learning_rate": 1.8691588785046729e-07,
60
+ "logits/chosen": -2.2137038707733154,
61
+ "logits/rejected": -2.2049481868743896,
62
+ "logps/chosen": -183.1804962158203,
63
+ "logps/rejected": -182.72348022460938,
64
+ "loss": 0.5836,
65
+ "rewards/accuracies": 0.7562500238418579,
66
+ "rewards/chosen": 0.3550775945186615,
67
+ "rewards/margins": 0.3591943383216858,
68
+ "rewards/rejected": -0.0041167521849274635,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.04679457182966776,
73
+ "grad_norm": 288.97151092196236,
74
+ "learning_rate": 2.336448598130841e-07,
75
+ "logits/chosen": -2.23293137550354,
76
+ "logits/rejected": -2.2262890338897705,
77
+ "logps/chosen": -187.1265869140625,
78
+ "logps/rejected": -173.95443725585938,
79
+ "loss": 0.5215,
80
+ "rewards/accuracies": 0.7437499761581421,
81
+ "rewards/chosen": 0.6253132820129395,
82
+ "rewards/margins": 0.6594952940940857,
83
+ "rewards/rejected": -0.03418206050992012,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.05615348619560131,
88
+ "grad_norm": 404.3974395498644,
89
+ "learning_rate": 2.803738317757009e-07,
90
+ "logits/chosen": -2.178109884262085,
91
+ "logits/rejected": -2.1433067321777344,
92
+ "logps/chosen": -194.1527099609375,
93
+ "logps/rejected": -167.05271911621094,
94
+ "loss": 0.495,
95
+ "rewards/accuracies": 0.7437499761581421,
96
+ "rewards/chosen": 0.975957989692688,
97
+ "rewards/margins": 1.1514160633087158,
98
+ "rewards/rejected": -0.17545820772647858,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.06551240056153486,
103
+ "grad_norm": 489.67076672915556,
104
+ "learning_rate": 3.271028037383177e-07,
105
+ "logits/chosen": -2.2335753440856934,
106
+ "logits/rejected": -2.2199859619140625,
107
+ "logps/chosen": -176.63388061523438,
108
+ "logps/rejected": -168.95254516601562,
109
+ "loss": 0.4915,
110
+ "rewards/accuracies": 0.8062499761581421,
111
+ "rewards/chosen": 1.0813405513763428,
112
+ "rewards/margins": 1.4744064807891846,
113
+ "rewards/rejected": -0.39306575059890747,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 0.07487131492746842,
118
+ "grad_norm": 268.7439784813086,
119
+ "learning_rate": 3.7383177570093457e-07,
120
+ "logits/chosen": -2.230722427368164,
121
+ "logits/rejected": -2.2134506702423096,
122
+ "logps/chosen": -179.94815063476562,
123
+ "logps/rejected": -163.42172241210938,
124
+ "loss": 0.4306,
125
+ "rewards/accuracies": 0.8187500238418579,
126
+ "rewards/chosen": 0.9366798400878906,
127
+ "rewards/margins": 1.555740475654602,
128
+ "rewards/rejected": -0.6190606355667114,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 0.08423022929340196,
133
+ "grad_norm": 407.10417831930414,
134
+ "learning_rate": 4.205607476635514e-07,
135
+ "logits/chosen": -2.239989757537842,
136
+ "logits/rejected": -2.2285830974578857,
137
+ "logps/chosen": -185.32884216308594,
138
+ "logps/rejected": -180.8179931640625,
139
+ "loss": 0.3964,
140
+ "rewards/accuracies": 0.8374999761581421,
141
+ "rewards/chosen": 0.9721916317939758,
142
+ "rewards/margins": 1.7323249578475952,
143
+ "rewards/rejected": -0.7601334452629089,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 0.09358914365933552,
148
+ "grad_norm": 285.7378646864507,
149
+ "learning_rate": 4.672897196261682e-07,
150
+ "logits/chosen": -2.2750823497772217,
151
+ "logits/rejected": -2.257164716720581,
152
+ "logps/chosen": -173.72262573242188,
153
+ "logps/rejected": -175.11956787109375,
154
+ "loss": 0.3628,
155
+ "rewards/accuracies": 0.84375,
156
+ "rewards/chosen": 0.8861462473869324,
157
+ "rewards/margins": 2.0354702472686768,
158
+ "rewards/rejected": -1.1493237018585205,
159
+ "step": 100
160
+ },
161
+ {
162
+ "epoch": 0.10294805802526907,
163
+ "grad_norm": 326.35954947901496,
164
+ "learning_rate": 4.999879772999679e-07,
165
+ "logits/chosen": -2.205427646636963,
166
+ "logits/rejected": -2.2005703449249268,
167
+ "logps/chosen": -173.0555419921875,
168
+ "logps/rejected": -176.62982177734375,
169
+ "loss": 0.4143,
170
+ "rewards/accuracies": 0.8187500238418579,
171
+ "rewards/chosen": 0.5173667073249817,
172
+ "rewards/margins": 1.9897832870483398,
173
+ "rewards/rejected": -1.472416639328003,
174
+ "step": 110
175
+ },
176
+ {
177
+ "epoch": 0.11230697239120262,
178
+ "grad_norm": 510.0144601970792,
179
+ "learning_rate": 4.997742725777528e-07,
180
+ "logits/chosen": -2.220973253250122,
181
+ "logits/rejected": -2.1889216899871826,
182
+ "logps/chosen": -182.09658813476562,
183
+ "logps/rejected": -182.0464324951172,
184
+ "loss": 0.3867,
185
+ "rewards/accuracies": 0.8125,
186
+ "rewards/chosen": 0.5381929874420166,
187
+ "rewards/margins": 1.9815146923065186,
188
+ "rewards/rejected": -1.4433214664459229,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 0.12166588675713617,
193
+ "grad_norm": 296.75469977214414,
194
+ "learning_rate": 4.992936596071119e-07,
195
+ "logits/chosen": -2.277806043624878,
196
+ "logits/rejected": -2.266979455947876,
197
+ "logps/chosen": -184.37937927246094,
198
+ "logps/rejected": -196.90145874023438,
199
+ "loss": 0.3428,
200
+ "rewards/accuracies": 0.8687499761581421,
201
+ "rewards/chosen": 0.19669190049171448,
202
+ "rewards/margins": 2.241025447845459,
203
+ "rewards/rejected": -2.0443339347839355,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 0.13102480112306972,
208
+ "grad_norm": 338.91893998964593,
209
+ "learning_rate": 4.985466519700276e-07,
210
+ "logits/chosen": -2.310959577560425,
211
+ "logits/rejected": -2.2651824951171875,
212
+ "logps/chosen": -206.59927368164062,
213
+ "logps/rejected": -198.52685546875,
214
+ "loss": 0.3218,
215
+ "rewards/accuracies": 0.856249988079071,
216
+ "rewards/chosen": -0.12562061846256256,
217
+ "rewards/margins": 2.7015974521636963,
218
+ "rewards/rejected": -2.8272180557250977,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.14038371548900327,
223
+ "grad_norm": 212.25106910764887,
224
+ "learning_rate": 4.975340479172484e-07,
225
+ "logits/chosen": -2.2387795448303223,
226
+ "logits/rejected": -2.2182071208953857,
227
+ "logps/chosen": -173.45755004882812,
228
+ "logps/rejected": -192.9697723388672,
229
+ "loss": 0.261,
230
+ "rewards/accuracies": 0.887499988079071,
231
+ "rewards/chosen": -0.057323455810546875,
232
+ "rewards/margins": 2.911245584487915,
233
+ "rewards/rejected": -2.968569040298462,
234
+ "step": 150
235
+ },
236
+ {
237
+ "epoch": 0.14974262985493683,
238
+ "grad_norm": 449.1022472585512,
239
+ "learning_rate": 4.962569295152789e-07,
240
+ "logits/chosen": -2.269744634628296,
241
+ "logits/rejected": -2.2439522743225098,
242
+ "logps/chosen": -182.5315704345703,
243
+ "logps/rejected": -188.2720947265625,
244
+ "loss": 0.3652,
245
+ "rewards/accuracies": 0.856249988079071,
246
+ "rewards/chosen": -0.4403308928012848,
247
+ "rewards/margins": 2.6585566997528076,
248
+ "rewards/rejected": -3.0988876819610596,
249
+ "step": 160
250
+ },
251
+ {
252
+ "epoch": 0.1591015442208704,
253
+ "grad_norm": 221.2797843804336,
254
+ "learning_rate": 4.947166614900862e-07,
255
+ "logits/chosen": -2.241987466812134,
256
+ "logits/rejected": -2.240777015686035,
257
+ "logps/chosen": -176.78402709960938,
258
+ "logps/rejected": -189.9019317626953,
259
+ "loss": 0.3148,
260
+ "rewards/accuracies": 0.824999988079071,
261
+ "rewards/chosen": -0.5035785436630249,
262
+ "rewards/margins": 3.3230865001678467,
263
+ "rewards/rejected": -3.8266654014587402,
264
+ "step": 170
265
+ },
266
+ {
267
+ "epoch": 0.16846045858680392,
268
+ "grad_norm": 248.66687889155475,
269
+ "learning_rate": 4.929148897687566e-07,
270
+ "logits/chosen": -2.2612690925598145,
271
+ "logits/rejected": -2.2250888347625732,
272
+ "logps/chosen": -160.37701416015625,
273
+ "logps/rejected": -175.27114868164062,
274
+ "loss": 0.2945,
275
+ "rewards/accuracies": 0.856249988079071,
276
+ "rewards/chosen": -0.7433215379714966,
277
+ "rewards/margins": 3.024913787841797,
278
+ "rewards/rejected": -3.768234968185425,
279
+ "step": 180
280
+ },
281
+ {
282
+ "epoch": 0.17781937295273748,
283
+ "grad_norm": 201.5607938140453,
284
+ "learning_rate": 4.908535397206616e-07,
285
+ "logits/chosen": -2.211106061935425,
286
+ "logits/rejected": -2.213930130004883,
287
+ "logps/chosen": -172.2119903564453,
288
+ "logps/rejected": -193.7111358642578,
289
+ "loss": 0.3633,
290
+ "rewards/accuracies": 0.862500011920929,
291
+ "rewards/chosen": -0.5147860050201416,
292
+ "rewards/margins": 3.2517306804656982,
293
+ "rewards/rejected": -3.766516923904419,
294
+ "step": 190
295
+ },
296
+ {
297
+ "epoch": 0.18717828731867103,
298
+ "grad_norm": 242.24625564840164,
299
+ "learning_rate": 4.885348141000122e-07,
300
+ "logits/chosen": -2.246872901916504,
301
+ "logits/rejected": -2.2560267448425293,
302
+ "logps/chosen": -203.78329467773438,
303
+ "logps/rejected": -216.1337890625,
304
+ "loss": 0.2602,
305
+ "rewards/accuracies": 0.8812500238418579,
306
+ "rewards/chosen": -0.6025756001472473,
307
+ "rewards/margins": 3.9953460693359375,
308
+ "rewards/rejected": -4.597921371459961,
309
+ "step": 200
310
+ },
311
+ {
312
+ "epoch": 0.1965372016846046,
313
+ "grad_norm": 373.54438813525843,
314
+ "learning_rate": 4.859611906920007e-07,
315
+ "logits/chosen": -2.2831497192382812,
316
+ "logits/rejected": -2.243542194366455,
317
+ "logps/chosen": -197.22555541992188,
318
+ "logps/rejected": -209.253662109375,
319
+ "loss": 0.2325,
320
+ "rewards/accuracies": 0.90625,
321
+ "rewards/chosen": -0.8766697645187378,
322
+ "rewards/margins": 3.9367973804473877,
323
+ "rewards/rejected": -4.813466548919678,
324
+ "step": 210
325
+ },
326
+ {
327
+ "epoch": 0.20589611605053815,
328
+ "grad_norm": 335.83234499510337,
329
+ "learning_rate": 4.831354196650446e-07,
330
+ "logits/chosen": -2.219947338104248,
331
+ "logits/rejected": -2.2009224891662598,
332
+ "logps/chosen": -195.6641387939453,
333
+ "logps/rejected": -214.68386840820312,
334
+ "loss": 0.2758,
335
+ "rewards/accuracies": 0.856249988079071,
336
+ "rewards/chosen": -1.506487250328064,
337
+ "rewards/margins": 3.3951823711395264,
338
+ "rewards/rejected": -4.901669502258301,
339
+ "step": 220
340
+ },
341
+ {
342
+ "epoch": 0.21525503041647168,
343
+ "grad_norm": 217.04407322808672,
344
+ "learning_rate": 4.800605206319624e-07,
345
+ "logits/chosen": -2.290611743927002,
346
+ "logits/rejected": -2.2906360626220703,
347
+ "logps/chosen": -187.4936981201172,
348
+ "logps/rejected": -206.67166137695312,
349
+ "loss": 0.2448,
350
+ "rewards/accuracies": 0.8999999761581421,
351
+ "rewards/chosen": -0.8258201479911804,
352
+ "rewards/margins": 3.9309005737304688,
353
+ "rewards/rejected": -4.756721019744873,
354
+ "step": 230
355
+ },
356
+ {
357
+ "epoch": 0.22461394478240523,
358
+ "grad_norm": 268.14247446117065,
359
+ "learning_rate": 4.767397794232225e-07,
360
+ "logits/chosen": -2.2846646308898926,
361
+ "logits/rejected": -2.2862441539764404,
362
+ "logps/chosen": -204.64450073242188,
363
+ "logps/rejected": -207.92294311523438,
364
+ "loss": 0.227,
365
+ "rewards/accuracies": 0.893750011920929,
366
+ "rewards/chosen": -0.8587066531181335,
367
+ "rewards/margins": 3.832045316696167,
368
+ "rewards/rejected": -4.690752029418945,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 0.2339728591483388,
373
+ "grad_norm": 138.21085068846622,
374
+ "learning_rate": 4.731767445757111e-07,
375
+ "logits/chosen": -2.2376067638397217,
376
+ "logits/rejected": -2.2099392414093018,
377
+ "logps/chosen": -191.3949737548828,
378
+ "logps/rejected": -211.0519561767578,
379
+ "loss": 0.2885,
380
+ "rewards/accuracies": 0.8812500238418579,
381
+ "rewards/chosen": -1.0055475234985352,
382
+ "rewards/margins": 4.678719997406006,
383
+ "rewards/rejected": -5.684267044067383,
384
+ "step": 250
385
+ },
386
+ {
387
+ "epoch": 0.24333177351427235,
388
+ "grad_norm": 220.922491224004,
389
+ "learning_rate": 4.6937522354077397e-07,
390
+ "logits/chosen": -2.2818140983581543,
391
+ "logits/rejected": -2.275104522705078,
392
+ "logps/chosen": -196.9884796142578,
393
+ "logps/rejected": -220.4885711669922,
394
+ "loss": 0.2789,
395
+ "rewards/accuracies": 0.875,
396
+ "rewards/chosen": -1.6425187587738037,
397
+ "rewards/margins": 4.461324214935303,
398
+ "rewards/rejected": -6.1038432121276855,
399
+ "step": 260
400
+ },
401
+ {
402
+ "epoch": 0.2526906878802059,
403
+ "grad_norm": 301.14297310445636,
404
+ "learning_rate": 4.6533927861558166e-07,
405
+ "logits/chosen": -2.2284903526306152,
406
+ "logits/rejected": -2.2001399993896484,
407
+ "logps/chosen": -200.37940979003906,
408
+ "logps/rejected": -221.6734619140625,
409
+ "loss": 0.2105,
410
+ "rewards/accuracies": 0.90625,
411
+ "rewards/chosen": -1.2222009897232056,
412
+ "rewards/margins": 4.569552421569824,
413
+ "rewards/rejected": -5.791752815246582,
414
+ "step": 270
415
+ },
416
+ {
417
+ "epoch": 0.26204960224613943,
418
+ "grad_norm": 173.76590512224945,
419
+ "learning_rate": 4.6107322260216787e-07,
420
+ "logits/chosen": -2.269810199737549,
421
+ "logits/rejected": -2.2810139656066895,
422
+ "logps/chosen": -185.00311279296875,
423
+ "logps/rejected": -212.89730834960938,
424
+ "loss": 0.3261,
425
+ "rewards/accuracies": 0.8812500238418579,
426
+ "rewards/chosen": -1.5367414951324463,
427
+ "rewards/margins": 4.153133869171143,
428
+ "rewards/rejected": -5.68987512588501,
429
+ "step": 280
430
+ },
431
+ {
432
+ "epoch": 0.271408516612073,
433
+ "grad_norm": 306.28569991504236,
434
+ "learning_rate": 4.565816141987782e-07,
435
+ "logits/chosen": -2.2453150749206543,
436
+ "logits/rejected": -2.249136209487915,
437
+ "logps/chosen": -205.7445068359375,
438
+ "logps/rejected": -240.56655883789062,
439
+ "loss": 0.2769,
440
+ "rewards/accuracies": 0.9312499761581421,
441
+ "rewards/chosen": -1.5990461111068726,
442
+ "rewards/margins": 4.568573951721191,
443
+ "rewards/rejected": -6.167620658874512,
444
+ "step": 290
445
+ },
446
+ {
447
+ "epoch": 0.28076743097800655,
448
+ "grad_norm": 338.2369819535066,
449
+ "learning_rate": 4.518692531284555e-07,
450
+ "logits/chosen": -2.245236873626709,
451
+ "logits/rejected": -2.235701560974121,
452
+ "logps/chosen": -195.2344970703125,
453
+ "logps/rejected": -242.3723907470703,
454
+ "loss": 0.3172,
455
+ "rewards/accuracies": 0.856249988079071,
456
+ "rewards/chosen": -1.8885822296142578,
457
+ "rewards/margins": 4.1689043045043945,
458
+ "rewards/rejected": -6.057486534118652,
459
+ "step": 300
460
+ },
461
+ {
462
+ "epoch": 0.2901263453439401,
463
+ "grad_norm": 259.98560817847823,
464
+ "learning_rate": 4.469411750100657e-07,
465
+ "logits/chosen": -2.271074056625366,
466
+ "logits/rejected": -2.259099245071411,
467
+ "logps/chosen": -177.983154296875,
468
+ "logps/rejected": -225.9912872314453,
469
+ "loss": 0.1695,
470
+ "rewards/accuracies": 0.9312499761581421,
471
+ "rewards/chosen": -1.4376739263534546,
472
+ "rewards/margins": 4.754927635192871,
473
+ "rewards/rejected": -6.192601680755615,
474
+ "step": 310
475
+ },
476
+ {
477
+ "epoch": 0.29948525970987366,
478
+ "grad_norm": 140.72410819610812,
479
+ "learning_rate": 4.418026459772465e-07,
480
+ "logits/chosen": -2.323652744293213,
481
+ "logits/rejected": -2.3101260662078857,
482
+ "logps/chosen": -190.6024169921875,
483
+ "logps/rejected": -238.22805786132812,
484
+ "loss": 0.211,
485
+ "rewards/accuracies": 0.9312499761581421,
486
+ "rewards/chosen": -1.3049207925796509,
487
+ "rewards/margins": 5.320730686187744,
488
+ "rewards/rejected": -6.6256513595581055,
489
+ "step": 320
490
+ },
491
+ {
492
+ "epoch": 0.3088441740758072,
493
+ "grad_norm": 170.38089922479452,
494
+ "learning_rate": 4.3645915705102875e-07,
495
+ "logits/chosen": -2.300119161605835,
496
+ "logits/rejected": -2.2830920219421387,
497
+ "logps/chosen": -193.13084411621094,
498
+ "logps/rejected": -223.0570831298828,
499
+ "loss": 0.244,
500
+ "rewards/accuracies": 0.887499988079071,
501
+ "rewards/chosen": -2.01973295211792,
502
+ "rewards/margins": 4.6583662033081055,
503
+ "rewards/rejected": -6.678099155426025,
504
+ "step": 330
505
+ },
506
+ {
507
+ "epoch": 0.3182030884417408,
508
+ "grad_norm": 288.1744910027074,
509
+ "learning_rate": 4.3091641827214255e-07,
510
+ "logits/chosen": -2.339986562728882,
511
+ "logits/rejected": -2.3027901649475098,
512
+ "logps/chosen": -215.3566436767578,
513
+ "logps/rejected": -223.7361297607422,
514
+ "loss": 0.2796,
515
+ "rewards/accuracies": 0.90625,
516
+ "rewards/chosen": -2.0394206047058105,
517
+ "rewards/margins": 4.69008731842041,
518
+ "rewards/rejected": -6.729508399963379,
519
+ "step": 340
520
+ },
521
+ {
522
+ "epoch": 0.32756200280767434,
523
+ "grad_norm": 266.350804676357,
524
+ "learning_rate": 4.251803525992806e-07,
525
+ "logits/chosen": -2.4055254459381104,
526
+ "logits/rejected": -2.377108573913574,
527
+ "logps/chosen": -186.85452270507812,
528
+ "logps/rejected": -215.7018585205078,
529
+ "loss": 0.2159,
530
+ "rewards/accuracies": 0.8999999761581421,
531
+ "rewards/chosen": -2.4017603397369385,
532
+ "rewards/margins": 4.537654876708984,
533
+ "rewards/rejected": -6.939416408538818,
534
+ "step": 350
535
+ },
536
+ {
537
+ "epoch": 0.33692091717360784,
538
+ "grad_norm": 430.59991122058335,
539
+ "learning_rate": 4.192570895798369e-07,
540
+ "logits/chosen": -2.3556883335113525,
541
+ "logits/rejected": -2.3484387397766113,
542
+ "logps/chosen": -202.12490844726562,
543
+ "logps/rejected": -228.5521697998047,
544
+ "loss": 0.2098,
545
+ "rewards/accuracies": 0.893750011920929,
546
+ "rewards/chosen": -2.2037243843078613,
547
+ "rewards/margins": 4.6899189949035645,
548
+ "rewards/rejected": -6.893643379211426,
549
+ "step": 360
550
+ },
551
+ {
552
+ "epoch": 0.3462798315395414,
553
+ "grad_norm": 208.29725176344635,
554
+ "learning_rate": 4.1315295879988603e-07,
555
+ "logits/chosen": -2.3661398887634277,
556
+ "logits/rejected": -2.3594870567321777,
557
+ "logps/chosen": -196.1697235107422,
558
+ "logps/rejected": -233.34750366210938,
559
+ "loss": 0.2062,
560
+ "rewards/accuracies": 0.9125000238418579,
561
+ "rewards/chosen": -2.0555663108825684,
562
+ "rewards/margins": 5.407596588134766,
563
+ "rewards/rejected": -7.463162899017334,
564
+ "step": 370
565
+ },
566
+ {
567
+ "epoch": 0.35563874590547495,
568
+ "grad_norm": 307.1229438949229,
569
+ "learning_rate": 4.06874483120401e-07,
570
+ "logits/chosen": -2.366969108581543,
571
+ "logits/rejected": -2.3364720344543457,
572
+ "logps/chosen": -191.20263671875,
573
+ "logps/rejected": -227.005615234375,
574
+ "loss": 0.1488,
575
+ "rewards/accuracies": 0.9437500238418579,
576
+ "rewards/chosen": -1.4671393632888794,
577
+ "rewards/margins": 5.521860599517822,
578
+ "rewards/rejected": -6.988999843597412,
579
+ "step": 380
580
+ },
581
+ {
582
+ "epoch": 0.3649976602714085,
583
+ "grad_norm": 343.77677311698415,
584
+ "learning_rate": 4.00428371706938e-07,
585
+ "logits/chosen": -2.4439613819122314,
586
+ "logits/rejected": -2.429218292236328,
587
+ "logps/chosen": -184.9285430908203,
588
+ "logps/rejected": -223.7628936767578,
589
+ "loss": 0.2388,
590
+ "rewards/accuracies": 0.925000011920929,
591
+ "rewards/chosen": -1.4543546438217163,
592
+ "rewards/margins": 4.948115348815918,
593
+ "rewards/rejected": -6.402470588684082,
594
+ "step": 390
595
+ },
596
+ {
597
+ "epoch": 0.37435657463734207,
598
+ "grad_norm": 396.61140085928014,
599
+ "learning_rate": 3.9382151286023644e-07,
600
+ "logits/chosen": -2.3459649085998535,
601
+ "logits/rejected": -2.3251359462738037,
602
+ "logps/chosen": -215.3402862548828,
603
+ "logps/rejected": -227.07998657226562,
604
+ "loss": 0.2852,
605
+ "rewards/accuracies": 0.8812500238418579,
606
+ "rewards/chosen": -1.916053056716919,
607
+ "rewards/margins": 4.319214820861816,
608
+ "rewards/rejected": -6.235267639160156,
609
+ "step": 400
610
+ },
611
+ {
612
+ "epoch": 0.3837154890032756,
613
+ "grad_norm": 438.08804622250545,
614
+ "learning_rate": 3.8706096665539617e-07,
615
+ "logits/chosen": -2.356903076171875,
616
+ "logits/rejected": -2.3512845039367676,
617
+ "logps/chosen": -200.12722778320312,
618
+ "logps/rejected": -226.90353393554688,
619
+ "loss": 0.3127,
620
+ "rewards/accuracies": 0.9125000238418579,
621
+ "rewards/chosen": -1.6871376037597656,
622
+ "rewards/margins": 4.592865943908691,
623
+ "rewards/rejected": -6.280003547668457,
624
+ "step": 410
625
+ },
626
+ {
627
+ "epoch": 0.3930744033692092,
628
+ "grad_norm": 120.04239302174234,
629
+ "learning_rate": 3.801539573974959e-07,
630
+ "logits/chosen": -2.3720829486846924,
631
+ "logits/rejected": -2.3591010570526123,
632
+ "logps/chosen": -189.6260528564453,
633
+ "logps/rejected": -217.00198364257812,
634
+ "loss": 0.1983,
635
+ "rewards/accuracies": 0.90625,
636
+ "rewards/chosen": -1.1001567840576172,
637
+ "rewards/margins": 4.759296894073486,
638
+ "rewards/rejected": -5.8594536781311035,
639
+ "step": 420
640
+ },
641
+ {
642
+ "epoch": 0.40243331773514274,
643
+ "grad_norm": 220.6642040395276,
644
+ "learning_rate": 3.7310786590171683e-07,
645
+ "logits/chosen": -2.3531527519226074,
646
+ "logits/rejected": -2.316413402557373,
647
+ "logps/chosen": -204.20730590820312,
648
+ "logps/rejected": -217.7911834716797,
649
+ "loss": 0.251,
650
+ "rewards/accuracies": 0.918749988079071,
651
+ "rewards/chosen": -1.7743408679962158,
652
+ "rewards/margins": 4.7499566078186035,
653
+ "rewards/rejected": -6.524298191070557,
654
+ "step": 430
655
+ },
656
+ {
657
+ "epoch": 0.4117922321010763,
658
+ "grad_norm": 231.72106552607505,
659
+ "learning_rate": 3.659302216062191e-07,
660
+ "logits/chosen": -2.304792642593384,
661
+ "logits/rejected": -2.3092713356018066,
662
+ "logps/chosen": -193.0000457763672,
663
+ "logps/rejected": -231.2361602783203,
664
+ "loss": 0.1942,
665
+ "rewards/accuracies": 0.925000011920929,
666
+ "rewards/chosen": -2.236583709716797,
667
+ "rewards/margins": 4.879889965057373,
668
+ "rewards/rejected": -7.116473197937012,
669
+ "step": 440
670
+ },
671
+ {
672
+ "epoch": 0.42115114646700985,
673
+ "grad_norm": 200.0155148050309,
674
+ "learning_rate": 3.586286945262007e-07,
675
+ "logits/chosen": -2.3446478843688965,
676
+ "logits/rejected": -2.346301555633545,
677
+ "logps/chosen": -202.21690368652344,
678
+ "logps/rejected": -237.6812744140625,
679
+ "loss": 0.1977,
680
+ "rewards/accuracies": 0.925000011920929,
681
+ "rewards/chosen": -1.7922999858856201,
682
+ "rewards/margins": 5.313002109527588,
683
+ "rewards/rejected": -7.105301856994629,
684
+ "step": 450
685
+ },
686
+ {
687
+ "epoch": 0.43051006083294335,
688
+ "grad_norm": 243.59457753599952,
689
+ "learning_rate": 3.512110870577357e-07,
690
+ "logits/chosen": -2.329233169555664,
691
+ "logits/rejected": -2.316497564315796,
692
+ "logps/chosen": -200.449951171875,
693
+ "logps/rejected": -232.7522735595703,
694
+ "loss": 0.3232,
695
+ "rewards/accuracies": 0.90625,
696
+ "rewards/chosen": -2.161752939224243,
697
+ "rewards/margins": 4.482771873474121,
698
+ "rewards/rejected": -6.644524574279785,
699
+ "step": 460
700
+ },
701
+ {
702
+ "epoch": 0.4398689751988769,
703
+ "grad_norm": 236.19588125075293,
704
+ "learning_rate": 3.436853256401506e-07,
705
+ "logits/chosen": -2.426886796951294,
706
+ "logits/rejected": -2.4074816703796387,
707
+ "logps/chosen": -200.6336212158203,
708
+ "logps/rejected": -222.5684051513672,
709
+ "loss": 0.2956,
710
+ "rewards/accuracies": 0.875,
711
+ "rewards/chosen": -2.3288497924804688,
712
+ "rewards/margins": 4.197469234466553,
713
+ "rewards/rejected": -6.5263190269470215,
714
+ "step": 470
715
+ },
716
+ {
717
+ "epoch": 0.44922788956481047,
718
+ "grad_norm": 261.36723919084227,
719
+ "learning_rate": 3.3605945228584866e-07,
720
+ "logits/chosen": -2.3828203678131104,
721
+ "logits/rejected": -2.3804872035980225,
722
+ "logps/chosen": -183.07763671875,
723
+ "logps/rejected": -214.15109252929688,
724
+ "loss": 0.2362,
725
+ "rewards/accuracies": 0.90625,
726
+ "rewards/chosen": -2.2689507007598877,
727
+ "rewards/margins": 4.707828998565674,
728
+ "rewards/rejected": -6.976778984069824,
729
+ "step": 480
730
+ },
731
+ {
732
+ "epoch": 0.458586803930744,
733
+ "grad_norm": 357.0231496583015,
734
+ "learning_rate": 3.283416159866321e-07,
735
+ "logits/chosen": -2.3662495613098145,
736
+ "logits/rejected": -2.356351613998413,
737
+ "logps/chosen": -213.04910278320312,
738
+ "logps/rejected": -235.6800994873047,
739
+ "loss": 0.2538,
740
+ "rewards/accuracies": 0.925000011920929,
741
+ "rewards/chosen": -1.9594087600708008,
742
+ "rewards/margins": 5.065524101257324,
743
+ "rewards/rejected": -7.024932861328125,
744
+ "step": 490
745
+ },
746
+ {
747
+ "epoch": 0.4679457182966776,
748
+ "grad_norm": 178.83525635594185,
749
+ "learning_rate": 3.205400640057081e-07,
750
+ "logits/chosen": -2.34678053855896,
751
+ "logits/rejected": -2.3390913009643555,
752
+ "logps/chosen": -200.38333129882812,
753
+ "logps/rejected": -236.71630859375,
754
+ "loss": 0.2016,
755
+ "rewards/accuracies": 0.925000011920929,
756
+ "rewards/chosen": -2.038482189178467,
757
+ "rewards/margins": 4.947923183441162,
758
+ "rewards/rejected": -6.986405849456787,
759
+ "step": 500
760
+ },
761
+ {
762
+ "epoch": 0.47730463266261114,
763
+ "grad_norm": 154.56461882085978,
764
+ "learning_rate": 3.126631330646801e-07,
765
+ "logits/chosen": -2.380345106124878,
766
+ "logits/rejected": -2.366577625274658,
767
+ "logps/chosen": -220.55148315429688,
768
+ "logps/rejected": -243.6335906982422,
769
+ "loss": 0.228,
770
+ "rewards/accuracies": 0.8999999761581421,
771
+ "rewards/chosen": -1.79801344871521,
772
+ "rewards/margins": 4.938762187957764,
773
+ "rewards/rejected": -6.736774444580078,
774
+ "step": 510
775
+ },
776
+ {
777
+ "epoch": 0.4866635470285447,
778
+ "grad_norm": 272.7303604765264,
779
+ "learning_rate": 3.0471924043494595e-07,
780
+ "logits/chosen": -2.34561824798584,
781
+ "logits/rejected": -2.343247890472412,
782
+ "logps/chosen": -188.9125213623047,
783
+ "logps/rejected": -234.4366912841797,
784
+ "loss": 0.2892,
785
+ "rewards/accuracies": 0.918749988079071,
786
+ "rewards/chosen": -2.0266098976135254,
787
+ "rewards/margins": 4.852522850036621,
788
+ "rewards/rejected": -6.8791327476501465,
789
+ "step": 520
790
+ },
791
+ {
792
+ "epoch": 0.49602246139447825,
793
+ "grad_norm": 188.04497446499659,
794
+ "learning_rate": 2.967168749430191e-07,
795
+ "logits/chosen": -2.376474618911743,
796
+ "logits/rejected": -2.3322389125823975,
797
+ "logps/chosen": -197.1897430419922,
798
+ "logps/rejected": -208.17019653320312,
799
+ "loss": 0.2509,
800
+ "rewards/accuracies": 0.918749988079071,
801
+ "rewards/chosen": -1.9680665731430054,
802
+ "rewards/margins": 4.385739326477051,
803
+ "rewards/rejected": -6.353806495666504,
804
+ "step": 530
805
+ },
806
+ {
807
+ "epoch": 0.5053813757604118,
808
+ "grad_norm": 141.3355114298549,
809
+ "learning_rate": 2.8866458789938774e-07,
810
+ "logits/chosen": -2.341230869293213,
811
+ "logits/rejected": -2.3447136878967285,
812
+ "logps/chosen": -192.13973999023438,
813
+ "logps/rejected": -221.07467651367188,
814
+ "loss": 0.1769,
815
+ "rewards/accuracies": 0.918749988079071,
816
+ "rewards/chosen": -1.8019930124282837,
817
+ "rewards/margins": 5.1995649337768555,
818
+ "rewards/rejected": -7.00155782699585,
819
+ "step": 540
820
+ },
821
+ {
822
+ "epoch": 0.5147402901263454,
823
+ "grad_norm": 353.0896169961274,
824
+ "learning_rate": 2.8057098396060196e-07,
825
+ "logits/chosen": -2.3936164379119873,
826
+ "logits/rejected": -2.3707709312438965,
827
+ "logps/chosen": -213.8645477294922,
828
+ "logps/rejected": -229.2753143310547,
829
+ "loss": 0.2376,
830
+ "rewards/accuracies": 0.918749988079071,
831
+ "rewards/chosen": -2.169435739517212,
832
+ "rewards/margins": 4.956913948059082,
833
+ "rewards/rejected": -7.126348972320557,
834
+ "step": 550
835
+ },
836
+ {
837
+ "epoch": 0.5240992044922789,
838
+ "grad_norm": 175.32011122486495,
839
+ "learning_rate": 2.724447119343572e-07,
840
+ "logits/chosen": -2.3987956047058105,
841
+ "logits/rejected": -2.3760597705841064,
842
+ "logps/chosen": -211.10073852539062,
843
+ "logps/rejected": -236.55307006835938,
844
+ "loss": 0.2425,
845
+ "rewards/accuracies": 0.8812500238418579,
846
+ "rewards/chosen": -2.3528664112091064,
847
+ "rewards/margins": 5.059988975524902,
848
+ "rewards/rejected": -7.412856101989746,
849
+ "step": 560
850
+ },
851
+ {
852
+ "epoch": 0.5334581188582125,
853
+ "grad_norm": 196.64668218353924,
854
+ "learning_rate": 2.642944555373965e-07,
855
+ "logits/chosen": -2.3882896900177,
856
+ "logits/rejected": -2.3592185974121094,
857
+ "logps/chosen": -217.615966796875,
858
+ "logps/rejected": -237.7814483642578,
859
+ "loss": 0.2914,
860
+ "rewards/accuracies": 0.90625,
861
+ "rewards/chosen": -2.401543378829956,
862
+ "rewards/margins": 4.712543964385986,
863
+ "rewards/rejected": -7.1140875816345215,
864
+ "step": 570
865
+ },
866
+ {
867
+ "epoch": 0.542817033224146,
868
+ "grad_norm": 275.5508915566092,
869
+ "learning_rate": 2.561289241161095e-07,
870
+ "logits/chosen": -2.42510986328125,
871
+ "logits/rejected": -2.3965210914611816,
872
+ "logps/chosen": -184.4060516357422,
873
+ "logps/rejected": -224.3748016357422,
874
+ "loss": 0.1981,
875
+ "rewards/accuracies": 0.9125000238418579,
876
+ "rewards/chosen": -2.069643020629883,
877
+ "rewards/margins": 4.64445686340332,
878
+ "rewards/rejected": -6.714099884033203,
879
+ "step": 580
880
+ },
881
+ {
882
+ "epoch": 0.5521759475900796,
883
+ "grad_norm": 122.43757714489868,
884
+ "learning_rate": 2.479568433397441e-07,
885
+ "logits/chosen": -2.441179037094116,
886
+ "logits/rejected": -2.418757200241089,
887
+ "logps/chosen": -178.72349548339844,
888
+ "logps/rejected": -230.2123565673828,
889
+ "loss": 0.2123,
890
+ "rewards/accuracies": 0.918749988079071,
891
+ "rewards/chosen": -1.8525559902191162,
892
+ "rewards/margins": 4.985072135925293,
893
+ "rewards/rejected": -6.837628364562988,
894
+ "step": 590
895
+ },
896
+ {
897
+ "epoch": 0.5615348619560131,
898
+ "grad_norm": 236.4733706875993,
899
+ "learning_rate": 2.3978694587617473e-07,
900
+ "logits/chosen": -2.5012760162353516,
901
+ "logits/rejected": -2.5047078132629395,
902
+ "logps/chosen": -190.4002685546875,
903
+ "logps/rejected": -224.8535614013672,
904
+ "loss": 0.2095,
905
+ "rewards/accuracies": 0.918749988079071,
906
+ "rewards/chosen": -1.8381601572036743,
907
+ "rewards/margins": 4.416454315185547,
908
+ "rewards/rejected": -6.25461483001709,
909
+ "step": 600
910
+ },
911
+ {
912
+ "epoch": 0.5708937763219466,
913
+ "grad_norm": 305.5301632445064,
914
+ "learning_rate": 2.3162796206019266e-07,
915
+ "logits/chosen": -2.513767957687378,
916
+ "logits/rejected": -2.4798645973205566,
917
+ "logps/chosen": -197.41751098632812,
918
+ "logps/rejected": -212.202392578125,
919
+ "loss": 0.2902,
920
+ "rewards/accuracies": 0.925000011920929,
921
+ "rewards/chosen": -1.4360084533691406,
922
+ "rewards/margins": 4.890429496765137,
923
+ "rewards/rejected": -6.326437950134277,
924
+ "step": 610
925
+ },
926
+ {
927
+ "epoch": 0.5802526906878802,
928
+ "grad_norm": 204.01758629819477,
929
+ "learning_rate": 2.2348861056428868e-07,
930
+ "logits/chosen": -2.417910575866699,
931
+ "logits/rejected": -2.396533727645874,
932
+ "logps/chosen": -198.9875030517578,
933
+ "logps/rejected": -234.6140899658203,
934
+ "loss": 0.1734,
935
+ "rewards/accuracies": 0.9312499761581421,
936
+ "rewards/chosen": -1.8775478601455688,
937
+ "rewards/margins": 5.081615924835205,
938
+ "rewards/rejected": -6.959162712097168,
939
+ "step": 620
940
+ },
941
+ {
942
+ "epoch": 0.5896116050538137,
943
+ "grad_norm": 249.61384281322438,
944
+ "learning_rate": 2.153775890818989e-07,
945
+ "logits/chosen": -2.372988700866699,
946
+ "logits/rejected": -2.374239444732666,
947
+ "logps/chosen": -199.1547088623047,
948
+ "logps/rejected": -237.0546112060547,
949
+ "loss": 0.1912,
950
+ "rewards/accuracies": 0.918749988079071,
951
+ "rewards/chosen": -2.228351593017578,
952
+ "rewards/margins": 5.300154685974121,
953
+ "rewards/rejected": -7.528506278991699,
954
+ "step": 630
955
+ },
956
+ {
957
+ "epoch": 0.5989705194197473,
958
+ "grad_norm": 233.18324352334685,
959
+ "learning_rate": 2.0730356503306806e-07,
960
+ "logits/chosen": -2.3942298889160156,
961
+ "logits/rejected": -2.3816661834716797,
962
+ "logps/chosen": -206.57546997070312,
963
+ "logps/rejected": -220.2229766845703,
964
+ "loss": 0.1882,
965
+ "rewards/accuracies": 0.9375,
966
+ "rewards/chosen": -1.8596980571746826,
967
+ "rewards/margins": 4.845181465148926,
968
+ "rewards/rejected": -6.7048797607421875,
969
+ "step": 640
970
+ },
971
+ {
972
+ "epoch": 0.6083294337856808,
973
+ "grad_norm": 284.0861886390374,
974
+ "learning_rate": 1.9927516630246335e-07,
975
+ "logits/chosen": -2.374399185180664,
976
+ "logits/rejected": -2.3482818603515625,
977
+ "logps/chosen": -202.29544067382812,
978
+ "logps/rejected": -216.13223266601562,
979
+ "loss": 0.1947,
980
+ "rewards/accuracies": 0.9624999761581421,
981
+ "rewards/chosen": -2.2385735511779785,
982
+ "rewards/margins": 5.253697395324707,
983
+ "rewards/rejected": -7.492271423339844,
984
+ "step": 650
985
+ },
986
+ {
987
+ "epoch": 0.6176883481516144,
988
+ "grad_norm": 122.40127647049943,
989
+ "learning_rate": 1.9130097201963545e-07,
990
+ "logits/chosen": -2.3976006507873535,
991
+ "logits/rejected": -2.373926877975464,
992
+ "logps/chosen": -202.98617553710938,
993
+ "logps/rejected": -230.7165069580078,
994
+ "loss": 0.1549,
995
+ "rewards/accuracies": 0.9312499761581421,
996
+ "rewards/chosen": -2.2640860080718994,
997
+ "rewards/margins": 5.643507957458496,
998
+ "rewards/rejected": -7.907593727111816,
999
+ "step": 660
1000
+ },
1001
+ {
1002
+ "epoch": 0.6270472625175479,
1003
+ "grad_norm": 253.97537353410252,
1004
+ "learning_rate": 1.833895033913789e-07,
1005
+ "logits/chosen": -2.401292324066162,
1006
+ "logits/rejected": -2.388359785079956,
1007
+ "logps/chosen": -204.93606567382812,
1008
+ "logps/rejected": -233.9709014892578,
1009
+ "loss": 0.2199,
1010
+ "rewards/accuracies": 0.893750011920929,
1011
+ "rewards/chosen": -2.0720014572143555,
1012
+ "rewards/margins": 5.02842903137207,
1013
+ "rewards/rejected": -7.100430488586426,
1014
+ "step": 670
1015
+ },
1016
+ {
1017
+ "epoch": 0.6364061768834816,
1018
+ "grad_norm": 160.3951062625682,
1019
+ "learning_rate": 1.755492145959896e-07,
1020
+ "logits/chosen": -2.4025282859802246,
1021
+ "logits/rejected": -2.397082567214966,
1022
+ "logps/chosen": -184.88265991210938,
1023
+ "logps/rejected": -211.2412872314453,
1024
+ "loss": 0.215,
1025
+ "rewards/accuracies": 0.9125000238418579,
1026
+ "rewards/chosen": -1.7722076177597046,
1027
+ "rewards/margins": 4.766100883483887,
1028
+ "rewards/rejected": -6.538309574127197,
1029
+ "step": 680
1030
+ },
1031
+ {
1032
+ "epoch": 0.6457650912494151,
1033
+ "grad_norm": 183.24580011494754,
1034
+ "learning_rate": 1.6778848374914728e-07,
1035
+ "logits/chosen": -2.356121778488159,
1036
+ "logits/rejected": -2.343468427658081,
1037
+ "logps/chosen": -202.14730834960938,
1038
+ "logps/rejected": -237.6175994873047,
1039
+ "loss": 0.1561,
1040
+ "rewards/accuracies": 0.893750011920929,
1041
+ "rewards/chosen": -2.2485437393188477,
1042
+ "rewards/margins": 4.805045127868652,
1043
+ "rewards/rejected": -7.0535888671875,
1044
+ "step": 690
1045
+ },
1046
+ {
1047
+ "epoch": 0.6551240056153487,
1048
+ "grad_norm": 331.5265540619454,
1049
+ "learning_rate": 1.6011560395107998e-07,
1050
+ "logits/chosen": -2.309534788131714,
1051
+ "logits/rejected": -2.313741445541382,
1052
+ "logps/chosen": -219.30227661132812,
1053
+ "logps/rejected": -265.6573486328125,
1054
+ "loss": 0.1999,
1055
+ "rewards/accuracies": 0.956250011920929,
1056
+ "rewards/chosen": -2.002448797225952,
1057
+ "rewards/margins": 5.976955890655518,
1058
+ "rewards/rejected": -7.979405403137207,
1059
+ "step": 700
1060
+ },
1061
+ {
1062
+ "epoch": 0.6644829199812822,
1063
+ "grad_norm": 230.35235648135276,
1064
+ "learning_rate": 1.5253877442457446e-07,
1065
+ "logits/chosen": -2.380941390991211,
1066
+ "logits/rejected": -2.3474934101104736,
1067
+ "logps/chosen": -195.14735412597656,
1068
+ "logps/rejected": -228.6056671142578,
1069
+ "loss": 0.1553,
1070
+ "rewards/accuracies": 0.9312499761581421,
1071
+ "rewards/chosen": -2.0050787925720215,
1072
+ "rewards/margins": 5.060803413391113,
1073
+ "rewards/rejected": -7.065882682800293,
1074
+ "step": 710
1075
+ },
1076
+ {
1077
+ "epoch": 0.6738418343472157,
1078
+ "grad_norm": 264.0638798597405,
1079
+ "learning_rate": 1.450660917533048e-07,
1080
+ "logits/chosen": -2.383948802947998,
1081
+ "logits/rejected": -2.370701313018799,
1082
+ "logps/chosen": -200.4253692626953,
1083
+ "logps/rejected": -228.43276977539062,
1084
+ "loss": 0.2231,
1085
+ "rewards/accuracies": 0.918749988079071,
1086
+ "rewards/chosen": -1.5557230710983276,
1087
+ "rewards/margins": 5.590066432952881,
1088
+ "rewards/rejected": -7.145790100097656,
1089
+ "step": 720
1090
+ },
1091
+ {
1092
+ "epoch": 0.6832007487131493,
1093
+ "grad_norm": 301.1690624380923,
1094
+ "learning_rate": 1.377055412298402e-07,
1095
+ "logits/chosen": -2.315328359603882,
1096
+ "logits/rejected": -2.3330445289611816,
1097
+ "logps/chosen": -184.9591827392578,
1098
+ "logps/rejected": -221.6781463623047,
1099
+ "loss": 0.2814,
1100
+ "rewards/accuracies": 0.8999999761581421,
1101
+ "rewards/chosen": -1.6768786907196045,
1102
+ "rewards/margins": 4.688675880432129,
1103
+ "rewards/rejected": -6.3655548095703125,
1104
+ "step": 730
1105
+ },
1106
+ {
1107
+ "epoch": 0.6925596630790828,
1108
+ "grad_norm": 138.29726172165815,
1109
+ "learning_rate": 1.3046498832257924e-07,
1110
+ "logits/chosen": -2.261472463607788,
1111
+ "logits/rejected": -2.2410614490509033,
1112
+ "logps/chosen": -180.821044921875,
1113
+ "logps/rejected": -227.9903564453125,
1114
+ "loss": 0.2016,
1115
+ "rewards/accuracies": 0.918749988079071,
1116
+ "rewards/chosen": -1.7726589441299438,
1117
+ "rewards/margins": 5.4179792404174805,
1118
+ "rewards/rejected": -7.190638542175293,
1119
+ "step": 740
1120
+ },
1121
+ {
1122
+ "epoch": 0.7019185774450164,
1123
+ "grad_norm": 159.55209169708692,
1124
+ "learning_rate": 1.233521702707264e-07,
1125
+ "logits/chosen": -2.3717551231384277,
1126
+ "logits/rejected": -2.3354125022888184,
1127
+ "logps/chosen": -218.03079223632812,
1128
+ "logps/rejected": -241.95547485351562,
1129
+ "loss": 0.1846,
1130
+ "rewards/accuracies": 0.956250011920929,
1131
+ "rewards/chosen": -1.688469648361206,
1132
+ "rewards/margins": 5.713479518890381,
1133
+ "rewards/rejected": -7.401949405670166,
1134
+ "step": 750
1135
+ },
1136
+ {
1137
+ "epoch": 0.7112774918109499,
1138
+ "grad_norm": 148.40838445395983,
1139
+ "learning_rate": 1.1637468781629567e-07,
1140
+ "logits/chosen": -2.3702948093414307,
1141
+ "logits/rejected": -2.3584303855895996,
1142
+ "logps/chosen": -200.7034912109375,
1143
+ "logps/rejected": -240.5320281982422,
1144
+ "loss": 0.2118,
1145
+ "rewards/accuracies": 0.925000011920929,
1146
+ "rewards/chosen": -2.420879602432251,
1147
+ "rewards/margins": 4.9278974533081055,
1148
+ "rewards/rejected": -7.348776817321777,
1149
+ "step": 760
1150
+ },
1151
+ {
1152
+ "epoch": 0.7206364061768835,
1153
+ "grad_norm": 117.9389721926925,
1154
+ "learning_rate": 1.0953999708197404e-07,
1155
+ "logits/chosen": -2.3842949867248535,
1156
+ "logits/rejected": -2.3798604011535645,
1157
+ "logps/chosen": -177.48667907714844,
1158
+ "logps/rejected": -225.3169403076172,
1159
+ "loss": 0.1628,
1160
+ "rewards/accuracies": 0.918749988079071,
1161
+ "rewards/chosen": -2.098729372024536,
1162
+ "rewards/margins": 5.115200996398926,
1163
+ "rewards/rejected": -7.213929653167725,
1164
+ "step": 770
1165
+ },
1166
+ {
1167
+ "epoch": 0.729995320542817,
1168
+ "grad_norm": 97.92207841158323,
1169
+ "learning_rate": 1.0285540160352404e-07,
1170
+ "logits/chosen": -2.386287212371826,
1171
+ "logits/rejected": -2.376877546310425,
1172
+ "logps/chosen": -207.9984893798828,
1173
+ "logps/rejected": -236.9729766845703,
1174
+ "loss": 0.1808,
1175
+ "rewards/accuracies": 0.9375,
1176
+ "rewards/chosen": -1.8446118831634521,
1177
+ "rewards/margins": 4.870266914367676,
1178
+ "rewards/rejected": -6.714879035949707,
1179
+ "step": 780
1180
+ },
1181
+ {
1182
+ "epoch": 0.7393542349087506,
1183
+ "grad_norm": 192.7463006493741,
1184
+ "learning_rate": 9.632804452524256e-08,
1185
+ "logits/chosen": -2.3924126625061035,
1186
+ "logits/rejected": -2.3695528507232666,
1187
+ "logps/chosen": -194.1177978515625,
1188
+ "logps/rejected": -225.58261108398438,
1189
+ "loss": 0.1798,
1190
+ "rewards/accuracies": 0.887499988079071,
1191
+ "rewards/chosen": -1.8828728199005127,
1192
+ "rewards/margins": 4.728604316711426,
1193
+ "rewards/rejected": -6.611476898193359,
1194
+ "step": 790
1195
+ },
1196
+ {
1197
+ "epoch": 0.7487131492746841,
1198
+ "grad_norm": 252.49293961822633,
1199
+ "learning_rate": 8.996490096681109e-08,
1200
+ "logits/chosen": -2.332010269165039,
1201
+ "logits/rejected": -2.331939935684204,
1202
+ "logps/chosen": -192.48745727539062,
1203
+ "logps/rejected": -239.68603515625,
1204
+ "loss": 0.2127,
1205
+ "rewards/accuracies": 0.9375,
1206
+ "rewards/chosen": -1.7579126358032227,
1207
+ "rewards/margins": 5.512737274169922,
1208
+ "rewards/rejected": -7.2706499099731445,
1209
+ "step": 800
1210
+ },
1211
+ {
1212
+ "epoch": 0.7580720636406177,
1213
+ "grad_norm": 286.82155890951486,
1214
+ "learning_rate": 8.377277056969842e-08,
1215
+ "logits/chosen": -2.3116650581359863,
1216
+ "logits/rejected": -2.294281482696533,
1217
+ "logps/chosen": -200.29153442382812,
1218
+ "logps/rejected": -229.8260040283203,
1219
+ "loss": 0.2065,
1220
+ "rewards/accuracies": 0.96875,
1221
+ "rewards/chosen": -2.109861135482788,
1222
+ "rewards/margins": 5.406586647033691,
1223
+ "rewards/rejected": -7.5164475440979,
1224
+ "step": 810
1225
+ },
1226
+ {
1227
+ "epoch": 0.7674309780065512,
1228
+ "grad_norm": 184.48830075035403,
1229
+ "learning_rate": 7.775827023107834e-08,
1230
+ "logits/chosen": -2.3696084022521973,
1231
+ "logits/rejected": -2.3516430854797363,
1232
+ "logps/chosen": -199.99896240234375,
1233
+ "logps/rejected": -222.9709014892578,
1234
+ "loss": 0.1882,
1235
+ "rewards/accuracies": 0.9312499761581421,
1236
+ "rewards/chosen": -1.6485363245010376,
1237
+ "rewards/margins": 5.333346843719482,
1238
+ "rewards/rejected": -6.981882572174072,
1239
+ "step": 820
1240
+ },
1241
+ {
1242
+ "epoch": 0.7767898923724847,
1243
+ "grad_norm": 132.0229952982291,
1244
+ "learning_rate": 7.192782703302785e-08,
1245
+ "logits/chosen": -2.4008779525756836,
1246
+ "logits/rejected": -2.4027934074401855,
1247
+ "logps/chosen": -204.44485473632812,
1248
+ "logps/rejected": -239.76693725585938,
1249
+ "loss": 0.1912,
1250
+ "rewards/accuracies": 0.925000011920929,
1251
+ "rewards/chosen": -2.0589280128479004,
1252
+ "rewards/margins": 4.891021251678467,
1253
+ "rewards/rejected": -6.949949741363525,
1254
+ "step": 830
1255
+ },
1256
+ {
1257
+ "epoch": 0.7861488067384184,
1258
+ "grad_norm": 235.57073722900367,
1259
+ "learning_rate": 6.628767137456067e-08,
1260
+ "logits/chosen": -2.3839497566223145,
1261
+ "logits/rejected": -2.3741023540496826,
1262
+ "logps/chosen": -214.9656219482422,
1263
+ "logps/rejected": -243.2672882080078,
1264
+ "loss": 0.2597,
1265
+ "rewards/accuracies": 0.90625,
1266
+ "rewards/chosen": -1.827707290649414,
1267
+ "rewards/margins": 5.181296348571777,
1268
+ "rewards/rejected": -7.009003639221191,
1269
+ "step": 840
1270
+ },
1271
+ {
1272
+ "epoch": 0.7955077211043519,
1273
+ "grad_norm": 158.98404160783085,
1274
+ "learning_rate": 6.08438303138365e-08,
1275
+ "logits/chosen": -2.3602254390716553,
1276
+ "logits/rejected": -2.350632429122925,
1277
+ "logps/chosen": -197.31517028808594,
1278
+ "logps/rejected": -231.42343139648438,
1279
+ "loss": 0.2479,
1280
+ "rewards/accuracies": 0.90625,
1281
+ "rewards/chosen": -2.3168513774871826,
1282
+ "rewards/margins": 4.414270877838135,
1283
+ "rewards/rejected": -6.731122016906738,
1284
+ "step": 850
1285
+ },
1286
+ {
1287
+ "epoch": 0.8048666354702855,
1288
+ "grad_norm": 177.35304007639718,
1289
+ "learning_rate": 5.560212112766011e-08,
1290
+ "logits/chosen": -2.3707616329193115,
1291
+ "logits/rejected": -2.3783464431762695,
1292
+ "logps/chosen": -198.06982421875,
1293
+ "logps/rejected": -237.296875,
1294
+ "loss": 0.2453,
1295
+ "rewards/accuracies": 0.893750011920929,
1296
+ "rewards/chosen": -1.8742965459823608,
1297
+ "rewards/margins": 5.264628887176514,
1298
+ "rewards/rejected": -7.138925075531006,
1299
+ "step": 860
1300
+ },
1301
+ {
1302
+ "epoch": 0.814225549836219,
1303
+ "grad_norm": 271.13283993407134,
1304
+ "learning_rate": 5.056814509515092e-08,
1305
+ "logits/chosen": -2.428950071334839,
1306
+ "logits/rejected": -2.3831164836883545,
1307
+ "logps/chosen": -200.1393585205078,
1308
+ "logps/rejected": -216.57424926757812,
1309
+ "loss": 0.1966,
1310
+ "rewards/accuracies": 0.918749988079071,
1311
+ "rewards/chosen": -1.7213060855865479,
1312
+ "rewards/margins": 5.350318431854248,
1313
+ "rewards/rejected": -7.071624755859375,
1314
+ "step": 870
1315
+ },
1316
+ {
1317
+ "epoch": 0.8235844642021526,
1318
+ "grad_norm": 203.86764722500251,
1319
+ "learning_rate": 4.57472815122294e-08,
1320
+ "logits/chosen": -2.366086483001709,
1321
+ "logits/rejected": -2.3685665130615234,
1322
+ "logps/chosen": -190.0081787109375,
1323
+ "logps/rejected": -232.5431365966797,
1324
+ "loss": 0.2183,
1325
+ "rewards/accuracies": 0.9437500238418579,
1326
+ "rewards/chosen": -1.7962220907211304,
1327
+ "rewards/margins": 5.570892333984375,
1328
+ "rewards/rejected": -7.367114067077637,
1329
+ "step": 880
1330
+ },
1331
+ {
1332
+ "epoch": 0.8329433785680861,
1333
+ "grad_norm": 271.41817915302744,
1334
+ "learning_rate": 4.1144681943312135e-08,
1335
+ "logits/chosen": -2.3388266563415527,
1336
+ "logits/rejected": -2.342984676361084,
1337
+ "logps/chosen": -193.9663848876953,
1338
+ "logps/rejected": -224.80859375,
1339
+ "loss": 0.262,
1340
+ "rewards/accuracies": 0.90625,
1341
+ "rewards/chosen": -2.0019173622131348,
1342
+ "rewards/margins": 4.8847246170043945,
1343
+ "rewards/rejected": -6.886641502380371,
1344
+ "step": 890
1345
+ },
1346
+ {
1347
+ "epoch": 0.8423022929340197,
1348
+ "grad_norm": 308.2778765171746,
1349
+ "learning_rate": 3.676526471636168e-08,
1350
+ "logits/chosen": -2.3499531745910645,
1351
+ "logits/rejected": -2.3488576412200928,
1352
+ "logps/chosen": -193.8671112060547,
1353
+ "logps/rejected": -219.2283172607422,
1354
+ "loss": 0.1793,
1355
+ "rewards/accuracies": 0.9125000238418579,
1356
+ "rewards/chosen": -1.8417354822158813,
1357
+ "rewards/margins": 4.747663974761963,
1358
+ "rewards/rejected": -6.5893988609313965,
1359
+ "step": 900
1360
+ },
1361
+ {
1362
+ "epoch": 0.8516612072999532,
1363
+ "grad_norm": 153.53030663881188,
1364
+ "learning_rate": 3.2613709667171893e-08,
1365
+ "logits/chosen": -2.388249635696411,
1366
+ "logits/rejected": -2.360759735107422,
1367
+ "logps/chosen": -199.05209350585938,
1368
+ "logps/rejected": -228.4853973388672,
1369
+ "loss": 0.1961,
1370
+ "rewards/accuracies": 0.90625,
1371
+ "rewards/chosen": -1.754990577697754,
1372
+ "rewards/margins": 5.208621978759766,
1373
+ "rewards/rejected": -6.9636125564575195,
1374
+ "step": 910
1375
+ },
1376
+ {
1377
+ "epoch": 0.8610201216658867,
1378
+ "grad_norm": 291.40469015284674,
1379
+ "learning_rate": 2.8694453138505296e-08,
1380
+ "logits/chosen": -2.370460033416748,
1381
+ "logits/rejected": -2.35982346534729,
1382
+ "logps/chosen": -189.83285522460938,
1383
+ "logps/rejected": -222.2431182861328,
1384
+ "loss": 0.2066,
1385
+ "rewards/accuracies": 0.9312499761581421,
1386
+ "rewards/chosen": -2.2980434894561768,
1387
+ "rewards/margins": 4.7502241134643555,
1388
+ "rewards/rejected": -7.048267364501953,
1389
+ "step": 920
1390
+ },
1391
+ {
1392
+ "epoch": 0.8703790360318203,
1393
+ "grad_norm": 290.7230352789667,
1394
+ "learning_rate": 2.5011683239426847e-08,
1395
+ "logits/chosen": -2.3978443145751953,
1396
+ "logits/rejected": -2.3811287879943848,
1397
+ "logps/chosen": -215.52938842773438,
1398
+ "logps/rejected": -239.8271026611328,
1399
+ "loss": 0.237,
1400
+ "rewards/accuracies": 0.9312499761581421,
1401
+ "rewards/chosen": -2.1587111949920654,
1402
+ "rewards/margins": 5.388065338134766,
1403
+ "rewards/rejected": -7.546775817871094,
1404
+ "step": 930
1405
+ },
1406
+ {
1407
+ "epoch": 0.8797379503977538,
1408
+ "grad_norm": 220.6414563408128,
1409
+ "learning_rate": 2.1569335369899884e-08,
1410
+ "logits/chosen": -2.418283700942993,
1411
+ "logits/rejected": -2.3877856731414795,
1412
+ "logps/chosen": -211.22250366210938,
1413
+ "logps/rejected": -232.1291046142578,
1414
+ "loss": 0.2113,
1415
+ "rewards/accuracies": 0.8999999761581421,
1416
+ "rewards/chosen": -1.8959633111953735,
1417
+ "rewards/margins": 5.136784553527832,
1418
+ "rewards/rejected": -7.032748222351074,
1419
+ "step": 940
1420
+ },
1421
+ {
1422
+ "epoch": 0.8890968647636874,
1423
+ "grad_norm": 143.48169448012388,
1424
+ "learning_rate": 1.837108801542589e-08,
1425
+ "logits/chosen": -2.411691904067993,
1426
+ "logits/rejected": -2.4168848991394043,
1427
+ "logps/chosen": -205.9098358154297,
1428
+ "logps/rejected": -232.7529296875,
1429
+ "loss": 0.1393,
1430
+ "rewards/accuracies": 0.949999988079071,
1431
+ "rewards/chosen": -1.89556086063385,
1432
+ "rewards/margins": 5.379176139831543,
1433
+ "rewards/rejected": -7.2747368812561035,
1434
+ "step": 950
1435
+ },
1436
+ {
1437
+ "epoch": 0.8984557791296209,
1438
+ "grad_norm": 276.0510358237357,
1439
+ "learning_rate": 1.5420358816223e-08,
1440
+ "logits/chosen": -2.351065158843994,
1441
+ "logits/rejected": -2.3105835914611816,
1442
+ "logps/chosen": -194.69497680664062,
1443
+ "logps/rejected": -240.65762329101562,
1444
+ "loss": 0.1762,
1445
+ "rewards/accuracies": 0.96875,
1446
+ "rewards/chosen": -1.7103493213653564,
1447
+ "rewards/margins": 5.52829122543335,
1448
+ "rewards/rejected": -7.238640785217285,
1449
+ "step": 960
1450
+ },
1451
+ {
1452
+ "epoch": 0.9078146934955545,
1453
+ "grad_norm": 286.7038886752733,
1454
+ "learning_rate": 1.2720300915142978e-08,
1455
+ "logits/chosen": -2.3931686878204346,
1456
+ "logits/rejected": -2.379987955093384,
1457
+ "logps/chosen": -212.43826293945312,
1458
+ "logps/rejected": -237.0255584716797,
1459
+ "loss": 0.2259,
1460
+ "rewards/accuracies": 0.9125000238418579,
1461
+ "rewards/chosen": -2.4316928386688232,
1462
+ "rewards/margins": 4.873816967010498,
1463
+ "rewards/rejected": -7.3055100440979,
1464
+ "step": 970
1465
+ },
1466
+ {
1467
+ "epoch": 0.917173607861488,
1468
+ "grad_norm": 227.75719345945672,
1469
+ "learning_rate": 1.0273799588229659e-08,
1470
+ "logits/chosen": -2.3514316082000732,
1471
+ "logits/rejected": -2.3695566654205322,
1472
+ "logps/chosen": -182.806884765625,
1473
+ "logps/rejected": -219.46005249023438,
1474
+ "loss": 0.1828,
1475
+ "rewards/accuracies": 0.9375,
1476
+ "rewards/chosen": -1.490412712097168,
1477
+ "rewards/margins": 5.141217231750488,
1478
+ "rewards/rejected": -6.631629943847656,
1479
+ "step": 980
1480
+ },
1481
+ {
1482
+ "epoch": 0.9265325222274217,
1483
+ "grad_norm": 109.72235164161634,
1484
+ "learning_rate": 8.08346916151903e-09,
1485
+ "logits/chosen": -2.309913396835327,
1486
+ "logits/rejected": -2.304335355758667,
1487
+ "logps/chosen": -185.7053985595703,
1488
+ "logps/rejected": -225.95895385742188,
1489
+ "loss": 0.2306,
1490
+ "rewards/accuracies": 0.9312499761581421,
1491
+ "rewards/chosen": -1.6385961771011353,
1492
+ "rewards/margins": 5.16446590423584,
1493
+ "rewards/rejected": -6.803062438964844,
1494
+ "step": 990
1495
+ },
1496
+ {
1497
+ "epoch": 0.9358914365933552,
1498
+ "grad_norm": 120.18799699014491,
1499
+ "learning_rate": 6.151650217376347e-09,
1500
+ "logits/chosen": -2.404142141342163,
1501
+ "logits/rejected": -2.379453420639038,
1502
+ "logps/chosen": -192.36676025390625,
1503
+ "logps/rejected": -228.187744140625,
1504
+ "loss": 0.2018,
1505
+ "rewards/accuracies": 0.949999988079071,
1506
+ "rewards/chosen": -1.6089038848876953,
1507
+ "rewards/margins": 5.643777370452881,
1508
+ "rewards/rejected": -7.252681732177734,
1509
+ "step": 1000
1510
+ },
1511
+ {
1512
+ "epoch": 0.9452503509592888,
1513
+ "grad_norm": 386.22845252678115,
1514
+ "learning_rate": 4.480407093354566e-09,
1515
+ "logits/chosen": -2.3859081268310547,
1516
+ "logits/rejected": -2.392927646636963,
1517
+ "logps/chosen": -207.25112915039062,
1518
+ "logps/rejected": -233.6548614501953,
1519
+ "loss": 0.204,
1520
+ "rewards/accuracies": 0.925000011920929,
1521
+ "rewards/chosen": -1.6337274312973022,
1522
+ "rewards/margins": 5.299923896789551,
1523
+ "rewards/rejected": -6.933651924133301,
1524
+ "step": 1010
1525
+ },
1526
+ {
1527
+ "epoch": 0.9546092653252223,
1528
+ "grad_norm": 186.14758486308767,
1529
+ "learning_rate": 3.0715256762478825e-09,
1530
+ "logits/chosen": -2.4298458099365234,
1531
+ "logits/rejected": -2.4073410034179688,
1532
+ "logps/chosen": -183.422607421875,
1533
+ "logps/rejected": -216.18246459960938,
1534
+ "loss": 0.216,
1535
+ "rewards/accuracies": 0.8999999761581421,
1536
+ "rewards/chosen": -1.9498409032821655,
1537
+ "rewards/margins": 4.482772350311279,
1538
+ "rewards/rejected": -6.432612419128418,
1539
+ "step": 1020
1540
+ },
1541
+ {
1542
+ "epoch": 0.9639681796911558,
1543
+ "grad_norm": 123.36716955534204,
1544
+ "learning_rate": 1.926511493696936e-09,
1545
+ "logits/chosen": -2.3117165565490723,
1546
+ "logits/rejected": -2.295056104660034,
1547
+ "logps/chosen": -188.56947326660156,
1548
+ "logps/rejected": -231.18417358398438,
1549
+ "loss": 0.1393,
1550
+ "rewards/accuracies": 0.949999988079071,
1551
+ "rewards/chosen": -1.7056289911270142,
1552
+ "rewards/margins": 5.4376044273376465,
1553
+ "rewards/rejected": -7.143233299255371,
1554
+ "step": 1030
1555
+ },
1556
+ {
1557
+ "epoch": 0.9733270940570894,
1558
+ "grad_norm": 255.97860575442272,
1559
+ "learning_rate": 1.04658810538516e-09,
1560
+ "logits/chosen": -2.280712127685547,
1561
+ "logits/rejected": -2.2452359199523926,
1562
+ "logps/chosen": -218.2684326171875,
1563
+ "logps/rejected": -241.2434539794922,
1564
+ "loss": 0.2111,
1565
+ "rewards/accuracies": 0.893750011920929,
1566
+ "rewards/chosen": -1.9387584924697876,
1567
+ "rewards/margins": 5.03714656829834,
1568
+ "rewards/rejected": -6.975905418395996,
1569
+ "step": 1040
1570
+ },
1571
+ {
1572
+ "epoch": 0.9826860084230229,
1573
+ "grad_norm": 278.59930069753267,
1574
+ "learning_rate": 4.3269579554558455e-10,
1575
+ "logits/chosen": -2.375357151031494,
1576
+ "logits/rejected": -2.3615028858184814,
1577
+ "logps/chosen": -197.4428253173828,
1578
+ "logps/rejected": -236.6564483642578,
1579
+ "loss": 0.2418,
1580
+ "rewards/accuracies": 0.887499988079071,
1581
+ "rewards/chosen": -2.3001036643981934,
1582
+ "rewards/margins": 4.696293354034424,
1583
+ "rewards/rejected": -6.996397495269775,
1584
+ "step": 1050
1585
+ },
1586
+ {
1587
+ "epoch": 0.9920449227889565,
1588
+ "grad_norm": 267.76889839293335,
1589
+ "learning_rate": 8.549056817513944e-11,
1590
+ "logits/chosen": -2.360644817352295,
1591
+ "logits/rejected": -2.354513645172119,
1592
+ "logps/chosen": -188.0636749267578,
1593
+ "logps/rejected": -226.8789520263672,
1594
+ "loss": 0.221,
1595
+ "rewards/accuracies": 0.90625,
1596
+ "rewards/chosen": -2.1550252437591553,
1597
+ "rewards/margins": 4.53070592880249,
1598
+ "rewards/rejected": -6.685731410980225,
1599
+ "step": 1060
1600
+ },
1601
+ {
1602
+ "epoch": 0.9995320542817033,
1603
+ "step": 1068,
1604
+ "total_flos": 0.0,
1605
+ "train_loss": 0.2609846996010913,
1606
+ "train_runtime": 23133.9065,
1607
+ "train_samples_per_second": 1.478,
1608
+ "train_steps_per_second": 0.046
1609
+ }
1610
+ ],
1611
+ "logging_steps": 10,
1612
+ "max_steps": 1068,
1613
+ "num_input_tokens_seen": 0,
1614
+ "num_train_epochs": 1,
1615
+ "save_steps": 500,
1616
+ "stateful_callbacks": {
1617
+ "TrainerControl": {
1618
+ "args": {
1619
+ "should_epoch_stop": false,
1620
+ "should_evaluate": false,
1621
+ "should_log": false,
1622
+ "should_save": true,
1623
+ "should_training_stop": true
1624
+ },
1625
+ "attributes": {}
1626
+ }
1627
+ },
1628
+ "total_flos": 0.0,
1629
+ "train_batch_size": 4,
1630
+ "trial_name": null,
1631
+ "trial_params": null
1632
+ }