bigheiniuJ commited on
Commit
308296a
·
verified ·
1 Parent(s): 6bcc84b

Training in progress, step 955

Browse files
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9994767137624281,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.15591317970715268,
5
+ "train_runtime": 13974.7976,
6
+ "train_samples": 122268,
7
+ "train_samples_per_second": 8.749,
8
+ "train_steps_per_second": 0.068
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.46.3"
6
+ }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ea021ae914136c3094c2ff96199c13cdf60edf8e2d3847a4437321665ea98bd
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b030ae9a61de1228d0be54c2700e14db8b35b81f95742f90a7bf10e63545337f
3
  size 4943162336
runs/Dec25_18-35-31_ip-10-0-27-237.ec2.internal/events.out.tfevents.1735151795.ip-10-0-27-237.ec2.internal.18852.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d02d968758128f0193281b1495f8dc3be1af68b5719521216c36e03fbab478c
3
- size 72291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e69992077e35b1c0123d6bcd1db2b789047bad70743944481f2bc1ec6d84702
3
+ size 72645
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9994767137624281,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.15591317970715268,
5
+ "train_runtime": 13974.7976,
6
+ "train_samples": 122268,
7
+ "train_samples_per_second": 8.749,
8
+ "train_steps_per_second": 0.068
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9994767137624281,
5
+ "eval_steps": 100,
6
+ "global_step": 955,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0010465724751439038,
13
+ "grad_norm": 9.516955534709263,
14
+ "learning_rate": 5.208333333333333e-09,
15
+ "logits/chosen": -3.21875,
16
+ "logits/rejected": -3.21875,
17
+ "logps/chosen": -250.0,
18
+ "logps/rejected": -364.0,
19
+ "loss": 0.6914,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.010465724751439037,
28
+ "grad_norm": 9.605250953667445,
29
+ "learning_rate": 5.208333333333333e-08,
30
+ "logits/chosen": -3.25,
31
+ "logits/rejected": -3.28125,
32
+ "logps/chosen": -298.0,
33
+ "logps/rejected": -278.0,
34
+ "loss": 0.6921,
35
+ "rewards/accuracies": 0.1527777761220932,
36
+ "rewards/chosen": -0.00176239013671875,
37
+ "rewards/margins": -0.0012969970703125,
38
+ "rewards/rejected": -0.0004596710205078125,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.020931449502878074,
43
+ "grad_norm": 9.292065125128957,
44
+ "learning_rate": 1.0416666666666667e-07,
45
+ "logits/chosen": -3.078125,
46
+ "logits/rejected": -3.171875,
47
+ "logps/chosen": -286.0,
48
+ "logps/rejected": -294.0,
49
+ "loss": 0.6915,
50
+ "rewards/accuracies": 0.22499999403953552,
51
+ "rewards/chosen": -0.001617431640625,
52
+ "rewards/margins": -0.000492095947265625,
53
+ "rewards/rejected": -0.00112152099609375,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.03139717425431711,
58
+ "grad_norm": 8.81073696781217,
59
+ "learning_rate": 1.5624999999999999e-07,
60
+ "logits/chosen": -3.21875,
61
+ "logits/rejected": -3.25,
62
+ "logps/chosen": -294.0,
63
+ "logps/rejected": -264.0,
64
+ "loss": 0.69,
65
+ "rewards/accuracies": 0.39375001192092896,
66
+ "rewards/chosen": -0.00153350830078125,
67
+ "rewards/margins": 0.003387451171875,
68
+ "rewards/rejected": -0.004913330078125,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.04186289900575615,
73
+ "grad_norm": 9.511442470624056,
74
+ "learning_rate": 2.0833333333333333e-07,
75
+ "logits/chosen": -3.1875,
76
+ "logits/rejected": -3.125,
77
+ "logps/chosen": -288.0,
78
+ "logps/rejected": -302.0,
79
+ "loss": 0.684,
80
+ "rewards/accuracies": 0.6499999761581421,
81
+ "rewards/chosen": -0.006072998046875,
82
+ "rewards/margins": 0.0186767578125,
83
+ "rewards/rejected": -0.0247802734375,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.052328623757195186,
88
+ "grad_norm": 8.623245631146178,
89
+ "learning_rate": 2.604166666666667e-07,
90
+ "logits/chosen": -3.25,
91
+ "logits/rejected": -3.21875,
92
+ "logps/chosen": -318.0,
93
+ "logps/rejected": -302.0,
94
+ "loss": 0.6707,
95
+ "rewards/accuracies": 0.7875000238418579,
96
+ "rewards/chosen": -0.0244140625,
97
+ "rewards/margins": 0.0498046875,
98
+ "rewards/rejected": -0.07421875,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.06279434850863422,
103
+ "grad_norm": 11.270983496881312,
104
+ "learning_rate": 3.1249999999999997e-07,
105
+ "logits/chosen": -3.15625,
106
+ "logits/rejected": -3.171875,
107
+ "logps/chosen": -296.0,
108
+ "logps/rejected": -316.0,
109
+ "loss": 0.6463,
110
+ "rewards/accuracies": 0.7562500238418579,
111
+ "rewards/chosen": -0.046875,
112
+ "rewards/margins": 0.09375,
113
+ "rewards/rejected": -0.140625,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.07326007326007326,
118
+ "grad_norm": 11.953436287497002,
119
+ "learning_rate": 3.645833333333333e-07,
120
+ "logits/chosen": -3.109375,
121
+ "logits/rejected": -3.109375,
122
+ "logps/chosen": -310.0,
123
+ "logps/rejected": -304.0,
124
+ "loss": 0.5956,
125
+ "rewards/accuracies": 0.856249988079071,
126
+ "rewards/chosen": -0.0294189453125,
127
+ "rewards/margins": 0.26171875,
128
+ "rewards/rejected": -0.291015625,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.0837257980115123,
133
+ "grad_norm": 24.459142709298142,
134
+ "learning_rate": 4.1666666666666667e-07,
135
+ "logits/chosen": -3.046875,
136
+ "logits/rejected": -3.078125,
137
+ "logps/chosen": -326.0,
138
+ "logps/rejected": -348.0,
139
+ "loss": 0.518,
140
+ "rewards/accuracies": 0.8500000238418579,
141
+ "rewards/chosen": -0.1044921875,
142
+ "rewards/margins": 0.57421875,
143
+ "rewards/rejected": -0.6796875,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.09419152276295134,
148
+ "grad_norm": 24.49272478624807,
149
+ "learning_rate": 4.6874999999999996e-07,
150
+ "logits/chosen": -3.0625,
151
+ "logits/rejected": -3.046875,
152
+ "logps/chosen": -324.0,
153
+ "logps/rejected": -352.0,
154
+ "loss": 0.4382,
155
+ "rewards/accuracies": 0.856249988079071,
156
+ "rewards/chosen": -0.255859375,
157
+ "rewards/margins": 0.69921875,
158
+ "rewards/rejected": -0.953125,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.10465724751439037,
163
+ "grad_norm": 32.563301578465556,
164
+ "learning_rate": 4.999732492681437e-07,
165
+ "logits/chosen": -2.96875,
166
+ "logits/rejected": -3.0,
167
+ "logps/chosen": -346.0,
168
+ "logps/rejected": -488.0,
169
+ "loss": 0.3249,
170
+ "rewards/accuracies": 0.8999999761581421,
171
+ "rewards/chosen": -0.46484375,
172
+ "rewards/margins": 1.4375,
173
+ "rewards/rejected": -1.90625,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.1151229722658294,
178
+ "grad_norm": 29.67260704692337,
179
+ "learning_rate": 4.996723692767926e-07,
180
+ "logits/chosen": -2.984375,
181
+ "logits/rejected": -2.96875,
182
+ "logps/chosen": -338.0,
183
+ "logps/rejected": -528.0,
184
+ "loss": 0.2642,
185
+ "rewards/accuracies": 0.8999999761581421,
186
+ "rewards/chosen": -0.59375,
187
+ "rewards/margins": 1.9296875,
188
+ "rewards/rejected": -2.515625,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.12558869701726844,
193
+ "grad_norm": 36.130980567601185,
194
+ "learning_rate": 4.990375746213598e-07,
195
+ "logits/chosen": -2.875,
196
+ "logits/rejected": -2.84375,
197
+ "logps/chosen": -368.0,
198
+ "logps/rejected": -608.0,
199
+ "loss": 0.2784,
200
+ "rewards/accuracies": 0.8687499761581421,
201
+ "rewards/chosen": -0.9765625,
202
+ "rewards/margins": 2.109375,
203
+ "rewards/rejected": -3.09375,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.1360544217687075,
208
+ "grad_norm": 39.623525128446786,
209
+ "learning_rate": 4.980697142834314e-07,
210
+ "logits/chosen": -2.8125,
211
+ "logits/rejected": -2.78125,
212
+ "logps/chosen": -390.0,
213
+ "logps/rejected": -588.0,
214
+ "loss": 0.2208,
215
+ "rewards/accuracies": 0.925000011920929,
216
+ "rewards/chosen": -0.92578125,
217
+ "rewards/margins": 2.296875,
218
+ "rewards/rejected": -3.234375,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.14652014652014653,
223
+ "grad_norm": 26.762330480495592,
224
+ "learning_rate": 4.967700826904229e-07,
225
+ "logits/chosen": -2.875,
226
+ "logits/rejected": -2.8125,
227
+ "logps/chosen": -360.0,
228
+ "logps/rejected": -664.0,
229
+ "loss": 0.2179,
230
+ "rewards/accuracies": 0.925000011920929,
231
+ "rewards/chosen": -0.859375,
232
+ "rewards/margins": 2.9375,
233
+ "rewards/rejected": -3.796875,
234
+ "step": 140
235
+ },
236
+ {
237
+ "epoch": 0.15698587127158556,
238
+ "grad_norm": 40.56285073504209,
239
+ "learning_rate": 4.951404179843962e-07,
240
+ "logits/chosen": -2.765625,
241
+ "logits/rejected": -2.65625,
242
+ "logps/chosen": -438.0,
243
+ "logps/rejected": -668.0,
244
+ "loss": 0.1995,
245
+ "rewards/accuracies": 0.8999999761581421,
246
+ "rewards/chosen": -1.2734375,
247
+ "rewards/margins": 2.828125,
248
+ "rewards/rejected": -4.09375,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.1674515960230246,
253
+ "grad_norm": 19.47918869184068,
254
+ "learning_rate": 4.931828996974498e-07,
255
+ "logits/chosen": -2.765625,
256
+ "logits/rejected": -2.6875,
257
+ "logps/chosen": -452.0,
258
+ "logps/rejected": -704.0,
259
+ "loss": 0.2069,
260
+ "rewards/accuracies": 0.90625,
261
+ "rewards/chosen": -1.2734375,
262
+ "rewards/margins": 2.90625,
263
+ "rewards/rejected": -4.1875,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.17791732077446362,
268
+ "grad_norm": 54.76579795757994,
269
+ "learning_rate": 4.909001458367866e-07,
270
+ "logits/chosen": -2.71875,
271
+ "logits/rejected": -2.640625,
272
+ "logps/chosen": -366.0,
273
+ "logps/rejected": -716.0,
274
+ "loss": 0.2041,
275
+ "rewards/accuracies": 0.9375,
276
+ "rewards/chosen": -1.0234375,
277
+ "rewards/margins": 3.140625,
278
+ "rewards/rejected": -4.15625,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.18838304552590268,
283
+ "grad_norm": 26.167531224154025,
284
+ "learning_rate": 4.882952093833627e-07,
285
+ "logits/chosen": -2.71875,
286
+ "logits/rejected": -2.578125,
287
+ "logps/chosen": -402.0,
288
+ "logps/rejected": -736.0,
289
+ "loss": 0.2019,
290
+ "rewards/accuracies": 0.925000011920929,
291
+ "rewards/chosen": -1.28125,
292
+ "rewards/margins": 3.34375,
293
+ "rewards/rejected": -4.625,
294
+ "step": 180
295
+ },
296
+ {
297
+ "epoch": 0.1988487702773417,
298
+ "grad_norm": 19.72905224051703,
299
+ "learning_rate": 4.853715742087946e-07,
300
+ "logits/chosen": -2.515625,
301
+ "logits/rejected": -2.453125,
302
+ "logps/chosen": -434.0,
303
+ "logps/rejected": -768.0,
304
+ "loss": 0.1627,
305
+ "rewards/accuracies": 0.925000011920929,
306
+ "rewards/chosen": -1.5703125,
307
+ "rewards/margins": 3.609375,
308
+ "rewards/rejected": -5.1875,
309
+ "step": 190
310
+ },
311
+ {
312
+ "epoch": 0.20931449502878074,
313
+ "grad_norm": 18.779653204870545,
314
+ "learning_rate": 4.821331504159906e-07,
315
+ "logits/chosen": -2.65625,
316
+ "logits/rejected": -2.40625,
317
+ "logps/chosen": -482.0,
318
+ "logps/rejected": -828.0,
319
+ "loss": 0.172,
320
+ "rewards/accuracies": 0.925000011920929,
321
+ "rewards/chosen": -1.78125,
322
+ "rewards/margins": 3.734375,
323
+ "rewards/rejected": -5.5,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 0.21978021978021978,
328
+ "grad_norm": 22.81262811076015,
329
+ "learning_rate": 4.785842691097342e-07,
330
+ "logits/chosen": -2.546875,
331
+ "logits/rejected": -2.40625,
332
+ "logps/chosen": -422.0,
333
+ "logps/rejected": -824.0,
334
+ "loss": 0.1704,
335
+ "rewards/accuracies": 0.9437500238418579,
336
+ "rewards/chosen": -1.34375,
337
+ "rewards/margins": 4.1875,
338
+ "rewards/rejected": -5.5,
339
+ "step": 210
340
+ },
341
+ {
342
+ "epoch": 0.2302459445316588,
343
+ "grad_norm": 44.85290714547607,
344
+ "learning_rate": 4.7472967660421603e-07,
345
+ "logits/chosen": -2.546875,
346
+ "logits/rejected": -2.4375,
347
+ "logps/chosen": -468.0,
348
+ "logps/rejected": -864.0,
349
+ "loss": 0.1787,
350
+ "rewards/accuracies": 0.9125000238418579,
351
+ "rewards/chosen": -1.703125,
352
+ "rewards/margins": 4.03125,
353
+ "rewards/rejected": -5.71875,
354
+ "step": 220
355
+ },
356
+ {
357
+ "epoch": 0.24071166928309787,
358
+ "grad_norm": 23.212078131512556,
359
+ "learning_rate": 4.705745280752585e-07,
360
+ "logits/chosen": -2.65625,
361
+ "logits/rejected": -2.46875,
362
+ "logps/chosen": -424.0,
363
+ "logps/rejected": -804.0,
364
+ "loss": 0.1676,
365
+ "rewards/accuracies": 0.9312499761581421,
366
+ "rewards/chosen": -1.2734375,
367
+ "rewards/margins": 3.984375,
368
+ "rewards/rejected": -5.25,
369
+ "step": 230
370
+ },
371
+ {
372
+ "epoch": 0.25117739403453687,
373
+ "grad_norm": 49.148200668176614,
374
+ "learning_rate": 4.6612438066572555e-07,
375
+ "logits/chosen": -2.34375,
376
+ "logits/rejected": -2.0625,
377
+ "logps/chosen": -496.0,
378
+ "logps/rejected": -976.0,
379
+ "loss": 0.1638,
380
+ "rewards/accuracies": 0.9437500238418579,
381
+ "rewards/chosen": -1.8515625,
382
+ "rewards/margins": 4.9375,
383
+ "rewards/rejected": -6.78125,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 0.2616431187859759,
388
+ "grad_norm": 27.312656594695248,
389
+ "learning_rate": 4.6138518605333664e-07,
390
+ "logits/chosen": -2.484375,
391
+ "logits/rejected": -2.3125,
392
+ "logps/chosen": -444.0,
393
+ "logps/rejected": -812.0,
394
+ "loss": 0.158,
395
+ "rewards/accuracies": 0.9624999761581421,
396
+ "rewards/chosen": -1.6171875,
397
+ "rewards/margins": 3.96875,
398
+ "rewards/rejected": -5.59375,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 0.272108843537415,
403
+ "grad_norm": 22.296118147448365,
404
+ "learning_rate": 4.5636328249082514e-07,
405
+ "logits/chosen": -2.359375,
406
+ "logits/rejected": -2.1875,
407
+ "logps/chosen": -494.0,
408
+ "logps/rejected": -960.0,
409
+ "loss": 0.1445,
410
+ "rewards/accuracies": 0.9312499761581421,
411
+ "rewards/chosen": -2.015625,
412
+ "rewards/margins": 4.53125,
413
+ "rewards/rejected": -6.5625,
414
+ "step": 260
415
+ },
416
+ {
417
+ "epoch": 0.282574568288854,
418
+ "grad_norm": 22.50903492692647,
419
+ "learning_rate": 4.510653863290871e-07,
420
+ "logits/chosen": -2.34375,
421
+ "logits/rejected": -2.0,
422
+ "logps/chosen": -448.0,
423
+ "logps/rejected": -904.0,
424
+ "loss": 0.1423,
425
+ "rewards/accuracies": 0.9750000238418579,
426
+ "rewards/chosen": -1.484375,
427
+ "rewards/margins": 4.84375,
428
+ "rewards/rejected": -6.34375,
429
+ "step": 270
430
+ },
431
+ {
432
+ "epoch": 0.29304029304029305,
433
+ "grad_norm": 28.21995400188032,
434
+ "learning_rate": 4.4549858303465737e-07,
435
+ "logits/chosen": -2.390625,
436
+ "logits/rejected": -2.171875,
437
+ "logps/chosen": -516.0,
438
+ "logps/rejected": -980.0,
439
+ "loss": 0.1437,
440
+ "rewards/accuracies": 0.90625,
441
+ "rewards/chosen": -2.265625,
442
+ "rewards/margins": 4.5625,
443
+ "rewards/rejected": -6.84375,
444
+ "step": 280
445
+ },
446
+ {
447
+ "epoch": 0.3035060177917321,
448
+ "grad_norm": 40.19255553854356,
449
+ "learning_rate": 4.396703177135261e-07,
450
+ "logits/chosen": -2.21875,
451
+ "logits/rejected": -1.984375,
452
+ "logps/chosen": -568.0,
453
+ "logps/rejected": -1096.0,
454
+ "loss": 0.1329,
455
+ "rewards/accuracies": 0.956250011920929,
456
+ "rewards/chosen": -2.953125,
457
+ "rewards/margins": 5.5,
458
+ "rewards/rejected": -8.4375,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 0.3139717425431711,
463
+ "grad_norm": 28.430828005352467,
464
+ "learning_rate": 4.335883851539693e-07,
465
+ "logits/chosen": -2.296875,
466
+ "logits/rejected": -1.9375,
467
+ "logps/chosen": -648.0,
468
+ "logps/rejected": -1168.0,
469
+ "loss": 0.1374,
470
+ "rewards/accuracies": 0.918749988079071,
471
+ "rewards/chosen": -3.34375,
472
+ "rewards/margins": 5.4375,
473
+ "rewards/rejected": -8.75,
474
+ "step": 300
475
+ },
476
+ {
477
+ "epoch": 0.32443746729461015,
478
+ "grad_norm": 61.735019074562494,
479
+ "learning_rate": 4.272609194017105e-07,
480
+ "logits/chosen": -2.1875,
481
+ "logits/rejected": -1.6953125,
482
+ "logps/chosen": -676.0,
483
+ "logps/rejected": -1224.0,
484
+ "loss": 0.1284,
485
+ "rewards/accuracies": 0.918749988079071,
486
+ "rewards/chosen": -3.734375,
487
+ "rewards/margins": 5.5,
488
+ "rewards/rejected": -9.1875,
489
+ "step": 310
490
+ },
491
+ {
492
+ "epoch": 0.3349031920460492,
493
+ "grad_norm": 44.72666475922118,
494
+ "learning_rate": 4.2069638288135547e-07,
495
+ "logits/chosen": -2.109375,
496
+ "logits/rejected": -1.796875,
497
+ "logps/chosen": -736.0,
498
+ "logps/rejected": -1360.0,
499
+ "loss": 0.1419,
500
+ "rewards/accuracies": 0.9437500238418579,
501
+ "rewards/chosen": -4.3125,
502
+ "rewards/margins": 6.34375,
503
+ "rewards/rejected": -10.625,
504
+ "step": 320
505
+ },
506
+ {
507
+ "epoch": 0.3453689167974882,
508
+ "grad_norm": 25.59488492513282,
509
+ "learning_rate": 4.139035550786494e-07,
510
+ "logits/chosen": -2.328125,
511
+ "logits/rejected": -2.203125,
512
+ "logps/chosen": -684.0,
513
+ "logps/rejected": -1160.0,
514
+ "loss": 0.1288,
515
+ "rewards/accuracies": 0.893750011920929,
516
+ "rewards/chosen": -4.0625,
517
+ "rewards/margins": 4.6875,
518
+ "rewards/rejected": -8.75,
519
+ "step": 330
520
+ },
521
+ {
522
+ "epoch": 0.35583464154892724,
523
+ "grad_norm": 39.490355460521485,
524
+ "learning_rate": 4.0689152079869306e-07,
525
+ "logits/chosen": -2.109375,
526
+ "logits/rejected": -1.7109375,
527
+ "logps/chosen": -668.0,
528
+ "logps/rejected": -1336.0,
529
+ "loss": 0.1067,
530
+ "rewards/accuracies": 0.96875,
531
+ "rewards/chosen": -4.09375,
532
+ "rewards/margins": 6.84375,
533
+ "rewards/rejected": -10.9375,
534
+ "step": 340
535
+ },
536
+ {
537
+ "epoch": 0.3663003663003663,
538
+ "grad_norm": 40.05108849367682,
539
+ "learning_rate": 3.99669658015821e-07,
540
+ "logits/chosen": -2.0625,
541
+ "logits/rejected": -1.6796875,
542
+ "logps/chosen": -708.0,
543
+ "logps/rejected": -1344.0,
544
+ "loss": 0.1218,
545
+ "rewards/accuracies": 0.949999988079071,
546
+ "rewards/chosen": -4.5,
547
+ "rewards/margins": 6.6875,
548
+ "rewards/rejected": -11.1875,
549
+ "step": 350
550
+ },
551
+ {
552
+ "epoch": 0.37676609105180536,
553
+ "grad_norm": 29.496296075978837,
554
+ "learning_rate": 3.92247625331392e-07,
555
+ "logits/chosen": -2.109375,
556
+ "logits/rejected": -1.875,
557
+ "logps/chosen": -716.0,
558
+ "logps/rejected": -1296.0,
559
+ "loss": 0.1126,
560
+ "rewards/accuracies": 0.925000011920929,
561
+ "rewards/chosen": -4.25,
562
+ "rewards/margins": 6.0625,
563
+ "rewards/rejected": -10.3125,
564
+ "step": 360
565
+ },
566
+ {
567
+ "epoch": 0.3872318158032444,
568
+ "grad_norm": 25.08871689723511,
569
+ "learning_rate": 3.846353490562664e-07,
570
+ "logits/chosen": -2.109375,
571
+ "logits/rejected": -1.5,
572
+ "logps/chosen": -776.0,
573
+ "logps/rejected": -1496.0,
574
+ "loss": 0.1248,
575
+ "rewards/accuracies": 0.956250011920929,
576
+ "rewards/chosen": -5.125,
577
+ "rewards/margins": 7.3125,
578
+ "rewards/rejected": -12.4375,
579
+ "step": 370
580
+ },
581
+ {
582
+ "epoch": 0.3976975405546834,
583
+ "grad_norm": 26.78647304619011,
584
+ "learning_rate": 3.768430099352445e-07,
585
+ "logits/chosen": -2.234375,
586
+ "logits/rejected": -1.96875,
587
+ "logps/chosen": -756.0,
588
+ "logps/rejected": -1344.0,
589
+ "loss": 0.1186,
590
+ "rewards/accuracies": 0.9437500238418579,
591
+ "rewards/chosen": -4.46875,
592
+ "rewards/margins": 6.09375,
593
+ "rewards/rejected": -10.5625,
594
+ "step": 380
595
+ },
596
+ {
597
+ "epoch": 0.40816326530612246,
598
+ "grad_norm": 47.07672408947705,
599
+ "learning_rate": 3.6888102953122304e-07,
600
+ "logits/chosen": -2.265625,
601
+ "logits/rejected": -1.9765625,
602
+ "logps/chosen": -712.0,
603
+ "logps/rejected": -1336.0,
604
+ "loss": 0.103,
605
+ "rewards/accuracies": 0.9750000238418579,
606
+ "rewards/chosen": -4.21875,
607
+ "rewards/margins": 6.5,
608
+ "rewards/rejected": -10.75,
609
+ "step": 390
610
+ },
611
+ {
612
+ "epoch": 0.4186289900575615,
613
+ "grad_norm": 22.46877888992614,
614
+ "learning_rate": 3.607600562872785e-07,
615
+ "logits/chosen": -2.359375,
616
+ "logits/rejected": -1.953125,
617
+ "logps/chosen": -876.0,
618
+ "logps/rejected": -1592.0,
619
+ "loss": 0.0937,
620
+ "rewards/accuracies": 0.96875,
621
+ "rewards/chosen": -5.34375,
622
+ "rewards/margins": 7.5625,
623
+ "rewards/rejected": -12.875,
624
+ "step": 400
625
+ },
626
+ {
627
+ "epoch": 0.4290947148090005,
628
+ "grad_norm": 31.318574320005773,
629
+ "learning_rate": 3.5249095128531856e-07,
630
+ "logits/chosen": -2.40625,
631
+ "logits/rejected": -2.171875,
632
+ "logps/chosen": -796.0,
633
+ "logps/rejected": -1328.0,
634
+ "loss": 0.1072,
635
+ "rewards/accuracies": 0.9375,
636
+ "rewards/chosen": -4.59375,
637
+ "rewards/margins": 5.9375,
638
+ "rewards/rejected": -10.5,
639
+ "step": 410
640
+ },
641
+ {
642
+ "epoch": 0.43956043956043955,
643
+ "grad_norm": 23.57271093807849,
644
+ "learning_rate": 3.4408477372034736e-07,
645
+ "logits/chosen": -2.4375,
646
+ "logits/rejected": -2.09375,
647
+ "logps/chosen": -760.0,
648
+ "logps/rejected": -1368.0,
649
+ "loss": 0.1199,
650
+ "rewards/accuracies": 0.9624999761581421,
651
+ "rewards/chosen": -4.46875,
652
+ "rewards/margins": 6.625,
653
+ "rewards/rejected": -11.125,
654
+ "step": 420
655
+ },
656
+ {
657
+ "epoch": 0.4500261643118786,
658
+ "grad_norm": 20.257810507961672,
659
+ "learning_rate": 3.3555276610977276e-07,
660
+ "logits/chosen": -2.5,
661
+ "logits/rejected": -2.328125,
662
+ "logps/chosen": -568.0,
663
+ "logps/rejected": -1184.0,
664
+ "loss": 0.0937,
665
+ "rewards/accuracies": 0.9750000238418579,
666
+ "rewards/chosen": -2.8125,
667
+ "rewards/margins": 6.125,
668
+ "rewards/rejected": -8.9375,
669
+ "step": 430
670
+ },
671
+ {
672
+ "epoch": 0.4604918890633176,
673
+ "grad_norm": 29.069836347407232,
674
+ "learning_rate": 3.269063392575352e-07,
675
+ "logits/chosen": -2.4375,
676
+ "logits/rejected": -2.171875,
677
+ "logps/chosen": -628.0,
678
+ "logps/rejected": -1424.0,
679
+ "loss": 0.1044,
680
+ "rewards/accuracies": 0.96875,
681
+ "rewards/chosen": -3.5625,
682
+ "rewards/margins": 7.9375,
683
+ "rewards/rejected": -11.5,
684
+ "step": 440
685
+ },
686
+ {
687
+ "epoch": 0.47095761381475665,
688
+ "grad_norm": 28.99798773679313,
689
+ "learning_rate": 3.1815705699316964e-07,
690
+ "logits/chosen": -2.328125,
691
+ "logits/rejected": -2.03125,
692
+ "logps/chosen": -700.0,
693
+ "logps/rejected": -1472.0,
694
+ "loss": 0.1061,
695
+ "rewards/accuracies": 0.9624999761581421,
696
+ "rewards/chosen": -3.984375,
697
+ "rewards/margins": 7.8125,
698
+ "rewards/rejected": -11.8125,
699
+ "step": 450
700
+ },
701
+ {
702
+ "epoch": 0.48142333856619574,
703
+ "grad_norm": 20.545878743670112,
704
+ "learning_rate": 3.0931662070620794e-07,
705
+ "logits/chosen": -2.21875,
706
+ "logits/rejected": -2.03125,
707
+ "logps/chosen": -776.0,
708
+ "logps/rejected": -1464.0,
709
+ "loss": 0.097,
710
+ "rewards/accuracies": 0.96875,
711
+ "rewards/chosen": -5.03125,
712
+ "rewards/margins": 7.0625,
713
+ "rewards/rejected": -12.125,
714
+ "step": 460
715
+ },
716
+ {
717
+ "epoch": 0.49188906331763477,
718
+ "grad_norm": 22.557933835618037,
719
+ "learning_rate": 3.003968536966078e-07,
720
+ "logits/chosen": -2.34375,
721
+ "logits/rejected": -2.0625,
722
+ "logps/chosen": -760.0,
723
+ "logps/rejected": -1488.0,
724
+ "loss": 0.081,
725
+ "rewards/accuracies": 0.956250011920929,
726
+ "rewards/chosen": -4.46875,
727
+ "rewards/margins": 7.40625,
728
+ "rewards/rejected": -11.875,
729
+ "step": 470
730
+ },
731
+ {
732
+ "epoch": 0.5023547880690737,
733
+ "grad_norm": 28.131650321698604,
734
+ "learning_rate": 2.9140968536213693e-07,
735
+ "logits/chosen": -2.359375,
736
+ "logits/rejected": -1.9609375,
737
+ "logps/chosen": -840.0,
738
+ "logps/rejected": -1584.0,
739
+ "loss": 0.1038,
740
+ "rewards/accuracies": 0.9437500238418579,
741
+ "rewards/chosen": -5.34375,
742
+ "rewards/margins": 7.75,
743
+ "rewards/rejected": -13.125,
744
+ "step": 480
745
+ },
746
+ {
747
+ "epoch": 0.5128205128205128,
748
+ "grad_norm": 22.231590737155383,
749
+ "learning_rate": 2.823671352438608e-07,
750
+ "logits/chosen": -2.4375,
751
+ "logits/rejected": -2.1875,
752
+ "logps/chosen": -788.0,
753
+ "logps/rejected": -1432.0,
754
+ "loss": 0.099,
755
+ "rewards/accuracies": 0.9437500238418579,
756
+ "rewards/chosen": -5.0625,
757
+ "rewards/margins": 6.5625,
758
+ "rewards/rejected": -11.625,
759
+ "step": 490
760
+ },
761
+ {
762
+ "epoch": 0.5232862375719518,
763
+ "grad_norm": 32.66738645617045,
764
+ "learning_rate": 2.73281296951072e-07,
765
+ "logits/chosen": -2.421875,
766
+ "logits/rejected": -2.015625,
767
+ "logps/chosen": -880.0,
768
+ "logps/rejected": -1584.0,
769
+ "loss": 0.0906,
770
+ "rewards/accuracies": 0.96875,
771
+ "rewards/chosen": -5.84375,
772
+ "rewards/margins": 7.4375,
773
+ "rewards/rejected": -13.25,
774
+ "step": 500
775
+ },
776
+ {
777
+ "epoch": 0.533751962323391,
778
+ "grad_norm": 24.851145354651734,
779
+ "learning_rate": 2.641643219871597e-07,
780
+ "logits/chosen": -2.3125,
781
+ "logits/rejected": -1.9921875,
782
+ "logps/chosen": -868.0,
783
+ "logps/rejected": -1536.0,
784
+ "loss": 0.09,
785
+ "rewards/accuracies": 0.96875,
786
+ "rewards/chosen": -5.625,
787
+ "rewards/margins": 7.03125,
788
+ "rewards/rejected": -12.6875,
789
+ "step": 510
790
+ },
791
+ {
792
+ "epoch": 0.54421768707483,
793
+ "grad_norm": 30.824666380700695,
794
+ "learning_rate": 2.550284034980507e-07,
795
+ "logits/chosen": -2.375,
796
+ "logits/rejected": -2.171875,
797
+ "logps/chosen": -808.0,
798
+ "logps/rejected": -1504.0,
799
+ "loss": 0.11,
800
+ "rewards/accuracies": 0.96875,
801
+ "rewards/chosen": -5.0625,
802
+ "rewards/margins": 7.09375,
803
+ "rewards/rejected": -12.125,
804
+ "step": 520
805
+ },
806
+ {
807
+ "epoch": 0.554683411826269,
808
+ "grad_norm": 37.732613885821536,
809
+ "learning_rate": 2.4588575996495794e-07,
810
+ "logits/chosen": -2.578125,
811
+ "logits/rejected": -2.25,
812
+ "logps/chosen": -676.0,
813
+ "logps/rejected": -1352.0,
814
+ "loss": 0.1057,
815
+ "rewards/accuracies": 0.949999988079071,
816
+ "rewards/chosen": -4.0625,
817
+ "rewards/margins": 7.0,
818
+ "rewards/rejected": -11.0625,
819
+ "step": 530
820
+ },
821
+ {
822
+ "epoch": 0.565149136577708,
823
+ "grad_norm": 23.236138548824204,
824
+ "learning_rate": 2.367486188632446e-07,
825
+ "logits/chosen": -2.53125,
826
+ "logits/rejected": -2.25,
827
+ "logps/chosen": -704.0,
828
+ "logps/rejected": -1376.0,
829
+ "loss": 0.0968,
830
+ "rewards/accuracies": 0.9437500238418579,
831
+ "rewards/chosen": -4.21875,
832
+ "rewards/margins": 6.6875,
833
+ "rewards/rejected": -10.875,
834
+ "step": 540
835
+ },
836
+ {
837
+ "epoch": 0.5756148613291471,
838
+ "grad_norm": 18.62210206857554,
839
+ "learning_rate": 2.276292003092593e-07,
840
+ "logits/chosen": -2.421875,
841
+ "logits/rejected": -2.109375,
842
+ "logps/chosen": -836.0,
843
+ "logps/rejected": -1544.0,
844
+ "loss": 0.0824,
845
+ "rewards/accuracies": 0.9624999761581421,
846
+ "rewards/chosen": -5.5,
847
+ "rewards/margins": 7.28125,
848
+ "rewards/rejected": -12.75,
849
+ "step": 550
850
+ },
851
+ {
852
+ "epoch": 0.5860805860805861,
853
+ "grad_norm": 30.39956813704047,
854
+ "learning_rate": 2.185397007170141e-07,
855
+ "logits/chosen": -2.34375,
856
+ "logits/rejected": -1.9375,
857
+ "logps/chosen": -964.0,
858
+ "logps/rejected": -1720.0,
859
+ "loss": 0.0776,
860
+ "rewards/accuracies": 0.96875,
861
+ "rewards/chosen": -6.84375,
862
+ "rewards/margins": 8.0,
863
+ "rewards/rejected": -14.875,
864
+ "step": 560
865
+ },
866
+ {
867
+ "epoch": 0.5965463108320251,
868
+ "grad_norm": 31.863861823036746,
869
+ "learning_rate": 2.094922764865619e-07,
870
+ "logits/chosen": -2.265625,
871
+ "logits/rejected": -2.109375,
872
+ "logps/chosen": -932.0,
873
+ "logps/rejected": -1688.0,
874
+ "loss": 0.0778,
875
+ "rewards/accuracies": 0.9312499761581421,
876
+ "rewards/chosen": -6.3125,
877
+ "rewards/margins": 7.78125,
878
+ "rewards/rejected": -14.125,
879
+ "step": 570
880
+ },
881
+ {
882
+ "epoch": 0.6070120355834642,
883
+ "grad_norm": 22.78774525718653,
884
+ "learning_rate": 2.0049902774588797e-07,
885
+ "logits/chosen": -2.453125,
886
+ "logits/rejected": -2.15625,
887
+ "logps/chosen": -776.0,
888
+ "logps/rejected": -1528.0,
889
+ "loss": 0.0791,
890
+ "rewards/accuracies": 0.956250011920929,
891
+ "rewards/chosen": -5.03125,
892
+ "rewards/margins": 7.65625,
893
+ "rewards/rejected": -12.6875,
894
+ "step": 580
895
+ },
896
+ {
897
+ "epoch": 0.6174777603349032,
898
+ "grad_norm": 24.950505604608463,
899
+ "learning_rate": 1.9157198216806238e-07,
900
+ "logits/chosen": -2.453125,
901
+ "logits/rejected": -2.09375,
902
+ "logps/chosen": -776.0,
903
+ "logps/rejected": -1616.0,
904
+ "loss": 0.0779,
905
+ "rewards/accuracies": 0.9750000238418579,
906
+ "rewards/chosen": -5.03125,
907
+ "rewards/margins": 8.25,
908
+ "rewards/rejected": -13.3125,
909
+ "step": 590
910
+ },
911
+ {
912
+ "epoch": 0.6279434850863422,
913
+ "grad_norm": 39.70214338791234,
914
+ "learning_rate": 1.8272307888529274e-07,
915
+ "logits/chosen": -2.59375,
916
+ "logits/rejected": -2.375,
917
+ "logps/chosen": -824.0,
918
+ "logps/rejected": -1696.0,
919
+ "loss": 0.092,
920
+ "rewards/accuracies": 0.949999988079071,
921
+ "rewards/chosen": -5.125,
922
+ "rewards/margins": 8.5625,
923
+ "rewards/rejected": -13.6875,
924
+ "step": 600
925
+ },
926
+ {
927
+ "epoch": 0.6384092098377813,
928
+ "grad_norm": 24.678139590765365,
929
+ "learning_rate": 1.7396415252139288e-07,
930
+ "logits/chosen": -2.421875,
931
+ "logits/rejected": -2.109375,
932
+ "logps/chosen": -768.0,
933
+ "logps/rejected": -1544.0,
934
+ "loss": 0.0796,
935
+ "rewards/accuracies": 0.9375,
936
+ "rewards/chosen": -4.78125,
937
+ "rewards/margins": 7.9375,
938
+ "rewards/rejected": -12.75,
939
+ "step": 610
940
+ },
941
+ {
942
+ "epoch": 0.6488749345892203,
943
+ "grad_norm": 29.19695220969592,
944
+ "learning_rate": 1.6530691736402316e-07,
945
+ "logits/chosen": -2.484375,
946
+ "logits/rejected": -2.21875,
947
+ "logps/chosen": -768.0,
948
+ "logps/rejected": -1544.0,
949
+ "loss": 0.0724,
950
+ "rewards/accuracies": 0.9437500238418579,
951
+ "rewards/chosen": -4.78125,
952
+ "rewards/margins": 7.9375,
953
+ "rewards/rejected": -12.6875,
954
+ "step": 620
955
+ },
956
+ {
957
+ "epoch": 0.6593406593406593,
958
+ "grad_norm": 17.517613098387617,
959
+ "learning_rate": 1.5676295169786864e-07,
960
+ "logits/chosen": -2.5,
961
+ "logits/rejected": -2.203125,
962
+ "logps/chosen": -828.0,
963
+ "logps/rejected": -1544.0,
964
+ "loss": 0.0929,
965
+ "rewards/accuracies": 0.9437500238418579,
966
+ "rewards/chosen": -5.1875,
967
+ "rewards/margins": 7.5625,
968
+ "rewards/rejected": -12.75,
969
+ "step": 630
970
+ },
971
+ {
972
+ "epoch": 0.6698063840920984,
973
+ "grad_norm": 18.336302496269198,
974
+ "learning_rate": 1.483436823197092e-07,
975
+ "logits/chosen": -2.5,
976
+ "logits/rejected": -2.296875,
977
+ "logps/chosen": -804.0,
978
+ "logps/rejected": -1568.0,
979
+ "loss": 0.0861,
980
+ "rewards/accuracies": 0.9750000238418579,
981
+ "rewards/chosen": -5.28125,
982
+ "rewards/margins": 7.75,
983
+ "rewards/rejected": -13.0625,
984
+ "step": 640
985
+ },
986
+ {
987
+ "epoch": 0.6802721088435374,
988
+ "grad_norm": 19.399010068279768,
989
+ "learning_rate": 1.4006036925609243e-07,
990
+ "logits/chosen": -2.484375,
991
+ "logits/rejected": -2.3125,
992
+ "logps/chosen": -888.0,
993
+ "logps/rejected": -1568.0,
994
+ "loss": 0.0706,
995
+ "rewards/accuracies": 0.9437500238418579,
996
+ "rewards/chosen": -5.875,
997
+ "rewards/margins": 7.125,
998
+ "rewards/rejected": -13.0,
999
+ "step": 650
1000
+ },
1001
+ {
1002
+ "epoch": 0.6907378335949764,
1003
+ "grad_norm": 24.671794614204327,
1004
+ "learning_rate": 1.319240907040458e-07,
1005
+ "logits/chosen": -2.4375,
1006
+ "logits/rejected": -2.15625,
1007
+ "logps/chosen": -844.0,
1008
+ "logps/rejected": -1616.0,
1009
+ "loss": 0.0735,
1010
+ "rewards/accuracies": 0.9624999761581421,
1011
+ "rewards/chosen": -5.4375,
1012
+ "rewards/margins": 7.6875,
1013
+ "rewards/rejected": -13.125,
1014
+ "step": 660
1015
+ },
1016
+ {
1017
+ "epoch": 0.7012035583464155,
1018
+ "grad_norm": 21.656808943027293,
1019
+ "learning_rate": 1.2394572821496948e-07,
1020
+ "logits/chosen": -2.296875,
1021
+ "logits/rejected": -2.015625,
1022
+ "logps/chosen": -852.0,
1023
+ "logps/rejected": -1720.0,
1024
+ "loss": 0.0756,
1025
+ "rewards/accuracies": 0.987500011920929,
1026
+ "rewards/chosen": -5.78125,
1027
+ "rewards/margins": 8.75,
1028
+ "rewards/rejected": -14.5625,
1029
+ "step": 670
1030
+ },
1031
+ {
1032
+ "epoch": 0.7116692830978545,
1033
+ "grad_norm": 37.67983640549423,
1034
+ "learning_rate": 1.1613595214152711e-07,
1035
+ "logits/chosen": -2.34375,
1036
+ "logits/rejected": -2.046875,
1037
+ "logps/chosen": -944.0,
1038
+ "logps/rejected": -1800.0,
1039
+ "loss": 0.0717,
1040
+ "rewards/accuracies": 0.9624999761581421,
1041
+ "rewards/chosen": -6.375,
1042
+ "rewards/margins": 8.75,
1043
+ "rewards/rejected": -15.125,
1044
+ "step": 680
1045
+ },
1046
+ {
1047
+ "epoch": 0.7221350078492935,
1048
+ "grad_norm": 31.204304577384395,
1049
+ "learning_rate": 1.0850520736699362e-07,
1050
+ "logits/chosen": -2.46875,
1051
+ "logits/rejected": -2.203125,
1052
+ "logps/chosen": -960.0,
1053
+ "logps/rejected": -1752.0,
1054
+ "loss": 0.0799,
1055
+ "rewards/accuracies": 0.949999988079071,
1056
+ "rewards/chosen": -6.46875,
1057
+ "rewards/margins": 8.0625,
1058
+ "rewards/rejected": -14.5,
1059
+ "step": 690
1060
+ },
1061
+ {
1062
+ "epoch": 0.7326007326007326,
1063
+ "grad_norm": 14.65690246648118,
1064
+ "learning_rate": 1.0106369933615042e-07,
1065
+ "logits/chosen": -2.375,
1066
+ "logits/rejected": -2.21875,
1067
+ "logps/chosen": -892.0,
1068
+ "logps/rejected": -1760.0,
1069
+ "loss": 0.066,
1070
+ "rewards/accuracies": 0.96875,
1071
+ "rewards/chosen": -5.84375,
1072
+ "rewards/margins": 8.6875,
1073
+ "rewards/rejected": -14.5,
1074
+ "step": 700
1075
+ },
1076
+ {
1077
+ "epoch": 0.7430664573521716,
1078
+ "grad_norm": 10.13345133625024,
1079
+ "learning_rate": 9.382138040640714e-08,
1080
+ "logits/chosen": -2.515625,
1081
+ "logits/rejected": -2.15625,
1082
+ "logps/chosen": -832.0,
1083
+ "logps/rejected": -1752.0,
1084
+ "loss": 0.0628,
1085
+ "rewards/accuracies": 0.9750000238418579,
1086
+ "rewards/chosen": -5.46875,
1087
+ "rewards/margins": 9.0625,
1088
+ "rewards/rejected": -14.5,
1089
+ "step": 710
1090
+ },
1091
+ {
1092
+ "epoch": 0.7535321821036107,
1093
+ "grad_norm": 26.423649093017993,
1094
+ "learning_rate": 8.678793653740632e-08,
1095
+ "logits/chosen": -2.484375,
1096
+ "logits/rejected": -2.1875,
1097
+ "logps/chosen": -956.0,
1098
+ "logps/rejected": -1744.0,
1099
+ "loss": 0.059,
1100
+ "rewards/accuracies": 0.9750000238418579,
1101
+ "rewards/chosen": -6.21875,
1102
+ "rewards/margins": 8.375,
1103
+ "rewards/rejected": -14.5625,
1104
+ "step": 720
1105
+ },
1106
+ {
1107
+ "epoch": 0.7639979068550498,
1108
+ "grad_norm": 21.35868634461276,
1109
+ "learning_rate": 7.997277433690983e-08,
1110
+ "logits/chosen": -2.28125,
1111
+ "logits/rejected": -2.078125,
1112
+ "logps/chosen": -984.0,
1113
+ "logps/rejected": -1864.0,
1114
+ "loss": 0.0766,
1115
+ "rewards/accuracies": 0.9624999761581421,
1116
+ "rewards/chosen": -7.0625,
1117
+ "rewards/margins": 8.9375,
1118
+ "rewards/rejected": -16.0,
1119
+ "step": 730
1120
+ },
1121
+ {
1122
+ "epoch": 0.7744636316064888,
1123
+ "grad_norm": 19.914720577430103,
1124
+ "learning_rate": 7.338500848029602e-08,
1125
+ "logits/chosen": -2.3125,
1126
+ "logits/rejected": -1.984375,
1127
+ "logps/chosen": -1020.0,
1128
+ "logps/rejected": -1976.0,
1129
+ "loss": 0.0593,
1130
+ "rewards/accuracies": 0.9750000238418579,
1131
+ "rewards/chosen": -7.21875,
1132
+ "rewards/margins": 9.75,
1133
+ "rewards/rejected": -17.0,
1134
+ "step": 740
1135
+ },
1136
+ {
1137
+ "epoch": 0.7849293563579278,
1138
+ "grad_norm": 21.23435309177318,
1139
+ "learning_rate": 6.70334495204884e-08,
1140
+ "logits/chosen": -2.203125,
1141
+ "logits/rejected": -1.859375,
1142
+ "logps/chosen": -968.0,
1143
+ "logps/rejected": -1928.0,
1144
+ "loss": 0.048,
1145
+ "rewards/accuracies": 0.96875,
1146
+ "rewards/chosen": -7.0625,
1147
+ "rewards/margins": 9.75,
1148
+ "rewards/rejected": -16.75,
1149
+ "step": 750
1150
+ },
1151
+ {
1152
+ "epoch": 0.7953950811093669,
1153
+ "grad_norm": 22.491426007618863,
1154
+ "learning_rate": 6.092659210462231e-08,
1155
+ "logits/chosen": -2.328125,
1156
+ "logits/rejected": -1.890625,
1157
+ "logps/chosen": -1020.0,
1158
+ "logps/rejected": -1992.0,
1159
+ "loss": 0.0566,
1160
+ "rewards/accuracies": 0.9750000238418579,
1161
+ "rewards/chosen": -7.4375,
1162
+ "rewards/margins": 9.9375,
1163
+ "rewards/rejected": -17.375,
1164
+ "step": 760
1165
+ },
1166
+ {
1167
+ "epoch": 0.8058608058608059,
1168
+ "grad_norm": 7.849882679120514,
1169
+ "learning_rate": 5.507260361320737e-08,
1170
+ "logits/chosen": -2.203125,
1171
+ "logits/rejected": -1.8828125,
1172
+ "logps/chosen": -1012.0,
1173
+ "logps/rejected": -2064.0,
1174
+ "loss": 0.0432,
1175
+ "rewards/accuracies": 0.981249988079071,
1176
+ "rewards/chosen": -7.21875,
1177
+ "rewards/margins": 10.75,
1178
+ "rewards/rejected": -18.0,
1179
+ "step": 770
1180
+ },
1181
+ {
1182
+ "epoch": 0.8163265306122449,
1183
+ "grad_norm": 16.662472392288155,
1184
+ "learning_rate": 4.947931323697982e-08,
1185
+ "logits/chosen": -2.3125,
1186
+ "logits/rejected": -1.9375,
1187
+ "logps/chosen": -1064.0,
1188
+ "logps/rejected": -2064.0,
1189
+ "loss": 0.0665,
1190
+ "rewards/accuracies": 0.96875,
1191
+ "rewards/chosen": -7.625,
1192
+ "rewards/margins": 10.375,
1193
+ "rewards/rejected": -18.0,
1194
+ "step": 780
1195
+ },
1196
+ {
1197
+ "epoch": 0.826792255363684,
1198
+ "grad_norm": 18.85427926276285,
1199
+ "learning_rate": 4.415420150605398e-08,
1200
+ "logits/chosen": -2.1875,
1201
+ "logits/rejected": -1.90625,
1202
+ "logps/chosen": -932.0,
1203
+ "logps/rejected": -1920.0,
1204
+ "loss": 0.0661,
1205
+ "rewards/accuracies": 0.949999988079071,
1206
+ "rewards/chosen": -6.65625,
1207
+ "rewards/margins": 9.8125,
1208
+ "rewards/rejected": -16.5,
1209
+ "step": 790
1210
+ },
1211
+ {
1212
+ "epoch": 0.837257980115123,
1213
+ "grad_norm": 26.78574368890705,
1214
+ "learning_rate": 3.9104390285376374e-08,
1215
+ "logits/chosen": -2.3125,
1216
+ "logits/rejected": -2.0,
1217
+ "logps/chosen": -928.0,
1218
+ "logps/rejected": -1880.0,
1219
+ "loss": 0.053,
1220
+ "rewards/accuracies": 0.96875,
1221
+ "rewards/chosen": -6.40625,
1222
+ "rewards/margins": 9.4375,
1223
+ "rewards/rejected": -15.875,
1224
+ "step": 800
1225
+ },
1226
+ {
1227
+ "epoch": 0.847723704866562,
1228
+ "grad_norm": 16.63949668329954,
1229
+ "learning_rate": 3.433663324986208e-08,
1230
+ "logits/chosen": -2.328125,
1231
+ "logits/rejected": -2.046875,
1232
+ "logps/chosen": -928.0,
1233
+ "logps/rejected": -1816.0,
1234
+ "loss": 0.0625,
1235
+ "rewards/accuracies": 0.9624999761581421,
1236
+ "rewards/chosen": -6.34375,
1237
+ "rewards/margins": 8.8125,
1238
+ "rewards/rejected": -15.125,
1239
+ "step": 810
1240
+ },
1241
+ {
1242
+ "epoch": 0.858189429618001,
1243
+ "grad_norm": 32.084754893664645,
1244
+ "learning_rate": 2.9857306851953897e-08,
1245
+ "logits/chosen": -2.359375,
1246
+ "logits/rejected": -1.9140625,
1247
+ "logps/chosen": -964.0,
1248
+ "logps/rejected": -1888.0,
1249
+ "loss": 0.054,
1250
+ "rewards/accuracies": 0.96875,
1251
+ "rewards/chosen": -6.875,
1252
+ "rewards/margins": 9.4375,
1253
+ "rewards/rejected": -16.375,
1254
+ "step": 820
1255
+ },
1256
+ {
1257
+ "epoch": 0.8686551543694401,
1258
+ "grad_norm": 19.89404053118834,
1259
+ "learning_rate": 2.567240179368185e-08,
1260
+ "logits/chosen": -2.34375,
1261
+ "logits/rejected": -1.78125,
1262
+ "logps/chosen": -1012.0,
1263
+ "logps/rejected": -1976.0,
1264
+ "loss": 0.0466,
1265
+ "rewards/accuracies": 0.9750000238418579,
1266
+ "rewards/chosen": -7.09375,
1267
+ "rewards/margins": 10.0625,
1268
+ "rewards/rejected": -17.125,
1269
+ "step": 830
1270
+ },
1271
+ {
1272
+ "epoch": 0.8791208791208791,
1273
+ "grad_norm": 11.521868597924879,
1274
+ "learning_rate": 2.1787515014630357e-08,
1275
+ "logits/chosen": -2.1875,
1276
+ "logits/rejected": -1.890625,
1277
+ "logps/chosen": -984.0,
1278
+ "logps/rejected": -2080.0,
1279
+ "loss": 0.0533,
1280
+ "rewards/accuracies": 0.96875,
1281
+ "rewards/chosen": -7.21875,
1282
+ "rewards/margins": 10.75,
1283
+ "rewards/rejected": -18.0,
1284
+ "step": 840
1285
+ },
1286
+ {
1287
+ "epoch": 0.8895866038723181,
1288
+ "grad_norm": 20.478869887407328,
1289
+ "learning_rate": 1.820784220652766e-08,
1290
+ "logits/chosen": -2.25,
1291
+ "logits/rejected": -1.9609375,
1292
+ "logps/chosen": -1024.0,
1293
+ "logps/rejected": -2040.0,
1294
+ "loss": 0.0396,
1295
+ "rewards/accuracies": 0.987500011920929,
1296
+ "rewards/chosen": -7.1875,
1297
+ "rewards/margins": 10.25,
1298
+ "rewards/rejected": -17.5,
1299
+ "step": 850
1300
+ },
1301
+ {
1302
+ "epoch": 0.9000523286237572,
1303
+ "grad_norm": 27.37618448209703,
1304
+ "learning_rate": 1.4938170864468636e-08,
1305
+ "logits/chosen": -2.265625,
1306
+ "logits/rejected": -1.78125,
1307
+ "logps/chosen": -984.0,
1308
+ "logps/rejected": -2096.0,
1309
+ "loss": 0.0529,
1310
+ "rewards/accuracies": 0.9750000238418579,
1311
+ "rewards/chosen": -7.1875,
1312
+ "rewards/margins": 11.125,
1313
+ "rewards/rejected": -18.375,
1314
+ "step": 860
1315
+ },
1316
+ {
1317
+ "epoch": 0.9105180533751962,
1318
+ "grad_norm": 27.12820939331472,
1319
+ "learning_rate": 1.1982873884064465e-08,
1320
+ "logits/chosen": -2.3125,
1321
+ "logits/rejected": -1.90625,
1322
+ "logps/chosen": -1020.0,
1323
+ "logps/rejected": -1976.0,
1324
+ "loss": 0.0596,
1325
+ "rewards/accuracies": 0.9750000238418579,
1326
+ "rewards/chosen": -7.09375,
1327
+ "rewards/margins": 9.9375,
1328
+ "rewards/rejected": -17.0,
1329
+ "step": 870
1330
+ },
1331
+ {
1332
+ "epoch": 0.9209837781266352,
1333
+ "grad_norm": 22.490629878443173,
1334
+ "learning_rate": 9.345903713082304e-09,
1335
+ "logits/chosen": -2.234375,
1336
+ "logits/rejected": -1.734375,
1337
+ "logps/chosen": -1088.0,
1338
+ "logps/rejected": -2112.0,
1339
+ "loss": 0.056,
1340
+ "rewards/accuracies": 0.9750000238418579,
1341
+ "rewards/chosen": -7.625,
1342
+ "rewards/margins": 10.375,
1343
+ "rewards/rejected": -18.0,
1344
+ "step": 880
1345
+ },
1346
+ {
1347
+ "epoch": 0.9314495028780743,
1348
+ "grad_norm": 16.052382854562193,
1349
+ "learning_rate": 7.030787065396865e-09,
1350
+ "logits/chosen": -2.21875,
1351
+ "logits/rejected": -1.78125,
1352
+ "logps/chosen": -1004.0,
1353
+ "logps/rejected": -2112.0,
1354
+ "loss": 0.0556,
1355
+ "rewards/accuracies": 0.9937499761581421,
1356
+ "rewards/chosen": -7.09375,
1357
+ "rewards/margins": 11.3125,
1358
+ "rewards/rejected": -18.375,
1359
+ "step": 890
1360
+ },
1361
+ {
1362
+ "epoch": 0.9419152276295133,
1363
+ "grad_norm": 7.2739971268908326,
1364
+ "learning_rate": 5.04062020432286e-09,
1365
+ "logits/chosen": -2.390625,
1366
+ "logits/rejected": -1.8671875,
1367
+ "logps/chosen": -1048.0,
1368
+ "logps/rejected": -2080.0,
1369
+ "loss": 0.0627,
1370
+ "rewards/accuracies": 0.96875,
1371
+ "rewards/chosen": -7.34375,
1372
+ "rewards/margins": 10.5625,
1373
+ "rewards/rejected": -17.875,
1374
+ "step": 900
1375
+ },
1376
+ {
1377
+ "epoch": 0.9523809523809523,
1378
+ "grad_norm": 29.919042800555964,
1379
+ "learning_rate": 3.3780648016376866e-09,
1380
+ "logits/chosen": -2.234375,
1381
+ "logits/rejected": -1.8984375,
1382
+ "logps/chosen": -1048.0,
1383
+ "logps/rejected": -2000.0,
1384
+ "loss": 0.0575,
1385
+ "rewards/accuracies": 0.96875,
1386
+ "rewards/chosen": -7.34375,
1387
+ "rewards/margins": 9.875,
1388
+ "rewards/rejected": -17.25,
1389
+ "step": 910
1390
+ },
1391
+ {
1392
+ "epoch": 0.9628466771323915,
1393
+ "grad_norm": 50.810549611199676,
1394
+ "learning_rate": 2.0453443778310766e-09,
1395
+ "logits/chosen": -2.25,
1396
+ "logits/rejected": -1.8125,
1397
+ "logps/chosen": -992.0,
1398
+ "logps/rejected": -1984.0,
1399
+ "loss": 0.0484,
1400
+ "rewards/accuracies": 0.987500011920929,
1401
+ "rewards/chosen": -7.09375,
1402
+ "rewards/margins": 10.0625,
1403
+ "rewards/rejected": -17.125,
1404
+ "step": 920
1405
+ },
1406
+ {
1407
+ "epoch": 0.9733124018838305,
1408
+ "grad_norm": 20.362035094083513,
1409
+ "learning_rate": 1.0442413283435758e-09,
1410
+ "logits/chosen": -2.234375,
1411
+ "logits/rejected": -1.734375,
1412
+ "logps/chosen": -964.0,
1413
+ "logps/rejected": -1920.0,
1414
+ "loss": 0.0598,
1415
+ "rewards/accuracies": 0.9750000238418579,
1416
+ "rewards/chosen": -7.0625,
1417
+ "rewards/margins": 9.5625,
1418
+ "rewards/rejected": -16.625,
1419
+ "step": 930
1420
+ },
1421
+ {
1422
+ "epoch": 0.9837781266352695,
1423
+ "grad_norm": 22.983688568193077,
1424
+ "learning_rate": 3.760945397705828e-10,
1425
+ "logits/chosen": -2.234375,
1426
+ "logits/rejected": -1.96875,
1427
+ "logps/chosen": -988.0,
1428
+ "logps/rejected": -2016.0,
1429
+ "loss": 0.0503,
1430
+ "rewards/accuracies": 0.981249988079071,
1431
+ "rewards/chosen": -6.8125,
1432
+ "rewards/margins": 10.3125,
1433
+ "rewards/rejected": -17.125,
1434
+ "step": 940
1435
+ },
1436
+ {
1437
+ "epoch": 0.9942438513867086,
1438
+ "grad_norm": 18.89450295595607,
1439
+ "learning_rate": 4.17975992204056e-11,
1440
+ "logits/chosen": -2.21875,
1441
+ "logits/rejected": -1.828125,
1442
+ "logps/chosen": -1024.0,
1443
+ "logps/rejected": -2040.0,
1444
+ "loss": 0.0406,
1445
+ "rewards/accuracies": 0.981249988079071,
1446
+ "rewards/chosen": -7.125,
1447
+ "rewards/margins": 10.4375,
1448
+ "rewards/rejected": -17.5,
1449
+ "step": 950
1450
+ },
1451
+ {
1452
+ "epoch": 0.9994767137624281,
1453
+ "step": 955,
1454
+ "total_flos": 0.0,
1455
+ "train_loss": 0.15591317970715268,
1456
+ "train_runtime": 13974.7976,
1457
+ "train_samples_per_second": 8.749,
1458
+ "train_steps_per_second": 0.068
1459
+ }
1460
+ ],
1461
+ "logging_steps": 10,
1462
+ "max_steps": 955,
1463
+ "num_input_tokens_seen": 0,
1464
+ "num_train_epochs": 1,
1465
+ "save_steps": 100,
1466
+ "stateful_callbacks": {
1467
+ "TrainerControl": {
1468
+ "args": {
1469
+ "should_epoch_stop": false,
1470
+ "should_evaluate": false,
1471
+ "should_log": false,
1472
+ "should_save": true,
1473
+ "should_training_stop": true
1474
+ },
1475
+ "attributes": {}
1476
+ }
1477
+ },
1478
+ "total_flos": 0.0,
1479
+ "train_batch_size": 8,
1480
+ "trial_name": null,
1481
+ "trial_params": null
1482
+ }