navjordj commited on
Commit
65c4844
1 Parent(s): 142c87f

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.0,
3
+ "eval_gen_len": 45.94139194139194,
4
+ "eval_loss": 2.0570528507232666,
5
+ "eval_rouge1": 33.0932,
6
+ "eval_rouge2": 14.9633,
7
+ "eval_rougeL": 28.6426,
8
+ "eval_rougeLsum": 30.8639,
9
+ "eval_runtime": 69.1967,
10
+ "eval_samples": 819,
11
+ "eval_samples_per_second": 11.836,
12
+ "eval_steps_per_second": 0.751,
13
+ "predict_gen_len": 45.05230769230769,
14
+ "predict_loss": 2.070528030395508,
15
+ "predict_rouge1": 33.1232,
16
+ "predict_rouge2": 14.9087,
17
+ "predict_rougeL": 28.6061,
18
+ "predict_rougeLsum": 30.7046,
19
+ "predict_runtime": 102.9053,
20
+ "predict_samples": 1300,
21
+ "predict_samples_per_second": 12.633,
22
+ "predict_steps_per_second": 0.797,
23
+ "train_loss": 2.4905450729393737,
24
+ "train_runtime": 8327.4677,
25
+ "train_samples": 10874,
26
+ "train_samples_per_second": 26.116,
27
+ "train_steps_per_second": 0.408
28
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.0,
3
+ "eval_gen_len": 45.94139194139194,
4
+ "eval_loss": 2.0570528507232666,
5
+ "eval_rouge1": 33.0932,
6
+ "eval_rouge2": 14.9633,
7
+ "eval_rougeL": 28.6426,
8
+ "eval_rougeLsum": 30.8639,
9
+ "eval_runtime": 69.1967,
10
+ "eval_samples": 819,
11
+ "eval_samples_per_second": 11.836,
12
+ "eval_steps_per_second": 0.751
13
+ }
generated_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
predict_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_gen_len": 45.05230769230769,
3
+ "predict_loss": 2.070528030395508,
4
+ "predict_rouge1": 33.1232,
5
+ "predict_rouge2": 14.9087,
6
+ "predict_rougeL": 28.6061,
7
+ "predict_rougeLsum": 30.7046,
8
+ "predict_runtime": 102.9053,
9
+ "predict_samples": 1300,
10
+ "predict_samples_per_second": 12.633,
11
+ "predict_steps_per_second": 0.797
12
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.0,
3
+ "train_loss": 2.4905450729393737,
4
+ "train_runtime": 8327.4677,
5
+ "train_samples": 10874,
6
+ "train_samples_per_second": 26.116,
7
+ "train_steps_per_second": 0.408
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1046 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.0570528507232666,
3
+ "best_model_checkpoint": "t5-base-snl/checkpoint-2890",
4
+ "epoch": 19.0,
5
+ "global_step": 3230,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.15,
12
+ "learning_rate": 4.9632352941176476e-05,
13
+ "loss": 4.9792,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.29,
18
+ "learning_rate": 4.9264705882352944e-05,
19
+ "loss": 3.7166,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.44,
24
+ "learning_rate": 4.889705882352941e-05,
25
+ "loss": 3.2528,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.59,
30
+ "learning_rate": 4.8529411764705885e-05,
31
+ "loss": 3.0823,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.74,
36
+ "learning_rate": 4.816176470588236e-05,
37
+ "loss": 3.0381,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.88,
42
+ "learning_rate": 4.7794117647058826e-05,
43
+ "loss": 2.9943,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_gen_len": 18.976800976800977,
49
+ "eval_loss": 2.2042253017425537,
50
+ "eval_rouge1": 28.1135,
51
+ "eval_rouge2": 13.7477,
52
+ "eval_rougeL": 25.4842,
53
+ "eval_rougeLsum": 26.6467,
54
+ "eval_runtime": 22.885,
55
+ "eval_samples_per_second": 35.788,
56
+ "eval_steps_per_second": 2.272,
57
+ "step": 170
58
+ },
59
+ {
60
+ "epoch": 1.03,
61
+ "learning_rate": 4.742647058823529e-05,
62
+ "loss": 2.9188,
63
+ "step": 175
64
+ },
65
+ {
66
+ "epoch": 1.18,
67
+ "learning_rate": 4.705882352941177e-05,
68
+ "loss": 2.8824,
69
+ "step": 200
70
+ },
71
+ {
72
+ "epoch": 1.32,
73
+ "learning_rate": 4.669117647058824e-05,
74
+ "loss": 2.8751,
75
+ "step": 225
76
+ },
77
+ {
78
+ "epoch": 1.47,
79
+ "learning_rate": 4.632352941176471e-05,
80
+ "loss": 2.8037,
81
+ "step": 250
82
+ },
83
+ {
84
+ "epoch": 1.62,
85
+ "learning_rate": 4.5955882352941176e-05,
86
+ "loss": 2.7824,
87
+ "step": 275
88
+ },
89
+ {
90
+ "epoch": 1.76,
91
+ "learning_rate": 4.558823529411765e-05,
92
+ "loss": 2.7789,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 1.91,
97
+ "learning_rate": 4.522058823529412e-05,
98
+ "loss": 2.7955,
99
+ "step": 325
100
+ },
101
+ {
102
+ "epoch": 2.0,
103
+ "eval_gen_len": 18.985347985347985,
104
+ "eval_loss": 2.1561412811279297,
105
+ "eval_rouge1": 28.5159,
106
+ "eval_rouge2": 14.3492,
107
+ "eval_rougeL": 26.0596,
108
+ "eval_rougeLsum": 27.2431,
109
+ "eval_runtime": 23.1378,
110
+ "eval_samples_per_second": 35.397,
111
+ "eval_steps_per_second": 2.247,
112
+ "step": 340
113
+ },
114
+ {
115
+ "epoch": 2.06,
116
+ "learning_rate": 4.485294117647059e-05,
117
+ "loss": 2.7598,
118
+ "step": 350
119
+ },
120
+ {
121
+ "epoch": 2.21,
122
+ "learning_rate": 4.448529411764706e-05,
123
+ "loss": 2.7091,
124
+ "step": 375
125
+ },
126
+ {
127
+ "epoch": 2.35,
128
+ "learning_rate": 4.411764705882353e-05,
129
+ "loss": 2.7055,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 2.5,
134
+ "learning_rate": 4.375e-05,
135
+ "loss": 2.7163,
136
+ "step": 425
137
+ },
138
+ {
139
+ "epoch": 2.65,
140
+ "learning_rate": 4.3382352941176474e-05,
141
+ "loss": 2.6844,
142
+ "step": 450
143
+ },
144
+ {
145
+ "epoch": 2.79,
146
+ "learning_rate": 4.301470588235295e-05,
147
+ "loss": 2.686,
148
+ "step": 475
149
+ },
150
+ {
151
+ "epoch": 2.94,
152
+ "learning_rate": 4.2647058823529415e-05,
153
+ "loss": 2.6378,
154
+ "step": 500
155
+ },
156
+ {
157
+ "epoch": 3.0,
158
+ "eval_gen_len": 18.99145299145299,
159
+ "eval_loss": 2.130974531173706,
160
+ "eval_rouge1": 28.9554,
161
+ "eval_rouge2": 14.6901,
162
+ "eval_rougeL": 26.4208,
163
+ "eval_rougeLsum": 27.5523,
164
+ "eval_runtime": 23.1305,
165
+ "eval_samples_per_second": 35.408,
166
+ "eval_steps_per_second": 2.248,
167
+ "step": 510
168
+ },
169
+ {
170
+ "epoch": 3.09,
171
+ "learning_rate": 4.227941176470588e-05,
172
+ "loss": 2.6666,
173
+ "step": 525
174
+ },
175
+ {
176
+ "epoch": 3.24,
177
+ "learning_rate": 4.1911764705882356e-05,
178
+ "loss": 2.6372,
179
+ "step": 550
180
+ },
181
+ {
182
+ "epoch": 3.38,
183
+ "learning_rate": 4.154411764705883e-05,
184
+ "loss": 2.6506,
185
+ "step": 575
186
+ },
187
+ {
188
+ "epoch": 3.53,
189
+ "learning_rate": 4.11764705882353e-05,
190
+ "loss": 2.6104,
191
+ "step": 600
192
+ },
193
+ {
194
+ "epoch": 3.68,
195
+ "learning_rate": 4.0808823529411765e-05,
196
+ "loss": 2.5946,
197
+ "step": 625
198
+ },
199
+ {
200
+ "epoch": 3.82,
201
+ "learning_rate": 4.044117647058824e-05,
202
+ "loss": 2.6182,
203
+ "step": 650
204
+ },
205
+ {
206
+ "epoch": 3.97,
207
+ "learning_rate": 4.007352941176471e-05,
208
+ "loss": 2.5962,
209
+ "step": 675
210
+ },
211
+ {
212
+ "epoch": 4.0,
213
+ "eval_gen_len": 18.99145299145299,
214
+ "eval_loss": 2.1109659671783447,
215
+ "eval_rouge1": 29.381,
216
+ "eval_rouge2": 15.1503,
217
+ "eval_rougeL": 26.8406,
218
+ "eval_rougeLsum": 27.9653,
219
+ "eval_runtime": 23.0996,
220
+ "eval_samples_per_second": 35.455,
221
+ "eval_steps_per_second": 2.251,
222
+ "step": 680
223
+ },
224
+ {
225
+ "epoch": 4.12,
226
+ "learning_rate": 3.970588235294117e-05,
227
+ "loss": 2.5478,
228
+ "step": 700
229
+ },
230
+ {
231
+ "epoch": 4.26,
232
+ "learning_rate": 3.933823529411765e-05,
233
+ "loss": 2.5601,
234
+ "step": 725
235
+ },
236
+ {
237
+ "epoch": 4.41,
238
+ "learning_rate": 3.897058823529412e-05,
239
+ "loss": 2.5793,
240
+ "step": 750
241
+ },
242
+ {
243
+ "epoch": 4.56,
244
+ "learning_rate": 3.8602941176470595e-05,
245
+ "loss": 2.5655,
246
+ "step": 775
247
+ },
248
+ {
249
+ "epoch": 4.71,
250
+ "learning_rate": 3.8235294117647055e-05,
251
+ "loss": 2.5686,
252
+ "step": 800
253
+ },
254
+ {
255
+ "epoch": 4.85,
256
+ "learning_rate": 3.786764705882353e-05,
257
+ "loss": 2.5704,
258
+ "step": 825
259
+ },
260
+ {
261
+ "epoch": 5.0,
262
+ "learning_rate": 3.7500000000000003e-05,
263
+ "loss": 2.5369,
264
+ "step": 850
265
+ },
266
+ {
267
+ "epoch": 5.0,
268
+ "eval_gen_len": 18.996336996336996,
269
+ "eval_loss": 2.1019859313964844,
270
+ "eval_rouge1": 29.5767,
271
+ "eval_rouge2": 15.2692,
272
+ "eval_rougeL": 27.0113,
273
+ "eval_rougeLsum": 28.1849,
274
+ "eval_runtime": 22.9206,
275
+ "eval_samples_per_second": 35.732,
276
+ "eval_steps_per_second": 2.269,
277
+ "step": 850
278
+ },
279
+ {
280
+ "epoch": 5.15,
281
+ "learning_rate": 3.713235294117647e-05,
282
+ "loss": 2.5257,
283
+ "step": 875
284
+ },
285
+ {
286
+ "epoch": 5.29,
287
+ "learning_rate": 3.6764705882352945e-05,
288
+ "loss": 2.5294,
289
+ "step": 900
290
+ },
291
+ {
292
+ "epoch": 5.44,
293
+ "learning_rate": 3.639705882352941e-05,
294
+ "loss": 2.5188,
295
+ "step": 925
296
+ },
297
+ {
298
+ "epoch": 5.59,
299
+ "learning_rate": 3.6029411764705886e-05,
300
+ "loss": 2.5164,
301
+ "step": 950
302
+ },
303
+ {
304
+ "epoch": 5.74,
305
+ "learning_rate": 3.566176470588235e-05,
306
+ "loss": 2.4973,
307
+ "step": 975
308
+ },
309
+ {
310
+ "epoch": 5.88,
311
+ "learning_rate": 3.529411764705883e-05,
312
+ "loss": 2.5103,
313
+ "step": 1000
314
+ },
315
+ {
316
+ "epoch": 6.0,
317
+ "eval_gen_len": 18.996336996336996,
318
+ "eval_loss": 2.090707302093506,
319
+ "eval_rouge1": 29.6354,
320
+ "eval_rouge2": 15.434,
321
+ "eval_rougeL": 27.0893,
322
+ "eval_rougeLsum": 28.2703,
323
+ "eval_runtime": 22.9931,
324
+ "eval_samples_per_second": 35.619,
325
+ "eval_steps_per_second": 2.262,
326
+ "step": 1020
327
+ },
328
+ {
329
+ "epoch": 6.03,
330
+ "learning_rate": 3.4926470588235294e-05,
331
+ "loss": 2.4817,
332
+ "step": 1025
333
+ },
334
+ {
335
+ "epoch": 6.18,
336
+ "learning_rate": 3.455882352941177e-05,
337
+ "loss": 2.4662,
338
+ "step": 1050
339
+ },
340
+ {
341
+ "epoch": 6.32,
342
+ "learning_rate": 3.4191176470588236e-05,
343
+ "loss": 2.4879,
344
+ "step": 1075
345
+ },
346
+ {
347
+ "epoch": 6.47,
348
+ "learning_rate": 3.382352941176471e-05,
349
+ "loss": 2.4666,
350
+ "step": 1100
351
+ },
352
+ {
353
+ "epoch": 6.62,
354
+ "learning_rate": 3.345588235294118e-05,
355
+ "loss": 2.4908,
356
+ "step": 1125
357
+ },
358
+ {
359
+ "epoch": 6.76,
360
+ "learning_rate": 3.308823529411765e-05,
361
+ "loss": 2.4887,
362
+ "step": 1150
363
+ },
364
+ {
365
+ "epoch": 6.91,
366
+ "learning_rate": 3.272058823529412e-05,
367
+ "loss": 2.4524,
368
+ "step": 1175
369
+ },
370
+ {
371
+ "epoch": 7.0,
372
+ "eval_gen_len": 18.996336996336996,
373
+ "eval_loss": 2.0839579105377197,
374
+ "eval_rouge1": 29.7812,
375
+ "eval_rouge2": 15.4963,
376
+ "eval_rougeL": 27.2779,
377
+ "eval_rougeLsum": 28.385,
378
+ "eval_runtime": 23.0064,
379
+ "eval_samples_per_second": 35.599,
380
+ "eval_steps_per_second": 2.26,
381
+ "step": 1190
382
+ },
383
+ {
384
+ "epoch": 7.06,
385
+ "learning_rate": 3.235294117647059e-05,
386
+ "loss": 2.4526,
387
+ "step": 1200
388
+ },
389
+ {
390
+ "epoch": 7.21,
391
+ "learning_rate": 3.198529411764706e-05,
392
+ "loss": 2.4316,
393
+ "step": 1225
394
+ },
395
+ {
396
+ "epoch": 7.35,
397
+ "learning_rate": 3.161764705882353e-05,
398
+ "loss": 2.4511,
399
+ "step": 1250
400
+ },
401
+ {
402
+ "epoch": 7.5,
403
+ "learning_rate": 3.125e-05,
404
+ "loss": 2.4642,
405
+ "step": 1275
406
+ },
407
+ {
408
+ "epoch": 7.65,
409
+ "learning_rate": 3.0882352941176475e-05,
410
+ "loss": 2.4387,
411
+ "step": 1300
412
+ },
413
+ {
414
+ "epoch": 7.79,
415
+ "learning_rate": 3.0514705882352945e-05,
416
+ "loss": 2.477,
417
+ "step": 1325
418
+ },
419
+ {
420
+ "epoch": 7.94,
421
+ "learning_rate": 3.0147058823529413e-05,
422
+ "loss": 2.4472,
423
+ "step": 1350
424
+ },
425
+ {
426
+ "epoch": 8.0,
427
+ "eval_gen_len": 18.996336996336996,
428
+ "eval_loss": 2.0799622535705566,
429
+ "eval_rouge1": 29.6011,
430
+ "eval_rouge2": 15.5138,
431
+ "eval_rougeL": 27.1381,
432
+ "eval_rougeLsum": 28.2799,
433
+ "eval_runtime": 22.9827,
434
+ "eval_samples_per_second": 35.636,
435
+ "eval_steps_per_second": 2.263,
436
+ "step": 1360
437
+ },
438
+ {
439
+ "epoch": 8.09,
440
+ "learning_rate": 2.9779411764705883e-05,
441
+ "loss": 2.4296,
442
+ "step": 1375
443
+ },
444
+ {
445
+ "epoch": 8.24,
446
+ "learning_rate": 2.9411764705882354e-05,
447
+ "loss": 2.4109,
448
+ "step": 1400
449
+ },
450
+ {
451
+ "epoch": 8.38,
452
+ "learning_rate": 2.9044117647058828e-05,
453
+ "loss": 2.4181,
454
+ "step": 1425
455
+ },
456
+ {
457
+ "epoch": 8.53,
458
+ "learning_rate": 2.8676470588235295e-05,
459
+ "loss": 2.4089,
460
+ "step": 1450
461
+ },
462
+ {
463
+ "epoch": 8.68,
464
+ "learning_rate": 2.8308823529411766e-05,
465
+ "loss": 2.4518,
466
+ "step": 1475
467
+ },
468
+ {
469
+ "epoch": 8.82,
470
+ "learning_rate": 2.7941176470588236e-05,
471
+ "loss": 2.4271,
472
+ "step": 1500
473
+ },
474
+ {
475
+ "epoch": 8.97,
476
+ "learning_rate": 2.757352941176471e-05,
477
+ "loss": 2.4089,
478
+ "step": 1525
479
+ },
480
+ {
481
+ "epoch": 9.0,
482
+ "eval_gen_len": 18.996336996336996,
483
+ "eval_loss": 2.075223207473755,
484
+ "eval_rouge1": 29.7647,
485
+ "eval_rouge2": 15.6183,
486
+ "eval_rougeL": 27.318,
487
+ "eval_rougeLsum": 28.4747,
488
+ "eval_runtime": 22.8902,
489
+ "eval_samples_per_second": 35.779,
490
+ "eval_steps_per_second": 2.272,
491
+ "step": 1530
492
+ },
493
+ {
494
+ "epoch": 9.12,
495
+ "learning_rate": 2.7205882352941174e-05,
496
+ "loss": 2.4048,
497
+ "step": 1550
498
+ },
499
+ {
500
+ "epoch": 9.26,
501
+ "learning_rate": 2.6838235294117648e-05,
502
+ "loss": 2.4132,
503
+ "step": 1575
504
+ },
505
+ {
506
+ "epoch": 9.41,
507
+ "learning_rate": 2.647058823529412e-05,
508
+ "loss": 2.3885,
509
+ "step": 1600
510
+ },
511
+ {
512
+ "epoch": 9.56,
513
+ "learning_rate": 2.6102941176470593e-05,
514
+ "loss": 2.4007,
515
+ "step": 1625
516
+ },
517
+ {
518
+ "epoch": 9.71,
519
+ "learning_rate": 2.5735294117647057e-05,
520
+ "loss": 2.4089,
521
+ "step": 1650
522
+ },
523
+ {
524
+ "epoch": 9.85,
525
+ "learning_rate": 2.536764705882353e-05,
526
+ "loss": 2.3912,
527
+ "step": 1675
528
+ },
529
+ {
530
+ "epoch": 10.0,
531
+ "learning_rate": 2.5e-05,
532
+ "loss": 2.4011,
533
+ "step": 1700
534
+ },
535
+ {
536
+ "epoch": 10.0,
537
+ "eval_gen_len": 19.0,
538
+ "eval_loss": 2.071033239364624,
539
+ "eval_rouge1": 29.6533,
540
+ "eval_rouge2": 15.5536,
541
+ "eval_rougeL": 27.2687,
542
+ "eval_rougeLsum": 28.4457,
543
+ "eval_runtime": 23.0214,
544
+ "eval_samples_per_second": 35.576,
545
+ "eval_steps_per_second": 2.259,
546
+ "step": 1700
547
+ },
548
+ {
549
+ "epoch": 10.15,
550
+ "learning_rate": 2.4632352941176472e-05,
551
+ "loss": 2.4049,
552
+ "step": 1725
553
+ },
554
+ {
555
+ "epoch": 10.29,
556
+ "learning_rate": 2.4264705882352942e-05,
557
+ "loss": 2.3802,
558
+ "step": 1750
559
+ },
560
+ {
561
+ "epoch": 10.44,
562
+ "learning_rate": 2.3897058823529413e-05,
563
+ "loss": 2.3688,
564
+ "step": 1775
565
+ },
566
+ {
567
+ "epoch": 10.59,
568
+ "learning_rate": 2.3529411764705884e-05,
569
+ "loss": 2.3897,
570
+ "step": 1800
571
+ },
572
+ {
573
+ "epoch": 10.74,
574
+ "learning_rate": 2.3161764705882354e-05,
575
+ "loss": 2.3464,
576
+ "step": 1825
577
+ },
578
+ {
579
+ "epoch": 10.88,
580
+ "learning_rate": 2.2794117647058825e-05,
581
+ "loss": 2.3792,
582
+ "step": 1850
583
+ },
584
+ {
585
+ "epoch": 11.0,
586
+ "eval_gen_len": 19.0,
587
+ "eval_loss": 2.0655674934387207,
588
+ "eval_rouge1": 29.8668,
589
+ "eval_rouge2": 15.6931,
590
+ "eval_rougeL": 27.4208,
591
+ "eval_rougeLsum": 28.5477,
592
+ "eval_runtime": 21.951,
593
+ "eval_samples_per_second": 37.31,
594
+ "eval_steps_per_second": 2.369,
595
+ "step": 1870
596
+ },
597
+ {
598
+ "epoch": 11.03,
599
+ "learning_rate": 2.2426470588235296e-05,
600
+ "loss": 2.3783,
601
+ "step": 1875
602
+ },
603
+ {
604
+ "epoch": 11.18,
605
+ "learning_rate": 2.2058823529411766e-05,
606
+ "loss": 2.3446,
607
+ "step": 1900
608
+ },
609
+ {
610
+ "epoch": 11.32,
611
+ "learning_rate": 2.1691176470588237e-05,
612
+ "loss": 2.3929,
613
+ "step": 1925
614
+ },
615
+ {
616
+ "epoch": 11.47,
617
+ "learning_rate": 2.1323529411764707e-05,
618
+ "loss": 2.374,
619
+ "step": 1950
620
+ },
621
+ {
622
+ "epoch": 11.62,
623
+ "learning_rate": 2.0955882352941178e-05,
624
+ "loss": 2.3544,
625
+ "step": 1975
626
+ },
627
+ {
628
+ "epoch": 11.76,
629
+ "learning_rate": 2.058823529411765e-05,
630
+ "loss": 2.357,
631
+ "step": 2000
632
+ },
633
+ {
634
+ "epoch": 11.91,
635
+ "learning_rate": 2.022058823529412e-05,
636
+ "loss": 2.3588,
637
+ "step": 2025
638
+ },
639
+ {
640
+ "epoch": 12.0,
641
+ "eval_gen_len": 18.996336996336996,
642
+ "eval_loss": 2.0634803771972656,
643
+ "eval_rouge1": 29.8378,
644
+ "eval_rouge2": 15.682,
645
+ "eval_rougeL": 27.4635,
646
+ "eval_rougeLsum": 28.5803,
647
+ "eval_runtime": 22.98,
648
+ "eval_samples_per_second": 35.64,
649
+ "eval_steps_per_second": 2.263,
650
+ "step": 2040
651
+ },
652
+ {
653
+ "epoch": 12.06,
654
+ "learning_rate": 1.9852941176470586e-05,
655
+ "loss": 2.3503,
656
+ "step": 2050
657
+ },
658
+ {
659
+ "epoch": 12.21,
660
+ "learning_rate": 1.948529411764706e-05,
661
+ "loss": 2.3402,
662
+ "step": 2075
663
+ },
664
+ {
665
+ "epoch": 12.35,
666
+ "learning_rate": 1.9117647058823528e-05,
667
+ "loss": 2.3716,
668
+ "step": 2100
669
+ },
670
+ {
671
+ "epoch": 12.5,
672
+ "learning_rate": 1.8750000000000002e-05,
673
+ "loss": 2.3161,
674
+ "step": 2125
675
+ },
676
+ {
677
+ "epoch": 12.65,
678
+ "learning_rate": 1.8382352941176472e-05,
679
+ "loss": 2.3354,
680
+ "step": 2150
681
+ },
682
+ {
683
+ "epoch": 12.79,
684
+ "learning_rate": 1.8014705882352943e-05,
685
+ "loss": 2.3476,
686
+ "step": 2175
687
+ },
688
+ {
689
+ "epoch": 12.94,
690
+ "learning_rate": 1.7647058823529414e-05,
691
+ "loss": 2.3397,
692
+ "step": 2200
693
+ },
694
+ {
695
+ "epoch": 13.0,
696
+ "eval_gen_len": 19.0,
697
+ "eval_loss": 2.0630440711975098,
698
+ "eval_rouge1": 29.9043,
699
+ "eval_rouge2": 15.7535,
700
+ "eval_rougeL": 27.5065,
701
+ "eval_rougeLsum": 28.6539,
702
+ "eval_runtime": 22.9094,
703
+ "eval_samples_per_second": 35.75,
704
+ "eval_steps_per_second": 2.27,
705
+ "step": 2210
706
+ },
707
+ {
708
+ "epoch": 13.09,
709
+ "learning_rate": 1.7279411764705884e-05,
710
+ "loss": 2.3399,
711
+ "step": 2225
712
+ },
713
+ {
714
+ "epoch": 13.24,
715
+ "learning_rate": 1.6911764705882355e-05,
716
+ "loss": 2.3207,
717
+ "step": 2250
718
+ },
719
+ {
720
+ "epoch": 13.38,
721
+ "learning_rate": 1.6544117647058825e-05,
722
+ "loss": 2.3339,
723
+ "step": 2275
724
+ },
725
+ {
726
+ "epoch": 13.53,
727
+ "learning_rate": 1.6176470588235296e-05,
728
+ "loss": 2.3347,
729
+ "step": 2300
730
+ },
731
+ {
732
+ "epoch": 13.68,
733
+ "learning_rate": 1.5808823529411763e-05,
734
+ "loss": 2.3318,
735
+ "step": 2325
736
+ },
737
+ {
738
+ "epoch": 13.82,
739
+ "learning_rate": 1.5441176470588237e-05,
740
+ "loss": 2.3275,
741
+ "step": 2350
742
+ },
743
+ {
744
+ "epoch": 13.97,
745
+ "learning_rate": 1.5073529411764706e-05,
746
+ "loss": 2.3201,
747
+ "step": 2375
748
+ },
749
+ {
750
+ "epoch": 14.0,
751
+ "eval_gen_len": 18.996336996336996,
752
+ "eval_loss": 2.0599966049194336,
753
+ "eval_rouge1": 29.7926,
754
+ "eval_rouge2": 15.7077,
755
+ "eval_rougeL": 27.4066,
756
+ "eval_rougeLsum": 28.5302,
757
+ "eval_runtime": 23.1182,
758
+ "eval_samples_per_second": 35.427,
759
+ "eval_steps_per_second": 2.249,
760
+ "step": 2380
761
+ },
762
+ {
763
+ "epoch": 14.12,
764
+ "learning_rate": 1.4705882352941177e-05,
765
+ "loss": 2.3204,
766
+ "step": 2400
767
+ },
768
+ {
769
+ "epoch": 14.26,
770
+ "learning_rate": 1.4338235294117647e-05,
771
+ "loss": 2.3592,
772
+ "step": 2425
773
+ },
774
+ {
775
+ "epoch": 14.41,
776
+ "learning_rate": 1.3970588235294118e-05,
777
+ "loss": 2.3275,
778
+ "step": 2450
779
+ },
780
+ {
781
+ "epoch": 14.56,
782
+ "learning_rate": 1.3602941176470587e-05,
783
+ "loss": 2.2936,
784
+ "step": 2475
785
+ },
786
+ {
787
+ "epoch": 14.71,
788
+ "learning_rate": 1.323529411764706e-05,
789
+ "loss": 2.3013,
790
+ "step": 2500
791
+ },
792
+ {
793
+ "epoch": 14.85,
794
+ "learning_rate": 1.2867647058823528e-05,
795
+ "loss": 2.3007,
796
+ "step": 2525
797
+ },
798
+ {
799
+ "epoch": 15.0,
800
+ "learning_rate": 1.25e-05,
801
+ "loss": 2.3241,
802
+ "step": 2550
803
+ },
804
+ {
805
+ "epoch": 15.0,
806
+ "eval_gen_len": 19.0,
807
+ "eval_loss": 2.0615200996398926,
808
+ "eval_rouge1": 29.8536,
809
+ "eval_rouge2": 15.7929,
810
+ "eval_rougeL": 27.4572,
811
+ "eval_rougeLsum": 28.5704,
812
+ "eval_runtime": 22.9087,
813
+ "eval_samples_per_second": 35.751,
814
+ "eval_steps_per_second": 2.27,
815
+ "step": 2550
816
+ },
817
+ {
818
+ "epoch": 15.15,
819
+ "learning_rate": 1.2132352941176471e-05,
820
+ "loss": 2.326,
821
+ "step": 2575
822
+ },
823
+ {
824
+ "epoch": 15.29,
825
+ "learning_rate": 1.1764705882352942e-05,
826
+ "loss": 2.3004,
827
+ "step": 2600
828
+ },
829
+ {
830
+ "epoch": 15.44,
831
+ "learning_rate": 1.1397058823529412e-05,
832
+ "loss": 2.311,
833
+ "step": 2625
834
+ },
835
+ {
836
+ "epoch": 15.59,
837
+ "learning_rate": 1.1029411764705883e-05,
838
+ "loss": 2.3427,
839
+ "step": 2650
840
+ },
841
+ {
842
+ "epoch": 15.74,
843
+ "learning_rate": 1.0661764705882354e-05,
844
+ "loss": 2.2741,
845
+ "step": 2675
846
+ },
847
+ {
848
+ "epoch": 15.88,
849
+ "learning_rate": 1.0294117647058824e-05,
850
+ "loss": 2.3183,
851
+ "step": 2700
852
+ },
853
+ {
854
+ "epoch": 16.0,
855
+ "eval_gen_len": 19.0,
856
+ "eval_loss": 2.0573582649230957,
857
+ "eval_rouge1": 29.7529,
858
+ "eval_rouge2": 15.6729,
859
+ "eval_rougeL": 27.3388,
860
+ "eval_rougeLsum": 28.4678,
861
+ "eval_runtime": 23.1299,
862
+ "eval_samples_per_second": 35.409,
863
+ "eval_steps_per_second": 2.248,
864
+ "step": 2720
865
+ },
866
+ {
867
+ "epoch": 16.03,
868
+ "learning_rate": 9.926470588235293e-06,
869
+ "loss": 2.2934,
870
+ "step": 2725
871
+ },
872
+ {
873
+ "epoch": 16.18,
874
+ "learning_rate": 9.558823529411764e-06,
875
+ "loss": 2.2633,
876
+ "step": 2750
877
+ },
878
+ {
879
+ "epoch": 16.32,
880
+ "learning_rate": 9.191176470588236e-06,
881
+ "loss": 2.2957,
882
+ "step": 2775
883
+ },
884
+ {
885
+ "epoch": 16.47,
886
+ "learning_rate": 8.823529411764707e-06,
887
+ "loss": 2.3083,
888
+ "step": 2800
889
+ },
890
+ {
891
+ "epoch": 16.62,
892
+ "learning_rate": 8.455882352941177e-06,
893
+ "loss": 2.3246,
894
+ "step": 2825
895
+ },
896
+ {
897
+ "epoch": 16.76,
898
+ "learning_rate": 8.088235294117648e-06,
899
+ "loss": 2.2989,
900
+ "step": 2850
901
+ },
902
+ {
903
+ "epoch": 16.91,
904
+ "learning_rate": 7.720588235294119e-06,
905
+ "loss": 2.3346,
906
+ "step": 2875
907
+ },
908
+ {
909
+ "epoch": 17.0,
910
+ "eval_gen_len": 19.0,
911
+ "eval_loss": 2.0570528507232666,
912
+ "eval_rouge1": 29.7443,
913
+ "eval_rouge2": 15.6459,
914
+ "eval_rougeL": 27.3245,
915
+ "eval_rougeLsum": 28.4549,
916
+ "eval_runtime": 22.9331,
917
+ "eval_samples_per_second": 35.713,
918
+ "eval_steps_per_second": 2.267,
919
+ "step": 2890
920
+ },
921
+ {
922
+ "epoch": 17.06,
923
+ "learning_rate": 7.3529411764705884e-06,
924
+ "loss": 2.2887,
925
+ "step": 2900
926
+ },
927
+ {
928
+ "epoch": 17.21,
929
+ "learning_rate": 6.985294117647059e-06,
930
+ "loss": 2.2881,
931
+ "step": 2925
932
+ },
933
+ {
934
+ "epoch": 17.35,
935
+ "learning_rate": 6.61764705882353e-06,
936
+ "loss": 2.3062,
937
+ "step": 2950
938
+ },
939
+ {
940
+ "epoch": 17.5,
941
+ "learning_rate": 6.25e-06,
942
+ "loss": 2.2867,
943
+ "step": 2975
944
+ },
945
+ {
946
+ "epoch": 17.65,
947
+ "learning_rate": 5.882352941176471e-06,
948
+ "loss": 2.3056,
949
+ "step": 3000
950
+ },
951
+ {
952
+ "epoch": 17.79,
953
+ "learning_rate": 5.5147058823529415e-06,
954
+ "loss": 2.3098,
955
+ "step": 3025
956
+ },
957
+ {
958
+ "epoch": 17.94,
959
+ "learning_rate": 5.147058823529412e-06,
960
+ "loss": 2.2932,
961
+ "step": 3050
962
+ },
963
+ {
964
+ "epoch": 18.0,
965
+ "eval_gen_len": 19.0,
966
+ "eval_loss": 2.0577263832092285,
967
+ "eval_rouge1": 29.7467,
968
+ "eval_rouge2": 15.6717,
969
+ "eval_rougeL": 27.3391,
970
+ "eval_rougeLsum": 28.4541,
971
+ "eval_runtime": 23.0624,
972
+ "eval_samples_per_second": 35.512,
973
+ "eval_steps_per_second": 2.255,
974
+ "step": 3060
975
+ },
976
+ {
977
+ "epoch": 18.09,
978
+ "learning_rate": 4.779411764705882e-06,
979
+ "loss": 2.2832,
980
+ "step": 3075
981
+ },
982
+ {
983
+ "epoch": 18.24,
984
+ "learning_rate": 4.411764705882353e-06,
985
+ "loss": 2.289,
986
+ "step": 3100
987
+ },
988
+ {
989
+ "epoch": 18.38,
990
+ "learning_rate": 4.044117647058824e-06,
991
+ "loss": 2.2932,
992
+ "step": 3125
993
+ },
994
+ {
995
+ "epoch": 18.53,
996
+ "learning_rate": 3.6764705882352942e-06,
997
+ "loss": 2.3085,
998
+ "step": 3150
999
+ },
1000
+ {
1001
+ "epoch": 18.68,
1002
+ "learning_rate": 3.308823529411765e-06,
1003
+ "loss": 2.2884,
1004
+ "step": 3175
1005
+ },
1006
+ {
1007
+ "epoch": 18.82,
1008
+ "learning_rate": 2.9411764705882355e-06,
1009
+ "loss": 2.2877,
1010
+ "step": 3200
1011
+ },
1012
+ {
1013
+ "epoch": 18.97,
1014
+ "learning_rate": 2.573529411764706e-06,
1015
+ "loss": 2.2755,
1016
+ "step": 3225
1017
+ },
1018
+ {
1019
+ "epoch": 19.0,
1020
+ "eval_gen_len": 19.0,
1021
+ "eval_loss": 2.0573978424072266,
1022
+ "eval_rouge1": 29.7694,
1023
+ "eval_rouge2": 15.6776,
1024
+ "eval_rougeL": 27.3556,
1025
+ "eval_rougeLsum": 28.4819,
1026
+ "eval_runtime": 22.951,
1027
+ "eval_samples_per_second": 35.685,
1028
+ "eval_steps_per_second": 2.266,
1029
+ "step": 3230
1030
+ },
1031
+ {
1032
+ "epoch": 19.0,
1033
+ "step": 3230,
1034
+ "total_flos": 2.285170027491492e+17,
1035
+ "train_loss": 2.4905450729393737,
1036
+ "train_runtime": 8327.4677,
1037
+ "train_samples_per_second": 26.116,
1038
+ "train_steps_per_second": 0.408
1039
+ }
1040
+ ],
1041
+ "max_steps": 3400,
1042
+ "num_train_epochs": 20,
1043
+ "total_flos": 2.285170027491492e+17,
1044
+ "trial_name": null,
1045
+ "trial_params": null
1046
+ }