sjlee311 commited on
Commit
4f93f31
1 Parent(s): 6c61bf2

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,11 @@ license: mit
3
  base_model: facebook/bart-large-cnn
4
  tags:
5
  - generated_from_trainer
 
 
 
 
 
6
  model-index:
7
  - name: sjlee311bart-large-cnn-finetuned
8
  results: []
@@ -14,6 +19,24 @@ should probably proofread and complete it, then remove this comment. -->
14
  # sjlee311bart-large-cnn-finetuned
15
 
16
  This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: facebook/bart-large-cnn
4
  tags:
5
  - generated_from_trainer
6
+ metrics:
7
+ - rouge
8
+ - precision
9
+ - recall
10
+ - f1
11
  model-index:
12
  - name: sjlee311bart-large-cnn-finetuned
13
  results: []
 
19
  # sjlee311bart-large-cnn-finetuned
20
 
21
  This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on an unknown dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 2.1557
24
+ - Rouge1: 49.9356
25
+ - Rouge2: 14.8574
26
+ - Rougel: 22.2849
27
+ - Precision: 86.7404
28
+ - Recall: 86.4333
29
+ - F1: 86.584
30
+ - Hashcode: roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)
31
+ - Fkgl: 10.01
32
+ - Cloze Score: 17.05
33
+ - Reading Level 13-15: 110
34
+ - Reading Level 11-12: 39
35
+ - Reading Level 16+: 85
36
+ - Reading Level 9-10: 7
37
+ - Reading Level Mode: 13-15
38
+ - Summac Val: 0.57
39
+ - Gen Len: 434.7842
40
 
41
  ## Model description
42
 
all_results.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "eval_cloze_score": 17.05,
4
+ "eval_f1": 86.584,
5
+ "eval_fkgl": 10.01,
6
+ "eval_gen_len": 434.7842323651452,
7
+ "eval_hashcode": "roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)",
8
+ "eval_loss": 2.155679941177368,
9
+ "eval_precision": 86.7404,
10
+ "eval_reading_level_11-12": 39,
11
+ "eval_reading_level_13-15": 110,
12
+ "eval_reading_level_16+": 85,
13
+ "eval_reading_level_9-10": 7,
14
+ "eval_reading_level_mode": "13-15",
15
+ "eval_recall": 86.4333,
16
+ "eval_rouge1": 49.9356,
17
+ "eval_rouge2": 14.8574,
18
+ "eval_rougeL": 22.2849,
19
+ "eval_runtime": 1122.9876,
20
+ "eval_samples": 241,
21
+ "eval_samples_per_second": 0.215,
22
+ "eval_steps_per_second": 0.054,
23
+ "eval_summac_val": 0.57,
24
+ "predict_runtime": 149.2151,
25
+ "predict_samples": 142,
26
+ "predict_samples_per_second": 0.952,
27
+ "predict_steps_per_second": 0.241,
28
+ "summac_predict": 0.56,
29
+ "train_loss": 2.124213267956273,
30
+ "train_runtime": 1043.2,
31
+ "train_samples": 4346,
32
+ "train_samples_per_second": 12.498,
33
+ "train_steps_per_second": 0.779
34
+ }
eval_results.json CHANGED
@@ -1,26 +1,24 @@
1
  {
2
  "epoch": 2.99,
3
- "eval_cloze_score": 17.01,
4
- "eval_f1": 86.7381,
5
  "eval_fkgl": 10.01,
6
- "eval_gen_len": 128.0,
7
  "eval_hashcode": "roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)",
8
- "eval_loss": 2.3937041759490967,
9
- "eval_precision": 86.95,
10
- "eval_reading_level_11-12": 46,
11
- "eval_reading_level_13-15": 83,
12
- "eval_reading_level_16+": 94,
13
- "eval_reading_level_5-6": 1,
14
- "eval_reading_level_7-8": 3,
15
- "eval_reading_level_9-10": 14,
16
- "eval_reading_level_mode": "16+",
17
- "eval_recall": 86.5305,
18
- "eval_rouge1": 40.8383,
19
- "eval_rouge2": 9.9101,
20
- "eval_rougeL": 21.2025,
21
- "eval_runtime": 355.8584,
22
  "eval_samples": 241,
23
- "eval_samples_per_second": 0.677,
24
- "eval_steps_per_second": 0.171,
25
- "eval_summac_val": 0.61
26
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "eval_cloze_score": 17.05,
4
+ "eval_f1": 86.584,
5
  "eval_fkgl": 10.01,
6
+ "eval_gen_len": 434.7842323651452,
7
  "eval_hashcode": "roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.2)",
8
+ "eval_loss": 2.155679941177368,
9
+ "eval_precision": 86.7404,
10
+ "eval_reading_level_11-12": 39,
11
+ "eval_reading_level_13-15": 110,
12
+ "eval_reading_level_16+": 85,
13
+ "eval_reading_level_9-10": 7,
14
+ "eval_reading_level_mode": "13-15",
15
+ "eval_recall": 86.4333,
16
+ "eval_rouge1": 49.9356,
17
+ "eval_rouge2": 14.8574,
18
+ "eval_rougeL": 22.2849,
19
+ "eval_runtime": 1122.9876,
 
 
20
  "eval_samples": 241,
21
+ "eval_samples_per_second": 0.215,
22
+ "eval_steps_per_second": 0.054,
23
+ "eval_summac_val": 0.57
24
  }
predict_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "predict_runtime": 38.5172,
3
  "predict_samples": 142,
4
- "predict_samples_per_second": 3.687,
5
- "predict_steps_per_second": 0.935,
6
- "summac_predict": 0.61
7
  }
 
1
  {
2
+ "predict_runtime": 149.2151,
3
  "predict_samples": 142,
4
+ "predict_samples_per_second": 0.952,
5
+ "predict_steps_per_second": 0.241,
6
+ "summac_predict": 0.56
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.99,
3
- "train_loss": 1.5162586585182105,
4
- "train_runtime": 753.3958,
5
  "train_samples": 4346,
6
- "train_samples_per_second": 17.306,
7
- "train_steps_per_second": 1.079
8
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "train_loss": 2.124213267956273,
4
+ "train_runtime": 1043.2,
5
  "train_samples": 4346,
6
+ "train_samples_per_second": 12.498,
7
+ "train_steps_per_second": 0.779
8
  }
trainer_state.json CHANGED
@@ -11,497 +11,497 @@
11
  {
12
  "epoch": 0.04,
13
  "learning_rate": 4.93849938499385e-05,
14
- "loss": 1.7671,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.07,
19
  "learning_rate": 4.8769987699877e-05,
20
- "loss": 1.7167,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.11,
25
  "learning_rate": 4.81549815498155e-05,
26
- "loss": 1.7802,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.15,
31
  "learning_rate": 4.7539975399754e-05,
32
- "loss": 1.7147,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.18,
37
  "learning_rate": 4.6924969249692496e-05,
38
- "loss": 1.7821,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.22,
43
  "learning_rate": 4.6309963099631e-05,
44
- "loss": 1.736,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.26,
49
  "learning_rate": 4.569495694956949e-05,
50
- "loss": 1.7452,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.29,
55
  "learning_rate": 4.5079950799507994e-05,
56
- "loss": 1.7133,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.33,
61
  "learning_rate": 4.4464944649446495e-05,
62
- "loss": 1.7226,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.37,
67
  "learning_rate": 4.3849938499385e-05,
68
- "loss": 1.7183,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.4,
73
  "learning_rate": 4.323493234932349e-05,
74
- "loss": 1.7213,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.44,
79
  "learning_rate": 4.261992619926199e-05,
80
- "loss": 1.7129,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.48,
85
  "learning_rate": 4.2004920049200495e-05,
86
- "loss": 1.7263,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.52,
91
  "learning_rate": 4.1389913899138996e-05,
92
- "loss": 1.6906,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.55,
97
  "learning_rate": 4.077490774907749e-05,
98
- "loss": 1.7081,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.59,
103
  "learning_rate": 4.015990159901599e-05,
104
- "loss": 1.6632,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 3.954489544895449e-05,
110
- "loss": 1.7391,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.66,
115
  "learning_rate": 3.892988929889299e-05,
116
- "loss": 1.7226,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.7,
121
  "learning_rate": 3.831488314883149e-05,
122
- "loss": 1.7101,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.74,
127
  "learning_rate": 3.769987699876999e-05,
128
- "loss": 1.7839,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.77,
133
  "learning_rate": 3.7084870848708486e-05,
134
- "loss": 1.7438,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.81,
139
  "learning_rate": 3.646986469864699e-05,
140
- "loss": 1.7511,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.85,
145
  "learning_rate": 3.585485854858548e-05,
146
- "loss": 1.7076,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.88,
151
  "learning_rate": 3.5239852398523984e-05,
152
- "loss": 1.6967,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.92,
157
  "learning_rate": 3.4624846248462485e-05,
158
- "loss": 1.7217,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.96,
163
  "learning_rate": 3.400984009840099e-05,
164
- "loss": 1.7106,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.99,
169
  "learning_rate": 3.339483394833948e-05,
170
- "loss": 1.7438,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 1.03,
175
  "learning_rate": 3.277982779827798e-05,
176
- "loss": 1.4464,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 1.07,
181
  "learning_rate": 3.2164821648216484e-05,
182
- "loss": 1.3142,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 1.1,
187
  "learning_rate": 3.1549815498154986e-05,
188
- "loss": 1.3559,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 1.14,
193
  "learning_rate": 3.093480934809348e-05,
194
- "loss": 1.3662,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 1.18,
199
  "learning_rate": 3.0319803198031982e-05,
200
- "loss": 1.3499,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 1.21,
205
  "learning_rate": 2.970479704797048e-05,
206
- "loss": 1.3961,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 1.25,
211
  "learning_rate": 2.908979089790898e-05,
212
- "loss": 1.3712,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 1.29,
217
  "learning_rate": 2.8474784747847476e-05,
218
- "loss": 1.3994,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 1.32,
223
  "learning_rate": 2.7859778597785978e-05,
224
- "loss": 1.3919,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 1.36,
229
  "learning_rate": 2.7244772447724476e-05,
230
- "loss": 1.3564,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 1.4,
235
  "learning_rate": 2.6629766297662977e-05,
236
- "loss": 1.3711,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 1.44,
241
  "learning_rate": 2.6014760147601475e-05,
242
- "loss": 1.3818,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 1.47,
247
  "learning_rate": 2.5399753997539977e-05,
248
- "loss": 1.4208,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 1.51,
253
  "learning_rate": 2.4784747847478475e-05,
254
- "loss": 1.4105,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 1.55,
259
  "learning_rate": 2.4169741697416977e-05,
260
- "loss": 1.4127,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 1.58,
265
  "learning_rate": 2.3554735547355475e-05,
266
- "loss": 1.4413,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 1.62,
271
  "learning_rate": 2.2939729397293973e-05,
272
- "loss": 1.4459,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 1.66,
277
  "learning_rate": 2.2324723247232474e-05,
278
- "loss": 1.4432,
279
  "step": 450
280
  },
281
  {
282
  "epoch": 1.69,
283
  "learning_rate": 2.1709717097170972e-05,
284
- "loss": 1.4724,
285
  "step": 460
286
  },
287
  {
288
  "epoch": 1.73,
289
  "learning_rate": 2.1094710947109474e-05,
290
- "loss": 1.4482,
291
  "step": 470
292
  },
293
  {
294
  "epoch": 1.77,
295
  "learning_rate": 2.0479704797047972e-05,
296
- "loss": 1.4861,
297
  "step": 480
298
  },
299
  {
300
  "epoch": 1.8,
301
  "learning_rate": 1.986469864698647e-05,
302
- "loss": 1.4791,
303
  "step": 490
304
  },
305
  {
306
  "epoch": 1.84,
307
  "learning_rate": 1.924969249692497e-05,
308
- "loss": 1.5557,
309
  "step": 500
310
  },
311
  {
312
  "epoch": 1.88,
313
  "learning_rate": 1.863468634686347e-05,
314
- "loss": 1.7621,
315
  "step": 510
316
  },
317
  {
318
  "epoch": 1.91,
319
  "learning_rate": 1.8019680196801968e-05,
320
- "loss": 1.7283,
321
  "step": 520
322
  },
323
  {
324
  "epoch": 1.95,
325
  "learning_rate": 1.740467404674047e-05,
326
- "loss": 1.7163,
327
  "step": 530
328
  },
329
  {
330
  "epoch": 1.99,
331
  "learning_rate": 1.6789667896678967e-05,
332
- "loss": 1.6936,
333
  "step": 540
334
  },
335
  {
336
  "epoch": 2.02,
337
  "learning_rate": 1.617466174661747e-05,
338
- "loss": 1.4864,
339
  "step": 550
340
  },
341
  {
342
  "epoch": 2.06,
343
  "learning_rate": 1.5559655596555967e-05,
344
- "loss": 1.328,
345
  "step": 560
346
  },
347
  {
348
  "epoch": 2.1,
349
  "learning_rate": 1.4944649446494467e-05,
350
- "loss": 1.3673,
351
  "step": 570
352
  },
353
  {
354
  "epoch": 2.13,
355
  "learning_rate": 1.4329643296432965e-05,
356
- "loss": 1.3545,
357
  "step": 580
358
  },
359
  {
360
  "epoch": 2.17,
361
  "learning_rate": 1.3714637146371464e-05,
362
- "loss": 1.3736,
363
  "step": 590
364
  },
365
  {
366
  "epoch": 2.21,
367
  "learning_rate": 1.3099630996309964e-05,
368
- "loss": 1.3618,
369
  "step": 600
370
  },
371
  {
372
  "epoch": 2.24,
373
  "learning_rate": 1.2484624846248464e-05,
374
- "loss": 1.3407,
375
  "step": 610
376
  },
377
  {
378
  "epoch": 2.28,
379
  "learning_rate": 1.1869618696186962e-05,
380
- "loss": 1.3543,
381
  "step": 620
382
  },
383
  {
384
  "epoch": 2.32,
385
  "learning_rate": 1.1254612546125462e-05,
386
- "loss": 1.3831,
387
  "step": 630
388
  },
389
  {
390
  "epoch": 2.36,
391
  "learning_rate": 1.0639606396063962e-05,
392
- "loss": 1.3331,
393
  "step": 640
394
  },
395
  {
396
  "epoch": 2.39,
397
  "learning_rate": 1.0024600246002461e-05,
398
- "loss": 1.3872,
399
  "step": 650
400
  },
401
  {
402
  "epoch": 2.43,
403
  "learning_rate": 9.40959409594096e-06,
404
- "loss": 1.3579,
405
  "step": 660
406
  },
407
  {
408
  "epoch": 2.47,
409
  "learning_rate": 8.79458794587946e-06,
410
- "loss": 1.402,
411
  "step": 670
412
  },
413
  {
414
  "epoch": 2.5,
415
  "learning_rate": 8.179581795817959e-06,
416
- "loss": 1.3763,
417
  "step": 680
418
  },
419
  {
420
  "epoch": 2.54,
421
  "learning_rate": 7.564575645756458e-06,
422
- "loss": 1.3659,
423
  "step": 690
424
  },
425
  {
426
  "epoch": 2.58,
427
  "learning_rate": 6.949569495694958e-06,
428
- "loss": 1.3575,
429
  "step": 700
430
  },
431
  {
432
  "epoch": 2.61,
433
  "learning_rate": 6.334563345633457e-06,
434
- "loss": 1.3666,
435
  "step": 710
436
  },
437
  {
438
  "epoch": 2.65,
439
  "learning_rate": 5.7195571955719566e-06,
440
- "loss": 1.3642,
441
  "step": 720
442
  },
443
  {
444
  "epoch": 2.69,
445
  "learning_rate": 5.1045510455104555e-06,
446
- "loss": 1.3613,
447
  "step": 730
448
  },
449
  {
450
  "epoch": 2.72,
451
  "learning_rate": 4.489544895448955e-06,
452
- "loss": 1.3673,
453
  "step": 740
454
  },
455
  {
456
  "epoch": 2.76,
457
  "learning_rate": 3.874538745387454e-06,
458
- "loss": 1.3516,
459
  "step": 750
460
  },
461
  {
462
  "epoch": 2.8,
463
  "learning_rate": 3.2595325953259536e-06,
464
- "loss": 1.3662,
465
  "step": 760
466
  },
467
  {
468
  "epoch": 2.83,
469
  "learning_rate": 2.6445264452644525e-06,
470
- "loss": 1.3449,
471
  "step": 770
472
  },
473
  {
474
  "epoch": 2.87,
475
  "learning_rate": 2.029520295202952e-06,
476
- "loss": 1.3232,
477
  "step": 780
478
  },
479
  {
480
  "epoch": 2.91,
481
  "learning_rate": 1.4145141451414515e-06,
482
- "loss": 1.3536,
483
  "step": 790
484
  },
485
  {
486
  "epoch": 2.94,
487
  "learning_rate": 7.995079950799507e-07,
488
- "loss": 1.3384,
489
  "step": 800
490
  },
491
  {
492
  "epoch": 2.98,
493
  "learning_rate": 1.845018450184502e-07,
494
- "loss": 1.3277,
495
  "step": 810
496
  },
497
  {
498
  "epoch": 2.99,
499
  "step": 813,
500
  "total_flos": 2.818102824586445e+16,
501
- "train_loss": 1.5162586585182105,
502
- "train_runtime": 753.3958,
503
- "train_samples_per_second": 17.306,
504
- "train_steps_per_second": 1.079
505
  }
506
  ],
507
  "logging_steps": 10,
 
11
  {
12
  "epoch": 0.04,
13
  "learning_rate": 4.93849938499385e-05,
14
+ "loss": 3.0474,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.07,
19
  "learning_rate": 4.8769987699877e-05,
20
+ "loss": 2.5423,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.11,
25
  "learning_rate": 4.81549815498155e-05,
26
+ "loss": 2.494,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.15,
31
  "learning_rate": 4.7539975399754e-05,
32
+ "loss": 2.4624,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.18,
37
  "learning_rate": 4.6924969249692496e-05,
38
+ "loss": 2.4676,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.22,
43
  "learning_rate": 4.6309963099631e-05,
44
+ "loss": 2.4296,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.26,
49
  "learning_rate": 4.569495694956949e-05,
50
+ "loss": 2.4051,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.29,
55
  "learning_rate": 4.5079950799507994e-05,
56
+ "loss": 2.4153,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.33,
61
  "learning_rate": 4.4464944649446495e-05,
62
+ "loss": 2.416,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.37,
67
  "learning_rate": 4.3849938499385e-05,
68
+ "loss": 2.3806,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.4,
73
  "learning_rate": 4.323493234932349e-05,
74
+ "loss": 2.359,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.44,
79
  "learning_rate": 4.261992619926199e-05,
80
+ "loss": 2.3396,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.48,
85
  "learning_rate": 4.2004920049200495e-05,
86
+ "loss": 2.3664,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.52,
91
  "learning_rate": 4.1389913899138996e-05,
92
+ "loss": 2.3306,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.55,
97
  "learning_rate": 4.077490774907749e-05,
98
+ "loss": 2.3515,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.59,
103
  "learning_rate": 4.015990159901599e-05,
104
+ "loss": 2.2757,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.63,
109
  "learning_rate": 3.954489544895449e-05,
110
+ "loss": 2.3511,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.66,
115
  "learning_rate": 3.892988929889299e-05,
116
+ "loss": 2.3337,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.7,
121
  "learning_rate": 3.831488314883149e-05,
122
+ "loss": 2.2724,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.74,
127
  "learning_rate": 3.769987699876999e-05,
128
+ "loss": 2.3169,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.77,
133
  "learning_rate": 3.7084870848708486e-05,
134
+ "loss": 2.2984,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.81,
139
  "learning_rate": 3.646986469864699e-05,
140
+ "loss": 2.3052,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.85,
145
  "learning_rate": 3.585485854858548e-05,
146
+ "loss": 2.2797,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.88,
151
  "learning_rate": 3.5239852398523984e-05,
152
+ "loss": 2.2492,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.92,
157
  "learning_rate": 3.4624846248462485e-05,
158
+ "loss": 2.2824,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.96,
163
  "learning_rate": 3.400984009840099e-05,
164
+ "loss": 2.2565,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.99,
169
  "learning_rate": 3.339483394833948e-05,
170
+ "loss": 2.2766,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 1.03,
175
  "learning_rate": 3.277982779827798e-05,
176
+ "loss": 2.1366,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 1.07,
181
  "learning_rate": 3.2164821648216484e-05,
182
+ "loss": 2.0625,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 1.1,
187
  "learning_rate": 3.1549815498154986e-05,
188
+ "loss": 2.0812,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 1.14,
193
  "learning_rate": 3.093480934809348e-05,
194
+ "loss": 2.0876,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 1.18,
199
  "learning_rate": 3.0319803198031982e-05,
200
+ "loss": 2.0732,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 1.21,
205
  "learning_rate": 2.970479704797048e-05,
206
+ "loss": 2.1055,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 1.25,
211
  "learning_rate": 2.908979089790898e-05,
212
+ "loss": 2.0799,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 1.29,
217
  "learning_rate": 2.8474784747847476e-05,
218
+ "loss": 2.0846,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 1.32,
223
  "learning_rate": 2.7859778597785978e-05,
224
+ "loss": 2.0928,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 1.36,
229
  "learning_rate": 2.7244772447724476e-05,
230
+ "loss": 2.0533,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 1.4,
235
  "learning_rate": 2.6629766297662977e-05,
236
+ "loss": 2.0432,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 1.44,
241
  "learning_rate": 2.6014760147601475e-05,
242
+ "loss": 2.0408,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 1.47,
247
  "learning_rate": 2.5399753997539977e-05,
248
+ "loss": 2.1038,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 1.51,
253
  "learning_rate": 2.4784747847478475e-05,
254
+ "loss": 2.0639,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 1.55,
259
  "learning_rate": 2.4169741697416977e-05,
260
+ "loss": 2.0712,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 1.58,
265
  "learning_rate": 2.3554735547355475e-05,
266
+ "loss": 2.0843,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 1.62,
271
  "learning_rate": 2.2939729397293973e-05,
272
+ "loss": 2.0795,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 1.66,
277
  "learning_rate": 2.2324723247232474e-05,
278
+ "loss": 2.0812,
279
  "step": 450
280
  },
281
  {
282
  "epoch": 1.69,
283
  "learning_rate": 2.1709717097170972e-05,
284
+ "loss": 2.0648,
285
  "step": 460
286
  },
287
  {
288
  "epoch": 1.73,
289
  "learning_rate": 2.1094710947109474e-05,
290
+ "loss": 2.0476,
291
  "step": 470
292
  },
293
  {
294
  "epoch": 1.77,
295
  "learning_rate": 2.0479704797047972e-05,
296
+ "loss": 2.0769,
297
  "step": 480
298
  },
299
  {
300
  "epoch": 1.8,
301
  "learning_rate": 1.986469864698647e-05,
302
+ "loss": 2.0753,
303
  "step": 490
304
  },
305
  {
306
  "epoch": 1.84,
307
  "learning_rate": 1.924969249692497e-05,
308
+ "loss": 2.0336,
309
  "step": 500
310
  },
311
  {
312
  "epoch": 1.88,
313
  "learning_rate": 1.863468634686347e-05,
314
+ "loss": 2.0888,
315
  "step": 510
316
  },
317
  {
318
  "epoch": 1.91,
319
  "learning_rate": 1.8019680196801968e-05,
320
+ "loss": 2.0748,
321
  "step": 520
322
  },
323
  {
324
  "epoch": 1.95,
325
  "learning_rate": 1.740467404674047e-05,
326
+ "loss": 2.062,
327
  "step": 530
328
  },
329
  {
330
  "epoch": 1.99,
331
  "learning_rate": 1.6789667896678967e-05,
332
+ "loss": 2.0263,
333
  "step": 540
334
  },
335
  {
336
  "epoch": 2.02,
337
  "learning_rate": 1.617466174661747e-05,
338
+ "loss": 1.9472,
339
  "step": 550
340
  },
341
  {
342
  "epoch": 2.06,
343
  "learning_rate": 1.5559655596555967e-05,
344
+ "loss": 1.9055,
345
  "step": 560
346
  },
347
  {
348
  "epoch": 2.1,
349
  "learning_rate": 1.4944649446494467e-05,
350
+ "loss": 1.9086,
351
  "step": 570
352
  },
353
  {
354
  "epoch": 2.13,
355
  "learning_rate": 1.4329643296432965e-05,
356
+ "loss": 1.9112,
357
  "step": 580
358
  },
359
  {
360
  "epoch": 2.17,
361
  "learning_rate": 1.3714637146371464e-05,
362
+ "loss": 1.9361,
363
  "step": 590
364
  },
365
  {
366
  "epoch": 2.21,
367
  "learning_rate": 1.3099630996309964e-05,
368
+ "loss": 1.9229,
369
  "step": 600
370
  },
371
  {
372
  "epoch": 2.24,
373
  "learning_rate": 1.2484624846248464e-05,
374
+ "loss": 1.9133,
375
  "step": 610
376
  },
377
  {
378
  "epoch": 2.28,
379
  "learning_rate": 1.1869618696186962e-05,
380
+ "loss": 1.9196,
381
  "step": 620
382
  },
383
  {
384
  "epoch": 2.32,
385
  "learning_rate": 1.1254612546125462e-05,
386
+ "loss": 1.9198,
387
  "step": 630
388
  },
389
  {
390
  "epoch": 2.36,
391
  "learning_rate": 1.0639606396063962e-05,
392
+ "loss": 1.8842,
393
  "step": 640
394
  },
395
  {
396
  "epoch": 2.39,
397
  "learning_rate": 1.0024600246002461e-05,
398
+ "loss": 1.948,
399
  "step": 650
400
  },
401
  {
402
  "epoch": 2.43,
403
  "learning_rate": 9.40959409594096e-06,
404
+ "loss": 1.9455,
405
  "step": 660
406
  },
407
  {
408
  "epoch": 2.47,
409
  "learning_rate": 8.79458794587946e-06,
410
+ "loss": 1.9538,
411
  "step": 670
412
  },
413
  {
414
  "epoch": 2.5,
415
  "learning_rate": 8.179581795817959e-06,
416
+ "loss": 1.9362,
417
  "step": 680
418
  },
419
  {
420
  "epoch": 2.54,
421
  "learning_rate": 7.564575645756458e-06,
422
+ "loss": 1.9044,
423
  "step": 690
424
  },
425
  {
426
  "epoch": 2.58,
427
  "learning_rate": 6.949569495694958e-06,
428
+ "loss": 1.9351,
429
  "step": 700
430
  },
431
  {
432
  "epoch": 2.61,
433
  "learning_rate": 6.334563345633457e-06,
434
+ "loss": 1.9374,
435
  "step": 710
436
  },
437
  {
438
  "epoch": 2.65,
439
  "learning_rate": 5.7195571955719566e-06,
440
+ "loss": 1.9474,
441
  "step": 720
442
  },
443
  {
444
  "epoch": 2.69,
445
  "learning_rate": 5.1045510455104555e-06,
446
+ "loss": 1.8852,
447
  "step": 730
448
  },
449
  {
450
  "epoch": 2.72,
451
  "learning_rate": 4.489544895448955e-06,
452
+ "loss": 1.9285,
453
  "step": 740
454
  },
455
  {
456
  "epoch": 2.76,
457
  "learning_rate": 3.874538745387454e-06,
458
+ "loss": 1.8788,
459
  "step": 750
460
  },
461
  {
462
  "epoch": 2.8,
463
  "learning_rate": 3.2595325953259536e-06,
464
+ "loss": 1.9447,
465
  "step": 760
466
  },
467
  {
468
  "epoch": 2.83,
469
  "learning_rate": 2.6445264452644525e-06,
470
+ "loss": 1.9196,
471
  "step": 770
472
  },
473
  {
474
  "epoch": 2.87,
475
  "learning_rate": 2.029520295202952e-06,
476
+ "loss": 1.9037,
477
  "step": 780
478
  },
479
  {
480
  "epoch": 2.91,
481
  "learning_rate": 1.4145141451414515e-06,
482
+ "loss": 1.8997,
483
  "step": 790
484
  },
485
  {
486
  "epoch": 2.94,
487
  "learning_rate": 7.995079950799507e-07,
488
+ "loss": 1.9194,
489
  "step": 800
490
  },
491
  {
492
  "epoch": 2.98,
493
  "learning_rate": 1.845018450184502e-07,
494
+ "loss": 1.8938,
495
  "step": 810
496
  },
497
  {
498
  "epoch": 2.99,
499
  "step": 813,
500
  "total_flos": 2.818102824586445e+16,
501
+ "train_loss": 2.124213267956273,
502
+ "train_runtime": 1043.2,
503
+ "train_samples_per_second": 12.498,
504
+ "train_steps_per_second": 0.779
505
  }
506
  ],
507
  "logging_steps": 10,