Rakhman16 commited on
Commit
abb5cfb
·
verified ·
1 Parent(s): 31263e2

Training in progress, step 2404, checkpoint

Browse files
Files changed (1) hide show
  1. last-checkpoint/trainer_state.json +72 -72
last-checkpoint/trainer_state.json CHANGED
@@ -25,9 +25,9 @@
25
  {
26
  "epoch": 0.1662510390689942,
27
  "eval_loss": 0.35283052921295166,
28
- "eval_runtime": 36.3275,
29
- "eval_samples_per_second": 13.598,
30
- "eval_steps_per_second": 1.707,
31
  "step": 100
32
  },
33
  {
@@ -47,9 +47,9 @@
47
  {
48
  "epoch": 0.3325020781379884,
49
  "eval_loss": 0.30865946412086487,
50
- "eval_runtime": 36.5834,
51
- "eval_samples_per_second": 13.503,
52
- "eval_steps_per_second": 1.695,
53
  "step": 200
54
  },
55
  {
@@ -69,9 +69,9 @@
69
  {
70
  "epoch": 0.49875311720698257,
71
  "eval_loss": 0.2891499996185303,
72
- "eval_runtime": 36.4356,
73
- "eval_samples_per_second": 13.558,
74
- "eval_steps_per_second": 1.702,
75
  "step": 300
76
  },
77
  {
@@ -91,9 +91,9 @@
91
  {
92
  "epoch": 0.6650041562759768,
93
  "eval_loss": 0.2757515609264374,
94
- "eval_runtime": 36.561,
95
- "eval_samples_per_second": 13.512,
96
- "eval_steps_per_second": 1.696,
97
  "step": 400
98
  },
99
  {
@@ -113,9 +113,9 @@
113
  {
114
  "epoch": 0.8312551953449709,
115
  "eval_loss": 0.2673098146915436,
116
- "eval_runtime": 36.4611,
117
- "eval_samples_per_second": 13.549,
118
- "eval_steps_per_second": 1.7,
119
  "step": 500
120
  },
121
  {
@@ -135,9 +135,9 @@
135
  {
136
  "epoch": 0.9975062344139651,
137
  "eval_loss": 0.26070085167884827,
138
- "eval_runtime": 36.5977,
139
- "eval_samples_per_second": 13.498,
140
- "eval_steps_per_second": 1.694,
141
  "step": 600
142
  },
143
  {
@@ -157,9 +157,9 @@
157
  {
158
  "epoch": 1.1637572734829593,
159
  "eval_loss": 0.256939560174942,
160
- "eval_runtime": 36.5571,
161
- "eval_samples_per_second": 13.513,
162
- "eval_steps_per_second": 1.696,
163
  "step": 700
164
  },
165
  {
@@ -179,9 +179,9 @@
179
  {
180
  "epoch": 1.3300083125519535,
181
  "eval_loss": 0.2525966763496399,
182
- "eval_runtime": 36.5366,
183
- "eval_samples_per_second": 13.521,
184
- "eval_steps_per_second": 1.697,
185
  "step": 800
186
  },
187
  {
@@ -201,9 +201,9 @@
201
  {
202
  "epoch": 1.4962593516209477,
203
  "eval_loss": 0.24994711577892303,
204
- "eval_runtime": 36.5909,
205
- "eval_samples_per_second": 13.501,
206
- "eval_steps_per_second": 1.694,
207
  "step": 900
208
  },
209
  {
@@ -223,9 +223,9 @@
223
  {
224
  "epoch": 1.6625103906899419,
225
  "eval_loss": 0.246443971991539,
226
- "eval_runtime": 36.514,
227
- "eval_samples_per_second": 13.529,
228
- "eval_steps_per_second": 1.698,
229
  "step": 1000
230
  },
231
  {
@@ -245,9 +245,9 @@
245
  {
246
  "epoch": 1.828761429758936,
247
  "eval_loss": 0.24409395456314087,
248
- "eval_runtime": 36.5322,
249
- "eval_samples_per_second": 13.522,
250
- "eval_steps_per_second": 1.697,
251
  "step": 1100
252
  },
253
  {
@@ -267,9 +267,9 @@
267
  {
268
  "epoch": 1.9950124688279303,
269
  "eval_loss": 0.2411041557788849,
270
- "eval_runtime": 36.4594,
271
- "eval_samples_per_second": 13.549,
272
- "eval_steps_per_second": 1.701,
273
  "step": 1200
274
  },
275
  {
@@ -289,9 +289,9 @@
289
  {
290
  "epoch": 2.1612635078969245,
291
  "eval_loss": 0.23963774740695953,
292
- "eval_runtime": 36.6761,
293
- "eval_samples_per_second": 13.469,
294
- "eval_steps_per_second": 1.69,
295
  "step": 1300
296
  },
297
  {
@@ -311,9 +311,9 @@
311
  {
312
  "epoch": 2.3275145469659186,
313
  "eval_loss": 0.23829442262649536,
314
- "eval_runtime": 36.5959,
315
- "eval_samples_per_second": 13.499,
316
- "eval_steps_per_second": 1.694,
317
  "step": 1400
318
  },
319
  {
@@ -333,9 +333,9 @@
333
  {
334
  "epoch": 2.493765586034913,
335
  "eval_loss": 0.2369847148656845,
336
- "eval_runtime": 36.5924,
337
- "eval_samples_per_second": 13.5,
338
- "eval_steps_per_second": 1.694,
339
  "step": 1500
340
  },
341
  {
@@ -355,9 +355,9 @@
355
  {
356
  "epoch": 2.660016625103907,
357
  "eval_loss": 0.23503336310386658,
358
- "eval_runtime": 36.5255,
359
- "eval_samples_per_second": 13.525,
360
- "eval_steps_per_second": 1.697,
361
  "step": 1600
362
  },
363
  {
@@ -377,9 +377,9 @@
377
  {
378
  "epoch": 2.826267664172901,
379
  "eval_loss": 0.23422521352767944,
380
- "eval_runtime": 36.5068,
381
- "eval_samples_per_second": 13.532,
382
- "eval_steps_per_second": 1.698,
383
  "step": 1700
384
  },
385
  {
@@ -399,9 +399,9 @@
399
  {
400
  "epoch": 2.9925187032418954,
401
  "eval_loss": 0.23330263793468475,
402
- "eval_runtime": 36.5181,
403
- "eval_samples_per_second": 13.528,
404
- "eval_steps_per_second": 1.698,
405
  "step": 1800
406
  },
407
  {
@@ -421,9 +421,9 @@
421
  {
422
  "epoch": 3.1587697423108896,
423
  "eval_loss": 0.23302872478961945,
424
- "eval_runtime": 36.6159,
425
- "eval_samples_per_second": 13.491,
426
- "eval_steps_per_second": 1.693,
427
  "step": 1900
428
  },
429
  {
@@ -443,9 +443,9 @@
443
  {
444
  "epoch": 3.3250207813798838,
445
  "eval_loss": 0.23246362805366516,
446
- "eval_runtime": 36.5215,
447
- "eval_samples_per_second": 13.526,
448
- "eval_steps_per_second": 1.698,
449
  "step": 2000
450
  },
451
  {
@@ -465,9 +465,9 @@
465
  {
466
  "epoch": 3.491271820448878,
467
  "eval_loss": 0.23180559277534485,
468
- "eval_runtime": 36.4965,
469
- "eval_samples_per_second": 13.536,
470
- "eval_steps_per_second": 1.699,
471
  "step": 2100
472
  },
473
  {
@@ -487,9 +487,9 @@
487
  {
488
  "epoch": 3.657522859517872,
489
  "eval_loss": 0.23114623129367828,
490
- "eval_runtime": 36.5506,
491
- "eval_samples_per_second": 13.516,
492
- "eval_steps_per_second": 1.696,
493
  "step": 2200
494
  },
495
  {
@@ -509,9 +509,9 @@
509
  {
510
  "epoch": 3.8237738985868663,
511
  "eval_loss": 0.23083852231502533,
512
- "eval_runtime": 36.3806,
513
- "eval_samples_per_second": 13.579,
514
- "eval_steps_per_second": 1.704,
515
  "step": 2300
516
  },
517
  {
@@ -531,9 +531,9 @@
531
  {
532
  "epoch": 3.9900249376558605,
533
  "eval_loss": 0.23079748451709747,
534
- "eval_runtime": 36.4645,
535
- "eval_samples_per_second": 13.547,
536
- "eval_steps_per_second": 1.7,
537
  "step": 2400
538
  }
539
  ],
 
25
  {
26
  "epoch": 0.1662510390689942,
27
  "eval_loss": 0.35283052921295166,
28
+ "eval_runtime": 38.0523,
29
+ "eval_samples_per_second": 12.982,
30
+ "eval_steps_per_second": 1.629,
31
  "step": 100
32
  },
33
  {
 
47
  {
48
  "epoch": 0.3325020781379884,
49
  "eval_loss": 0.30865946412086487,
50
+ "eval_runtime": 38.2162,
51
+ "eval_samples_per_second": 12.926,
52
+ "eval_steps_per_second": 1.622,
53
  "step": 200
54
  },
55
  {
 
69
  {
70
  "epoch": 0.49875311720698257,
71
  "eval_loss": 0.2891499996185303,
72
+ "eval_runtime": 38.1858,
73
+ "eval_samples_per_second": 12.937,
74
+ "eval_steps_per_second": 1.624,
75
  "step": 300
76
  },
77
  {
 
91
  {
92
  "epoch": 0.6650041562759768,
93
  "eval_loss": 0.2757515609264374,
94
+ "eval_runtime": 38.2247,
95
+ "eval_samples_per_second": 12.924,
96
+ "eval_steps_per_second": 1.622,
97
  "step": 400
98
  },
99
  {
 
113
  {
114
  "epoch": 0.8312551953449709,
115
  "eval_loss": 0.2673098146915436,
116
+ "eval_runtime": 38.0207,
117
+ "eval_samples_per_second": 12.993,
118
+ "eval_steps_per_second": 1.631,
119
  "step": 500
120
  },
121
  {
 
135
  {
136
  "epoch": 0.9975062344139651,
137
  "eval_loss": 0.26070085167884827,
138
+ "eval_runtime": 37.7694,
139
+ "eval_samples_per_second": 13.079,
140
+ "eval_steps_per_second": 1.642,
141
  "step": 600
142
  },
143
  {
 
157
  {
158
  "epoch": 1.1637572734829593,
159
  "eval_loss": 0.256939560174942,
160
+ "eval_runtime": 38.0385,
161
+ "eval_samples_per_second": 12.987,
162
+ "eval_steps_per_second": 1.63,
163
  "step": 700
164
  },
165
  {
 
179
  {
180
  "epoch": 1.3300083125519535,
181
  "eval_loss": 0.2525966763496399,
182
+ "eval_runtime": 38.1831,
183
+ "eval_samples_per_second": 12.938,
184
+ "eval_steps_per_second": 1.624,
185
  "step": 800
186
  },
187
  {
 
201
  {
202
  "epoch": 1.4962593516209477,
203
  "eval_loss": 0.24994711577892303,
204
+ "eval_runtime": 37.4678,
205
+ "eval_samples_per_second": 13.185,
206
+ "eval_steps_per_second": 1.655,
207
  "step": 900
208
  },
209
  {
 
223
  {
224
  "epoch": 1.6625103906899419,
225
  "eval_loss": 0.246443971991539,
226
+ "eval_runtime": 37.8324,
227
+ "eval_samples_per_second": 13.058,
228
+ "eval_steps_per_second": 1.639,
229
  "step": 1000
230
  },
231
  {
 
245
  {
246
  "epoch": 1.828761429758936,
247
  "eval_loss": 0.24409395456314087,
248
+ "eval_runtime": 37.7821,
249
+ "eval_samples_per_second": 13.075,
250
+ "eval_steps_per_second": 1.641,
251
  "step": 1100
252
  },
253
  {
 
267
  {
268
  "epoch": 1.9950124688279303,
269
  "eval_loss": 0.2411041557788849,
270
+ "eval_runtime": 37.3119,
271
+ "eval_samples_per_second": 13.24,
272
+ "eval_steps_per_second": 1.662,
273
  "step": 1200
274
  },
275
  {
 
289
  {
290
  "epoch": 2.1612635078969245,
291
  "eval_loss": 0.23963774740695953,
292
+ "eval_runtime": 37.7867,
293
+ "eval_samples_per_second": 13.073,
294
+ "eval_steps_per_second": 1.641,
295
  "step": 1300
296
  },
297
  {
 
311
  {
312
  "epoch": 2.3275145469659186,
313
  "eval_loss": 0.23829442262649536,
314
+ "eval_runtime": 37.7411,
315
+ "eval_samples_per_second": 13.089,
316
+ "eval_steps_per_second": 1.643,
317
  "step": 1400
318
  },
319
  {
 
333
  {
334
  "epoch": 2.493765586034913,
335
  "eval_loss": 0.2369847148656845,
336
+ "eval_runtime": 38.1397,
337
+ "eval_samples_per_second": 12.952,
338
+ "eval_steps_per_second": 1.626,
339
  "step": 1500
340
  },
341
  {
 
355
  {
356
  "epoch": 2.660016625103907,
357
  "eval_loss": 0.23503336310386658,
358
+ "eval_runtime": 37.7531,
359
+ "eval_samples_per_second": 13.085,
360
+ "eval_steps_per_second": 1.642,
361
  "step": 1600
362
  },
363
  {
 
377
  {
378
  "epoch": 2.826267664172901,
379
  "eval_loss": 0.23422521352767944,
380
+ "eval_runtime": 37.8729,
381
+ "eval_samples_per_second": 13.044,
382
+ "eval_steps_per_second": 1.637,
383
  "step": 1700
384
  },
385
  {
 
399
  {
400
  "epoch": 2.9925187032418954,
401
  "eval_loss": 0.23330263793468475,
402
+ "eval_runtime": 37.8678,
403
+ "eval_samples_per_second": 13.045,
404
+ "eval_steps_per_second": 1.637,
405
  "step": 1800
406
  },
407
  {
 
421
  {
422
  "epoch": 3.1587697423108896,
423
  "eval_loss": 0.23302872478961945,
424
+ "eval_runtime": 38.1436,
425
+ "eval_samples_per_second": 12.951,
426
+ "eval_steps_per_second": 1.625,
427
  "step": 1900
428
  },
429
  {
 
443
  {
444
  "epoch": 3.3250207813798838,
445
  "eval_loss": 0.23246362805366516,
446
+ "eval_runtime": 37.97,
447
+ "eval_samples_per_second": 13.01,
448
+ "eval_steps_per_second": 1.633,
449
  "step": 2000
450
  },
451
  {
 
465
  {
466
  "epoch": 3.491271820448878,
467
  "eval_loss": 0.23180559277534485,
468
+ "eval_runtime": 37.9308,
469
+ "eval_samples_per_second": 13.024,
470
+ "eval_steps_per_second": 1.635,
471
  "step": 2100
472
  },
473
  {
 
487
  {
488
  "epoch": 3.657522859517872,
489
  "eval_loss": 0.23114623129367828,
490
+ "eval_runtime": 38.1017,
491
+ "eval_samples_per_second": 12.965,
492
+ "eval_steps_per_second": 1.627,
493
  "step": 2200
494
  },
495
  {
 
509
  {
510
  "epoch": 3.8237738985868663,
511
  "eval_loss": 0.23083852231502533,
512
+ "eval_runtime": 38.0527,
513
+ "eval_samples_per_second": 12.982,
514
+ "eval_steps_per_second": 1.629,
515
  "step": 2300
516
  },
517
  {
 
531
  {
532
  "epoch": 3.9900249376558605,
533
  "eval_loss": 0.23079748451709747,
534
+ "eval_runtime": 37.7401,
535
+ "eval_samples_per_second": 13.09,
536
+ "eval_steps_per_second": 1.643,
537
  "step": 2400
538
  }
539
  ],