Melo1512 commited on
Commit
48c8e22
·
verified ·
1 Parent(s): efd4535

End of training

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.8895732512098549
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,8 +33,8 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.4142
37
- - Accuracy: 0.8896
38
 
39
  ## Model description
40
 
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.893532776066872
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.3538
37
+ - Accuracy: 0.8935
38
 
39
  ## Model description
40
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.893532776066872,
4
+ "eval_loss": 0.35377147793769836,
5
+ "eval_runtime": 7.2946,
6
+ "eval_samples_per_second": 311.601,
7
+ "eval_steps_per_second": 4.935,
8
+ "total_flos": 2.0038784309526528e+18,
9
+ "train_loss": 0.3093862909078598,
10
+ "train_runtime": 987.5731,
11
+ "train_samples_per_second": 103.689,
12
+ "train_steps_per_second": 0.405
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.893532776066872,
4
+ "eval_loss": 0.35377147793769836,
5
+ "eval_runtime": 7.2946,
6
+ "eval_samples_per_second": 311.601,
7
+ "eval_steps_per_second": 4.935
8
+ }
runs/Dec17_15-57-09_ae1aa77fe319/events.out.tfevents.1734454743.ae1aa77fe319.236.21 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6598b9426d45ed7ccb19cbee331379ad7e2e21a070c4378d46eef8439b6cb11c
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "total_flos": 2.0038784309526528e+18,
4
+ "train_loss": 0.3093862909078598,
5
+ "train_runtime": 987.5731,
6
+ "train_samples_per_second": 103.689,
7
+ "train_steps_per_second": 0.405
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.893532776066872,
3
+ "best_model_checkpoint": "vit-msn-small-wbc-classifier-0316-cleaned-dataset-10/checkpoint-288",
4
+ "epoch": 25.0,
5
+ "eval_steps": 500,
6
+ "global_step": 400,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.625,
13
+ "grad_norm": 5.568078994750977,
14
+ "learning_rate": 1.25e-05,
15
+ "loss": 1.5193,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.7945446546414431,
21
+ "eval_loss": 0.6822578310966492,
22
+ "eval_runtime": 7.3634,
23
+ "eval_samples_per_second": 308.688,
24
+ "eval_steps_per_second": 4.889,
25
+ "step": 16
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 8.283280372619629,
30
+ "learning_rate": 2.5e-05,
31
+ "loss": 0.7595,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 1.875,
36
+ "grad_norm": 17.42099952697754,
37
+ "learning_rate": 3.7500000000000003e-05,
38
+ "loss": 0.5339,
39
+ "step": 30
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_accuracy": 0.8438187417509899,
44
+ "eval_loss": 0.45528778433799744,
45
+ "eval_runtime": 7.4331,
46
+ "eval_samples_per_second": 305.795,
47
+ "eval_steps_per_second": 4.843,
48
+ "step": 32
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 13.5732421875,
53
+ "learning_rate": 5e-05,
54
+ "loss": 0.4778,
55
+ "step": 40
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.847778266608007,
60
+ "eval_loss": 0.45250508189201355,
61
+ "eval_runtime": 7.3291,
62
+ "eval_samples_per_second": 310.135,
63
+ "eval_steps_per_second": 4.912,
64
+ "step": 48
65
+ },
66
+ {
67
+ "epoch": 3.125,
68
+ "grad_norm": 13.014479637145996,
69
+ "learning_rate": 4.8611111111111115e-05,
70
+ "loss": 0.4966,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 3.75,
75
+ "grad_norm": 3.9621634483337402,
76
+ "learning_rate": 4.722222222222222e-05,
77
+ "loss": 0.4253,
78
+ "step": 60
79
+ },
80
+ {
81
+ "epoch": 4.0,
82
+ "eval_accuracy": 0.8473383194016718,
83
+ "eval_loss": 0.40767061710357666,
84
+ "eval_runtime": 7.3487,
85
+ "eval_samples_per_second": 309.305,
86
+ "eval_steps_per_second": 4.899,
87
+ "step": 64
88
+ },
89
+ {
90
+ "epoch": 4.375,
91
+ "grad_norm": 6.071442127227783,
92
+ "learning_rate": 4.5833333333333334e-05,
93
+ "loss": 0.3969,
94
+ "step": 70
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "grad_norm": 14.56364917755127,
99
+ "learning_rate": 4.4444444444444447e-05,
100
+ "loss": 0.4086,
101
+ "step": 80
102
+ },
103
+ {
104
+ "epoch": 5.0,
105
+ "eval_accuracy": 0.8574571051473823,
106
+ "eval_loss": 0.42175111174583435,
107
+ "eval_runtime": 7.398,
108
+ "eval_samples_per_second": 307.245,
109
+ "eval_steps_per_second": 4.866,
110
+ "step": 80
111
+ },
112
+ {
113
+ "epoch": 5.625,
114
+ "grad_norm": 6.646561145782471,
115
+ "learning_rate": 4.305555555555556e-05,
116
+ "loss": 0.3673,
117
+ "step": 90
118
+ },
119
+ {
120
+ "epoch": 6.0,
121
+ "eval_accuracy": 0.8693356797184338,
122
+ "eval_loss": 0.400217205286026,
123
+ "eval_runtime": 7.3916,
124
+ "eval_samples_per_second": 307.511,
125
+ "eval_steps_per_second": 4.87,
126
+ "step": 96
127
+ },
128
+ {
129
+ "epoch": 6.25,
130
+ "grad_norm": 5.845401287078857,
131
+ "learning_rate": 4.166666666666667e-05,
132
+ "loss": 0.3728,
133
+ "step": 100
134
+ },
135
+ {
136
+ "epoch": 6.875,
137
+ "grad_norm": 5.991316318511963,
138
+ "learning_rate": 4.027777777777778e-05,
139
+ "loss": 0.3275,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 7.0,
144
+ "eval_accuracy": 0.8772547294324681,
145
+ "eval_loss": 0.33016717433929443,
146
+ "eval_runtime": 7.3331,
147
+ "eval_samples_per_second": 309.966,
148
+ "eval_steps_per_second": 4.909,
149
+ "step": 112
150
+ },
151
+ {
152
+ "epoch": 7.5,
153
+ "grad_norm": 5.736546993255615,
154
+ "learning_rate": 3.888888888888889e-05,
155
+ "loss": 0.3231,
156
+ "step": 120
157
+ },
158
+ {
159
+ "epoch": 8.0,
160
+ "eval_accuracy": 0.8803343598768147,
161
+ "eval_loss": 0.36715179681777954,
162
+ "eval_runtime": 7.3393,
163
+ "eval_samples_per_second": 309.704,
164
+ "eval_steps_per_second": 4.905,
165
+ "step": 128
166
+ },
167
+ {
168
+ "epoch": 8.125,
169
+ "grad_norm": 6.285913467407227,
170
+ "learning_rate": 3.7500000000000003e-05,
171
+ "loss": 0.3186,
172
+ "step": 130
173
+ },
174
+ {
175
+ "epoch": 8.75,
176
+ "grad_norm": 5.252928733825684,
177
+ "learning_rate": 3.611111111111111e-05,
178
+ "loss": 0.302,
179
+ "step": 140
180
+ },
181
+ {
182
+ "epoch": 9.0,
183
+ "eval_accuracy": 0.8900131984161901,
184
+ "eval_loss": 0.33626171946525574,
185
+ "eval_runtime": 7.373,
186
+ "eval_samples_per_second": 308.286,
187
+ "eval_steps_per_second": 4.883,
188
+ "step": 144
189
+ },
190
+ {
191
+ "epoch": 9.375,
192
+ "grad_norm": 6.006847381591797,
193
+ "learning_rate": 3.472222222222222e-05,
194
+ "loss": 0.3,
195
+ "step": 150
196
+ },
197
+ {
198
+ "epoch": 10.0,
199
+ "grad_norm": 6.0960493087768555,
200
+ "learning_rate": 3.3333333333333335e-05,
201
+ "loss": 0.3122,
202
+ "step": 160
203
+ },
204
+ {
205
+ "epoch": 10.0,
206
+ "eval_accuracy": 0.884293884733832,
207
+ "eval_loss": 0.32835376262664795,
208
+ "eval_runtime": 7.4834,
209
+ "eval_samples_per_second": 303.741,
210
+ "eval_steps_per_second": 4.811,
211
+ "step": 160
212
+ },
213
+ {
214
+ "epoch": 10.625,
215
+ "grad_norm": 6.523850440979004,
216
+ "learning_rate": 3.194444444444444e-05,
217
+ "loss": 0.2686,
218
+ "step": 170
219
+ },
220
+ {
221
+ "epoch": 11.0,
222
+ "eval_accuracy": 0.8873735151781786,
223
+ "eval_loss": 0.3317248523235321,
224
+ "eval_runtime": 7.541,
225
+ "eval_samples_per_second": 301.418,
226
+ "eval_steps_per_second": 4.774,
227
+ "step": 176
228
+ },
229
+ {
230
+ "epoch": 11.25,
231
+ "grad_norm": 4.121973514556885,
232
+ "learning_rate": 3.055555555555556e-05,
233
+ "loss": 0.2776,
234
+ "step": 180
235
+ },
236
+ {
237
+ "epoch": 11.875,
238
+ "grad_norm": 4.277699947357178,
239
+ "learning_rate": 2.916666666666667e-05,
240
+ "loss": 0.2786,
241
+ "step": 190
242
+ },
243
+ {
244
+ "epoch": 12.0,
245
+ "eval_accuracy": 0.8882534095908491,
246
+ "eval_loss": 0.3660268783569336,
247
+ "eval_runtime": 7.455,
248
+ "eval_samples_per_second": 304.896,
249
+ "eval_steps_per_second": 4.829,
250
+ "step": 192
251
+ },
252
+ {
253
+ "epoch": 12.5,
254
+ "grad_norm": 5.525660514831543,
255
+ "learning_rate": 2.777777777777778e-05,
256
+ "loss": 0.2338,
257
+ "step": 200
258
+ },
259
+ {
260
+ "epoch": 13.0,
261
+ "eval_accuracy": 0.8834139903211614,
262
+ "eval_loss": 0.35196566581726074,
263
+ "eval_runtime": 7.4309,
264
+ "eval_samples_per_second": 305.884,
265
+ "eval_steps_per_second": 4.845,
266
+ "step": 208
267
+ },
268
+ {
269
+ "epoch": 13.125,
270
+ "grad_norm": 3.3051507472991943,
271
+ "learning_rate": 2.6388888888888892e-05,
272
+ "loss": 0.2226,
273
+ "step": 210
274
+ },
275
+ {
276
+ "epoch": 13.75,
277
+ "grad_norm": 5.123415946960449,
278
+ "learning_rate": 2.5e-05,
279
+ "loss": 0.2466,
280
+ "step": 220
281
+ },
282
+ {
283
+ "epoch": 14.0,
284
+ "eval_accuracy": 0.8895732512098549,
285
+ "eval_loss": 0.34136760234832764,
286
+ "eval_runtime": 7.2795,
287
+ "eval_samples_per_second": 312.245,
288
+ "eval_steps_per_second": 4.945,
289
+ "step": 224
290
+ },
291
+ {
292
+ "epoch": 14.375,
293
+ "grad_norm": 7.256677627563477,
294
+ "learning_rate": 2.361111111111111e-05,
295
+ "loss": 0.2279,
296
+ "step": 230
297
+ },
298
+ {
299
+ "epoch": 15.0,
300
+ "grad_norm": 5.926828384399414,
301
+ "learning_rate": 2.2222222222222223e-05,
302
+ "loss": 0.2296,
303
+ "step": 240
304
+ },
305
+ {
306
+ "epoch": 15.0,
307
+ "eval_accuracy": 0.8873735151781786,
308
+ "eval_loss": 0.35307401418685913,
309
+ "eval_runtime": 7.3954,
310
+ "eval_samples_per_second": 307.352,
311
+ "eval_steps_per_second": 4.868,
312
+ "step": 240
313
+ },
314
+ {
315
+ "epoch": 15.625,
316
+ "grad_norm": 10.099089622497559,
317
+ "learning_rate": 2.0833333333333336e-05,
318
+ "loss": 0.1961,
319
+ "step": 250
320
+ },
321
+ {
322
+ "epoch": 16.0,
323
+ "eval_accuracy": 0.8847338319401672,
324
+ "eval_loss": 0.38436347246170044,
325
+ "eval_runtime": 7.4905,
326
+ "eval_samples_per_second": 303.453,
327
+ "eval_steps_per_second": 4.806,
328
+ "step": 256
329
+ },
330
+ {
331
+ "epoch": 16.25,
332
+ "grad_norm": 6.454414367675781,
333
+ "learning_rate": 1.9444444444444445e-05,
334
+ "loss": 0.2159,
335
+ "step": 260
336
+ },
337
+ {
338
+ "epoch": 16.875,
339
+ "grad_norm": 7.058192729949951,
340
+ "learning_rate": 1.8055555555555555e-05,
341
+ "loss": 0.2056,
342
+ "step": 270
343
+ },
344
+ {
345
+ "epoch": 17.0,
346
+ "eval_accuracy": 0.8900131984161901,
347
+ "eval_loss": 0.3704770803451538,
348
+ "eval_runtime": 7.5318,
349
+ "eval_samples_per_second": 301.788,
350
+ "eval_steps_per_second": 4.78,
351
+ "step": 272
352
+ },
353
+ {
354
+ "epoch": 17.5,
355
+ "grad_norm": 8.302024841308594,
356
+ "learning_rate": 1.6666666666666667e-05,
357
+ "loss": 0.197,
358
+ "step": 280
359
+ },
360
+ {
361
+ "epoch": 18.0,
362
+ "eval_accuracy": 0.893532776066872,
363
+ "eval_loss": 0.35377147793769836,
364
+ "eval_runtime": 7.2947,
365
+ "eval_samples_per_second": 311.598,
366
+ "eval_steps_per_second": 4.935,
367
+ "step": 288
368
+ },
369
+ {
370
+ "epoch": 18.125,
371
+ "grad_norm": 5.322263240814209,
372
+ "learning_rate": 1.527777777777778e-05,
373
+ "loss": 0.2136,
374
+ "step": 290
375
+ },
376
+ {
377
+ "epoch": 18.75,
378
+ "grad_norm": 5.138127326965332,
379
+ "learning_rate": 1.388888888888889e-05,
380
+ "loss": 0.1748,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 19.0,
385
+ "eval_accuracy": 0.8886933567971843,
386
+ "eval_loss": 0.37168198823928833,
387
+ "eval_runtime": 7.471,
388
+ "eval_samples_per_second": 304.243,
389
+ "eval_steps_per_second": 4.819,
390
+ "step": 304
391
+ },
392
+ {
393
+ "epoch": 19.375,
394
+ "grad_norm": 5.1360249519348145,
395
+ "learning_rate": 1.25e-05,
396
+ "loss": 0.1676,
397
+ "step": 310
398
+ },
399
+ {
400
+ "epoch": 20.0,
401
+ "grad_norm": 3.917973041534424,
402
+ "learning_rate": 1.1111111111111112e-05,
403
+ "loss": 0.1807,
404
+ "step": 320
405
+ },
406
+ {
407
+ "epoch": 20.0,
408
+ "eval_accuracy": 0.884293884733832,
409
+ "eval_loss": 0.40747764706611633,
410
+ "eval_runtime": 7.4744,
411
+ "eval_samples_per_second": 304.105,
412
+ "eval_steps_per_second": 4.816,
413
+ "step": 320
414
+ },
415
+ {
416
+ "epoch": 20.625,
417
+ "grad_norm": 4.329443454742432,
418
+ "learning_rate": 9.722222222222223e-06,
419
+ "loss": 0.177,
420
+ "step": 330
421
+ },
422
+ {
423
+ "epoch": 21.0,
424
+ "eval_accuracy": 0.8829740431148262,
425
+ "eval_loss": 0.38811179995536804,
426
+ "eval_runtime": 7.4018,
427
+ "eval_samples_per_second": 307.088,
428
+ "eval_steps_per_second": 4.864,
429
+ "step": 336
430
+ },
431
+ {
432
+ "epoch": 21.25,
433
+ "grad_norm": 5.614075183868408,
434
+ "learning_rate": 8.333333333333334e-06,
435
+ "loss": 0.17,
436
+ "step": 340
437
+ },
438
+ {
439
+ "epoch": 21.875,
440
+ "grad_norm": 4.243983745574951,
441
+ "learning_rate": 6.944444444444445e-06,
442
+ "loss": 0.1433,
443
+ "step": 350
444
+ },
445
+ {
446
+ "epoch": 22.0,
447
+ "eval_accuracy": 0.8856137263528376,
448
+ "eval_loss": 0.40139684081077576,
449
+ "eval_runtime": 7.4287,
450
+ "eval_samples_per_second": 305.975,
451
+ "eval_steps_per_second": 4.846,
452
+ "step": 352
453
+ },
454
+ {
455
+ "epoch": 22.5,
456
+ "grad_norm": 3.7538909912109375,
457
+ "learning_rate": 5.555555555555556e-06,
458
+ "loss": 0.1522,
459
+ "step": 360
460
+ },
461
+ {
462
+ "epoch": 23.0,
463
+ "eval_accuracy": 0.8873735151781786,
464
+ "eval_loss": 0.3918473422527313,
465
+ "eval_runtime": 7.5833,
466
+ "eval_samples_per_second": 299.738,
467
+ "eval_steps_per_second": 4.747,
468
+ "step": 368
469
+ },
470
+ {
471
+ "epoch": 23.125,
472
+ "grad_norm": 5.43955659866333,
473
+ "learning_rate": 4.166666666666667e-06,
474
+ "loss": 0.1608,
475
+ "step": 370
476
+ },
477
+ {
478
+ "epoch": 23.75,
479
+ "grad_norm": 3.6251299381256104,
480
+ "learning_rate": 2.777777777777778e-06,
481
+ "loss": 0.1322,
482
+ "step": 380
483
+ },
484
+ {
485
+ "epoch": 24.0,
486
+ "eval_accuracy": 0.8904531456225253,
487
+ "eval_loss": 0.4198566973209381,
488
+ "eval_runtime": 7.4829,
489
+ "eval_samples_per_second": 303.759,
490
+ "eval_steps_per_second": 4.811,
491
+ "step": 384
492
+ },
493
+ {
494
+ "epoch": 24.375,
495
+ "grad_norm": 4.068058013916016,
496
+ "learning_rate": 1.388888888888889e-06,
497
+ "loss": 0.1224,
498
+ "step": 390
499
+ },
500
+ {
501
+ "epoch": 25.0,
502
+ "grad_norm": 4.5933332443237305,
503
+ "learning_rate": 0.0,
504
+ "loss": 0.1396,
505
+ "step": 400
506
+ },
507
+ {
508
+ "epoch": 25.0,
509
+ "eval_accuracy": 0.8895732512098549,
510
+ "eval_loss": 0.4142039120197296,
511
+ "eval_runtime": 7.6009,
512
+ "eval_samples_per_second": 299.044,
513
+ "eval_steps_per_second": 4.736,
514
+ "step": 400
515
+ },
516
+ {
517
+ "epoch": 25.0,
518
+ "step": 400,
519
+ "total_flos": 2.0038784309526528e+18,
520
+ "train_loss": 0.3093862909078598,
521
+ "train_runtime": 987.5731,
522
+ "train_samples_per_second": 103.689,
523
+ "train_steps_per_second": 0.405
524
+ }
525
+ ],
526
+ "logging_steps": 10,
527
+ "max_steps": 400,
528
+ "num_input_tokens_seen": 0,
529
+ "num_train_epochs": 25,
530
+ "save_steps": 500,
531
+ "stateful_callbacks": {
532
+ "TrainerControl": {
533
+ "args": {
534
+ "should_epoch_stop": false,
535
+ "should_evaluate": false,
536
+ "should_log": false,
537
+ "should_save": true,
538
+ "should_training_stop": true
539
+ },
540
+ "attributes": {}
541
+ }
542
+ },
543
+ "total_flos": 2.0038784309526528e+18,
544
+ "train_batch_size": 64,
545
+ "trial_name": null,
546
+ "trial_params": null
547
+ }