hanasim commited on
Commit
1566692
1 Parent(s): ebcc9ce

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,12 @@
1
  ---
 
 
2
  license: cc-by-nc-4.0
3
  base_model: facebook/mms-1b-all
4
  tags:
 
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - common_voice_16_0
@@ -14,15 +19,15 @@ model-index:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: common_voice_16_0
18
  type: common_voice_16_0
19
  config: id
20
  split: test
21
- args: id
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 0.1455760839290688
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +35,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # breeze-listen-w2v2-id
32
 
33
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the common_voice_16_0 dataset.
34
  It achieves the following results on the evaluation set:
35
  - Loss: 0.1253
36
- - Wer: 0.1456
37
 
38
  ## Model description
39
 
 
1
  ---
2
+ language:
3
+ - id
4
  license: cc-by-nc-4.0
5
  base_model: facebook/mms-1b-all
6
  tags:
7
+ - automatic-speech-recognition
8
+ - mozilla-foundation/common_voice_16_0
9
+ - mms
10
  - generated_from_trainer
11
  datasets:
12
  - common_voice_16_0
 
19
  name: Automatic Speech Recognition
20
  type: automatic-speech-recognition
21
  dataset:
22
+ name: MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - ID
23
  type: common_voice_16_0
24
  config: id
25
  split: test
26
+ args: 'Config: id, Training split: train+validation, Eval split: test'
27
  metrics:
28
  - name: Wer
29
  type: wer
30
+ value: 0.145808188654721
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
35
 
36
  # breeze-listen-w2v2-id
37
 
38
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - ID dataset.
39
  It achieves the following results on the evaluation set:
40
  - Loss: 0.1253
41
+ - Wer: 0.1458
42
 
43
  ## Model description
44
 
adapter.ind.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fabda2a21f8bfe6ecaf473ac7ed25810dbdf35a63ae88262d894ebb86c779902
3
+ size 8860028
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_loss": 0.12526944279670715,
4
+ "eval_runtime": 688.3498,
5
+ "eval_samples": 3642,
6
+ "eval_samples_per_second": 5.291,
7
+ "eval_steps_per_second": 0.662,
8
+ "eval_wer": 0.145808188654721,
9
+ "train_loss": 0.7944976037459608,
10
+ "train_runtime": 46739.0877,
11
+ "train_samples": 8309,
12
+ "train_samples_per_second": 0.711,
13
+ "train_steps_per_second": 0.178
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_loss": 0.12526944279670715,
4
+ "eval_runtime": 688.3498,
5
+ "eval_samples": 3642,
6
+ "eval_samples_per_second": 5.291,
7
+ "eval_steps_per_second": 0.662,
8
+ "eval_wer": 0.145808188654721
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.7944976037459608,
4
+ "train_runtime": 46739.0877,
5
+ "train_samples": 8309,
6
+ "train_samples_per_second": 0.711,
7
+ "train_steps_per_second": 0.178
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 200,
6
+ "global_step": 8312,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1,
13
+ "eval_loss": 3.2670648097991943,
14
+ "eval_runtime": 692.0326,
15
+ "eval_samples_per_second": 5.263,
16
+ "eval_steps_per_second": 0.659,
17
+ "eval_wer": 1.0,
18
+ "step": 200
19
+ },
20
+ {
21
+ "epoch": 0.19,
22
+ "eval_loss": 2.8740603923797607,
23
+ "eval_runtime": 683.8046,
24
+ "eval_samples_per_second": 5.326,
25
+ "eval_steps_per_second": 0.667,
26
+ "eval_wer": 1.0006963141769567,
27
+ "step": 400
28
+ },
29
+ {
30
+ "epoch": 0.24,
31
+ "learning_rate": 0.0009517778860204579,
32
+ "loss": 3.8381,
33
+ "step": 500
34
+ },
35
+ {
36
+ "epoch": 0.29,
37
+ "eval_loss": 2.761221170425415,
38
+ "eval_runtime": 683.642,
39
+ "eval_samples_per_second": 5.327,
40
+ "eval_steps_per_second": 0.667,
41
+ "eval_wer": 0.9954507473772166,
42
+ "step": 600
43
+ },
44
+ {
45
+ "epoch": 0.38,
46
+ "eval_loss": 2.633348226547241,
47
+ "eval_runtime": 684.1815,
48
+ "eval_samples_per_second": 5.323,
49
+ "eval_steps_per_second": 0.666,
50
+ "eval_wer": 0.9981431621947823,
51
+ "step": 800
52
+ },
53
+ {
54
+ "epoch": 0.48,
55
+ "learning_rate": 0.000890891378470531,
56
+ "loss": 2.6996,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 0.48,
61
+ "eval_loss": 2.3073549270629883,
62
+ "eval_runtime": 686.3923,
63
+ "eval_samples_per_second": 5.306,
64
+ "eval_steps_per_second": 0.664,
65
+ "eval_wer": 0.9770680531055612,
66
+ "step": 1000
67
+ },
68
+ {
69
+ "epoch": 0.58,
70
+ "eval_loss": 2.0154612064361572,
71
+ "eval_runtime": 686.5478,
72
+ "eval_samples_per_second": 5.305,
73
+ "eval_steps_per_second": 0.664,
74
+ "eval_wer": 0.9286045863893789,
75
+ "step": 1200
76
+ },
77
+ {
78
+ "epoch": 0.67,
79
+ "eval_loss": 1.9155136346817017,
80
+ "eval_runtime": 689.547,
81
+ "eval_samples_per_second": 5.282,
82
+ "eval_steps_per_second": 0.661,
83
+ "eval_wer": 0.8947172964441557,
84
+ "step": 1400
85
+ },
86
+ {
87
+ "epoch": 0.72,
88
+ "learning_rate": 0.000830004870920604,
89
+ "loss": 2.2919,
90
+ "step": 1500
91
+ },
92
+ {
93
+ "epoch": 0.77,
94
+ "eval_loss": 1.641204595565796,
95
+ "eval_runtime": 685.3748,
96
+ "eval_samples_per_second": 5.314,
97
+ "eval_steps_per_second": 0.665,
98
+ "eval_wer": 0.8813944851917185,
99
+ "step": 1600
100
+ },
101
+ {
102
+ "epoch": 0.87,
103
+ "eval_loss": 1.4531193971633911,
104
+ "eval_runtime": 689.2035,
105
+ "eval_samples_per_second": 5.284,
106
+ "eval_steps_per_second": 0.662,
107
+ "eval_wer": 0.8285210286881441,
108
+ "step": 1800
109
+ },
110
+ {
111
+ "epoch": 0.96,
112
+ "learning_rate": 0.0007691183633706771,
113
+ "loss": 1.5872,
114
+ "step": 2000
115
+ },
116
+ {
117
+ "epoch": 0.96,
118
+ "eval_loss": 0.1812867820262909,
119
+ "eval_runtime": 685.9058,
120
+ "eval_samples_per_second": 5.31,
121
+ "eval_steps_per_second": 0.665,
122
+ "eval_wer": 0.2060161544889054,
123
+ "step": 2000
124
+ },
125
+ {
126
+ "epoch": 1.06,
127
+ "eval_loss": 0.1635832041501999,
128
+ "eval_runtime": 687.9409,
129
+ "eval_samples_per_second": 5.294,
130
+ "eval_steps_per_second": 0.663,
131
+ "eval_wer": 0.18062389750255314,
132
+ "step": 2200
133
+ },
134
+ {
135
+ "epoch": 1.15,
136
+ "eval_loss": 0.155806764960289,
137
+ "eval_runtime": 692.1735,
138
+ "eval_samples_per_second": 5.262,
139
+ "eval_steps_per_second": 0.659,
140
+ "eval_wer": 0.17444991180020425,
141
+ "step": 2400
142
+ },
143
+ {
144
+ "epoch": 1.2,
145
+ "learning_rate": 0.0007084754018509498,
146
+ "loss": 0.2659,
147
+ "step": 2500
148
+ },
149
+ {
150
+ "epoch": 1.25,
151
+ "eval_loss": 0.152183398604393,
152
+ "eval_runtime": 688.556,
153
+ "eval_samples_per_second": 5.289,
154
+ "eval_steps_per_second": 0.662,
155
+ "eval_wer": 0.1646550923776808,
156
+ "step": 2600
157
+ },
158
+ {
159
+ "epoch": 1.35,
160
+ "eval_loss": 0.15532232820987701,
161
+ "eval_runtime": 688.1144,
162
+ "eval_samples_per_second": 5.293,
163
+ "eval_steps_per_second": 0.663,
164
+ "eval_wer": 0.16641908829263763,
165
+ "step": 2800
166
+ },
167
+ {
168
+ "epoch": 1.44,
169
+ "learning_rate": 0.0006475888943010228,
170
+ "loss": 0.2436,
171
+ "step": 3000
172
+ },
173
+ {
174
+ "epoch": 1.44,
175
+ "eval_loss": 0.1840931922197342,
176
+ "eval_runtime": 692.811,
177
+ "eval_samples_per_second": 5.257,
178
+ "eval_steps_per_second": 0.658,
179
+ "eval_wer": 0.1960820722309906,
180
+ "step": 3000
181
+ },
182
+ {
183
+ "epoch": 1.54,
184
+ "eval_loss": 0.14190182089805603,
185
+ "eval_runtime": 690.3365,
186
+ "eval_samples_per_second": 5.276,
187
+ "eval_steps_per_second": 0.661,
188
+ "eval_wer": 0.1640051991458546,
189
+ "step": 3200
190
+ },
191
+ {
192
+ "epoch": 1.64,
193
+ "eval_loss": 0.14559713006019592,
194
+ "eval_runtime": 685.0999,
195
+ "eval_samples_per_second": 5.316,
196
+ "eval_steps_per_second": 0.666,
197
+ "eval_wer": 0.17143255036672547,
198
+ "step": 3400
199
+ },
200
+ {
201
+ "epoch": 1.68,
202
+ "learning_rate": 0.000586702386751096,
203
+ "loss": 0.2464,
204
+ "step": 3500
205
+ },
206
+ {
207
+ "epoch": 1.73,
208
+ "eval_loss": 0.14024095237255096,
209
+ "eval_runtime": 692.5402,
210
+ "eval_samples_per_second": 5.259,
211
+ "eval_steps_per_second": 0.658,
212
+ "eval_wer": 0.16070931204159317,
213
+ "step": 3600
214
+ },
215
+ {
216
+ "epoch": 1.83,
217
+ "eval_loss": 0.1345185786485672,
218
+ "eval_runtime": 694.4502,
219
+ "eval_samples_per_second": 5.244,
220
+ "eval_steps_per_second": 0.657,
221
+ "eval_wer": 0.1528177513694179,
222
+ "step": 3800
223
+ },
224
+ {
225
+ "epoch": 1.92,
226
+ "learning_rate": 0.000525815879201169,
227
+ "loss": 0.2292,
228
+ "step": 4000
229
+ },
230
+ {
231
+ "epoch": 1.92,
232
+ "eval_loss": 0.134234219789505,
233
+ "eval_runtime": 693.8578,
234
+ "eval_samples_per_second": 5.249,
235
+ "eval_steps_per_second": 0.657,
236
+ "eval_wer": 0.155556587132114,
237
+ "step": 4000
238
+ },
239
+ {
240
+ "epoch": 2.02,
241
+ "eval_loss": 0.13340923190116882,
242
+ "eval_runtime": 684.0209,
243
+ "eval_samples_per_second": 5.324,
244
+ "eval_steps_per_second": 0.667,
245
+ "eval_wer": 0.15518521957107045,
246
+ "step": 4200
247
+ },
248
+ {
249
+ "epoch": 2.12,
250
+ "eval_loss": 0.13518257439136505,
251
+ "eval_runtime": 687.2622,
252
+ "eval_samples_per_second": 5.299,
253
+ "eval_steps_per_second": 0.664,
254
+ "eval_wer": 0.1543496425587225,
255
+ "step": 4400
256
+ },
257
+ {
258
+ "epoch": 2.17,
259
+ "learning_rate": 0.0004649293716512421,
260
+ "loss": 0.2209,
261
+ "step": 4500
262
+ },
263
+ {
264
+ "epoch": 2.21,
265
+ "eval_loss": 0.13499902188777924,
266
+ "eval_runtime": 691.8679,
267
+ "eval_samples_per_second": 5.264,
268
+ "eval_steps_per_second": 0.659,
269
+ "eval_wer": 0.1537925912171572,
270
+ "step": 4600
271
+ },
272
+ {
273
+ "epoch": 2.31,
274
+ "eval_loss": 0.13418444991111755,
275
+ "eval_runtime": 685.3615,
276
+ "eval_samples_per_second": 5.314,
277
+ "eval_steps_per_second": 0.665,
278
+ "eval_wer": 0.1530498560950701,
279
+ "step": 4800
280
+ },
281
+ {
282
+ "epoch": 2.41,
283
+ "learning_rate": 0.00040404286410131515,
284
+ "loss": 0.2136,
285
+ "step": 5000
286
+ },
287
+ {
288
+ "epoch": 2.41,
289
+ "eval_loss": 0.1319747269153595,
290
+ "eval_runtime": 688.5799,
291
+ "eval_samples_per_second": 5.289,
292
+ "eval_steps_per_second": 0.662,
293
+ "eval_wer": 0.1540246959428094,
294
+ "step": 5000
295
+ },
296
+ {
297
+ "epoch": 2.5,
298
+ "eval_loss": 0.13689081370830536,
299
+ "eval_runtime": 691.8314,
300
+ "eval_samples_per_second": 5.264,
301
+ "eval_steps_per_second": 0.659,
302
+ "eval_wer": 0.15690279454089684,
303
+ "step": 5200
304
+ },
305
+ {
306
+ "epoch": 2.6,
307
+ "eval_loss": 0.13139554858207703,
308
+ "eval_runtime": 689.159,
309
+ "eval_samples_per_second": 5.285,
310
+ "eval_steps_per_second": 0.662,
311
+ "eval_wer": 0.1516572277411568,
312
+ "step": 5400
313
+ },
314
+ {
315
+ "epoch": 2.65,
316
+ "learning_rate": 0.0003431563565513882,
317
+ "loss": 0.2154,
318
+ "step": 5500
319
+ },
320
+ {
321
+ "epoch": 2.69,
322
+ "eval_loss": 0.1303856372833252,
323
+ "eval_runtime": 694.7157,
324
+ "eval_samples_per_second": 5.242,
325
+ "eval_steps_per_second": 0.656,
326
+ "eval_wer": 0.15063596694828707,
327
+ "step": 5600
328
+ },
329
+ {
330
+ "epoch": 2.79,
331
+ "eval_loss": 0.13201411068439484,
332
+ "eval_runtime": 691.5101,
333
+ "eval_samples_per_second": 5.267,
334
+ "eval_steps_per_second": 0.659,
335
+ "eval_wer": 0.15072880883854795,
336
+ "step": 5800
337
+ },
338
+ {
339
+ "epoch": 2.89,
340
+ "learning_rate": 0.0002822698490014613,
341
+ "loss": 0.2123,
342
+ "step": 6000
343
+ },
344
+ {
345
+ "epoch": 2.89,
346
+ "eval_loss": 0.13187964260578156,
347
+ "eval_runtime": 687.2712,
348
+ "eval_samples_per_second": 5.299,
349
+ "eval_steps_per_second": 0.663,
350
+ "eval_wer": 0.1523999628632439,
351
+ "step": 6000
352
+ },
353
+ {
354
+ "epoch": 2.98,
355
+ "eval_loss": 0.12917861342430115,
356
+ "eval_runtime": 691.2948,
357
+ "eval_samples_per_second": 5.268,
358
+ "eval_steps_per_second": 0.66,
359
+ "eval_wer": 0.1523999628632439,
360
+ "step": 6200
361
+ },
362
+ {
363
+ "epoch": 3.08,
364
+ "eval_loss": 0.12825024127960205,
365
+ "eval_runtime": 689.7813,
366
+ "eval_samples_per_second": 5.28,
367
+ "eval_steps_per_second": 0.661,
368
+ "eval_wer": 0.1488255500881998,
369
+ "step": 6400
370
+ },
371
+ {
372
+ "epoch": 3.13,
373
+ "learning_rate": 0.00022138334145153436,
374
+ "loss": 0.2109,
375
+ "step": 6500
376
+ },
377
+ {
378
+ "epoch": 3.18,
379
+ "eval_loss": 0.1257564276456833,
380
+ "eval_runtime": 687.694,
381
+ "eval_samples_per_second": 5.296,
382
+ "eval_steps_per_second": 0.663,
383
+ "eval_wer": 0.14919691764924334,
384
+ "step": 6600
385
+ },
386
+ {
387
+ "epoch": 3.27,
388
+ "eval_loss": 0.12906372547149658,
389
+ "eval_runtime": 687.8093,
390
+ "eval_samples_per_second": 5.295,
391
+ "eval_steps_per_second": 0.663,
392
+ "eval_wer": 0.1488255500881998,
393
+ "step": 6800
394
+ },
395
+ {
396
+ "epoch": 3.37,
397
+ "learning_rate": 0.0001604968339016074,
398
+ "loss": 0.2103,
399
+ "step": 7000
400
+ },
401
+ {
402
+ "epoch": 3.37,
403
+ "eval_loss": 0.12778830528259277,
404
+ "eval_runtime": 694.3257,
405
+ "eval_samples_per_second": 5.245,
406
+ "eval_steps_per_second": 0.657,
407
+ "eval_wer": 0.14840776158202582,
408
+ "step": 7000
409
+ },
410
+ {
411
+ "epoch": 3.46,
412
+ "eval_loss": 0.12501177191734314,
413
+ "eval_runtime": 679.1124,
414
+ "eval_samples_per_second": 5.363,
415
+ "eval_steps_per_second": 0.671,
416
+ "eval_wer": 0.14780428929533004,
417
+ "step": 7200
418
+ },
419
+ {
420
+ "epoch": 3.56,
421
+ "eval_loss": 0.12769711017608643,
422
+ "eval_runtime": 683.2755,
423
+ "eval_samples_per_second": 5.33,
424
+ "eval_steps_per_second": 0.667,
425
+ "eval_wer": 0.14822207780150404,
426
+ "step": 7400
427
+ },
428
+ {
429
+ "epoch": 3.61,
430
+ "learning_rate": 9.961032635168047e-05,
431
+ "loss": 0.1986,
432
+ "step": 7500
433
+ },
434
+ {
435
+ "epoch": 3.66,
436
+ "eval_loss": 0.1256353259086609,
437
+ "eval_runtime": 680.6384,
438
+ "eval_samples_per_second": 5.351,
439
+ "eval_steps_per_second": 0.67,
440
+ "eval_wer": 0.14757218456967783,
441
+ "step": 7600
442
+ },
443
+ {
444
+ "epoch": 3.75,
445
+ "eval_loss": 0.12579868733882904,
446
+ "eval_runtime": 683.2757,
447
+ "eval_samples_per_second": 5.33,
448
+ "eval_steps_per_second": 0.667,
449
+ "eval_wer": 0.14682944944759074,
450
+ "step": 7800
451
+ },
452
+ {
453
+ "epoch": 3.85,
454
+ "learning_rate": 3.884559181685338e-05,
455
+ "loss": 0.1954,
456
+ "step": 8000
457
+ },
458
+ {
459
+ "epoch": 3.85,
460
+ "eval_loss": 0.12557055056095123,
461
+ "eval_runtime": 690.701,
462
+ "eval_samples_per_second": 5.273,
463
+ "eval_steps_per_second": 0.66,
464
+ "eval_wer": 0.14645808188654721,
465
+ "step": 8000
466
+ },
467
+ {
468
+ "epoch": 3.95,
469
+ "eval_loss": 0.12530682981014252,
470
+ "eval_runtime": 692.3328,
471
+ "eval_samples_per_second": 5.26,
472
+ "eval_steps_per_second": 0.659,
473
+ "eval_wer": 0.1455760839290688,
474
+ "step": 8200
475
+ },
476
+ {
477
+ "epoch": 4.0,
478
+ "step": 8312,
479
+ "total_flos": 1.5580571693960135e+19,
480
+ "train_loss": 0.7944976037459608,
481
+ "train_runtime": 46739.0877,
482
+ "train_samples_per_second": 0.711,
483
+ "train_steps_per_second": 0.178
484
+ }
485
+ ],
486
+ "logging_steps": 500,
487
+ "max_steps": 8312,
488
+ "num_input_tokens_seen": 0,
489
+ "num_train_epochs": 4,
490
+ "save_steps": 200,
491
+ "total_flos": 1.5580571693960135e+19,
492
+ "train_batch_size": 4,
493
+ "trial_name": null,
494
+ "trial_params": null
495
+ }