DrishtiSharma commited on
Commit
6fdd07c
1 Parent(s): 3791502

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. train_results.json +8 -0
  4. trainer_state.json +409 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 200.0,
3
+ "eval_loss": 1.0355710983276367,
4
+ "eval_runtime": 14.0252,
5
+ "eval_samples": 294,
6
+ "eval_samples_per_second": 20.962,
7
+ "eval_steps_per_second": 0.713,
8
+ "eval_wer": 0.6523702031602708,
9
+ "train_loss": 0.8014375554598295,
10
+ "train_runtime": 12807.5215,
11
+ "train_samples": 828,
12
+ "train_samples_per_second": 12.93,
13
+ "train_steps_per_second": 0.812
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 200.0,
3
+ "eval_loss": 1.0355710983276367,
4
+ "eval_runtime": 14.0252,
5
+ "eval_samples": 294,
6
+ "eval_samples_per_second": 20.962,
7
+ "eval_steps_per_second": 0.713,
8
+ "eval_wer": 0.6523702031602708
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 200.0,
3
+ "train_loss": 0.8014375554598295,
4
+ "train_runtime": 12807.5215,
5
+ "train_samples": 828,
6
+ "train_samples_per_second": 12.93,
7
+ "train_steps_per_second": 0.812
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 200.0,
5
+ "global_step": 10400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 5.77,
12
+ "learning_rate": 0.000148,
13
+ "loss": 5.649,
14
+ "step": 300
15
+ },
16
+ {
17
+ "epoch": 9.62,
18
+ "eval_loss": 3.0038444995880127,
19
+ "eval_runtime": 14.1536,
20
+ "eval_samples_per_second": 20.772,
21
+ "eval_steps_per_second": 0.707,
22
+ "eval_wer": 1.0,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 11.54,
27
+ "learning_rate": 0.00029800000000000003,
28
+ "loss": 2.9978,
29
+ "step": 600
30
+ },
31
+ {
32
+ "epoch": 17.31,
33
+ "learning_rate": 0.00039600000000000003,
34
+ "loss": 1.6272,
35
+ "step": 900
36
+ },
37
+ {
38
+ "epoch": 19.23,
39
+ "eval_loss": 0.7362223267555237,
40
+ "eval_runtime": 13.8179,
41
+ "eval_samples_per_second": 21.277,
42
+ "eval_steps_per_second": 0.724,
43
+ "eval_wer": 0.781941309255079,
44
+ "step": 1000
45
+ },
46
+ {
47
+ "epoch": 23.08,
48
+ "learning_rate": 0.0003835,
49
+ "loss": 1.2822,
50
+ "step": 1200
51
+ },
52
+ {
53
+ "epoch": 28.85,
54
+ "learning_rate": 0.000371,
55
+ "loss": 1.1354,
56
+ "step": 1500
57
+ },
58
+ {
59
+ "epoch": 28.85,
60
+ "eval_loss": 0.6409761309623718,
61
+ "eval_runtime": 13.8178,
62
+ "eval_samples_per_second": 21.277,
63
+ "eval_steps_per_second": 0.724,
64
+ "eval_wer": 0.7110609480812641,
65
+ "step": 1500
66
+ },
67
+ {
68
+ "epoch": 34.62,
69
+ "learning_rate": 0.00035850000000000004,
70
+ "loss": 1.0424,
71
+ "step": 1800
72
+ },
73
+ {
74
+ "epoch": 38.46,
75
+ "eval_loss": 0.6907294392585754,
76
+ "eval_runtime": 13.9619,
77
+ "eval_samples_per_second": 21.057,
78
+ "eval_steps_per_second": 0.716,
79
+ "eval_wer": 0.7431151241534989,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 40.38,
84
+ "learning_rate": 0.000346,
85
+ "loss": 0.9872,
86
+ "step": 2100
87
+ },
88
+ {
89
+ "epoch": 46.15,
90
+ "learning_rate": 0.00033350000000000003,
91
+ "loss": 0.9293,
92
+ "step": 2400
93
+ },
94
+ {
95
+ "epoch": 48.08,
96
+ "eval_loss": 0.7248561978340149,
97
+ "eval_runtime": 14.1849,
98
+ "eval_samples_per_second": 20.726,
99
+ "eval_steps_per_second": 0.705,
100
+ "eval_wer": 0.7101580135440181,
101
+ "step": 2500
102
+ },
103
+ {
104
+ "epoch": 51.92,
105
+ "learning_rate": 0.000321,
106
+ "loss": 0.8747,
107
+ "step": 2700
108
+ },
109
+ {
110
+ "epoch": 57.69,
111
+ "learning_rate": 0.0003085,
112
+ "loss": 0.8246,
113
+ "step": 3000
114
+ },
115
+ {
116
+ "epoch": 57.69,
117
+ "eval_loss": 0.7421836853027344,
118
+ "eval_runtime": 14.4192,
119
+ "eval_samples_per_second": 20.39,
120
+ "eval_steps_per_second": 0.694,
121
+ "eval_wer": 0.6966139954853273,
122
+ "step": 3000
123
+ },
124
+ {
125
+ "epoch": 63.46,
126
+ "learning_rate": 0.000296,
127
+ "loss": 0.7837,
128
+ "step": 3300
129
+ },
130
+ {
131
+ "epoch": 67.31,
132
+ "eval_loss": 0.7412946820259094,
133
+ "eval_runtime": 14.1823,
134
+ "eval_samples_per_second": 20.73,
135
+ "eval_steps_per_second": 0.705,
136
+ "eval_wer": 0.6812641083521445,
137
+ "step": 3500
138
+ },
139
+ {
140
+ "epoch": 69.23,
141
+ "learning_rate": 0.0002835,
142
+ "loss": 0.7527,
143
+ "step": 3600
144
+ },
145
+ {
146
+ "epoch": 75.0,
147
+ "learning_rate": 0.00027100000000000003,
148
+ "loss": 0.7147,
149
+ "step": 3900
150
+ },
151
+ {
152
+ "epoch": 76.92,
153
+ "eval_loss": 0.7873469591140747,
154
+ "eval_runtime": 13.9067,
155
+ "eval_samples_per_second": 21.141,
156
+ "eval_steps_per_second": 0.719,
157
+ "eval_wer": 0.6930022573363431,
158
+ "step": 4000
159
+ },
160
+ {
161
+ "epoch": 80.77,
162
+ "learning_rate": 0.0002585,
163
+ "loss": 0.6779,
164
+ "step": 4200
165
+ },
166
+ {
167
+ "epoch": 86.54,
168
+ "learning_rate": 0.000246,
169
+ "loss": 0.6276,
170
+ "step": 4500
171
+ },
172
+ {
173
+ "epoch": 86.54,
174
+ "eval_loss": 0.8037810921669006,
175
+ "eval_runtime": 14.0837,
176
+ "eval_samples_per_second": 20.875,
177
+ "eval_steps_per_second": 0.71,
178
+ "eval_wer": 0.6677200902934537,
179
+ "step": 4500
180
+ },
181
+ {
182
+ "epoch": 92.31,
183
+ "learning_rate": 0.0002335,
184
+ "loss": 0.6041,
185
+ "step": 4800
186
+ },
187
+ {
188
+ "epoch": 96.15,
189
+ "eval_loss": 0.8240488767623901,
190
+ "eval_runtime": 13.9338,
191
+ "eval_samples_per_second": 21.1,
192
+ "eval_steps_per_second": 0.718,
193
+ "eval_wer": 0.6830699774266366,
194
+ "step": 5000
195
+ },
196
+ {
197
+ "epoch": 98.08,
198
+ "learning_rate": 0.000221,
199
+ "loss": 0.5588,
200
+ "step": 5100
201
+ },
202
+ {
203
+ "epoch": 103.85,
204
+ "learning_rate": 0.0002085,
205
+ "loss": 0.5336,
206
+ "step": 5400
207
+ },
208
+ {
209
+ "epoch": 105.77,
210
+ "eval_loss": 0.8747946619987488,
211
+ "eval_runtime": 13.9807,
212
+ "eval_samples_per_second": 21.029,
213
+ "eval_steps_per_second": 0.715,
214
+ "eval_wer": 0.6749435665914221,
215
+ "step": 5500
216
+ },
217
+ {
218
+ "epoch": 109.62,
219
+ "learning_rate": 0.000196,
220
+ "loss": 0.5,
221
+ "step": 5700
222
+ },
223
+ {
224
+ "epoch": 115.38,
225
+ "learning_rate": 0.00018350000000000002,
226
+ "loss": 0.4705,
227
+ "step": 6000
228
+ },
229
+ {
230
+ "epoch": 115.38,
231
+ "eval_loss": 0.9005643129348755,
232
+ "eval_runtime": 13.867,
233
+ "eval_samples_per_second": 21.201,
234
+ "eval_steps_per_second": 0.721,
235
+ "eval_wer": 0.6496613995485327,
236
+ "step": 6000
237
+ },
238
+ {
239
+ "epoch": 121.15,
240
+ "learning_rate": 0.00017104166666666667,
241
+ "loss": 0.43,
242
+ "step": 6300
243
+ },
244
+ {
245
+ "epoch": 125.0,
246
+ "eval_loss": 0.8953593969345093,
247
+ "eval_runtime": 13.9621,
248
+ "eval_samples_per_second": 21.057,
249
+ "eval_steps_per_second": 0.716,
250
+ "eval_wer": 0.655079006772009,
251
+ "step": 6500
252
+ },
253
+ {
254
+ "epoch": 126.92,
255
+ "learning_rate": 0.00015854166666666667,
256
+ "loss": 0.4068,
257
+ "step": 6600
258
+ },
259
+ {
260
+ "epoch": 132.69,
261
+ "learning_rate": 0.0001460416666666667,
262
+ "loss": 0.3859,
263
+ "step": 6900
264
+ },
265
+ {
266
+ "epoch": 134.62,
267
+ "eval_loss": 0.9073536396026611,
268
+ "eval_runtime": 13.9518,
269
+ "eval_samples_per_second": 21.072,
270
+ "eval_steps_per_second": 0.717,
271
+ "eval_wer": 0.6613995485327314,
272
+ "step": 7000
273
+ },
274
+ {
275
+ "epoch": 138.46,
276
+ "learning_rate": 0.00013354166666666668,
277
+ "loss": 0.3622,
278
+ "step": 7200
279
+ },
280
+ {
281
+ "epoch": 144.23,
282
+ "learning_rate": 0.00012104166666666668,
283
+ "loss": 0.3342,
284
+ "step": 7500
285
+ },
286
+ {
287
+ "epoch": 144.23,
288
+ "eval_loss": 0.9693499803543091,
289
+ "eval_runtime": 13.8467,
290
+ "eval_samples_per_second": 21.233,
291
+ "eval_steps_per_second": 0.722,
292
+ "eval_wer": 0.6559819413092551,
293
+ "step": 7500
294
+ },
295
+ {
296
+ "epoch": 150.0,
297
+ "learning_rate": 0.00010854166666666667,
298
+ "loss": 0.3155,
299
+ "step": 7800
300
+ },
301
+ {
302
+ "epoch": 153.85,
303
+ "eval_loss": 1.0072590112686157,
304
+ "eval_runtime": 13.7449,
305
+ "eval_samples_per_second": 21.39,
306
+ "eval_steps_per_second": 0.728,
307
+ "eval_wer": 0.6690744920993228,
308
+ "step": 8000
309
+ },
310
+ {
311
+ "epoch": 155.77,
312
+ "learning_rate": 9.604166666666668e-05,
313
+ "loss": 0.2894,
314
+ "step": 8100
315
+ },
316
+ {
317
+ "epoch": 161.54,
318
+ "learning_rate": 8.358333333333334e-05,
319
+ "loss": 0.2673,
320
+ "step": 8400
321
+ },
322
+ {
323
+ "epoch": 163.46,
324
+ "eval_loss": 1.0170269012451172,
325
+ "eval_runtime": 14.0595,
326
+ "eval_samples_per_second": 20.911,
327
+ "eval_steps_per_second": 0.711,
328
+ "eval_wer": 0.6632054176072235,
329
+ "step": 8500
330
+ },
331
+ {
332
+ "epoch": 167.31,
333
+ "learning_rate": 7.108333333333333e-05,
334
+ "loss": 0.2517,
335
+ "step": 8700
336
+ },
337
+ {
338
+ "epoch": 173.08,
339
+ "learning_rate": 5.858333333333333e-05,
340
+ "loss": 0.2409,
341
+ "step": 9000
342
+ },
343
+ {
344
+ "epoch": 173.08,
345
+ "eval_loss": 1.0304286479949951,
346
+ "eval_runtime": 13.8942,
347
+ "eval_samples_per_second": 21.16,
348
+ "eval_steps_per_second": 0.72,
349
+ "eval_wer": 0.6708803611738149,
350
+ "step": 9000
351
+ },
352
+ {
353
+ "epoch": 178.85,
354
+ "learning_rate": 4.608333333333333e-05,
355
+ "loss": 0.2189,
356
+ "step": 9300
357
+ },
358
+ {
359
+ "epoch": 182.69,
360
+ "eval_loss": 0.9965260624885559,
361
+ "eval_runtime": 14.2442,
362
+ "eval_samples_per_second": 20.64,
363
+ "eval_steps_per_second": 0.702,
364
+ "eval_wer": 0.654627539503386,
365
+ "step": 9500
366
+ },
367
+ {
368
+ "epoch": 184.62,
369
+ "learning_rate": 3.3625000000000004e-05,
370
+ "loss": 0.203,
371
+ "step": 9600
372
+ },
373
+ {
374
+ "epoch": 190.38,
375
+ "learning_rate": 2.1125000000000002e-05,
376
+ "loss": 0.1973,
377
+ "step": 9900
378
+ },
379
+ {
380
+ "epoch": 192.31,
381
+ "eval_loss": 1.0360474586486816,
382
+ "eval_runtime": 14.1087,
383
+ "eval_samples_per_second": 20.838,
384
+ "eval_steps_per_second": 0.709,
385
+ "eval_wer": 0.655079006772009,
386
+ "step": 10000
387
+ },
388
+ {
389
+ "epoch": 196.15,
390
+ "learning_rate": 8.625e-06,
391
+ "loss": 0.1881,
392
+ "step": 10200
393
+ },
394
+ {
395
+ "epoch": 200.0,
396
+ "step": 10400,
397
+ "total_flos": 3.0179570579437056e+19,
398
+ "train_loss": 0.8014375554598295,
399
+ "train_runtime": 12807.5215,
400
+ "train_samples_per_second": 12.93,
401
+ "train_steps_per_second": 0.812
402
+ }
403
+ ],
404
+ "max_steps": 10400,
405
+ "num_train_epochs": 200,
406
+ "total_flos": 3.0179570579437056e+19,
407
+ "trial_name": null,
408
+ "trial_params": null
409
+ }