ales commited on
Commit
ff837b7
1 Parent(s): 18a7a50

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.6388838291168213,
4
- "eval_runtime": 16.8521,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.798,
7
- "eval_steps_per_second": 0.119,
8
- "eval_wer": 60.07326007326007,
9
- "train_loss": 1.0103698587417602,
10
- "train_runtime": 873.4716,
11
- "train_samples_per_second": 3.664,
12
- "train_steps_per_second": 0.114
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.4685819447040558,
4
+ "eval_runtime": 16.4606,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.888,
7
+ "eval_steps_per_second": 0.122,
8
+ "eval_wer": 51.28205128205128,
9
+ "train_loss": 0.8012711083889008,
10
+ "train_runtime": 2053.4009,
11
+ "train_samples_per_second": 3.117,
12
+ "train_steps_per_second": 0.097
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.6388838291168213,
4
- "eval_runtime": 16.8521,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.798,
7
- "eval_steps_per_second": 0.119,
8
- "eval_wer": 60.07326007326007
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.4685819447040558,
4
+ "eval_runtime": 16.4606,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.888,
7
+ "eval_steps_per_second": 0.122,
8
+ "eval_wer": 51.28205128205128
9
  }
runs/Dec13_13-15-52_d7f040c448a8/events.out.tfevents.1670939846.d7f040c448a8.37644.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4affb727a71a89dc4efa84f8d94dc08a33f65a82e2929b7d98fc5214eb48f452
3
+ size 358
train.log CHANGED
@@ -209,3 +209,9 @@
209
  {'loss': 0.4952, 'learning_rate': 1.5789473684210528e-06, 'epoch': 1.0}
210
  {'eval_loss': 0.4624484181404114, 'eval_wer': 51.46520146520146, 'eval_runtime': 19.165, 'eval_samples_per_second': 3.339, 'eval_steps_per_second': 0.104, 'epoch': 1.0}
211
  {'train_runtime': 2053.4009, 'train_samples_per_second': 3.117, 'train_steps_per_second': 0.097, 'train_loss': 0.8012711083889008, 'epoch': 1.0}
 
 
 
 
 
 
 
209
  {'loss': 0.4952, 'learning_rate': 1.5789473684210528e-06, 'epoch': 1.0}
210
  {'eval_loss': 0.4624484181404114, 'eval_wer': 51.46520146520146, 'eval_runtime': 19.165, 'eval_samples_per_second': 3.339, 'eval_steps_per_second': 0.104, 'epoch': 1.0}
211
  {'train_runtime': 2053.4009, 'train_samples_per_second': 3.117, 'train_steps_per_second': 0.097, 'train_loss': 0.8012711083889008, 'epoch': 1.0}
212
+ ***** train metrics *****
213
+ epoch = 1.0
214
+ train_loss = 0.8013
215
+ train_runtime = 0:34:13.40
216
+ train_samples_per_second = 3.117
217
+ train_steps_per_second = 0.097
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.0103698587417602,
4
- "train_runtime": 873.4716,
5
- "train_samples_per_second": 3.664,
6
- "train_steps_per_second": 0.114
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.8012711083889008,
4
+ "train_runtime": 2053.4009,
5
+ "train_samples_per_second": 3.117,
6
+ "train_steps_per_second": 0.097
7
  }
trainer_state.json CHANGED
@@ -1,175 +1,331 @@
1
  {
2
- "best_metric": 60.07326007326007,
3
- "best_model_checkpoint": "./checkpoint-70",
4
  "epoch": 1.0,
5
- "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.1,
 
 
 
 
 
 
12
  "learning_rate": 7e-05,
13
- "loss": 2.5622,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.1,
18
  "eval_loss": 1.5401501655578613,
19
- "eval_runtime": 62.3188,
20
- "eval_samples_per_second": 1.027,
21
  "eval_steps_per_second": 0.032,
22
  "eval_wer": 94.5054945054945,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.2,
27
- "learning_rate": 9.222222222222223e-05,
28
- "loss": 1.3719,
29
  "step": 20
30
  },
31
  {
32
- "epoch": 0.2,
33
- "eval_loss": 1.001212239265442,
34
- "eval_runtime": 18.9902,
35
- "eval_samples_per_second": 3.37,
36
- "eval_steps_per_second": 0.105,
37
- "eval_wer": 75.27472527472527,
38
  "step": 20
39
  },
40
  {
41
- "epoch": 0.3,
42
- "learning_rate": 8.111111111111112e-05,
43
- "loss": 0.9898,
44
  "step": 30
45
  },
46
  {
47
- "epoch": 0.3,
48
- "eval_loss": 0.8217034339904785,
49
- "eval_runtime": 17.7847,
50
- "eval_samples_per_second": 3.599,
51
- "eval_steps_per_second": 0.112,
52
- "eval_wer": 72.7106227106227,
53
  "step": 30
54
  },
55
  {
56
- "epoch": 0.4,
57
- "learning_rate": 7e-05,
58
- "loss": 0.9742,
59
  "step": 40
60
  },
61
  {
62
- "epoch": 0.4,
63
- "eval_loss": 0.7924289107322693,
64
- "eval_runtime": 19.063,
65
- "eval_samples_per_second": 3.357,
66
- "eval_steps_per_second": 0.105,
67
- "eval_wer": 72.52747252747253,
68
  "step": 40
69
  },
70
  {
71
- "epoch": 0.5,
72
- "learning_rate": 5.8888888888888896e-05,
73
- "loss": 0.6951,
74
  "step": 50
75
  },
76
  {
77
- "epoch": 0.5,
78
- "eval_loss": 0.7628086805343628,
79
- "eval_runtime": 17.8327,
80
- "eval_samples_per_second": 3.589,
81
- "eval_steps_per_second": 0.112,
82
- "eval_wer": 76.19047619047619,
83
  "step": 50
84
  },
85
  {
86
- "epoch": 0.6,
87
- "learning_rate": 4.7777777777777784e-05,
88
- "loss": 0.7824,
89
  "step": 60
90
  },
91
  {
92
- "epoch": 0.6,
93
- "eval_loss": 0.6737741231918335,
94
- "eval_runtime": 18.8876,
95
- "eval_samples_per_second": 3.388,
96
- "eval_steps_per_second": 0.106,
97
- "eval_wer": 65.38461538461539,
98
  "step": 60
99
  },
100
  {
101
- "epoch": 0.7,
102
- "learning_rate": 3.6666666666666666e-05,
103
- "loss": 0.6818,
104
  "step": 70
105
  },
106
  {
107
- "epoch": 0.7,
108
- "eval_loss": 0.6388838291168213,
109
- "eval_runtime": 18.9502,
110
- "eval_samples_per_second": 3.377,
111
- "eval_steps_per_second": 0.106,
112
- "eval_wer": 60.07326007326007,
113
  "step": 70
114
  },
115
  {
116
- "epoch": 0.8,
117
- "learning_rate": 2.5555555555555554e-05,
118
- "loss": 0.7823,
119
  "step": 80
120
  },
121
  {
122
- "epoch": 0.8,
123
- "eval_loss": 0.6208388805389404,
124
- "eval_runtime": 18.1881,
125
- "eval_samples_per_second": 3.519,
126
- "eval_steps_per_second": 0.11,
127
- "eval_wer": 65.75091575091575,
128
  "step": 80
129
  },
130
  {
131
- "epoch": 0.9,
132
- "learning_rate": 1.4444444444444444e-05,
133
- "loss": 0.5994,
134
  "step": 90
135
  },
136
  {
137
- "epoch": 0.9,
138
- "eval_loss": 0.5900620818138123,
139
- "eval_runtime": 17.489,
140
- "eval_samples_per_second": 3.659,
141
- "eval_steps_per_second": 0.114,
142
- "eval_wer": 61.904761904761905,
143
  "step": 90
144
  },
145
  {
146
- "epoch": 1.0,
147
- "learning_rate": 3.3333333333333333e-06,
148
- "loss": 0.6647,
149
  "step": 100
150
  },
151
  {
152
- "epoch": 1.0,
153
- "eval_loss": 0.5789934992790222,
154
- "eval_runtime": 18.4962,
155
- "eval_samples_per_second": 3.46,
156
- "eval_steps_per_second": 0.108,
157
- "eval_wer": 61.72161172161172,
158
  "step": 100
159
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  {
161
  "epoch": 1.0,
162
- "step": 100,
163
- "total_flos": 7.8780432384e+16,
164
- "train_loss": 1.0103698587417602,
165
- "train_runtime": 873.4716,
166
- "train_samples_per_second": 3.664,
167
- "train_steps_per_second": 0.114
168
  }
169
  ],
170
- "max_steps": 100,
171
  "num_train_epochs": 9223372036854775807,
172
- "total_flos": 7.8780432384e+16,
173
  "trial_name": null,
174
  "trial_params": null
175
  }
 
1
  {
2
+ "best_metric": 51.28205128205128,
3
+ "best_model_checkpoint": "./checkpoint-190",
4
  "epoch": 1.0,
5
+ "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
+ "learning_rate": 0.0,
13
+ "loss": 2.7922,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
  "learning_rate": 7e-05,
19
+ "loss": 2.5366,
20
  "step": 10
21
  },
22
  {
23
+ "epoch": 0.05,
24
  "eval_loss": 1.5401501655578613,
25
+ "eval_runtime": 61.7756,
26
+ "eval_samples_per_second": 1.036,
27
  "eval_steps_per_second": 0.032,
28
  "eval_wer": 94.5054945054945,
29
  "step": 10
30
  },
31
  {
32
+ "epoch": 0.1,
33
+ "learning_rate": 9.631578947368421e-05,
34
+ "loss": 1.3721,
35
  "step": 20
36
  },
37
  {
38
+ "epoch": 0.1,
39
+ "eval_loss": 1.0021113157272339,
40
+ "eval_runtime": 18.2189,
41
+ "eval_samples_per_second": 3.513,
42
+ "eval_steps_per_second": 0.11,
43
+ "eval_wer": 75.82417582417582,
44
  "step": 20
45
  },
46
  {
47
+ "epoch": 0.15,
48
+ "learning_rate": 9.105263157894738e-05,
49
+ "loss": 0.9921,
50
  "step": 30
51
  },
52
  {
53
+ "epoch": 0.15,
54
+ "eval_loss": 0.8322427868843079,
55
+ "eval_runtime": 18.5377,
56
+ "eval_samples_per_second": 3.452,
57
+ "eval_steps_per_second": 0.108,
58
+ "eval_wer": 75.0915750915751,
59
  "step": 30
60
  },
61
  {
62
+ "epoch": 0.2,
63
+ "learning_rate": 8.578947368421054e-05,
64
+ "loss": 0.9844,
65
  "step": 40
66
  },
67
  {
68
+ "epoch": 0.2,
69
+ "eval_loss": 0.8080323338508606,
70
+ "eval_runtime": 18.3811,
71
+ "eval_samples_per_second": 3.482,
72
+ "eval_steps_per_second": 0.109,
73
+ "eval_wer": 72.89377289377289,
74
  "step": 40
75
  },
76
  {
77
+ "epoch": 0.25,
78
+ "learning_rate": 8.052631578947368e-05,
79
+ "loss": 0.7071,
80
  "step": 50
81
  },
82
  {
83
+ "epoch": 0.25,
84
+ "eval_loss": 0.7861990332603455,
85
+ "eval_runtime": 18.6934,
86
+ "eval_samples_per_second": 3.424,
87
+ "eval_steps_per_second": 0.107,
88
+ "eval_wer": 77.2893772893773,
89
  "step": 50
90
  },
91
  {
92
+ "epoch": 0.3,
93
+ "learning_rate": 7.526315789473685e-05,
94
+ "loss": 0.7998,
95
  "step": 60
96
  },
97
  {
98
+ "epoch": 0.3,
99
+ "eval_loss": 0.7052078247070312,
100
+ "eval_runtime": 18.0962,
101
+ "eval_samples_per_second": 3.537,
102
+ "eval_steps_per_second": 0.111,
103
+ "eval_wer": 68.86446886446886,
104
  "step": 60
105
  },
106
  {
107
+ "epoch": 0.35,
108
+ "learning_rate": 7e-05,
109
+ "loss": 0.6935,
110
  "step": 70
111
  },
112
  {
113
+ "epoch": 0.35,
114
+ "eval_loss": 0.6780518889427185,
115
+ "eval_runtime": 18.1288,
116
+ "eval_samples_per_second": 3.53,
117
+ "eval_steps_per_second": 0.11,
118
+ "eval_wer": 64.28571428571429,
119
  "step": 70
120
  },
121
  {
122
+ "epoch": 0.4,
123
+ "learning_rate": 6.473684210526316e-05,
124
+ "loss": 0.81,
125
  "step": 80
126
  },
127
  {
128
+ "epoch": 0.4,
129
+ "eval_loss": 0.6341258883476257,
130
+ "eval_runtime": 17.5354,
131
+ "eval_samples_per_second": 3.65,
132
+ "eval_steps_per_second": 0.114,
133
+ "eval_wer": 63.55311355311355,
134
  "step": 80
135
  },
136
  {
137
+ "epoch": 0.45,
138
+ "learning_rate": 5.9473684210526315e-05,
139
+ "loss": 0.6133,
140
  "step": 90
141
  },
142
  {
143
+ "epoch": 0.45,
144
+ "eval_loss": 0.608344316482544,
145
+ "eval_runtime": 18.2697,
146
+ "eval_samples_per_second": 3.503,
147
+ "eval_steps_per_second": 0.109,
148
+ "eval_wer": 62.637362637362635,
149
  "step": 90
150
  },
151
  {
152
+ "epoch": 0.5,
153
+ "learning_rate": 5.421052631578948e-05,
154
+ "loss": 0.6675,
155
  "step": 100
156
  },
157
  {
158
+ "epoch": 0.5,
159
+ "eval_loss": 0.585149347782135,
160
+ "eval_runtime": 18.1531,
161
+ "eval_samples_per_second": 3.526,
162
+ "eval_steps_per_second": 0.11,
163
+ "eval_wer": 62.82051282051282,
164
  "step": 100
165
  },
166
+ {
167
+ "epoch": 0.55,
168
+ "learning_rate": 4.8947368421052635e-05,
169
+ "loss": 0.5577,
170
+ "step": 110
171
+ },
172
+ {
173
+ "epoch": 0.55,
174
+ "eval_loss": 0.5650949478149414,
175
+ "eval_runtime": 18.781,
176
+ "eval_samples_per_second": 3.408,
177
+ "eval_steps_per_second": 0.106,
178
+ "eval_wer": 59.34065934065934,
179
+ "step": 110
180
+ },
181
+ {
182
+ "epoch": 0.6,
183
+ "learning_rate": 4.368421052631579e-05,
184
+ "loss": 0.6473,
185
+ "step": 120
186
+ },
187
+ {
188
+ "epoch": 0.6,
189
+ "eval_loss": 0.5637935400009155,
190
+ "eval_runtime": 19.7936,
191
+ "eval_samples_per_second": 3.233,
192
+ "eval_steps_per_second": 0.101,
193
+ "eval_wer": 58.058608058608066,
194
+ "step": 120
195
+ },
196
+ {
197
+ "epoch": 0.65,
198
+ "learning_rate": 3.842105263157895e-05,
199
+ "loss": 0.6018,
200
+ "step": 130
201
+ },
202
+ {
203
+ "epoch": 0.65,
204
+ "eval_loss": 0.5434007048606873,
205
+ "eval_runtime": 18.5859,
206
+ "eval_samples_per_second": 3.443,
207
+ "eval_steps_per_second": 0.108,
208
+ "eval_wer": 53.84615384615385,
209
+ "step": 130
210
+ },
211
+ {
212
+ "epoch": 0.7,
213
+ "learning_rate": 3.3157894736842106e-05,
214
+ "loss": 0.5918,
215
+ "step": 140
216
+ },
217
+ {
218
+ "epoch": 0.7,
219
+ "eval_loss": 0.5384606719017029,
220
+ "eval_runtime": 17.6361,
221
+ "eval_samples_per_second": 3.629,
222
+ "eval_steps_per_second": 0.113,
223
+ "eval_wer": 54.94505494505495,
224
+ "step": 140
225
+ },
226
+ {
227
+ "epoch": 0.75,
228
+ "learning_rate": 2.7894736842105263e-05,
229
+ "loss": 0.5654,
230
+ "step": 150
231
+ },
232
+ {
233
+ "epoch": 0.75,
234
+ "eval_loss": 0.5200322866439819,
235
+ "eval_runtime": 19.3669,
236
+ "eval_samples_per_second": 3.305,
237
+ "eval_steps_per_second": 0.103,
238
+ "eval_wer": 58.058608058608066,
239
+ "step": 150
240
+ },
241
+ {
242
+ "epoch": 0.8,
243
+ "learning_rate": 2.2631578947368423e-05,
244
+ "loss": 0.587,
245
+ "step": 160
246
+ },
247
+ {
248
+ "epoch": 0.8,
249
+ "eval_loss": 0.4974484145641327,
250
+ "eval_runtime": 20.073,
251
+ "eval_samples_per_second": 3.188,
252
+ "eval_steps_per_second": 0.1,
253
+ "eval_wer": 57.14285714285714,
254
+ "step": 160
255
+ },
256
+ {
257
+ "epoch": 0.85,
258
+ "learning_rate": 1.736842105263158e-05,
259
+ "loss": 0.6157,
260
+ "step": 170
261
+ },
262
+ {
263
+ "epoch": 0.85,
264
+ "eval_loss": 0.483424574136734,
265
+ "eval_runtime": 20.2725,
266
+ "eval_samples_per_second": 3.157,
267
+ "eval_steps_per_second": 0.099,
268
+ "eval_wer": 53.2967032967033,
269
+ "step": 170
270
+ },
271
+ {
272
+ "epoch": 0.9,
273
+ "learning_rate": 1.2105263157894737e-05,
274
+ "loss": 0.6803,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.9,
279
+ "eval_loss": 0.4851979613304138,
280
+ "eval_runtime": 17.9572,
281
+ "eval_samples_per_second": 3.564,
282
+ "eval_steps_per_second": 0.111,
283
+ "eval_wer": 55.86080586080586,
284
+ "step": 180
285
+ },
286
+ {
287
+ "epoch": 0.95,
288
+ "learning_rate": 6.842105263157896e-06,
289
+ "loss": 0.4813,
290
+ "step": 190
291
+ },
292
+ {
293
+ "epoch": 0.95,
294
+ "eval_loss": 0.4685819447040558,
295
+ "eval_runtime": 17.9367,
296
+ "eval_samples_per_second": 3.568,
297
+ "eval_steps_per_second": 0.112,
298
+ "eval_wer": 51.28205128205128,
299
+ "step": 190
300
+ },
301
+ {
302
+ "epoch": 1.0,
303
+ "learning_rate": 1.5789473684210528e-06,
304
+ "loss": 0.4952,
305
+ "step": 200
306
+ },
307
+ {
308
+ "epoch": 1.0,
309
+ "eval_loss": 0.4624484181404114,
310
+ "eval_runtime": 19.165,
311
+ "eval_samples_per_second": 3.339,
312
+ "eval_steps_per_second": 0.104,
313
+ "eval_wer": 51.46520146520146,
314
+ "step": 200
315
+ },
316
  {
317
  "epoch": 1.0,
318
+ "step": 200,
319
+ "total_flos": 1.57560864768e+17,
320
+ "train_loss": 0.8012711083889008,
321
+ "train_runtime": 2053.4009,
322
+ "train_samples_per_second": 3.117,
323
+ "train_steps_per_second": 0.097
324
  }
325
  ],
326
+ "max_steps": 200,
327
  "num_train_epochs": 9223372036854775807,
328
+ "total_flos": 1.57560864768e+17,
329
  "trial_name": null,
330
  "trial_params": null
331
  }