abuelnasr commited on
Commit
feff7b6
·
verified ·
1 Parent(s): d1de6b2

Upload folder using huggingface_hub

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d690a20589ae9870a61c03da7e42e40ae9f851b4ea79d84ff8753a297964bdc
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18cd7996ecd1dc1dbf420a0ab81cde0036b12228cd22ea965a010840f2764c2b
3
  size 966995080
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03bf4c067b9597eac26dc4a2f0fc8834aad5468187f83f4a7dd2c2331eefe33
3
+ size 1925064044
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a52f081f4924293d8474c5485e0b13c875749c1e09e4a911fccb555cb30e0d5
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90b02d24fe55c96080d5eba73dc857321a59f84e8aeacc796ac8510ea27424c
3
+ size 1064
special_tokens_map.json CHANGED
@@ -125,14 +125,14 @@
125
  "pad_token": {
126
  "content": "<|endoftext|>",
127
  "lstrip": false,
128
- "normalized": true,
129
  "rstrip": false,
130
  "single_word": false
131
  },
132
  "unk_token": {
133
  "content": "",
134
  "lstrip": false,
135
- "normalized": false,
136
  "rstrip": false,
137
  "single_word": false
138
  }
 
125
  "pad_token": {
126
  "content": "<|endoftext|>",
127
  "lstrip": false,
128
+ "normalized": false,
129
  "rstrip": false,
130
  "single_word": false
131
  },
132
  "unk_token": {
133
  "content": "",
134
  "lstrip": false,
135
+ "normalized": true,
136
  "rstrip": false,
137
  "single_word": false
138
  }
trainer_state.json ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 37.0919881305638,
3
+ "best_model_checkpoint": "abuelnasr/whisper-small-eg/checkpoint-1020",
4
+ "epoch": 4.2677824267782425,
5
+ "eval_steps": 60,
6
+ "global_step": 1020,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12552301255230125,
13
+ "grad_norm": 22.29438018798828,
14
+ "learning_rate": 8.666666666666668e-07,
15
+ "loss": 2.7449,
16
+ "step": 30
17
+ },
18
+ {
19
+ "epoch": 0.2510460251046025,
20
+ "grad_norm": 13.858986854553223,
21
+ "learning_rate": 1.8666666666666669e-06,
22
+ "loss": 2.0487,
23
+ "step": 60
24
+ },
25
+ {
26
+ "epoch": 0.2510460251046025,
27
+ "eval_loss": 1.6403740644454956,
28
+ "eval_runtime": 248.9943,
29
+ "eval_samples_per_second": 1.627,
30
+ "eval_steps_per_second": 0.052,
31
+ "eval_wer": 72.87833827893175,
32
+ "step": 60
33
+ },
34
+ {
35
+ "epoch": 0.37656903765690375,
36
+ "grad_norm": 10.086219787597656,
37
+ "learning_rate": 2.866666666666667e-06,
38
+ "loss": 1.3133,
39
+ "step": 90
40
+ },
41
+ {
42
+ "epoch": 0.502092050209205,
43
+ "grad_norm": 10.840757369995117,
44
+ "learning_rate": 3.866666666666667e-06,
45
+ "loss": 1.1414,
46
+ "step": 120
47
+ },
48
+ {
49
+ "epoch": 0.502092050209205,
50
+ "eval_loss": 1.0603028535842896,
51
+ "eval_runtime": 279.3129,
52
+ "eval_samples_per_second": 1.45,
53
+ "eval_steps_per_second": 0.047,
54
+ "eval_wer": 73.33333333333333,
55
+ "step": 120
56
+ },
57
+ {
58
+ "epoch": 0.6276150627615062,
59
+ "grad_norm": 9.947965621948242,
60
+ "learning_rate": 4.866666666666667e-06,
61
+ "loss": 0.9772,
62
+ "step": 150
63
+ },
64
+ {
65
+ "epoch": 0.7531380753138075,
66
+ "grad_norm": 7.831133842468262,
67
+ "learning_rate": 4.926553672316385e-06,
68
+ "loss": 0.8749,
69
+ "step": 180
70
+ },
71
+ {
72
+ "epoch": 0.7531380753138075,
73
+ "eval_loss": 0.865332841873169,
74
+ "eval_runtime": 239.6319,
75
+ "eval_samples_per_second": 1.69,
76
+ "eval_steps_per_second": 0.054,
77
+ "eval_wer": 64.15430267062314,
78
+ "step": 180
79
+ },
80
+ {
81
+ "epoch": 0.8786610878661087,
82
+ "grad_norm": 7.919336318969727,
83
+ "learning_rate": 4.84180790960452e-06,
84
+ "loss": 0.8123,
85
+ "step": 210
86
+ },
87
+ {
88
+ "epoch": 1.00418410041841,
89
+ "grad_norm": 8.757369995117188,
90
+ "learning_rate": 4.757062146892656e-06,
91
+ "loss": 0.7374,
92
+ "step": 240
93
+ },
94
+ {
95
+ "epoch": 1.00418410041841,
96
+ "eval_loss": 0.6884974837303162,
97
+ "eval_runtime": 220.1085,
98
+ "eval_samples_per_second": 1.84,
99
+ "eval_steps_per_second": 0.059,
100
+ "eval_wer": 53.03659742828882,
101
+ "step": 240
102
+ },
103
+ {
104
+ "epoch": 1.1297071129707112,
105
+ "grad_norm": 6.005550861358643,
106
+ "learning_rate": 4.672316384180791e-06,
107
+ "loss": 0.5269,
108
+ "step": 270
109
+ },
110
+ {
111
+ "epoch": 1.2552301255230125,
112
+ "grad_norm": 5.889186382293701,
113
+ "learning_rate": 4.587570621468927e-06,
114
+ "loss": 0.5152,
115
+ "step": 300
116
+ },
117
+ {
118
+ "epoch": 1.2552301255230125,
119
+ "eval_loss": 0.5685195326805115,
120
+ "eval_runtime": 221.5589,
121
+ "eval_samples_per_second": 1.828,
122
+ "eval_steps_per_second": 0.059,
123
+ "eval_wer": 49.61424332344214,
124
+ "step": 300
125
+ },
126
+ {
127
+ "epoch": 1.3807531380753137,
128
+ "grad_norm": 5.900785446166992,
129
+ "learning_rate": 4.5028248587570625e-06,
130
+ "loss": 0.4884,
131
+ "step": 330
132
+ },
133
+ {
134
+ "epoch": 1.506276150627615,
135
+ "grad_norm": 5.92984676361084,
136
+ "learning_rate": 4.418079096045198e-06,
137
+ "loss": 0.4816,
138
+ "step": 360
139
+ },
140
+ {
141
+ "epoch": 1.506276150627615,
142
+ "eval_loss": 0.5440375804901123,
143
+ "eval_runtime": 268.521,
144
+ "eval_samples_per_second": 1.508,
145
+ "eval_steps_per_second": 0.048,
146
+ "eval_wer": 58.496538081107815,
147
+ "step": 360
148
+ },
149
+ {
150
+ "epoch": 1.6317991631799162,
151
+ "grad_norm": 5.4634318351745605,
152
+ "learning_rate": 4.333333333333334e-06,
153
+ "loss": 0.488,
154
+ "step": 390
155
+ },
156
+ {
157
+ "epoch": 1.7573221757322175,
158
+ "grad_norm": 6.412497043609619,
159
+ "learning_rate": 4.248587570621469e-06,
160
+ "loss": 0.4465,
161
+ "step": 420
162
+ },
163
+ {
164
+ "epoch": 1.7573221757322175,
165
+ "eval_loss": 0.5248781442642212,
166
+ "eval_runtime": 258.4515,
167
+ "eval_samples_per_second": 1.567,
168
+ "eval_steps_per_second": 0.05,
169
+ "eval_wer": 62.156280909990116,
170
+ "step": 420
171
+ },
172
+ {
173
+ "epoch": 1.8828451882845187,
174
+ "grad_norm": 6.152505874633789,
175
+ "learning_rate": 4.1638418079096045e-06,
176
+ "loss": 0.4736,
177
+ "step": 450
178
+ },
179
+ {
180
+ "epoch": 2.00836820083682,
181
+ "grad_norm": 5.672488689422607,
182
+ "learning_rate": 4.079096045197741e-06,
183
+ "loss": 0.4399,
184
+ "step": 480
185
+ },
186
+ {
187
+ "epoch": 2.00836820083682,
188
+ "eval_loss": 0.5114672183990479,
189
+ "eval_runtime": 279.1622,
190
+ "eval_samples_per_second": 1.451,
191
+ "eval_steps_per_second": 0.047,
192
+ "eval_wer": 67.24035608308606,
193
+ "step": 480
194
+ },
195
+ {
196
+ "epoch": 2.1338912133891212,
197
+ "grad_norm": 6.103506565093994,
198
+ "learning_rate": 3.994350282485876e-06,
199
+ "loss": 0.3451,
200
+ "step": 510
201
+ },
202
+ {
203
+ "epoch": 2.2594142259414225,
204
+ "grad_norm": 5.109236717224121,
205
+ "learning_rate": 3.909604519774012e-06,
206
+ "loss": 0.3324,
207
+ "step": 540
208
+ },
209
+ {
210
+ "epoch": 2.2594142259414225,
211
+ "eval_loss": 0.5136451721191406,
212
+ "eval_runtime": 268.029,
213
+ "eval_samples_per_second": 1.511,
214
+ "eval_steps_per_second": 0.049,
215
+ "eval_wer": 54.77744807121662,
216
+ "step": 540
217
+ },
218
+ {
219
+ "epoch": 2.3849372384937237,
220
+ "grad_norm": 4.577359199523926,
221
+ "learning_rate": 3.8248587570621474e-06,
222
+ "loss": 0.3343,
223
+ "step": 570
224
+ },
225
+ {
226
+ "epoch": 2.510460251046025,
227
+ "grad_norm": 6.362017631530762,
228
+ "learning_rate": 3.7401129943502828e-06,
229
+ "loss": 0.3366,
230
+ "step": 600
231
+ },
232
+ {
233
+ "epoch": 2.510460251046025,
234
+ "eval_loss": 0.5054488778114319,
235
+ "eval_runtime": 259.5993,
236
+ "eval_samples_per_second": 1.56,
237
+ "eval_steps_per_second": 0.05,
238
+ "eval_wer": 49.00098911968348,
239
+ "step": 600
240
+ },
241
+ {
242
+ "epoch": 2.6359832635983262,
243
+ "grad_norm": 4.350444316864014,
244
+ "learning_rate": 3.655367231638418e-06,
245
+ "loss": 0.3447,
246
+ "step": 630
247
+ },
248
+ {
249
+ "epoch": 2.7615062761506275,
250
+ "grad_norm": 4.399082183837891,
251
+ "learning_rate": 3.570621468926554e-06,
252
+ "loss": 0.3232,
253
+ "step": 660
254
+ },
255
+ {
256
+ "epoch": 2.7615062761506275,
257
+ "eval_loss": 0.4949069023132324,
258
+ "eval_runtime": 249.7373,
259
+ "eval_samples_per_second": 1.622,
260
+ "eval_steps_per_second": 0.052,
261
+ "eval_wer": 42.57171117705242,
262
+ "step": 660
263
+ },
264
+ {
265
+ "epoch": 2.8870292887029287,
266
+ "grad_norm": 4.29299259185791,
267
+ "learning_rate": 3.48587570621469e-06,
268
+ "loss": 0.3365,
269
+ "step": 690
270
+ },
271
+ {
272
+ "epoch": 3.01255230125523,
273
+ "grad_norm": 4.033880710601807,
274
+ "learning_rate": 3.401129943502825e-06,
275
+ "loss": 0.3374,
276
+ "step": 720
277
+ },
278
+ {
279
+ "epoch": 3.01255230125523,
280
+ "eval_loss": 0.487780898809433,
281
+ "eval_runtime": 280.4517,
282
+ "eval_samples_per_second": 1.444,
283
+ "eval_steps_per_second": 0.046,
284
+ "eval_wer": 43.48170128585559,
285
+ "step": 720
286
+ },
287
+ {
288
+ "epoch": 3.1380753138075312,
289
+ "grad_norm": 3.836930274963379,
290
+ "learning_rate": 3.316384180790961e-06,
291
+ "loss": 0.2821,
292
+ "step": 750
293
+ },
294
+ {
295
+ "epoch": 3.2635983263598325,
296
+ "grad_norm": 4.796186447143555,
297
+ "learning_rate": 3.231638418079096e-06,
298
+ "loss": 0.2295,
299
+ "step": 780
300
+ },
301
+ {
302
+ "epoch": 3.2635983263598325,
303
+ "eval_loss": 0.4930148124694824,
304
+ "eval_runtime": 270.3473,
305
+ "eval_samples_per_second": 1.498,
306
+ "eval_steps_per_second": 0.048,
307
+ "eval_wer": 46.70623145400594,
308
+ "step": 780
309
+ },
310
+ {
311
+ "epoch": 3.3891213389121337,
312
+ "grad_norm": 4.248520851135254,
313
+ "learning_rate": 3.146892655367232e-06,
314
+ "loss": 0.2681,
315
+ "step": 810
316
+ },
317
+ {
318
+ "epoch": 3.514644351464435,
319
+ "grad_norm": 4.470262050628662,
320
+ "learning_rate": 3.0621468926553677e-06,
321
+ "loss": 0.2479,
322
+ "step": 840
323
+ },
324
+ {
325
+ "epoch": 3.514644351464435,
326
+ "eval_loss": 0.4894804358482361,
327
+ "eval_runtime": 250.2043,
328
+ "eval_samples_per_second": 1.619,
329
+ "eval_steps_per_second": 0.052,
330
+ "eval_wer": 41.66172106824926,
331
+ "step": 840
332
+ },
333
+ {
334
+ "epoch": 3.6401673640167362,
335
+ "grad_norm": 4.155009746551514,
336
+ "learning_rate": 2.977401129943503e-06,
337
+ "loss": 0.2438,
338
+ "step": 870
339
+ },
340
+ {
341
+ "epoch": 3.7656903765690375,
342
+ "grad_norm": 4.195906639099121,
343
+ "learning_rate": 2.8926553672316388e-06,
344
+ "loss": 0.2419,
345
+ "step": 900
346
+ },
347
+ {
348
+ "epoch": 3.7656903765690375,
349
+ "eval_loss": 0.4896373152732849,
350
+ "eval_runtime": 258.2624,
351
+ "eval_samples_per_second": 1.568,
352
+ "eval_steps_per_second": 0.05,
353
+ "eval_wer": 46.36993076162216,
354
+ "step": 900
355
+ },
356
+ {
357
+ "epoch": 3.891213389121339,
358
+ "grad_norm": 4.5145487785339355,
359
+ "learning_rate": 2.807909604519774e-06,
360
+ "loss": 0.2525,
361
+ "step": 930
362
+ },
363
+ {
364
+ "epoch": 4.01673640167364,
365
+ "grad_norm": 3.6618359088897705,
366
+ "learning_rate": 2.72316384180791e-06,
367
+ "loss": 0.2373,
368
+ "step": 960
369
+ },
370
+ {
371
+ "epoch": 4.01673640167364,
372
+ "eval_loss": 0.4873057007789612,
373
+ "eval_runtime": 238.693,
374
+ "eval_samples_per_second": 1.697,
375
+ "eval_steps_per_second": 0.054,
376
+ "eval_wer": 39.58456973293769,
377
+ "step": 960
378
+ },
379
+ {
380
+ "epoch": 4.142259414225942,
381
+ "grad_norm": 3.126338481903076,
382
+ "learning_rate": 2.6384180790960455e-06,
383
+ "loss": 0.1846,
384
+ "step": 990
385
+ },
386
+ {
387
+ "epoch": 4.2677824267782425,
388
+ "grad_norm": 3.1524906158447266,
389
+ "learning_rate": 2.553672316384181e-06,
390
+ "loss": 0.1887,
391
+ "step": 1020
392
+ },
393
+ {
394
+ "epoch": 4.2677824267782425,
395
+ "eval_loss": 0.49608033895492554,
396
+ "eval_runtime": 222.7087,
397
+ "eval_samples_per_second": 1.819,
398
+ "eval_steps_per_second": 0.058,
399
+ "eval_wer": 37.0919881305638,
400
+ "step": 1020
401
+ }
402
+ ],
403
+ "logging_steps": 30,
404
+ "max_steps": 1920,
405
+ "num_input_tokens_seen": 0,
406
+ "num_train_epochs": 9,
407
+ "save_steps": 60,
408
+ "stateful_callbacks": {
409
+ "TrainerControl": {
410
+ "args": {
411
+ "should_epoch_stop": false,
412
+ "should_evaluate": false,
413
+ "should_log": false,
414
+ "should_save": true,
415
+ "should_training_stop": false
416
+ },
417
+ "attributes": {}
418
+ }
419
+ },
420
+ "total_flos": 9.3963406344192e+18,
421
+ "train_batch_size": 32,
422
+ "trial_name": null,
423
+ "trial_params": null
424
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5da4c034eeb232905005518b17d56280354b5b29bce74a33770a676322c5b4a2
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf550daf7a5c2b9708f6b489eacc1d897e3f311426c9c3af9d27457b030a65e
3
  size 5368