boumehdi commited on
Commit
8392698
1 Parent(s): f88d431

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +2 -2
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +34 -274
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:046b4511d9979e59fda0c2409d7b9e4f9f2601e7bb0ebb6bcea6e849db0e7b2e
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b8ba47c1a14520b9fcd038ff6a20628b01fa4771cb5fb74f72e5ab51d8b1b7
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d29291b649665d371303e9cc78b4ce656fb51bbaf143585dfdc11a28235e36ac
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0529c7648c521db89e680a68a1d95dc911c30720659cb12acf61bef32a3ad7
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98fb9274470e6654c92f2ab92267cb78fced569e3ecb8cb98d8445df5730abdb
3
- size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb86c4b86a8f8830681aee2bf3f61b818687a4378530eef63dc1a0ab6dba4f3
3
+ size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf733a520785e2a26377c7407c8a5281b756e1f9a937911fafbff2630fa6a6d9
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92bad7b4b1284d748ee0f1976606170e2e31c52706571e4ec2ced54d5e289b2f
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f98553c888b9ecffc2af30085063b88a8b95684949d549fa8f97627e4abd087
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e926484f096b98b97b945304545e37eda9109685e9310eb8bbddc2a3c7471e8b
3
  size 627
trainer_state.json CHANGED
@@ -1,304 +1,64 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 31.64505928853755,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.95,
12
- "learning_rate": 9.99968352427369e-06,
13
- "loss": 0.0334,
14
  "step": 150
15
  },
16
  {
17
- "epoch": 1.9,
18
- "learning_rate": 9.998734097094752e-06,
19
- "loss": 0.0325,
20
  "step": 300
21
  },
22
  {
23
- "epoch": 2.85,
24
- "learning_rate": 9.997790999430345e-06,
25
- "loss": 0.0318,
26
- "step": 450
 
 
 
27
  },
28
  {
29
- "epoch": 3.16,
30
- "eval_loss": 0.2169833928346634,
31
- "eval_runtime": 414.4566,
32
- "eval_samples_per_second": 10.899,
33
- "eval_steps_per_second": 1.363,
34
- "eval_wer": 0.16305210261835493,
35
- "step": 500
36
  },
37
  {
38
- "epoch": 3.8,
39
- "learning_rate": 9.99684157225141e-06,
40
- "loss": 0.0312,
41
  "step": 600
42
  },
43
  {
44
- "epoch": 4.75,
45
- "learning_rate": 9.995892145072473e-06,
46
- "loss": 0.0312,
47
  "step": 750
48
  },
49
  {
50
- "epoch": 5.7,
51
- "learning_rate": 9.994942717893538e-06,
52
- "loss": 0.0303,
53
- "step": 900
54
- },
55
- {
56
- "epoch": 6.33,
57
- "eval_loss": 0.21791410446166992,
58
- "eval_runtime": 241.3484,
59
- "eval_samples_per_second": 18.716,
60
- "eval_steps_per_second": 2.341,
61
- "eval_wer": 0.16155338093978666,
62
- "step": 1000
63
- },
64
- {
65
- "epoch": 6.65,
66
- "learning_rate": 9.993993290714603e-06,
67
- "loss": 0.0294,
68
- "step": 1050
69
- },
70
- {
71
- "epoch": 7.59,
72
- "learning_rate": 9.993043863535668e-06,
73
- "loss": 0.0297,
74
- "step": 1200
75
- },
76
- {
77
- "epoch": 8.54,
78
- "learning_rate": 9.992094436356731e-06,
79
- "loss": 0.0301,
80
- "step": 1350
81
- },
82
- {
83
- "epoch": 9.49,
84
- "learning_rate": 9.991145009177796e-06,
85
- "loss": 0.0283,
86
- "step": 1500
87
- },
88
- {
89
- "epoch": 9.49,
90
- "eval_loss": 0.22529518604278564,
91
- "eval_runtime": 246.1055,
92
- "eval_samples_per_second": 18.354,
93
- "eval_steps_per_second": 2.296,
94
- "eval_wer": 0.16005465926121837,
95
- "step": 1500
96
- },
97
- {
98
- "epoch": 10.44,
99
- "learning_rate": 9.990195581998862e-06,
100
- "loss": 0.0302,
101
- "step": 1650
102
- },
103
- {
104
- "epoch": 11.39,
105
- "learning_rate": 9.989246154819927e-06,
106
- "loss": 0.0278,
107
- "step": 1800
108
- },
109
- {
110
- "epoch": 12.34,
111
- "learning_rate": 9.98829672764099e-06,
112
- "loss": 0.0282,
113
- "step": 1950
114
- },
115
- {
116
- "epoch": 12.66,
117
- "eval_loss": 0.22862234711647034,
118
- "eval_runtime": 246.4099,
119
- "eval_samples_per_second": 18.331,
120
- "eval_steps_per_second": 2.293,
121
- "eval_wer": 0.16058361985365424,
122
- "step": 2000
123
- },
124
- {
125
- "epoch": 13.29,
126
- "learning_rate": 9.987353629976582e-06,
127
- "loss": 0.0277,
128
- "step": 2100
129
- },
130
- {
131
- "epoch": 14.24,
132
- "learning_rate": 9.986404202797645e-06,
133
- "loss": 0.028,
134
- "step": 2250
135
- },
136
- {
137
- "epoch": 15.19,
138
- "learning_rate": 9.98545477561871e-06,
139
- "loss": 0.0277,
140
- "step": 2400
141
- },
142
- {
143
- "epoch": 15.82,
144
- "eval_loss": 0.22946597635746002,
145
- "eval_runtime": 264.3977,
146
- "eval_samples_per_second": 17.084,
147
- "eval_steps_per_second": 2.137,
148
- "eval_wer": 0.1613329806929384,
149
- "step": 2500
150
- },
151
- {
152
- "epoch": 16.14,
153
- "learning_rate": 9.984505348439775e-06,
154
- "loss": 0.0271,
155
- "step": 2550
156
- },
157
- {
158
- "epoch": 17.09,
159
- "learning_rate": 9.98355592126084e-06,
160
- "loss": 0.0273,
161
- "step": 2700
162
- },
163
- {
164
- "epoch": 18.04,
165
- "learning_rate": 9.982606494081904e-06,
166
- "loss": 0.0268,
167
- "step": 2850
168
- },
169
- {
170
- "epoch": 18.99,
171
- "learning_rate": 9.981657066902969e-06,
172
- "loss": 0.0268,
173
- "step": 3000
174
- },
175
- {
176
- "epoch": 18.99,
177
- "eval_loss": 0.23895005881786346,
178
- "eval_runtime": 263.6411,
179
- "eval_samples_per_second": 17.133,
180
- "eval_steps_per_second": 2.143,
181
- "eval_wer": 0.16243498192717976,
182
- "step": 3000
183
- },
184
- {
185
- "epoch": 19.94,
186
- "learning_rate": 9.980707639724034e-06,
187
- "loss": 0.0256,
188
- "step": 3150
189
- },
190
- {
191
- "epoch": 20.89,
192
- "learning_rate": 9.979758212545099e-06,
193
- "loss": 0.0262,
194
- "step": 3300
195
- },
196
- {
197
- "epoch": 21.83,
198
- "learning_rate": 9.978808785366162e-06,
199
- "loss": 0.0255,
200
- "step": 3450
201
- },
202
- {
203
- "epoch": 22.15,
204
- "eval_loss": 0.23805132508277893,
205
- "eval_runtime": 265.2207,
206
- "eval_samples_per_second": 17.031,
207
- "eval_steps_per_second": 2.13,
208
- "eval_wer": 0.1612007405448294,
209
- "step": 3500
210
- },
211
- {
212
- "epoch": 22.78,
213
- "learning_rate": 9.977859358187227e-06,
214
- "loss": 0.0258,
215
- "step": 3600
216
- },
217
- {
218
- "epoch": 23.73,
219
- "learning_rate": 9.976909931008292e-06,
220
- "loss": 0.0259,
221
- "step": 3750
222
- },
223
- {
224
- "epoch": 24.68,
225
- "learning_rate": 9.975960503829357e-06,
226
- "loss": 0.0259,
227
- "step": 3900
228
- },
229
- {
230
- "epoch": 25.32,
231
- "eval_loss": 0.24025069177150726,
232
- "eval_runtime": 251.4279,
233
- "eval_samples_per_second": 17.965,
234
- "eval_steps_per_second": 2.247,
235
- "eval_wer": 0.1611125804460901,
236
- "step": 4000
237
- },
238
- {
239
- "epoch": 25.63,
240
- "learning_rate": 9.975011076650422e-06,
241
- "loss": 0.0249,
242
- "step": 4050
243
- },
244
- {
245
- "epoch": 26.58,
246
- "learning_rate": 9.974061649471486e-06,
247
- "loss": 0.0257,
248
- "step": 4200
249
- },
250
- {
251
- "epoch": 27.53,
252
- "learning_rate": 9.97311222229255e-06,
253
- "loss": 0.0246,
254
- "step": 4350
255
- },
256
- {
257
- "epoch": 28.48,
258
- "learning_rate": 9.972162795113616e-06,
259
- "loss": 0.0253,
260
- "step": 4500
261
- },
262
- {
263
- "epoch": 28.48,
264
- "eval_loss": 0.24363693594932556,
265
- "eval_runtime": 260.0433,
266
- "eval_samples_per_second": 17.37,
267
- "eval_steps_per_second": 2.173,
268
- "eval_wer": 0.16137706074230804,
269
- "step": 4500
270
- },
271
- {
272
- "epoch": 29.43,
273
- "learning_rate": 9.97121336793468e-06,
274
- "loss": 0.0242,
275
- "step": 4650
276
- },
277
- {
278
- "epoch": 30.38,
279
- "learning_rate": 9.970263940755744e-06,
280
- "loss": 0.0239,
281
- "step": 4800
282
- },
283
- {
284
- "epoch": 31.33,
285
- "learning_rate": 9.969314513576809e-06,
286
- "loss": 0.0235,
287
- "step": 4950
288
- },
289
- {
290
- "epoch": 31.65,
291
- "eval_loss": 0.24291476607322693,
292
- "eval_runtime": 232.435,
293
- "eval_samples_per_second": 19.433,
294
- "eval_steps_per_second": 2.431,
295
- "eval_wer": 0.16115666049545976,
296
- "step": 5000
297
  }
298
  ],
299
- "max_steps": 1580000,
300
  "num_train_epochs": 10000,
301
- "total_flos": 1.1341132572485426e+20,
302
  "trial_name": null,
303
  "trial_params": null
304
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.90727969348659,
5
+ "global_step": 800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.92,
12
+ "learning_rate": 9.999693232713664e-06,
13
+ "loss": 0.0295,
14
  "step": 150
15
  },
16
  {
17
+ "epoch": 1.84,
18
+ "learning_rate": 9.998772930854655e-06,
19
+ "loss": 0.0272,
20
  "step": 300
21
  },
22
  {
23
+ "epoch": 2.45,
24
+ "eval_loss": 0.23613713681697845,
25
+ "eval_runtime": 411.8281,
26
+ "eval_samples_per_second": 11.311,
27
+ "eval_steps_per_second": 1.416,
28
+ "eval_wer": 0.15606540535913022,
29
+ "step": 400
30
  },
31
  {
32
+ "epoch": 2.76,
33
+ "learning_rate": 9.997852628995645e-06,
34
+ "loss": 0.0272,
35
+ "step": 450
 
 
 
36
  },
37
  {
38
+ "epoch": 3.68,
39
+ "learning_rate": 9.996932327136635e-06,
40
+ "loss": 0.0268,
41
  "step": 600
42
  },
43
  {
44
+ "epoch": 4.6,
45
+ "learning_rate": 9.996012025277625e-06,
46
+ "loss": 0.0261,
47
  "step": 750
48
  },
49
  {
50
+ "epoch": 4.91,
51
+ "eval_loss": 0.2315920889377594,
52
+ "eval_runtime": 257.9698,
53
+ "eval_samples_per_second": 18.056,
54
+ "eval_steps_per_second": 2.26,
55
+ "eval_wer": 0.15478126872699255,
56
+ "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
  ],
59
+ "max_steps": 1630000,
60
  "num_train_epochs": 10000,
61
+ "total_flos": 1.8155123522141764e+19,
62
  "trial_name": null,
63
  "trial_params": null
64
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8359e96308fe840448b6443af6dc6fe6a79f6d1d40f9795ba838e7fc0849eec8
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94603c07847669ce63931a32b056beea3d6cc76af1c8d5dd099819ea13702bd
3
  size 3323