fsicoli commited on
Commit
98ccb58
1 Parent(s): 7bc2f20

Upload 11 files

Browse files
config.json CHANGED
@@ -43,7 +43,7 @@
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
- "torch_dtype": "float16",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
 
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
+ "torch_dtype": "float32",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08e0005225b3dbaf55dd13ac62926cc7e02c1025d66fa375e6fb305ff79cd4f9
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcba8cb26f5ac1b26555b07a6ae8873581075aaf0702c9f393e0a2f53f9c3eb2
3
  size 4993448880
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:630ca774672856d2e0e39a702e590f635a1cfc5726a64b6578ab46dd367369a9
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc291761a468b6236109acedacb3b0017f00895057c15820b0027e778fc5fb6
3
  size 1180663192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e8bac954a7913d692fd50a482d47de48625e368c8e5f3d3f3302acd1428ff12
3
  size 12333625252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1c504281f4a5cf99152635854cf2e5735ca337bddd2ea93b8da624f6203ed2
3
  size 12333625252
trainer_state.json CHANGED
@@ -11,250 +11,250 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 1.25e-08,
14
- "loss": 0.6602,
15
  "step": 25
16
  },
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 2.5e-08,
20
- "loss": 1.0487,
21
  "step": 50
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 3.75e-08,
26
- "loss": 0.6436,
27
  "step": 75
28
  },
29
  {
30
  "epoch": 0.02,
31
  "learning_rate": 5e-08,
32
- "loss": 1.0184,
33
  "step": 100
34
  },
35
  {
36
  "epoch": 0.03,
37
  "learning_rate": 6.25e-08,
38
- "loss": 0.6427,
39
  "step": 125
40
  },
41
  {
42
  "epoch": 0.03,
43
  "learning_rate": 7.5e-08,
44
- "loss": 0.9576,
45
  "step": 150
46
  },
47
  {
48
  "epoch": 0.04,
49
  "learning_rate": 8.75e-08,
50
- "loss": 0.5878,
51
  "step": 175
52
  },
53
  {
54
  "epoch": 0.04,
55
  "learning_rate": 1e-07,
56
- "loss": 0.9261,
57
  "step": 200
58
  },
59
  {
60
  "epoch": 0.05,
61
  "learning_rate": 1.125e-07,
62
- "loss": 0.5172,
63
  "step": 225
64
  },
65
  {
66
  "epoch": 0.05,
67
  "learning_rate": 1.25e-07,
68
- "loss": 0.7859,
69
  "step": 250
70
  },
71
  {
72
  "epoch": 0.06,
73
  "learning_rate": 1.375e-07,
74
- "loss": 0.4913,
75
  "step": 275
76
  },
77
  {
78
  "epoch": 0.06,
79
  "learning_rate": 1.5e-07,
80
- "loss": 0.6184,
81
  "step": 300
82
  },
83
  {
84
  "epoch": 0.07,
85
  "learning_rate": 1.625e-07,
86
- "loss": 0.3088,
87
  "step": 325
88
  },
89
  {
90
  "epoch": 0.07,
91
  "learning_rate": 1.75e-07,
92
- "loss": 0.3973,
93
  "step": 350
94
  },
95
  {
96
  "epoch": 0.08,
97
  "learning_rate": 1.875e-07,
98
- "loss": 0.2386,
99
  "step": 375
100
  },
101
  {
102
  "epoch": 0.08,
103
  "learning_rate": 2e-07,
104
- "loss": 0.2516,
105
  "step": 400
106
  },
107
  {
108
  "epoch": 0.09,
109
  "learning_rate": 2.1249999999999998e-07,
110
- "loss": 0.168,
111
  "step": 425
112
  },
113
  {
114
  "epoch": 0.1,
115
  "learning_rate": 2.25e-07,
116
- "loss": 0.2036,
117
  "step": 450
118
  },
119
  {
120
  "epoch": 0.1,
121
  "learning_rate": 2.3749999999999998e-07,
122
- "loss": 0.1564,
123
  "step": 475
124
  },
125
  {
126
  "epoch": 0.11,
127
  "learning_rate": 2.5e-07,
128
- "loss": 0.2491,
129
  "step": 500
130
  },
131
  {
132
  "epoch": 0.11,
133
  "learning_rate": 2.625e-07,
134
- "loss": 0.1817,
135
  "step": 525
136
  },
137
  {
138
  "epoch": 0.12,
139
  "learning_rate": 2.75e-07,
140
- "loss": 0.236,
141
  "step": 550
142
  },
143
  {
144
  "epoch": 0.12,
145
  "learning_rate": 2.8749999999999995e-07,
146
- "loss": 0.1117,
147
  "step": 575
148
  },
149
  {
150
  "epoch": 0.13,
151
  "learning_rate": 3e-07,
152
- "loss": 0.1665,
153
  "step": 600
154
  },
155
  {
156
  "epoch": 0.13,
157
  "learning_rate": 3.1249999999999997e-07,
158
- "loss": 0.1533,
159
  "step": 625
160
  },
161
  {
162
  "epoch": 0.14,
163
  "learning_rate": 3.25e-07,
164
- "loss": 0.2006,
165
  "step": 650
166
  },
167
  {
168
  "epoch": 0.14,
169
  "learning_rate": 3.375e-07,
170
- "loss": 0.1425,
171
  "step": 675
172
  },
173
  {
174
  "epoch": 0.15,
175
  "learning_rate": 3.5e-07,
176
- "loss": 0.1769,
177
  "step": 700
178
  },
179
  {
180
  "epoch": 0.15,
181
  "learning_rate": 3.6249999999999997e-07,
182
- "loss": 0.1259,
183
  "step": 725
184
  },
185
  {
186
  "epoch": 0.16,
187
  "learning_rate": 3.75e-07,
188
- "loss": 0.2038,
189
  "step": 750
190
  },
191
  {
192
  "epoch": 0.16,
193
  "learning_rate": 3.875e-07,
194
- "loss": 0.1431,
195
  "step": 775
196
  },
197
  {
198
  "epoch": 0.17,
199
  "learning_rate": 4e-07,
200
- "loss": 0.1459,
201
  "step": 800
202
  },
203
  {
204
  "epoch": 0.17,
205
  "learning_rate": 4.1249999999999997e-07,
206
- "loss": 0.1219,
207
  "step": 825
208
  },
209
  {
210
  "epoch": 0.18,
211
  "learning_rate": 4.2499999999999995e-07,
212
- "loss": 0.2224,
213
  "step": 850
214
  },
215
  {
216
  "epoch": 0.19,
217
  "learning_rate": 4.375e-07,
218
- "loss": 0.1409,
219
  "step": 875
220
  },
221
  {
222
  "epoch": 0.19,
223
  "learning_rate": 4.5e-07,
224
- "loss": 0.1993,
225
  "step": 900
226
  },
227
  {
228
  "epoch": 0.2,
229
  "learning_rate": 4.625e-07,
230
- "loss": 0.1486,
231
  "step": 925
232
  },
233
  {
234
  "epoch": 0.2,
235
  "learning_rate": 4.7499999999999995e-07,
236
- "loss": 0.1924,
237
  "step": 950
238
  },
239
  {
240
  "epoch": 0.21,
241
  "learning_rate": 4.875e-07,
242
- "loss": 0.107,
243
  "step": 975
244
  },
245
  {
246
  "epoch": 0.21,
247
  "learning_rate": 5e-07,
248
- "loss": 0.1534,
249
  "step": 1000
250
  },
251
  {
252
  "epoch": 0.21,
253
- "eval_loss": 0.1283227652311325,
254
- "eval_runtime": 271898.2411,
255
- "eval_samples_per_second": 0.035,
256
- "eval_steps_per_second": 0.004,
257
- "eval_wer": 0.08386096256684492,
258
  "step": 1000
259
  }
260
  ],
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 1.25e-08,
14
+ "loss": 0.5815,
15
  "step": 25
16
  },
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 2.5e-08,
20
+ "loss": 0.9607,
21
  "step": 50
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 3.75e-08,
26
+ "loss": 0.5678,
27
  "step": 75
28
  },
29
  {
30
  "epoch": 0.02,
31
  "learning_rate": 5e-08,
32
+ "loss": 0.9329,
33
  "step": 100
34
  },
35
  {
36
  "epoch": 0.03,
37
  "learning_rate": 6.25e-08,
38
+ "loss": 0.5937,
39
  "step": 125
40
  },
41
  {
42
  "epoch": 0.03,
43
  "learning_rate": 7.5e-08,
44
+ "loss": 0.9153,
45
  "step": 150
46
  },
47
  {
48
  "epoch": 0.04,
49
  "learning_rate": 8.75e-08,
50
+ "loss": 0.5232,
51
  "step": 175
52
  },
53
  {
54
  "epoch": 0.04,
55
  "learning_rate": 1e-07,
56
+ "loss": 0.8711,
57
  "step": 200
58
  },
59
  {
60
  "epoch": 0.05,
61
  "learning_rate": 1.125e-07,
62
+ "loss": 0.4636,
63
  "step": 225
64
  },
65
  {
66
  "epoch": 0.05,
67
  "learning_rate": 1.25e-07,
68
+ "loss": 0.6875,
69
  "step": 250
70
  },
71
  {
72
  "epoch": 0.06,
73
  "learning_rate": 1.375e-07,
74
+ "loss": 0.3447,
75
  "step": 275
76
  },
77
  {
78
  "epoch": 0.06,
79
  "learning_rate": 1.5e-07,
80
+ "loss": 0.4347,
81
  "step": 300
82
  },
83
  {
84
  "epoch": 0.07,
85
  "learning_rate": 1.625e-07,
86
+ "loss": 0.2261,
87
  "step": 325
88
  },
89
  {
90
  "epoch": 0.07,
91
  "learning_rate": 1.75e-07,
92
+ "loss": 0.3323,
93
  "step": 350
94
  },
95
  {
96
  "epoch": 0.08,
97
  "learning_rate": 1.875e-07,
98
+ "loss": 0.2093,
99
  "step": 375
100
  },
101
  {
102
  "epoch": 0.08,
103
  "learning_rate": 2e-07,
104
+ "loss": 0.2357,
105
  "step": 400
106
  },
107
  {
108
  "epoch": 0.09,
109
  "learning_rate": 2.1249999999999998e-07,
110
+ "loss": 0.1632,
111
  "step": 425
112
  },
113
  {
114
  "epoch": 0.1,
115
  "learning_rate": 2.25e-07,
116
+ "loss": 0.2114,
117
  "step": 450
118
  },
119
  {
120
  "epoch": 0.1,
121
  "learning_rate": 2.3749999999999998e-07,
122
+ "loss": 0.1587,
123
  "step": 475
124
  },
125
  {
126
  "epoch": 0.11,
127
  "learning_rate": 2.5e-07,
128
+ "loss": 0.2634,
129
  "step": 500
130
  },
131
  {
132
  "epoch": 0.11,
133
  "learning_rate": 2.625e-07,
134
+ "loss": 0.1897,
135
  "step": 525
136
  },
137
  {
138
  "epoch": 0.12,
139
  "learning_rate": 2.75e-07,
140
+ "loss": 0.2373,
141
  "step": 550
142
  },
143
  {
144
  "epoch": 0.12,
145
  "learning_rate": 2.8749999999999995e-07,
146
+ "loss": 0.1154,
147
  "step": 575
148
  },
149
  {
150
  "epoch": 0.13,
151
  "learning_rate": 3e-07,
152
+ "loss": 0.1733,
153
  "step": 600
154
  },
155
  {
156
  "epoch": 0.13,
157
  "learning_rate": 3.1249999999999997e-07,
158
+ "loss": 0.1558,
159
  "step": 625
160
  },
161
  {
162
  "epoch": 0.14,
163
  "learning_rate": 3.25e-07,
164
+ "loss": 0.2124,
165
  "step": 650
166
  },
167
  {
168
  "epoch": 0.14,
169
  "learning_rate": 3.375e-07,
170
+ "loss": 0.1478,
171
  "step": 675
172
  },
173
  {
174
  "epoch": 0.15,
175
  "learning_rate": 3.5e-07,
176
+ "loss": 0.1925,
177
  "step": 700
178
  },
179
  {
180
  "epoch": 0.15,
181
  "learning_rate": 3.6249999999999997e-07,
182
+ "loss": 0.1349,
183
  "step": 725
184
  },
185
  {
186
  "epoch": 0.16,
187
  "learning_rate": 3.75e-07,
188
+ "loss": 0.2101,
189
  "step": 750
190
  },
191
  {
192
  "epoch": 0.16,
193
  "learning_rate": 3.875e-07,
194
+ "loss": 0.1556,
195
  "step": 775
196
  },
197
  {
198
  "epoch": 0.17,
199
  "learning_rate": 4e-07,
200
+ "loss": 0.1572,
201
  "step": 800
202
  },
203
  {
204
  "epoch": 0.17,
205
  "learning_rate": 4.1249999999999997e-07,
206
+ "loss": 0.1339,
207
  "step": 825
208
  },
209
  {
210
  "epoch": 0.18,
211
  "learning_rate": 4.2499999999999995e-07,
212
+ "loss": 0.2275,
213
  "step": 850
214
  },
215
  {
216
  "epoch": 0.19,
217
  "learning_rate": 4.375e-07,
218
+ "loss": 0.1529,
219
  "step": 875
220
  },
221
  {
222
  "epoch": 0.19,
223
  "learning_rate": 4.5e-07,
224
+ "loss": 0.2102,
225
  "step": 900
226
  },
227
  {
228
  "epoch": 0.2,
229
  "learning_rate": 4.625e-07,
230
+ "loss": 0.1567,
231
  "step": 925
232
  },
233
  {
234
  "epoch": 0.2,
235
  "learning_rate": 4.7499999999999995e-07,
236
+ "loss": 0.2087,
237
  "step": 950
238
  },
239
  {
240
  "epoch": 0.21,
241
  "learning_rate": 4.875e-07,
242
+ "loss": 0.1155,
243
  "step": 975
244
  },
245
  {
246
  "epoch": 0.21,
247
  "learning_rate": 5e-07,
248
+ "loss": 0.163,
249
  "step": 1000
250
  },
251
  {
252
  "epoch": 0.21,
253
+ "eval_loss": 0.13766534626483917,
254
+ "eval_runtime": 257216.5334,
255
+ "eval_samples_per_second": 0.037,
256
+ "eval_steps_per_second": 0.005,
257
+ "eval_wer": 0.0962566844919786,
258
  "step": 1000
259
  }
260
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a1e93ebb1bba8a2558c7f365f093b9b295c6a74c892c2fd0cc31018e0c5f485
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b160490c443b161f18bd788ec020faaa217e6d7db5a6b0cf1b1be6e6ea1a968
3
  size 4856