kimbochen commited on
Commit
fd27e65
1 Parent(s): 95d183d

Training in progress, step 1000

Browse files
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED
@@ -1066,8 +1066,8 @@
1066
  "\n",
1067
  " <div>\n",
1068
  " \n",
1069
- " <progress value='29' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1070
- " [ 29/5000 02:50 < 8:42:35, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
1071
  " </div>\n",
1072
  " <table border=\"1\" class=\"dataframe\">\n",
1073
  " <thead>\n",
@@ -1115,7 +1115,7 @@
1115
  },
1116
  {
1117
  "cell_type": "code",
1118
- "execution_count": 22,
1119
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1120
  "metadata": {},
1121
  "outputs": [],
@@ -1123,7 +1123,7 @@
1123
  "kwargs = {\n",
1124
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1125
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1126
- " \"language\": \"zh-TW\",\n",
1127
  " \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
1128
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1129
  " \"tasks\": \"automatic-speech-recognition\",\n",
@@ -1141,7 +1141,7 @@
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
- "execution_count": 23,
1145
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1146
  "metadata": {},
1147
  "outputs": [
@@ -1155,82 +1155,21 @@
1155
  "Feature extractor saved in ./preprocessor_config.json\n",
1156
  "tokenizer config file saved in ./tokenizer_config.json\n",
1157
  "Special tokens file saved in ./special_tokens_map.json\n",
1158
- "added tokens file saved in ./added_tokens.json\n"
1159
- ]
1160
- },
1161
- {
1162
- "data": {
1163
- "application/vnd.jupyter.widget-view+json": {
1164
- "model_id": "dc59052a3b7f45b2b896c03763c79f57",
1165
- "version_major": 2,
1166
- "version_minor": 0
1167
- },
1168
- "text/plain": [
1169
- "Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
1170
- ]
1171
- },
1172
- "metadata": {},
1173
- "output_type": "display_data"
1174
- },
1175
- {
1176
- "data": {
1177
- "application/vnd.jupyter.widget-view+json": {
1178
- "model_id": "1c58442a44e84af9a6dff915e036de83",
1179
- "version_major": 2,
1180
- "version_minor": 0
1181
- },
1182
- "text/plain": [
1183
- "Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
1184
- ]
1185
- },
1186
- "metadata": {},
1187
- "output_type": "display_data"
1188
- },
1189
- {
1190
- "name": "stderr",
1191
- "output_type": "stream",
1192
- "text": [
1193
- "remote: Scanning LFS files for validity, may be slow... \n",
1194
- "remote: LFS file scan complete. \n",
1195
- "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
1196
- " 2ee4cf3..214645d main -> main\n",
1197
- "\n",
1198
- "Dropping the following result as it does not have all the necessary fields:\n",
1199
- "{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'zh-TW', 'split': 'test', 'args': 'zh-TW'}}\n",
1200
- "remote: ----------------------------------------------------------\u001b[0;31m \n",
1201
- "remote: Sorry, your push was rejected during YAML metadata verification: \n",
1202
- "remote: - Error: \"language[0]\" must only contain lowercase characters \n",
1203
- "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
1204
- "remote: ---------------------------------------------------------- \n",
1205
- "remote: Please find the documentation at: \n",
1206
- "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
1207
- "remote: ---------------------------------------------------------- \n",
1208
- "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
1209
- " ! [remote rejected] main -> main (pre-receive hook declined)\n",
1210
- "error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
1211
- "\n",
1212
- "Error pushing update to the model card. Please read logs and retry.\n",
1213
- "$remote: ----------------------------------------------------------\u001b[0;31m \n",
1214
- "remote: Sorry, your push was rejected during YAML metadata verification: \n",
1215
- "remote: - Error: \"language[0]\" must only contain lowercase characters \n",
1216
- "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
1217
- "remote: ---------------------------------------------------------- \n",
1218
- "remote: Please find the documentation at: \n",
1219
- "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
1220
- "remote: ---------------------------------------------------------- \n",
1221
  "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
1222
- " ! [remote rejected] main -> main (pre-receive hook declined)\n",
1223
- "error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
1224
  "\n"
1225
  ]
1226
  },
1227
  {
1228
  "data": {
1229
  "text/plain": [
1230
- "'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/214645d6cd1f0e7ab6a65a854eec2e349529961c'"
1231
  ]
1232
  },
1233
- "execution_count": 23,
1234
  "metadata": {},
1235
  "output_type": "execute_result"
1236
  }
 
1066
  "\n",
1067
  " <div>\n",
1068
  " \n",
1069
+ " <progress value='35' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1070
+ " [ 35/5000 03:29 < 8:46:02, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
1071
  " </div>\n",
1072
  " <table border=\"1\" class=\"dataframe\">\n",
1073
  " <thead>\n",
 
1115
  },
1116
  {
1117
  "cell_type": "code",
1118
+ "execution_count": 40,
1119
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1120
  "metadata": {},
1121
  "outputs": [],
 
1123
  "kwargs = {\n",
1124
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1125
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1126
+ " \"language\": \"zh\",\n",
1127
  " \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
1128
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1129
  " \"tasks\": \"automatic-speech-recognition\",\n",
 
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
+ "execution_count": 41,
1145
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1146
  "metadata": {},
1147
  "outputs": [
 
1155
  "Feature extractor saved in ./preprocessor_config.json\n",
1156
  "tokenizer config file saved in ./tokenizer_config.json\n",
1157
  "Special tokens file saved in ./special_tokens_map.json\n",
1158
+ "added tokens file saved in ./added_tokens.json\n",
1159
+ "Several commits (2) will be pushed upstream.\n",
1160
+ "The progress bars may be unreliable.\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161
  "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
1162
+ " 61dfe27..a94bfc3 main -> main\n",
 
1163
  "\n"
1164
  ]
1165
  },
1166
  {
1167
  "data": {
1168
  "text/plain": [
1169
+ "'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/a94bfc3cbb71b83e10525899df53ee0d4db96e32'"
1170
  ]
1171
  },
1172
+ "execution_count": 41,
1173
  "metadata": {},
1174
  "output_type": "execute_result"
1175
  }
fine-tune-whisper-streaming.ipynb CHANGED
@@ -145,70 +145,15 @@
145
  "execution_count": 2,
146
  "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
147
  "metadata": {},
148
- "outputs": [
149
- {
150
- "data": {
151
- "application/vnd.jupyter.widget-view+json": {
152
- "model_id": "ecce3a630cdb4ebab217a88a0163b257",
153
- "version_major": 2,
154
- "version_minor": 0
155
- },
156
- "text/plain": [
157
- "Downloading builder script: 0%| | 0.00/8.30k [00:00<?, ?B/s]"
158
- ]
159
- },
160
- "metadata": {},
161
- "output_type": "display_data"
162
- },
163
- {
164
- "data": {
165
- "application/vnd.jupyter.widget-view+json": {
166
- "model_id": "b0141b068f944775867034bc494f88d7",
167
- "version_major": 2,
168
- "version_minor": 0
169
- },
170
- "text/plain": [
171
- "Downloading readme: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
172
- ]
173
- },
174
- "metadata": {},
175
- "output_type": "display_data"
176
- },
177
- {
178
- "data": {
179
- "application/vnd.jupyter.widget-view+json": {
180
- "model_id": "9dd1f4ded47c4160b55f1bcedce2694f",
181
- "version_major": 2,
182
- "version_minor": 0
183
- },
184
- "text/plain": [
185
- "Downloading extra modules: 0%| | 0.00/3.44k [00:00<?, ?B/s]"
186
- ]
187
- },
188
- "metadata": {},
189
- "output_type": "display_data"
190
- },
191
- {
192
- "data": {
193
- "application/vnd.jupyter.widget-view+json": {
194
- "model_id": "a442da1e2a6b4271bae8ae0c655594b6",
195
- "version_major": 2,
196
- "version_minor": 0
197
- },
198
- "text/plain": [
199
- "Downloading extra modules: 0%| | 0.00/60.9k [00:00<?, ?B/s]"
200
- ]
201
- },
202
- "metadata": {},
203
- "output_type": "display_data"
204
- }
205
- ],
206
  "source": [
207
  "from datasets import IterableDatasetDict\n",
208
  "\n",
209
  "raw_datasets = IterableDatasetDict()\n",
210
  "\n",
211
- "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
 
 
212
  "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
213
  ]
214
  },
@@ -244,107 +189,10 @@
244
  "cell_type": "code",
245
  "execution_count": 3,
246
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
247
- "metadata": {},
248
- "outputs": [
249
- {
250
- "data": {
251
- "application/vnd.jupyter.widget-view+json": {
252
- "model_id": "0d0c17f582474beebea009f021515946",
253
- "version_major": 2,
254
- "version_minor": 0
255
- },
256
- "text/plain": [
257
- "Downloading: 0%| | 0.00/185k [00:00<?, ?B/s]"
258
- ]
259
- },
260
- "metadata": {},
261
- "output_type": "display_data"
262
- },
263
- {
264
- "data": {
265
- "application/vnd.jupyter.widget-view+json": {
266
- "model_id": "9f48049fe65c4045ba74c6fac892945e",
267
- "version_major": 2,
268
- "version_minor": 0
269
- },
270
- "text/plain": [
271
- "Downloading: 0%| | 0.00/829 [00:00<?, ?B/s]"
272
- ]
273
- },
274
- "metadata": {},
275
- "output_type": "display_data"
276
- },
277
- {
278
- "data": {
279
- "application/vnd.jupyter.widget-view+json": {
280
- "model_id": "25615259dd364494bc5782b4e8231b05",
281
- "version_major": 2,
282
- "version_minor": 0
283
- },
284
- "text/plain": [
285
- "Downloading: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
286
- ]
287
- },
288
- "metadata": {},
289
- "output_type": "display_data"
290
- },
291
- {
292
- "data": {
293
- "application/vnd.jupyter.widget-view+json": {
294
- "model_id": "6867564094bf4c7d82d0046dccb173fe",
295
- "version_major": 2,
296
- "version_minor": 0
297
- },
298
- "text/plain": [
299
- "Downloading: 0%| | 0.00/494k [00:00<?, ?B/s]"
300
- ]
301
- },
302
- "metadata": {},
303
- "output_type": "display_data"
304
- },
305
- {
306
- "data": {
307
- "application/vnd.jupyter.widget-view+json": {
308
- "model_id": "2cb3be77451542868602317c4d7eff85",
309
- "version_major": 2,
310
- "version_minor": 0
311
- },
312
- "text/plain": [
313
- "Downloading: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
314
- ]
315
- },
316
- "metadata": {},
317
- "output_type": "display_data"
318
- },
319
- {
320
- "data": {
321
- "application/vnd.jupyter.widget-view+json": {
322
- "model_id": "6dfc5dedce13459bbac6f2d695695ae0",
323
- "version_major": 2,
324
- "version_minor": 0
325
- },
326
- "text/plain": [
327
- "Downloading: 0%| | 0.00/2.11k [00:00<?, ?B/s]"
328
- ]
329
- },
330
- "metadata": {},
331
- "output_type": "display_data"
332
- },
333
- {
334
- "data": {
335
- "application/vnd.jupyter.widget-view+json": {
336
- "model_id": "944cb945f9dd47178ab22d418aa2934b",
337
- "version_major": 2,
338
- "version_minor": 0
339
- },
340
- "text/plain": [
341
- "Downloading: 0%| | 0.00/2.06k [00:00<?, ?B/s]"
342
- ]
343
- },
344
- "metadata": {},
345
- "output_type": "display_data"
346
- }
347
- ],
348
  "source": [
349
  "from transformers import WhisperProcessor\n",
350
  "\n",
@@ -706,22 +554,7 @@
706
  "execution_count": 14,
707
  "id": "b22b4011-f31f-4b57-b684-c52332f92890",
708
  "metadata": {},
709
- "outputs": [
710
- {
711
- "data": {
712
- "application/vnd.jupyter.widget-view+json": {
713
- "model_id": "bafc0b31fe9a4d239eedc348d5521dfc",
714
- "version_major": 2,
715
- "version_minor": 0
716
- },
717
- "text/plain": [
718
- "Downloading builder script: 0%| | 0.00/4.49k [00:00<?, ?B/s]"
719
- ]
720
- },
721
- "metadata": {},
722
- "output_type": "display_data"
723
- }
724
- ],
725
  "source": [
726
  "import evaluate\n",
727
  "\n",
@@ -800,36 +633,7 @@
800
  "execution_count": 16,
801
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
802
  "metadata": {},
803
- "outputs": [
804
- {
805
- "data": {
806
- "application/vnd.jupyter.widget-view+json": {
807
- "model_id": "e1d5d79e596a416aa96bde21be6fb551",
808
- "version_major": 2,
809
- "version_minor": 0
810
- },
811
- "text/plain": [
812
- "Downloading: 0%| | 0.00/1.97k [00:00<?, ?B/s]"
813
- ]
814
- },
815
- "metadata": {},
816
- "output_type": "display_data"
817
- },
818
- {
819
- "data": {
820
- "application/vnd.jupyter.widget-view+json": {
821
- "model_id": "3d722a61d7a440479d0f5497a6200345",
822
- "version_major": 2,
823
- "version_minor": 0
824
- },
825
- "text/plain": [
826
- "Downloading: 0%| | 0.00/967M [00:00<?, ?B/s]"
827
- ]
828
- },
829
- "metadata": {},
830
- "output_type": "display_data"
831
- }
832
- ],
833
  "source": [
834
  "from transformers import WhisperForConditionalGeneration\n",
835
  "\n",
@@ -874,10 +678,18 @@
874
  },
875
  {
876
  "cell_type": "code",
877
- "execution_count": 18,
878
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
879
  "metadata": {},
880
- "outputs": [],
 
 
 
 
 
 
 
 
881
  "source": [
882
  "from transformers import Seq2SeqTrainingArguments\n",
883
  "\n",
@@ -953,7 +765,7 @@
953
  },
954
  {
955
  "cell_type": "code",
956
- "execution_count": 20,
957
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
958
  "metadata": {},
959
  "outputs": [
@@ -992,7 +804,7 @@
992
  },
993
  {
994
  "cell_type": "code",
995
- "execution_count": 21,
996
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
997
  "metadata": {},
998
  "outputs": [
@@ -1056,7 +868,8 @@
1056
  " Gradient Accumulation steps = 1\n",
1057
  " Total optimization steps = 5000\n",
1058
  " Number of trainable parameters = 241734912\n",
1059
- "Reading metadata...: 6568it [00:00, 41540.60it/s]\n",
 
1060
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1061
  ]
1062
  },
@@ -1066,8 +879,8 @@
1066
  "\n",
1067
  " <div>\n",
1068
  " \n",
1069
- " <progress value='35' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1070
- " [ 35/5000 03:29 < 8:46:02, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
1071
  " </div>\n",
1072
  " <table border=\"1\" class=\"dataframe\">\n",
1073
  " <thead>\n",
@@ -1075,9 +888,16 @@
1075
  " <th>Step</th>\n",
1076
  " <th>Training Loss</th>\n",
1077
  " <th>Validation Loss</th>\n",
 
1078
  " </tr>\n",
1079
  " </thead>\n",
1080
  " <tbody>\n",
 
 
 
 
 
 
1081
  " </tbody>\n",
1082
  "</table><p>"
1083
  ],
@@ -1087,6 +907,40 @@
1087
  },
1088
  "metadata": {},
1089
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
  }
1091
  ],
1092
  "source": [
@@ -1115,7 +969,7 @@
1115
  },
1116
  {
1117
  "cell_type": "code",
1118
- "execution_count": 37,
1119
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1120
  "metadata": {},
1121
  "outputs": [],
@@ -1123,7 +977,7 @@
1123
  "kwargs = {\n",
1124
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1125
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1126
- " \"language\": \"zh-TW\",\n",
1127
  " \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
1128
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1129
  " \"tasks\": \"automatic-speech-recognition\",\n",
@@ -1141,7 +995,7 @@
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
- "execution_count": null,
1145
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1146
  "metadata": {},
1147
  "outputs": [
@@ -1156,14 +1010,22 @@
1156
  "tokenizer config file saved in ./tokenizer_config.json\n",
1157
  "Special tokens file saved in ./special_tokens_map.json\n",
1158
  "added tokens file saved in ./added_tokens.json\n",
1159
- "Several commits (13) will be pushed upstream.\n",
1160
  "The progress bars may be unreliable.\n",
1161
- "remote: Scanning LFS files for validity, may be slow... \n",
1162
- "remote: LFS file scan complete. \n",
1163
  "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
1164
- " 214645d..6f314a7 main -> main\n",
1165
  "\n"
1166
  ]
 
 
 
 
 
 
 
 
 
 
1167
  }
1168
  ],
1169
  "source": [
 
145
  "execution_count": 2,
146
  "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
147
  "metadata": {},
148
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "source": [
150
  "from datasets import IterableDatasetDict\n",
151
  "\n",
152
  "raw_datasets = IterableDatasetDict()\n",
153
  "\n",
154
+ "# raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"train\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
155
+ "# raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"test\", use_auth_token=True)\n",
156
+ "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train+validation\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
157
  "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
158
  ]
159
  },
 
189
  "cell_type": "code",
190
  "execution_count": 3,
191
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
192
+ "metadata": {
193
+ "tags": []
194
+ },
195
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  "source": [
197
  "from transformers import WhisperProcessor\n",
198
  "\n",
 
554
  "execution_count": 14,
555
  "id": "b22b4011-f31f-4b57-b684-c52332f92890",
556
  "metadata": {},
557
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  "source": [
559
  "import evaluate\n",
560
  "\n",
 
633
  "execution_count": 16,
634
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
635
  "metadata": {},
636
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
  "source": [
638
  "from transformers import WhisperForConditionalGeneration\n",
639
  "\n",
 
678
  },
679
  {
680
  "cell_type": "code",
681
+ "execution_count": 22,
682
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
683
  "metadata": {},
684
+ "outputs": [
685
+ {
686
+ "name": "stderr",
687
+ "output_type": "stream",
688
+ "text": [
689
+ "PyTorch: setting up devices\n"
690
+ ]
691
+ }
692
+ ],
693
  "source": [
694
  "from transformers import Seq2SeqTrainingArguments\n",
695
  "\n",
 
765
  },
766
  {
767
  "cell_type": "code",
768
+ "execution_count": 23,
769
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
770
  "metadata": {},
771
  "outputs": [
 
804
  },
805
  {
806
  "cell_type": "code",
807
+ "execution_count": 24,
808
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
809
  "metadata": {},
810
  "outputs": [
 
868
  " Gradient Accumulation steps = 1\n",
869
  " Total optimization steps = 5000\n",
870
  " Number of trainable parameters = 241734912\n",
871
+ "Reading metadata...: 6568it [00:00, 82605.00it/s]\n",
872
+ "Reading metadata...: 4709it [00:00, 34808.15it/s]\n",
873
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
874
  ]
875
  },
 
879
  "\n",
880
  " <div>\n",
881
  " \n",
882
+ " <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
883
+ " [1001/5000 1:48:25 < 7:14:02, 0.15 it/s, Epoch 6.02/9223372036854775807]\n",
884
  " </div>\n",
885
  " <table border=\"1\" class=\"dataframe\">\n",
886
  " <thead>\n",
 
888
  " <th>Step</th>\n",
889
  " <th>Training Loss</th>\n",
890
  " <th>Validation Loss</th>\n",
891
+ " <th>Wer</th>\n",
892
  " </tr>\n",
893
  " </thead>\n",
894
  " <tbody>\n",
895
+ " <tr>\n",
896
+ " <td>1000</td>\n",
897
+ " <td>0.004600</td>\n",
898
+ " <td>0.210189</td>\n",
899
+ " <td>41.801433</td>\n",
900
+ " </tr>\n",
901
  " </tbody>\n",
902
  "</table><p>"
903
  ],
 
907
  },
908
  "metadata": {},
909
  "output_type": "display_data"
910
+ },
911
+ {
912
+ "name": "stderr",
913
+ "output_type": "stream",
914
+ "text": [
915
+ "Reading metadata...: 6568it [00:00, 59821.30it/s]\n",
916
+ "Reading metadata...: 4709it [00:00, 76452.83it/s]\n",
917
+ "Reading metadata...: 6568it [00:00, 88722.02it/s]\n",
918
+ "Reading metadata...: 4709it [00:00, 33936.10it/s]\n",
919
+ "Reading metadata...: 6568it [00:00, 20936.31it/s]\n",
920
+ "Reading metadata...: 4709it [00:00, 20573.38it/s]\n",
921
+ "Reading metadata...: 6568it [00:00, 67954.78it/s]\n",
922
+ "Reading metadata...: 4709it [00:00, 58312.87it/s]\n",
923
+ "Reading metadata...: 6568it [00:00, 89351.95it/s]\n",
924
+ "Reading metadata...: 4709it [00:00, 26579.06it/s]\n",
925
+ "Reading metadata...: 6568it [00:00, 56758.03it/s]\n",
926
+ "Reading metadata...: 4709it [00:00, 80017.25it/s]\n",
927
+ "***** Running Evaluation *****\n",
928
+ " Num examples: Unknown\n",
929
+ " Batch size = 8\n",
930
+ "Reading metadata...: 4709it [00:00, 31641.56it/s]\n",
931
+ "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
932
+ "Saving model checkpoint to ./checkpoint-1000\n",
933
+ "Configuration saved in ./checkpoint-1000/config.json\n",
934
+ "Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
935
+ "Feature extractor saved in ./checkpoint-1000/preprocessor_config.json\n",
936
+ "tokenizer config file saved in ./checkpoint-1000/tokenizer_config.json\n",
937
+ "Special tokens file saved in ./checkpoint-1000/special_tokens_map.json\n",
938
+ "added tokens file saved in ./checkpoint-1000/added_tokens.json\n",
939
+ "Feature extractor saved in ./preprocessor_config.json\n",
940
+ "tokenizer config file saved in ./tokenizer_config.json\n",
941
+ "Special tokens file saved in ./special_tokens_map.json\n",
942
+ "added tokens file saved in ./added_tokens.json\n"
943
+ ]
944
  }
945
  ],
946
  "source": [
 
969
  },
970
  {
971
  "cell_type": "code",
972
+ "execution_count": 40,
973
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
974
  "metadata": {},
975
  "outputs": [],
 
977
  "kwargs = {\n",
978
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
979
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
980
+ " \"language\": \"zh\",\n",
981
  " \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
982
  " \"finetuned_from\": \"openai/whisper-small\",\n",
983
  " \"tasks\": \"automatic-speech-recognition\",\n",
 
995
  },
996
  {
997
  "cell_type": "code",
998
+ "execution_count": 41,
999
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1000
  "metadata": {},
1001
  "outputs": [
 
1010
  "tokenizer config file saved in ./tokenizer_config.json\n",
1011
  "Special tokens file saved in ./special_tokens_map.json\n",
1012
  "added tokens file saved in ./added_tokens.json\n",
1013
+ "Several commits (2) will be pushed upstream.\n",
1014
  "The progress bars may be unreliable.\n",
 
 
1015
  "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
1016
+ " 61dfe27..a94bfc3 main -> main\n",
1017
  "\n"
1018
  ]
1019
+ },
1020
+ {
1021
+ "data": {
1022
+ "text/plain": [
1023
+ "'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/a94bfc3cbb71b83e10525899df53ee0d4db96e32'"
1024
+ ]
1025
+ },
1026
+ "execution_count": 41,
1027
+ "metadata": {},
1028
+ "output_type": "execute_result"
1029
  }
1030
  ],
1031
  "source": [
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c37aa36016265b630dfcf67b6593ca65cefa6c6e939ab9dd790e2b04c9b56f
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a55e1c436f4a3d45cf396032864b1e41df706d4ce41f7465c29c839b34b723fb
3
  size 967102601
runs/Dec10_16-51-23_129-213-89-27/1670691100.0045364/events.out.tfevents.1670691100.129-213-89-27.2038299.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578751db7f962743d233f63d1b02e18773213fe5a7d8a669ae5aedba6e024fad
3
+ size 5863
runs/Dec10_16-51-23_129-213-89-27/events.out.tfevents.1670691099.129-213-89-27.2038299.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5d4ec692e90a8cee704b0209ee2f2b3f07105c8d547ebacb9a6166b3a7ab7f
3
+ size 4286
runs/Dec10_16-52-02_129-213-89-27/1670691146.0308955/events.out.tfevents.1670691146.129-213-89-27.2038299.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f1f7fb944bdd7c159c5be085a33c0b40536b0e3c87b4d3b17b747b14f50a50
3
+ size 5863
runs/Dec10_16-52-02_129-213-89-27/events.out.tfevents.1670691146.129-213-89-27.2038299.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285e20376617c173f79f8cf671b83dc10034ddeead5195847ff160c50958462b
3
+ size 4593
runs/Dec10_17-00-05_129-213-89-27/1670691616.7069395/events.out.tfevents.1670691616.129-213-89-27.2038299.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c04abfabe9b86260bd7f6852ff64646ea61afb4c32a1f538a24b7bc1044b7b98
3
+ size 5863
runs/Dec10_17-00-05_129-213-89-27/events.out.tfevents.1670691616.129-213-89-27.2038299.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070da7fa0ee0458c1a9fdff98dfae6c66ef3740e9a1be96a7e53464e908305e5
3
+ size 4285
runs/Dec10_17-02-04_129-213-89-27/1670691734.0866246/events.out.tfevents.1670691734.129-213-89-27.2038299.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7550e83d4a3fc05d7bdb2366917a5eef42a336652aee0eb90437a55e49155be3
3
+ size 5863
runs/Dec10_17-02-04_129-213-89-27/events.out.tfevents.1670691734.129-213-89-27.2038299.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0734aba51b0916f0b56bf72bc4f0d005b3bd3e23a6e1e5c2c22927b8da7dce2
3
+ size 4284
runs/Dec10_17-03-50_129-213-89-27/1670691839.2134347/events.out.tfevents.1670691839.129-213-89-27.2181513.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7888ed253cf7fa582a17034b7ed233827b92fbf03fea215affa25d85c205079e
3
+ size 5863
runs/Dec10_17-03-50_129-213-89-27/events.out.tfevents.1670691839.129-213-89-27.2181513.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfb0ec8513eb41db733ee3b6699345f1d1c595b8d3c079e96919b33438189525
3
+ size 10869
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6834fe9c5dcdc42bb0457b42c442c46c7d296a603b5ab0bb581485a55ab356fb
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea523f572893e3c4ca713d3731c8194b372c3af7337897cc7ca69fa8dc28498
3
  size 3579