Training in progress, step 1000
Browse files- .ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb +419 -38
- fine-tune-whisper-streaming.ipynb +142 -5
- pytorch_model.bin +1 -1
- runs/Dec10_02-58-52_129-213-89-27/1670641248.2035987/events.out.tfevents.1670641248.129-213-89-27.128858.1 +3 -0
- runs/Dec10_02-58-52_129-213-89-27/events.out.tfevents.1670641248.129-213-89-27.128858.0 +3 -0
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb
CHANGED
@@ -108,7 +108,7 @@
|
|
108 |
},
|
109 |
{
|
110 |
"cell_type": "code",
|
111 |
-
"execution_count":
|
112 |
"id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
|
113 |
"metadata": {},
|
114 |
"outputs": [],
|
@@ -142,17 +142,74 @@
|
|
142 |
},
|
143 |
{
|
144 |
"cell_type": "code",
|
145 |
-
"execution_count":
|
146 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
|
147 |
"metadata": {},
|
148 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
"source": [
|
150 |
"from datasets import IterableDatasetDict\n",
|
151 |
"\n",
|
152 |
"raw_datasets = IterableDatasetDict()\n",
|
153 |
"\n",
|
154 |
-
"raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"
|
155 |
-
"raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"
|
156 |
]
|
157 |
},
|
158 |
{
|
@@ -185,14 +242,113 @@
|
|
185 |
},
|
186 |
{
|
187 |
"cell_type": "code",
|
188 |
-
"execution_count":
|
189 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
|
190 |
"metadata": {},
|
191 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
"source": [
|
193 |
"from transformers import WhisperProcessor\n",
|
194 |
"\n",
|
195 |
-
"processor = WhisperProcessor.from_pretrained(\"openai/whisper-small\", language=\"
|
196 |
]
|
197 |
},
|
198 |
{
|
@@ -213,10 +369,31 @@
|
|
213 |
},
|
214 |
{
|
215 |
"cell_type": "code",
|
216 |
-
"execution_count":
|
217 |
"id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
|
218 |
"metadata": {},
|
219 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
"source": [
|
221 |
"raw_datasets[\"train\"].features"
|
222 |
]
|
@@ -238,7 +415,7 @@
|
|
238 |
},
|
239 |
{
|
240 |
"cell_type": "code",
|
241 |
-
"execution_count":
|
242 |
"id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
|
243 |
"metadata": {},
|
244 |
"outputs": [],
|
@@ -258,7 +435,7 @@
|
|
258 |
},
|
259 |
{
|
260 |
"cell_type": "code",
|
261 |
-
"execution_count":
|
262 |
"id": "d041650e-1c48-4439-87b3-5b6f4a514107",
|
263 |
"metadata": {},
|
264 |
"outputs": [],
|
@@ -285,7 +462,7 @@
|
|
285 |
},
|
286 |
{
|
287 |
"cell_type": "code",
|
288 |
-
"execution_count":
|
289 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
290 |
"metadata": {},
|
291 |
"outputs": [],
|
@@ -321,7 +498,7 @@
|
|
321 |
},
|
322 |
{
|
323 |
"cell_type": "code",
|
324 |
-
"execution_count":
|
325 |
"id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
|
326 |
"metadata": {},
|
327 |
"outputs": [],
|
@@ -339,7 +516,7 @@
|
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
-
"execution_count":
|
343 |
"id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
|
344 |
"metadata": {},
|
345 |
"outputs": [],
|
@@ -360,7 +537,7 @@
|
|
360 |
},
|
361 |
{
|
362 |
"cell_type": "code",
|
363 |
-
"execution_count":
|
364 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
365 |
"metadata": {},
|
366 |
"outputs": [],
|
@@ -381,7 +558,7 @@
|
|
381 |
},
|
382 |
{
|
383 |
"cell_type": "code",
|
384 |
-
"execution_count":
|
385 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
386 |
"metadata": {},
|
387 |
"outputs": [],
|
@@ -451,7 +628,7 @@
|
|
451 |
},
|
452 |
{
|
453 |
"cell_type": "code",
|
454 |
-
"execution_count":
|
455 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
|
456 |
"metadata": {},
|
457 |
"outputs": [],
|
@@ -499,7 +676,7 @@
|
|
499 |
},
|
500 |
{
|
501 |
"cell_type": "code",
|
502 |
-
"execution_count":
|
503 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
|
504 |
"metadata": {},
|
505 |
"outputs": [],
|
@@ -526,10 +703,25 @@
|
|
526 |
},
|
527 |
{
|
528 |
"cell_type": "code",
|
529 |
-
"execution_count":
|
530 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
531 |
"metadata": {},
|
532 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
"source": [
|
534 |
"import evaluate\n",
|
535 |
"\n",
|
@@ -555,7 +747,7 @@
|
|
555 |
},
|
556 |
{
|
557 |
"cell_type": "code",
|
558 |
-
"execution_count":
|
559 |
"id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
|
560 |
"metadata": {},
|
561 |
"outputs": [],
|
@@ -605,10 +797,39 @@
|
|
605 |
},
|
606 |
{
|
607 |
"cell_type": "code",
|
608 |
-
"execution_count":
|
609 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
610 |
"metadata": {},
|
611 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
612 |
"source": [
|
613 |
"from transformers import WhisperForConditionalGeneration\n",
|
614 |
"\n",
|
@@ -625,7 +846,7 @@
|
|
625 |
},
|
626 |
{
|
627 |
"cell_type": "code",
|
628 |
-
"execution_count":
|
629 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
|
630 |
"metadata": {},
|
631 |
"outputs": [],
|
@@ -653,7 +874,7 @@
|
|
653 |
},
|
654 |
{
|
655 |
"cell_type": "code",
|
656 |
-
"execution_count":
|
657 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
658 |
"metadata": {},
|
659 |
"outputs": [],
|
@@ -703,7 +924,7 @@
|
|
703 |
},
|
704 |
{
|
705 |
"cell_type": "code",
|
706 |
-
"execution_count":
|
707 |
"id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
|
708 |
"metadata": {},
|
709 |
"outputs": [],
|
@@ -732,10 +953,20 @@
|
|
732 |
},
|
733 |
{
|
734 |
"cell_type": "code",
|
735 |
-
"execution_count":
|
736 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
737 |
"metadata": {},
|
738 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
739 |
"source": [
|
740 |
"from transformers import Seq2SeqTrainer\n",
|
741 |
"\n",
|
@@ -761,10 +992,23 @@
|
|
761 |
},
|
762 |
{
|
763 |
"cell_type": "code",
|
764 |
-
"execution_count":
|
765 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
766 |
"metadata": {},
|
767 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
768 |
"source": [
|
769 |
"model.save_pretrained(training_args.output_dir)\n",
|
770 |
"processor.save_pretrained(training_args.output_dir)"
|
@@ -797,7 +1041,54 @@
|
|
797 |
"execution_count": null,
|
798 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
799 |
"metadata": {},
|
800 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
801 |
"source": [
|
802 |
"trainer.train()"
|
803 |
]
|
@@ -824,7 +1115,7 @@
|
|
824 |
},
|
825 |
{
|
826 |
"cell_type": "code",
|
827 |
-
"execution_count":
|
828 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
829 |
"metadata": {},
|
830 |
"outputs": [],
|
@@ -832,8 +1123,8 @@
|
|
832 |
"kwargs = {\n",
|
833 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
834 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
835 |
-
" \"language\": \"
|
836 |
-
" \"model_name\": \"Whisper Small
|
837 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
838 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
839 |
" \"tags\": \"whisper-event\",\n",
|
@@ -850,10 +1141,100 @@
|
|
850 |
},
|
851 |
{
|
852 |
"cell_type": "code",
|
853 |
-
"execution_count":
|
854 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
855 |
"metadata": {},
|
856 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
857 |
"source": [
|
858 |
"trainer.push_to_hub(**kwargs)"
|
859 |
]
|
@@ -875,7 +1256,7 @@
|
|
875 |
"name": "python",
|
876 |
"nbconvert_exporter": "python",
|
877 |
"pygments_lexer": "ipython3",
|
878 |
-
"version": "3.8.
|
879 |
}
|
880 |
},
|
881 |
"nbformat": 4,
|
|
|
108 |
},
|
109 |
{
|
110 |
"cell_type": "code",
|
111 |
+
"execution_count": 1,
|
112 |
"id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
|
113 |
"metadata": {},
|
114 |
"outputs": [],
|
|
|
142 |
},
|
143 |
{
|
144 |
"cell_type": "code",
|
145 |
+
"execution_count": 2,
|
146 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
|
147 |
"metadata": {},
|
148 |
+
"outputs": [
|
149 |
+
{
|
150 |
+
"data": {
|
151 |
+
"application/vnd.jupyter.widget-view+json": {
|
152 |
+
"model_id": "ecce3a630cdb4ebab217a88a0163b257",
|
153 |
+
"version_major": 2,
|
154 |
+
"version_minor": 0
|
155 |
+
},
|
156 |
+
"text/plain": [
|
157 |
+
"Downloading builder script: 0%| | 0.00/8.30k [00:00<?, ?B/s]"
|
158 |
+
]
|
159 |
+
},
|
160 |
+
"metadata": {},
|
161 |
+
"output_type": "display_data"
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"data": {
|
165 |
+
"application/vnd.jupyter.widget-view+json": {
|
166 |
+
"model_id": "b0141b068f944775867034bc494f88d7",
|
167 |
+
"version_major": 2,
|
168 |
+
"version_minor": 0
|
169 |
+
},
|
170 |
+
"text/plain": [
|
171 |
+
"Downloading readme: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
|
172 |
+
]
|
173 |
+
},
|
174 |
+
"metadata": {},
|
175 |
+
"output_type": "display_data"
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"data": {
|
179 |
+
"application/vnd.jupyter.widget-view+json": {
|
180 |
+
"model_id": "9dd1f4ded47c4160b55f1bcedce2694f",
|
181 |
+
"version_major": 2,
|
182 |
+
"version_minor": 0
|
183 |
+
},
|
184 |
+
"text/plain": [
|
185 |
+
"Downloading extra modules: 0%| | 0.00/3.44k [00:00<?, ?B/s]"
|
186 |
+
]
|
187 |
+
},
|
188 |
+
"metadata": {},
|
189 |
+
"output_type": "display_data"
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"data": {
|
193 |
+
"application/vnd.jupyter.widget-view+json": {
|
194 |
+
"model_id": "a442da1e2a6b4271bae8ae0c655594b6",
|
195 |
+
"version_major": 2,
|
196 |
+
"version_minor": 0
|
197 |
+
},
|
198 |
+
"text/plain": [
|
199 |
+
"Downloading extra modules: 0%| | 0.00/60.9k [00:00<?, ?B/s]"
|
200 |
+
]
|
201 |
+
},
|
202 |
+
"metadata": {},
|
203 |
+
"output_type": "display_data"
|
204 |
+
}
|
205 |
+
],
|
206 |
"source": [
|
207 |
"from datasets import IterableDatasetDict\n",
|
208 |
"\n",
|
209 |
"raw_datasets = IterableDatasetDict()\n",
|
210 |
"\n",
|
211 |
+
"raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
|
212 |
+
"raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
|
213 |
]
|
214 |
},
|
215 |
{
|
|
|
242 |
},
|
243 |
{
|
244 |
"cell_type": "code",
|
245 |
+
"execution_count": 3,
|
246 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
|
247 |
"metadata": {},
|
248 |
+
"outputs": [
|
249 |
+
{
|
250 |
+
"data": {
|
251 |
+
"application/vnd.jupyter.widget-view+json": {
|
252 |
+
"model_id": "0d0c17f582474beebea009f021515946",
|
253 |
+
"version_major": 2,
|
254 |
+
"version_minor": 0
|
255 |
+
},
|
256 |
+
"text/plain": [
|
257 |
+
"Downloading: 0%| | 0.00/185k [00:00<?, ?B/s]"
|
258 |
+
]
|
259 |
+
},
|
260 |
+
"metadata": {},
|
261 |
+
"output_type": "display_data"
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"data": {
|
265 |
+
"application/vnd.jupyter.widget-view+json": {
|
266 |
+
"model_id": "9f48049fe65c4045ba74c6fac892945e",
|
267 |
+
"version_major": 2,
|
268 |
+
"version_minor": 0
|
269 |
+
},
|
270 |
+
"text/plain": [
|
271 |
+
"Downloading: 0%| | 0.00/829 [00:00<?, ?B/s]"
|
272 |
+
]
|
273 |
+
},
|
274 |
+
"metadata": {},
|
275 |
+
"output_type": "display_data"
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"data": {
|
279 |
+
"application/vnd.jupyter.widget-view+json": {
|
280 |
+
"model_id": "25615259dd364494bc5782b4e8231b05",
|
281 |
+
"version_major": 2,
|
282 |
+
"version_minor": 0
|
283 |
+
},
|
284 |
+
"text/plain": [
|
285 |
+
"Downloading: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
|
286 |
+
]
|
287 |
+
},
|
288 |
+
"metadata": {},
|
289 |
+
"output_type": "display_data"
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"data": {
|
293 |
+
"application/vnd.jupyter.widget-view+json": {
|
294 |
+
"model_id": "6867564094bf4c7d82d0046dccb173fe",
|
295 |
+
"version_major": 2,
|
296 |
+
"version_minor": 0
|
297 |
+
},
|
298 |
+
"text/plain": [
|
299 |
+
"Downloading: 0%| | 0.00/494k [00:00<?, ?B/s]"
|
300 |
+
]
|
301 |
+
},
|
302 |
+
"metadata": {},
|
303 |
+
"output_type": "display_data"
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"data": {
|
307 |
+
"application/vnd.jupyter.widget-view+json": {
|
308 |
+
"model_id": "2cb3be77451542868602317c4d7eff85",
|
309 |
+
"version_major": 2,
|
310 |
+
"version_minor": 0
|
311 |
+
},
|
312 |
+
"text/plain": [
|
313 |
+
"Downloading: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
|
314 |
+
]
|
315 |
+
},
|
316 |
+
"metadata": {},
|
317 |
+
"output_type": "display_data"
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"data": {
|
321 |
+
"application/vnd.jupyter.widget-view+json": {
|
322 |
+
"model_id": "6dfc5dedce13459bbac6f2d695695ae0",
|
323 |
+
"version_major": 2,
|
324 |
+
"version_minor": 0
|
325 |
+
},
|
326 |
+
"text/plain": [
|
327 |
+
"Downloading: 0%| | 0.00/2.11k [00:00<?, ?B/s]"
|
328 |
+
]
|
329 |
+
},
|
330 |
+
"metadata": {},
|
331 |
+
"output_type": "display_data"
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"data": {
|
335 |
+
"application/vnd.jupyter.widget-view+json": {
|
336 |
+
"model_id": "944cb945f9dd47178ab22d418aa2934b",
|
337 |
+
"version_major": 2,
|
338 |
+
"version_minor": 0
|
339 |
+
},
|
340 |
+
"text/plain": [
|
341 |
+
"Downloading: 0%| | 0.00/2.06k [00:00<?, ?B/s]"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"metadata": {},
|
345 |
+
"output_type": "display_data"
|
346 |
+
}
|
347 |
+
],
|
348 |
"source": [
|
349 |
"from transformers import WhisperProcessor\n",
|
350 |
"\n",
|
351 |
+
"processor = WhisperProcessor.from_pretrained(\"openai/whisper-small\", language=\"chinese\", task=\"transcribe\")"
|
352 |
]
|
353 |
},
|
354 |
{
|
|
|
369 |
},
|
370 |
{
|
371 |
"cell_type": "code",
|
372 |
+
"execution_count": 4,
|
373 |
"id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
|
374 |
"metadata": {},
|
375 |
+
"outputs": [
|
376 |
+
{
|
377 |
+
"data": {
|
378 |
+
"text/plain": [
|
379 |
+
"{'client_id': Value(dtype='string', id=None),\n",
|
380 |
+
" 'path': Value(dtype='string', id=None),\n",
|
381 |
+
" 'audio': Audio(sampling_rate=48000, mono=True, decode=True, id=None),\n",
|
382 |
+
" 'sentence': Value(dtype='string', id=None),\n",
|
383 |
+
" 'up_votes': Value(dtype='int64', id=None),\n",
|
384 |
+
" 'down_votes': Value(dtype='int64', id=None),\n",
|
385 |
+
" 'age': Value(dtype='string', id=None),\n",
|
386 |
+
" 'gender': Value(dtype='string', id=None),\n",
|
387 |
+
" 'accent': Value(dtype='string', id=None),\n",
|
388 |
+
" 'locale': Value(dtype='string', id=None),\n",
|
389 |
+
" 'segment': Value(dtype='string', id=None)}"
|
390 |
+
]
|
391 |
+
},
|
392 |
+
"execution_count": 4,
|
393 |
+
"metadata": {},
|
394 |
+
"output_type": "execute_result"
|
395 |
+
}
|
396 |
+
],
|
397 |
"source": [
|
398 |
"raw_datasets[\"train\"].features"
|
399 |
]
|
|
|
415 |
},
|
416 |
{
|
417 |
"cell_type": "code",
|
418 |
+
"execution_count": 5,
|
419 |
"id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
|
420 |
"metadata": {},
|
421 |
"outputs": [],
|
|
|
435 |
},
|
436 |
{
|
437 |
"cell_type": "code",
|
438 |
+
"execution_count": 6,
|
439 |
"id": "d041650e-1c48-4439-87b3-5b6f4a514107",
|
440 |
"metadata": {},
|
441 |
"outputs": [],
|
|
|
462 |
},
|
463 |
{
|
464 |
"cell_type": "code",
|
465 |
+
"execution_count": 7,
|
466 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
467 |
"metadata": {},
|
468 |
"outputs": [],
|
|
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
+
"execution_count": 8,
|
502 |
"id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
|
503 |
"metadata": {},
|
504 |
"outputs": [],
|
|
|
516 |
},
|
517 |
{
|
518 |
"cell_type": "code",
|
519 |
+
"execution_count": 9,
|
520 |
"id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
|
521 |
"metadata": {},
|
522 |
"outputs": [],
|
|
|
537 |
},
|
538 |
{
|
539 |
"cell_type": "code",
|
540 |
+
"execution_count": 10,
|
541 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
542 |
"metadata": {},
|
543 |
"outputs": [],
|
|
|
558 |
},
|
559 |
{
|
560 |
"cell_type": "code",
|
561 |
+
"execution_count": 11,
|
562 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
563 |
"metadata": {},
|
564 |
"outputs": [],
|
|
|
628 |
},
|
629 |
{
|
630 |
"cell_type": "code",
|
631 |
+
"execution_count": 12,
|
632 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
|
633 |
"metadata": {},
|
634 |
"outputs": [],
|
|
|
676 |
},
|
677 |
{
|
678 |
"cell_type": "code",
|
679 |
+
"execution_count": 13,
|
680 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
|
681 |
"metadata": {},
|
682 |
"outputs": [],
|
|
|
703 |
},
|
704 |
{
|
705 |
"cell_type": "code",
|
706 |
+
"execution_count": 14,
|
707 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
708 |
"metadata": {},
|
709 |
+
"outputs": [
|
710 |
+
{
|
711 |
+
"data": {
|
712 |
+
"application/vnd.jupyter.widget-view+json": {
|
713 |
+
"model_id": "bafc0b31fe9a4d239eedc348d5521dfc",
|
714 |
+
"version_major": 2,
|
715 |
+
"version_minor": 0
|
716 |
+
},
|
717 |
+
"text/plain": [
|
718 |
+
"Downloading builder script: 0%| | 0.00/4.49k [00:00<?, ?B/s]"
|
719 |
+
]
|
720 |
+
},
|
721 |
+
"metadata": {},
|
722 |
+
"output_type": "display_data"
|
723 |
+
}
|
724 |
+
],
|
725 |
"source": [
|
726 |
"import evaluate\n",
|
727 |
"\n",
|
|
|
747 |
},
|
748 |
{
|
749 |
"cell_type": "code",
|
750 |
+
"execution_count": 15,
|
751 |
"id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
|
752 |
"metadata": {},
|
753 |
"outputs": [],
|
|
|
797 |
},
|
798 |
{
|
799 |
"cell_type": "code",
|
800 |
+
"execution_count": 16,
|
801 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
802 |
"metadata": {},
|
803 |
+
"outputs": [
|
804 |
+
{
|
805 |
+
"data": {
|
806 |
+
"application/vnd.jupyter.widget-view+json": {
|
807 |
+
"model_id": "e1d5d79e596a416aa96bde21be6fb551",
|
808 |
+
"version_major": 2,
|
809 |
+
"version_minor": 0
|
810 |
+
},
|
811 |
+
"text/plain": [
|
812 |
+
"Downloading: 0%| | 0.00/1.97k [00:00<?, ?B/s]"
|
813 |
+
]
|
814 |
+
},
|
815 |
+
"metadata": {},
|
816 |
+
"output_type": "display_data"
|
817 |
+
},
|
818 |
+
{
|
819 |
+
"data": {
|
820 |
+
"application/vnd.jupyter.widget-view+json": {
|
821 |
+
"model_id": "3d722a61d7a440479d0f5497a6200345",
|
822 |
+
"version_major": 2,
|
823 |
+
"version_minor": 0
|
824 |
+
},
|
825 |
+
"text/plain": [
|
826 |
+
"Downloading: 0%| | 0.00/967M [00:00<?, ?B/s]"
|
827 |
+
]
|
828 |
+
},
|
829 |
+
"metadata": {},
|
830 |
+
"output_type": "display_data"
|
831 |
+
}
|
832 |
+
],
|
833 |
"source": [
|
834 |
"from transformers import WhisperForConditionalGeneration\n",
|
835 |
"\n",
|
|
|
846 |
},
|
847 |
{
|
848 |
"cell_type": "code",
|
849 |
+
"execution_count": 17,
|
850 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
|
851 |
"metadata": {},
|
852 |
"outputs": [],
|
|
|
874 |
},
|
875 |
{
|
876 |
"cell_type": "code",
|
877 |
+
"execution_count": 18,
|
878 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
879 |
"metadata": {},
|
880 |
"outputs": [],
|
|
|
924 |
},
|
925 |
{
|
926 |
"cell_type": "code",
|
927 |
+
"execution_count": 19,
|
928 |
"id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
|
929 |
"metadata": {},
|
930 |
"outputs": [],
|
|
|
953 |
},
|
954 |
{
|
955 |
"cell_type": "code",
|
956 |
+
"execution_count": 20,
|
957 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
958 |
"metadata": {},
|
959 |
+
"outputs": [
|
960 |
+
{
|
961 |
+
"name": "stderr",
|
962 |
+
"output_type": "stream",
|
963 |
+
"text": [
|
964 |
+
"/home/ubuntu/whisper-small-zh-tw/./ is already a clone of https://huggingface.co/kimbochen/whisper-small-zh-tw. Make sure you pull the latest changes with `repo.git_pull()`.\n",
|
965 |
+
"max_steps is given, it will override any value given in num_train_epochs\n",
|
966 |
+
"Using cuda_amp half precision backend\n"
|
967 |
+
]
|
968 |
+
}
|
969 |
+
],
|
970 |
"source": [
|
971 |
"from transformers import Seq2SeqTrainer\n",
|
972 |
"\n",
|
|
|
992 |
},
|
993 |
{
|
994 |
"cell_type": "code",
|
995 |
+
"execution_count": 21,
|
996 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
997 |
"metadata": {},
|
998 |
+
"outputs": [
|
999 |
+
{
|
1000 |
+
"name": "stderr",
|
1001 |
+
"output_type": "stream",
|
1002 |
+
"text": [
|
1003 |
+
"Configuration saved in ./config.json\n",
|
1004 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
1005 |
+
"Feature extractor saved in ./preprocessor_config.json\n",
|
1006 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1007 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
1008 |
+
"added tokens file saved in ./added_tokens.json\n"
|
1009 |
+
]
|
1010 |
+
}
|
1011 |
+
],
|
1012 |
"source": [
|
1013 |
"model.save_pretrained(training_args.output_dir)\n",
|
1014 |
"processor.save_pretrained(training_args.output_dir)"
|
|
|
1041 |
"execution_count": null,
|
1042 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1043 |
"metadata": {},
|
1044 |
+
"outputs": [
|
1045 |
+
{
|
1046 |
+
"name": "stderr",
|
1047 |
+
"output_type": "stream",
|
1048 |
+
"text": [
|
1049 |
+
"/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
1050 |
+
" warnings.warn(\n",
|
1051 |
+
"***** Running training *****\n",
|
1052 |
+
" Num examples = 320000\n",
|
1053 |
+
" Num Epochs = 9223372036854775807\n",
|
1054 |
+
" Instantaneous batch size per device = 64\n",
|
1055 |
+
" Total train batch size (w. parallel, distributed & accumulation) = 64\n",
|
1056 |
+
" Gradient Accumulation steps = 1\n",
|
1057 |
+
" Total optimization steps = 5000\n",
|
1058 |
+
" Number of trainable parameters = 241734912\n",
|
1059 |
+
"Reading metadata...: 6568it [00:00, 41540.60it/s]\n",
|
1060 |
+
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
1061 |
+
]
|
1062 |
+
},
|
1063 |
+
{
|
1064 |
+
"data": {
|
1065 |
+
"text/html": [
|
1066 |
+
"\n",
|
1067 |
+
" <div>\n",
|
1068 |
+
" \n",
|
1069 |
+
" <progress value='29' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1070 |
+
" [ 29/5000 02:50 < 8:42:35, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
|
1071 |
+
" </div>\n",
|
1072 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
1073 |
+
" <thead>\n",
|
1074 |
+
" <tr style=\"text-align: left;\">\n",
|
1075 |
+
" <th>Step</th>\n",
|
1076 |
+
" <th>Training Loss</th>\n",
|
1077 |
+
" <th>Validation Loss</th>\n",
|
1078 |
+
" </tr>\n",
|
1079 |
+
" </thead>\n",
|
1080 |
+
" <tbody>\n",
|
1081 |
+
" </tbody>\n",
|
1082 |
+
"</table><p>"
|
1083 |
+
],
|
1084 |
+
"text/plain": [
|
1085 |
+
"<IPython.core.display.HTML object>"
|
1086 |
+
]
|
1087 |
+
},
|
1088 |
+
"metadata": {},
|
1089 |
+
"output_type": "display_data"
|
1090 |
+
}
|
1091 |
+
],
|
1092 |
"source": [
|
1093 |
"trainer.train()"
|
1094 |
]
|
|
|
1115 |
},
|
1116 |
{
|
1117 |
"cell_type": "code",
|
1118 |
+
"execution_count": 22,
|
1119 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1120 |
"metadata": {},
|
1121 |
"outputs": [],
|
|
|
1123 |
"kwargs = {\n",
|
1124 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
1125 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
1126 |
+
" \"language\": \"zh-TW\",\n",
|
1127 |
+
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
1128 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
1129 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
1130 |
" \"tags\": \"whisper-event\",\n",
|
|
|
1141 |
},
|
1142 |
{
|
1143 |
"cell_type": "code",
|
1144 |
+
"execution_count": 23,
|
1145 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1146 |
"metadata": {},
|
1147 |
+
"outputs": [
|
1148 |
+
{
|
1149 |
+
"name": "stderr",
|
1150 |
+
"output_type": "stream",
|
1151 |
+
"text": [
|
1152 |
+
"Saving model checkpoint to ./\n",
|
1153 |
+
"Configuration saved in ./config.json\n",
|
1154 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
1155 |
+
"Feature extractor saved in ./preprocessor_config.json\n",
|
1156 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1157 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
1158 |
+
"added tokens file saved in ./added_tokens.json\n"
|
1159 |
+
]
|
1160 |
+
},
|
1161 |
+
{
|
1162 |
+
"data": {
|
1163 |
+
"application/vnd.jupyter.widget-view+json": {
|
1164 |
+
"model_id": "dc59052a3b7f45b2b896c03763c79f57",
|
1165 |
+
"version_major": 2,
|
1166 |
+
"version_minor": 0
|
1167 |
+
},
|
1168 |
+
"text/plain": [
|
1169 |
+
"Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
|
1170 |
+
]
|
1171 |
+
},
|
1172 |
+
"metadata": {},
|
1173 |
+
"output_type": "display_data"
|
1174 |
+
},
|
1175 |
+
{
|
1176 |
+
"data": {
|
1177 |
+
"application/vnd.jupyter.widget-view+json": {
|
1178 |
+
"model_id": "1c58442a44e84af9a6dff915e036de83",
|
1179 |
+
"version_major": 2,
|
1180 |
+
"version_minor": 0
|
1181 |
+
},
|
1182 |
+
"text/plain": [
|
1183 |
+
"Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
|
1184 |
+
]
|
1185 |
+
},
|
1186 |
+
"metadata": {},
|
1187 |
+
"output_type": "display_data"
|
1188 |
+
},
|
1189 |
+
{
|
1190 |
+
"name": "stderr",
|
1191 |
+
"output_type": "stream",
|
1192 |
+
"text": [
|
1193 |
+
"remote: Scanning LFS files for validity, may be slow... \n",
|
1194 |
+
"remote: LFS file scan complete. \n",
|
1195 |
+
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1196 |
+
" 2ee4cf3..214645d main -> main\n",
|
1197 |
+
"\n",
|
1198 |
+
"Dropping the following result as it does not have all the necessary fields:\n",
|
1199 |
+
"{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'zh-TW', 'split': 'test', 'args': 'zh-TW'}}\n",
|
1200 |
+
"remote: ----------------------------------------------------------\u001b[0;31m \n",
|
1201 |
+
"remote: Sorry, your push was rejected during YAML metadata verification: \n",
|
1202 |
+
"remote: - Error: \"language[0]\" must only contain lowercase characters \n",
|
1203 |
+
"remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
|
1204 |
+
"remote: ---------------------------------------------------------- \n",
|
1205 |
+
"remote: Please find the documentation at: \n",
|
1206 |
+
"remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
|
1207 |
+
"remote: ---------------------------------------------------------- \n",
|
1208 |
+
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1209 |
+
" ! [remote rejected] main -> main (pre-receive hook declined)\n",
|
1210 |
+
"error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
|
1211 |
+
"\n",
|
1212 |
+
"Error pushing update to the model card. Please read logs and retry.\n",
|
1213 |
+
"$remote: ----------------------------------------------------------\u001b[0;31m \n",
|
1214 |
+
"remote: Sorry, your push was rejected during YAML metadata verification: \n",
|
1215 |
+
"remote: - Error: \"language[0]\" must only contain lowercase characters \n",
|
1216 |
+
"remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
|
1217 |
+
"remote: ---------------------------------------------------------- \n",
|
1218 |
+
"remote: Please find the documentation at: \n",
|
1219 |
+
"remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
|
1220 |
+
"remote: ---------------------------------------------------------- \n",
|
1221 |
+
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1222 |
+
" ! [remote rejected] main -> main (pre-receive hook declined)\n",
|
1223 |
+
"error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
|
1224 |
+
"\n"
|
1225 |
+
]
|
1226 |
+
},
|
1227 |
+
{
|
1228 |
+
"data": {
|
1229 |
+
"text/plain": [
|
1230 |
+
"'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/214645d6cd1f0e7ab6a65a854eec2e349529961c'"
|
1231 |
+
]
|
1232 |
+
},
|
1233 |
+
"execution_count": 23,
|
1234 |
+
"metadata": {},
|
1235 |
+
"output_type": "execute_result"
|
1236 |
+
}
|
1237 |
+
],
|
1238 |
"source": [
|
1239 |
"trainer.push_to_hub(**kwargs)"
|
1240 |
]
|
|
|
1256 |
"name": "python",
|
1257 |
"nbconvert_exporter": "python",
|
1258 |
"pygments_lexer": "ipython3",
|
1259 |
+
"version": "3.8.10"
|
1260 |
}
|
1261 |
},
|
1262 |
"nbformat": 4,
|
fine-tune-whisper-streaming.ipynb
CHANGED
@@ -1041,7 +1041,54 @@
|
|
1041 |
"execution_count": null,
|
1042 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1043 |
"metadata": {},
|
1044 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1045 |
"source": [
|
1046 |
"trainer.train()"
|
1047 |
]
|
@@ -1068,7 +1115,7 @@
|
|
1068 |
},
|
1069 |
{
|
1070 |
"cell_type": "code",
|
1071 |
-
"execution_count":
|
1072 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1073 |
"metadata": {},
|
1074 |
"outputs": [],
|
@@ -1076,7 +1123,7 @@
|
|
1076 |
"kwargs = {\n",
|
1077 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
1078 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
1079 |
-
" \"language\": \"\",\n",
|
1080 |
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
1081 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
1082 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
@@ -1094,10 +1141,100 @@
|
|
1094 |
},
|
1095 |
{
|
1096 |
"cell_type": "code",
|
1097 |
-
"execution_count":
|
1098 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1099 |
"metadata": {},
|
1100 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1101 |
"source": [
|
1102 |
"trainer.push_to_hub(**kwargs)"
|
1103 |
]
|
|
|
1041 |
"execution_count": null,
|
1042 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1043 |
"metadata": {},
|
1044 |
+
"outputs": [
|
1045 |
+
{
|
1046 |
+
"name": "stderr",
|
1047 |
+
"output_type": "stream",
|
1048 |
+
"text": [
|
1049 |
+
"/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
1050 |
+
" warnings.warn(\n",
|
1051 |
+
"***** Running training *****\n",
|
1052 |
+
" Num examples = 320000\n",
|
1053 |
+
" Num Epochs = 9223372036854775807\n",
|
1054 |
+
" Instantaneous batch size per device = 64\n",
|
1055 |
+
" Total train batch size (w. parallel, distributed & accumulation) = 64\n",
|
1056 |
+
" Gradient Accumulation steps = 1\n",
|
1057 |
+
" Total optimization steps = 5000\n",
|
1058 |
+
" Number of trainable parameters = 241734912\n",
|
1059 |
+
"Reading metadata...: 6568it [00:00, 41540.60it/s]\n",
|
1060 |
+
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
1061 |
+
]
|
1062 |
+
},
|
1063 |
+
{
|
1064 |
+
"data": {
|
1065 |
+
"text/html": [
|
1066 |
+
"\n",
|
1067 |
+
" <div>\n",
|
1068 |
+
" \n",
|
1069 |
+
" <progress value='35' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1070 |
+
" [ 35/5000 03:29 < 8:46:02, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
|
1071 |
+
" </div>\n",
|
1072 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
1073 |
+
" <thead>\n",
|
1074 |
+
" <tr style=\"text-align: left;\">\n",
|
1075 |
+
" <th>Step</th>\n",
|
1076 |
+
" <th>Training Loss</th>\n",
|
1077 |
+
" <th>Validation Loss</th>\n",
|
1078 |
+
" </tr>\n",
|
1079 |
+
" </thead>\n",
|
1080 |
+
" <tbody>\n",
|
1081 |
+
" </tbody>\n",
|
1082 |
+
"</table><p>"
|
1083 |
+
],
|
1084 |
+
"text/plain": [
|
1085 |
+
"<IPython.core.display.HTML object>"
|
1086 |
+
]
|
1087 |
+
},
|
1088 |
+
"metadata": {},
|
1089 |
+
"output_type": "display_data"
|
1090 |
+
}
|
1091 |
+
],
|
1092 |
"source": [
|
1093 |
"trainer.train()"
|
1094 |
]
|
|
|
1115 |
},
|
1116 |
{
|
1117 |
"cell_type": "code",
|
1118 |
+
"execution_count": 22,
|
1119 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1120 |
"metadata": {},
|
1121 |
"outputs": [],
|
|
|
1123 |
"kwargs = {\n",
|
1124 |
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
|
1125 |
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
|
1126 |
+
" \"language\": \"zh-TW\",\n",
|
1127 |
" \"model_name\": \"Whisper Small Chinese - Kimbo Chen\", # a 'pretty' name for your model\n",
|
1128 |
" \"finetuned_from\": \"openai/whisper-small\",\n",
|
1129 |
" \"tasks\": \"automatic-speech-recognition\",\n",
|
|
|
1141 |
},
|
1142 |
{
|
1143 |
"cell_type": "code",
|
1144 |
+
"execution_count": 23,
|
1145 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1146 |
"metadata": {},
|
1147 |
+
"outputs": [
|
1148 |
+
{
|
1149 |
+
"name": "stderr",
|
1150 |
+
"output_type": "stream",
|
1151 |
+
"text": [
|
1152 |
+
"Saving model checkpoint to ./\n",
|
1153 |
+
"Configuration saved in ./config.json\n",
|
1154 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
1155 |
+
"Feature extractor saved in ./preprocessor_config.json\n",
|
1156 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1157 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
1158 |
+
"added tokens file saved in ./added_tokens.json\n"
|
1159 |
+
]
|
1160 |
+
},
|
1161 |
+
{
|
1162 |
+
"data": {
|
1163 |
+
"application/vnd.jupyter.widget-view+json": {
|
1164 |
+
"model_id": "dc59052a3b7f45b2b896c03763c79f57",
|
1165 |
+
"version_major": 2,
|
1166 |
+
"version_minor": 0
|
1167 |
+
},
|
1168 |
+
"text/plain": [
|
1169 |
+
"Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
|
1170 |
+
]
|
1171 |
+
},
|
1172 |
+
"metadata": {},
|
1173 |
+
"output_type": "display_data"
|
1174 |
+
},
|
1175 |
+
{
|
1176 |
+
"data": {
|
1177 |
+
"application/vnd.jupyter.widget-view+json": {
|
1178 |
+
"model_id": "1c58442a44e84af9a6dff915e036de83",
|
1179 |
+
"version_major": 2,
|
1180 |
+
"version_minor": 0
|
1181 |
+
},
|
1182 |
+
"text/plain": [
|
1183 |
+
"Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
|
1184 |
+
]
|
1185 |
+
},
|
1186 |
+
"metadata": {},
|
1187 |
+
"output_type": "display_data"
|
1188 |
+
},
|
1189 |
+
{
|
1190 |
+
"name": "stderr",
|
1191 |
+
"output_type": "stream",
|
1192 |
+
"text": [
|
1193 |
+
"remote: Scanning LFS files for validity, may be slow... \n",
|
1194 |
+
"remote: LFS file scan complete. \n",
|
1195 |
+
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1196 |
+
" 2ee4cf3..214645d main -> main\n",
|
1197 |
+
"\n",
|
1198 |
+
"Dropping the following result as it does not have all the necessary fields:\n",
|
1199 |
+
"{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'zh-TW', 'split': 'test', 'args': 'zh-TW'}}\n",
|
1200 |
+
"remote: ----------------------------------------------------------\u001b[0;31m \n",
|
1201 |
+
"remote: Sorry, your push was rejected during YAML metadata verification: \n",
|
1202 |
+
"remote: - Error: \"language[0]\" must only contain lowercase characters \n",
|
1203 |
+
"remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
|
1204 |
+
"remote: ---------------------------------------------------------- \n",
|
1205 |
+
"remote: Please find the documentation at: \n",
|
1206 |
+
"remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
|
1207 |
+
"remote: ---------------------------------------------------------- \n",
|
1208 |
+
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1209 |
+
" ! [remote rejected] main -> main (pre-receive hook declined)\n",
|
1210 |
+
"error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
|
1211 |
+
"\n",
|
1212 |
+
"Error pushing update to the model card. Please read logs and retry.\n",
|
1213 |
+
"$remote: ----------------------------------------------------------\u001b[0;31m \n",
|
1214 |
+
"remote: Sorry, your push was rejected during YAML metadata verification: \n",
|
1215 |
+
"remote: - Error: \"language[0]\" must only contain lowercase characters \n",
|
1216 |
+
"remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m \n",
|
1217 |
+
"remote: ---------------------------------------------------------- \n",
|
1218 |
+
"remote: Please find the documentation at: \n",
|
1219 |
+
"remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m \n",
|
1220 |
+
"remote: ---------------------------------------------------------- \n",
|
1221 |
+
"To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
|
1222 |
+
" ! [remote rejected] main -> main (pre-receive hook declined)\n",
|
1223 |
+
"error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
|
1224 |
+
"\n"
|
1225 |
+
]
|
1226 |
+
},
|
1227 |
+
{
|
1228 |
+
"data": {
|
1229 |
+
"text/plain": [
|
1230 |
+
"'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/214645d6cd1f0e7ab6a65a854eec2e349529961c'"
|
1231 |
+
]
|
1232 |
+
},
|
1233 |
+
"execution_count": 23,
|
1234 |
+
"metadata": {},
|
1235 |
+
"output_type": "execute_result"
|
1236 |
+
}
|
1237 |
+
],
|
1238 |
"source": [
|
1239 |
"trainer.push_to_hub(**kwargs)"
|
1240 |
]
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 967102601
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68c37aa36016265b630dfcf67b6593ca65cefa6c6e939ab9dd790e2b04c9b56f
|
3 |
size 967102601
|
runs/Dec10_02-58-52_129-213-89-27/1670641248.2035987/events.out.tfevents.1670641248.129-213-89-27.128858.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d7d176987bf05d49e50c322906f78e49182290133c788cbef513dd25194be99
|
3 |
+
size 5863
|
runs/Dec10_02-58-52_129-213-89-27/events.out.tfevents.1670641248.129-213-89-27.128858.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57031bef3a05b71c381e0b0d76e9378fdb1bb7a416a15f96b5296653a4f5bb53
|
3 |
+
size 10869
|