End of training

Browse files

Files changed (5) hide show

fine-tune-whisper-streaming.ipynb +253 -3
pytorch_model.bin +1 -1
runs/Dec10_16-23-25_129-213-27-84/1670689420.7830398/events.out.tfevents.1670689420.129-213-27-84.69598.3 +3 -0
runs/Dec10_16-23-25_129-213-27-84/events.out.tfevents.1670689420.129-213-27-84.69598.2 +3 -0
training_args.bin +1 -1

fine-tune-whisper-streaming.ipynb CHANGED Viewed

@@ -1082,7 +1082,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
@@ -1108,13 +1108,263 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]
   }
  ],
  "metadata": {

   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 25,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "16739dc58bd048408e8154a39dca4590",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "df61c3286393482e9084f4f78f661525",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file runs/Dec10_01-56-07_129-213-27-84/events.out.tfevents.1670637380.129-213-27-84.69598.0:  95%|#####…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "remote: Scanning LFS files for validity, may be slow...        \n",
+      "remote: LFS file scan complete.        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-jp\n",
+      "   f4b374d..e216c5d  main -> main\n",
+      "\n",
+      "Dropping the following result as it does not have all the necessary fields:\n",
+      "{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'ja', 'split': 'test', 'args': 'ja'}}\n",
+      "To https://huggingface.co/kimbochen/whisper-small-jp\n",
+      "   e216c5d..3a44fa5  main -> main\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/kimbochen/whisper-small-jp/commit/e216c5dfdb8e05855b7f8c0cb2778c7731a46633'"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "4df1603c-ef35-40f1-ae57-3214441073c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "PyTorch: setting up devices\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"./\",\n",
+    "    per_device_train_batch_size=64,\n",
+    "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
+    "    learning_rate=1e-5,\n",
+    "    max_steps=1000,\n",
+    "    num_train_epochs=-1,\n",
+    "    gradient_checkpointing=True,\n",
+    "    fp16=True,\n",
+    "    evaluation_strategy=\"steps\",\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    predict_with_generate=True,\n",
+    "    generation_max_length=225,\n",
+    "    save_steps=1000,\n",
+    "    eval_steps=1000,\n",
+    "    logging_steps=25,\n",
+    "    report_to=[\"tensorboard\"],\n",
+    "    load_best_model_at_end=True,\n",
+    "    metric_for_best_model=\"wer\",\n",
+    "    greater_is_better=False,\n",
+    "    push_to_hub=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "afc2b554-7171-48c7-95aa-b7e61b70ab20",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/whisper-small-jp/./ is already a clone of https://huggingface.co/kimbochen/whisper-small-jp. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+      "max_steps is given, it will override any value given in num_train_epochs\n",
+      "Using cuda_amp half precision backend\n"
+     ]
+    }
+   ],
+   "source": [
+    "trainer = Seq2SeqTrainer(\n",
+    "    args=training_args,\n",
+    "    model=model,\n",
+    "    train_dataset=vectorized_datasets[\"train\"],\n",
+    "    eval_dataset=vectorized_datasets[\"test\"],\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    tokenizer=processor,\n",
+    "    callbacks=[ShuffleCallback()],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "b029a1d8-24de-46e7-b067-0f900b1db342",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading model from checkpoint-4000.\n",
+      "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n",
+      "***** Running training *****\n",
+      "  Num examples = 64000\n",
+      "  Num Epochs = 9223372036854775807\n",
+      "  Instantaneous batch size per device = 64\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 1000\n",
+      "  Number of trainable parameters = 241734912\n",
+      "  Continuing training from checkpoint, will skip to saved global_step\n",
+      "  Continuing training from epoch 4\n",
+      "  Continuing training from global step 4000\n",
+      "  Will skip the first 4 epochs then the first 0 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "01337298313740d98d3cc75b6d5e3ff7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Reading metadata...: 0it [00:00, ?it/s]\u001b[A\n",
+      "Reading metadata...: 6505it [00:00, 34246.80it/s]\n",
+      "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 84823.64it/s]\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 88617.62it/s]\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 90289.78it/s]\n",
+      "\n",
+      "Reading metadata...: 6505it [00:00, 91816.92it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='4001' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [1000/1000 00:00, Epoch 4/9223372036854775807]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+      "\n",
+      "\n",
+      "Loading best model from ./checkpoint-4000 (score: 88.31039863810469).\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=4001, training_loss=8.343380785802548e-08, metrics={'train_runtime': 169.0541, 'train_samples_per_second': 378.577, 'train_steps_per_second': 5.915, 'total_flos': 7.363747084345344e+19, 'train_loss': 8.343380785802548e-08, 'epoch': 4.0})"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train(\"checkpoint-4000\")"
+   ]
   }
  ],
  "metadata": {

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae23baa6d08377d69619c875f6ba0ad1ba99e737c1dafae82d54f122b0e881d3
 size 967102601

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3246529f086b22124c7901ea81e50c3e83cfe22009b2ee44ddc94f5bea88d86
 size 967102601

runs/Dec10_16-23-25_129-213-27-84/1670689420.7830398/events.out.tfevents.1670689420.129-213-27-84.69598.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:919f991c2b7b827b7bbfa43f46161f5f173d21c892703c4c7a1722f696dedfbb
+size 5863

runs/Dec10_16-23-25_129-213-27-84/events.out.tfevents.1670689420.129-213-27-84.69598.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:945311114191fd94d4d98afee3982d6e6ada989a6b38f2442c3c6e0217f1644d
+size 4637

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e47dc3dd345045f851c2adb1a18a18460c556d0e3f86131b6707ff1504ff767f
 size 3579

 version https://git-lfs.github.com/spec/v1
+oid sha256:004a4cf781ce4e3549410cee708eb390c3b675a56f1d039eff79f582955c901a
 size 3579