End of training
Browse files
fine-tune-whisper-streaming.ipynb
CHANGED
@@ -1082,7 +1082,7 @@
|
|
1082 |
},
|
1083 |
{
|
1084 |
"cell_type": "code",
|
1085 |
-
"execution_count":
|
1086 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1087 |
"metadata": {},
|
1088 |
"outputs": [],
|
@@ -1108,13 +1108,263 @@
|
|
1108 |
},
|
1109 |
{
|
1110 |
"cell_type": "code",
|
1111 |
-
"execution_count":
|
1112 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1113 |
"metadata": {},
|
1114 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1115 |
"source": [
|
1116 |
"trainer.push_to_hub(**kwargs)"
|
1117 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1118 |
}
|
1119 |
],
|
1120 |
"metadata": {
|
|
|
1082 |
},
|
1083 |
{
|
1084 |
"cell_type": "code",
|
1085 |
+
"execution_count": 24,
|
1086 |
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
|
1087 |
"metadata": {},
|
1088 |
"outputs": [],
|
|
|
1108 |
},
|
1109 |
{
|
1110 |
"cell_type": "code",
|
1111 |
+
"execution_count": 25,
|
1112 |
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
|
1113 |
"metadata": {},
|
1114 |
+
"outputs": [
|
1115 |
+
{
|
1116 |
+
"name": "stderr",
|
1117 |
+
"output_type": "stream",
|
1118 |
+
"text": [
|
1119 |
+
"Saving model checkpoint to ./\n",
|
1120 |
+
"Configuration saved in ./config.json\n",
|
1121 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
1122 |
+
"Feature extractor saved in ./preprocessor_config.json\n",
|
1123 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
1124 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
1125 |
+
"added tokens file saved in ./added_tokens.json\n"
|
1126 |
+
]
|
1127 |
+
},
|
1128 |
+
{
|
1129 |
+
"data": {
|
1130 |
+
"application/vnd.jupyter.widget-view+json": {
|
1131 |
+
"model_id": "16739dc58bd048408e8154a39dca4590",
|
1132 |
+
"version_major": 2,
|
1133 |
+
"version_minor": 0
|
1134 |
+
},
|
1135 |
+
"text/plain": [
|
1136 |
+
"Upload file pytorch_model.bin: 0%| | 32.0k/922M [00:00<?, ?B/s]"
|
1137 |
+
]
|
1138 |
+
},
|
1139 |
+
"metadata": {},
|
1140 |
+
"output_type": "display_data"
|
1141 |
+
},
|
1142 |
+
{
|
1143 |
+
"data": {
|
1144 |
+
"application/vnd.jupyter.widget-view+json": {
|
1145 |
+
"model_id": "df61c3286393482e9084f4f78f661525",
|
1146 |
+
"version_major": 2,
|
1147 |
+
"version_minor": 0
|
1148 |
+
},
|
1149 |
+
"text/plain": [
|
1150 |
+
"Upload file runs/Dec10_01-56-07_129-213-27-84/events.out.tfevents.1670637380.129-213-27-84.69598.0: 95%|#####…"
|
1151 |
+
]
|
1152 |
+
},
|
1153 |
+
"metadata": {},
|
1154 |
+
"output_type": "display_data"
|
1155 |
+
},
|
1156 |
+
{
|
1157 |
+
"name": "stderr",
|
1158 |
+
"output_type": "stream",
|
1159 |
+
"text": [
|
1160 |
+
"remote: Scanning LFS files for validity, may be slow... \n",
|
1161 |
+
"remote: LFS file scan complete. \n",
|
1162 |
+
"To https://huggingface.co/kimbochen/whisper-small-jp\n",
|
1163 |
+
" f4b374d..e216c5d main -> main\n",
|
1164 |
+
"\n",
|
1165 |
+
"Dropping the following result as it does not have all the necessary fields:\n",
|
1166 |
+
"{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'ja', 'split': 'test', 'args': 'ja'}}\n",
|
1167 |
+
"To https://huggingface.co/kimbochen/whisper-small-jp\n",
|
1168 |
+
" e216c5d..3a44fa5 main -> main\n",
|
1169 |
+
"\n"
|
1170 |
+
]
|
1171 |
+
},
|
1172 |
+
{
|
1173 |
+
"data": {
|
1174 |
+
"text/plain": [
|
1175 |
+
"'https://huggingface.co/kimbochen/whisper-small-jp/commit/e216c5dfdb8e05855b7f8c0cb2778c7731a46633'"
|
1176 |
+
]
|
1177 |
+
},
|
1178 |
+
"execution_count": 25,
|
1179 |
+
"metadata": {},
|
1180 |
+
"output_type": "execute_result"
|
1181 |
+
}
|
1182 |
+
],
|
1183 |
"source": [
|
1184 |
"trainer.push_to_hub(**kwargs)"
|
1185 |
]
|
1186 |
+
},
|
1187 |
+
{
|
1188 |
+
"cell_type": "code",
|
1189 |
+
"execution_count": 28,
|
1190 |
+
"id": "4df1603c-ef35-40f1-ae57-3214441073c8",
|
1191 |
+
"metadata": {},
|
1192 |
+
"outputs": [
|
1193 |
+
{
|
1194 |
+
"name": "stderr",
|
1195 |
+
"output_type": "stream",
|
1196 |
+
"text": [
|
1197 |
+
"PyTorch: setting up devices\n"
|
1198 |
+
]
|
1199 |
+
}
|
1200 |
+
],
|
1201 |
+
"source": [
|
1202 |
+
"training_args = Seq2SeqTrainingArguments(\n",
|
1203 |
+
" output_dir=\"./\",\n",
|
1204 |
+
" per_device_train_batch_size=64,\n",
|
1205 |
+
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
1206 |
+
" learning_rate=1e-5,\n",
|
1207 |
+
" max_steps=1000,\n",
|
1208 |
+
" num_train_epochs=-1,\n",
|
1209 |
+
" gradient_checkpointing=True,\n",
|
1210 |
+
" fp16=True,\n",
|
1211 |
+
" evaluation_strategy=\"steps\",\n",
|
1212 |
+
" per_device_eval_batch_size=8,\n",
|
1213 |
+
" predict_with_generate=True,\n",
|
1214 |
+
" generation_max_length=225,\n",
|
1215 |
+
" save_steps=1000,\n",
|
1216 |
+
" eval_steps=1000,\n",
|
1217 |
+
" logging_steps=25,\n",
|
1218 |
+
" report_to=[\"tensorboard\"],\n",
|
1219 |
+
" load_best_model_at_end=True,\n",
|
1220 |
+
" metric_for_best_model=\"wer\",\n",
|
1221 |
+
" greater_is_better=False,\n",
|
1222 |
+
" push_to_hub=True,\n",
|
1223 |
+
")"
|
1224 |
+
]
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"cell_type": "code",
|
1228 |
+
"execution_count": 29,
|
1229 |
+
"id": "afc2b554-7171-48c7-95aa-b7e61b70ab20",
|
1230 |
+
"metadata": {},
|
1231 |
+
"outputs": [
|
1232 |
+
{
|
1233 |
+
"name": "stderr",
|
1234 |
+
"output_type": "stream",
|
1235 |
+
"text": [
|
1236 |
+
"/home/ubuntu/whisper-small-jp/./ is already a clone of https://huggingface.co/kimbochen/whisper-small-jp. Make sure you pull the latest changes with `repo.git_pull()`.\n",
|
1237 |
+
"max_steps is given, it will override any value given in num_train_epochs\n",
|
1238 |
+
"Using cuda_amp half precision backend\n"
|
1239 |
+
]
|
1240 |
+
}
|
1241 |
+
],
|
1242 |
+
"source": [
|
1243 |
+
"trainer = Seq2SeqTrainer(\n",
|
1244 |
+
" args=training_args,\n",
|
1245 |
+
" model=model,\n",
|
1246 |
+
" train_dataset=vectorized_datasets[\"train\"],\n",
|
1247 |
+
" eval_dataset=vectorized_datasets[\"test\"],\n",
|
1248 |
+
" data_collator=data_collator,\n",
|
1249 |
+
" compute_metrics=compute_metrics,\n",
|
1250 |
+
" tokenizer=processor,\n",
|
1251 |
+
" callbacks=[ShuffleCallback()],\n",
|
1252 |
+
")"
|
1253 |
+
]
|
1254 |
+
},
|
1255 |
+
{
|
1256 |
+
"cell_type": "code",
|
1257 |
+
"execution_count": 30,
|
1258 |
+
"id": "b029a1d8-24de-46e7-b067-0f900b1db342",
|
1259 |
+
"metadata": {},
|
1260 |
+
"outputs": [
|
1261 |
+
{
|
1262 |
+
"name": "stderr",
|
1263 |
+
"output_type": "stream",
|
1264 |
+
"text": [
|
1265 |
+
"Loading model from checkpoint-4000.\n",
|
1266 |
+
"/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
1267 |
+
" warnings.warn(\n",
|
1268 |
+
"***** Running training *****\n",
|
1269 |
+
" Num examples = 64000\n",
|
1270 |
+
" Num Epochs = 9223372036854775807\n",
|
1271 |
+
" Instantaneous batch size per device = 64\n",
|
1272 |
+
" Total train batch size (w. parallel, distributed & accumulation) = 64\n",
|
1273 |
+
" Gradient Accumulation steps = 1\n",
|
1274 |
+
" Total optimization steps = 1000\n",
|
1275 |
+
" Number of trainable parameters = 241734912\n",
|
1276 |
+
" Continuing training from checkpoint, will skip to saved global_step\n",
|
1277 |
+
" Continuing training from epoch 4\n",
|
1278 |
+
" Continuing training from global step 4000\n",
|
1279 |
+
" Will skip the first 4 epochs then the first 0 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.\n"
|
1280 |
+
]
|
1281 |
+
},
|
1282 |
+
{
|
1283 |
+
"data": {
|
1284 |
+
"application/vnd.jupyter.widget-view+json": {
|
1285 |
+
"model_id": "01337298313740d98d3cc75b6d5e3ff7",
|
1286 |
+
"version_major": 2,
|
1287 |
+
"version_minor": 0
|
1288 |
+
},
|
1289 |
+
"text/plain": [
|
1290 |
+
"0it [00:00, ?it/s]"
|
1291 |
+
]
|
1292 |
+
},
|
1293 |
+
"metadata": {},
|
1294 |
+
"output_type": "display_data"
|
1295 |
+
},
|
1296 |
+
{
|
1297 |
+
"name": "stderr",
|
1298 |
+
"output_type": "stream",
|
1299 |
+
"text": [
|
1300 |
+
"\n",
|
1301 |
+
"Reading metadata...: 0it [00:00, ?it/s]\u001b[A\n",
|
1302 |
+
"Reading metadata...: 6505it [00:00, 34246.80it/s]\n",
|
1303 |
+
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
|
1304 |
+
"\n",
|
1305 |
+
"Reading metadata...: 6505it [00:00, 84823.64it/s]\n",
|
1306 |
+
"\n",
|
1307 |
+
"Reading metadata...: 6505it [00:00, 88617.62it/s]\n",
|
1308 |
+
"\n",
|
1309 |
+
"Reading metadata...: 6505it [00:00, 90289.78it/s]\n",
|
1310 |
+
"\n",
|
1311 |
+
"Reading metadata...: 6505it [00:00, 91816.92it/s]\n"
|
1312 |
+
]
|
1313 |
+
},
|
1314 |
+
{
|
1315 |
+
"data": {
|
1316 |
+
"text/html": [
|
1317 |
+
"\n",
|
1318 |
+
" <div>\n",
|
1319 |
+
" \n",
|
1320 |
+
" <progress value='4001' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1321 |
+
" [1000/1000 00:00, Epoch 4/9223372036854775807]\n",
|
1322 |
+
" </div>\n",
|
1323 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
1324 |
+
" <thead>\n",
|
1325 |
+
" <tr style=\"text-align: left;\">\n",
|
1326 |
+
" <th>Step</th>\n",
|
1327 |
+
" <th>Training Loss</th>\n",
|
1328 |
+
" <th>Validation Loss</th>\n",
|
1329 |
+
" </tr>\n",
|
1330 |
+
" </thead>\n",
|
1331 |
+
" <tbody>\n",
|
1332 |
+
" </tbody>\n",
|
1333 |
+
"</table><p>"
|
1334 |
+
],
|
1335 |
+
"text/plain": [
|
1336 |
+
"<IPython.core.display.HTML object>"
|
1337 |
+
]
|
1338 |
+
},
|
1339 |
+
"metadata": {},
|
1340 |
+
"output_type": "display_data"
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"name": "stderr",
|
1344 |
+
"output_type": "stream",
|
1345 |
+
"text": [
|
1346 |
+
"\n",
|
1347 |
+
"\n",
|
1348 |
+
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
|
1349 |
+
"\n",
|
1350 |
+
"\n",
|
1351 |
+
"Loading best model from ./checkpoint-4000 (score: 88.31039863810469).\n"
|
1352 |
+
]
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"data": {
|
1356 |
+
"text/plain": [
|
1357 |
+
"TrainOutput(global_step=4001, training_loss=8.343380785802548e-08, metrics={'train_runtime': 169.0541, 'train_samples_per_second': 378.577, 'train_steps_per_second': 5.915, 'total_flos': 7.363747084345344e+19, 'train_loss': 8.343380785802548e-08, 'epoch': 4.0})"
|
1358 |
+
]
|
1359 |
+
},
|
1360 |
+
"execution_count": 30,
|
1361 |
+
"metadata": {},
|
1362 |
+
"output_type": "execute_result"
|
1363 |
+
}
|
1364 |
+
],
|
1365 |
+
"source": [
|
1366 |
+
"trainer.train(\"checkpoint-4000\")"
|
1367 |
+
]
|
1368 |
}
|
1369 |
],
|
1370 |
"metadata": {
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 967102601
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3246529f086b22124c7901ea81e50c3e83cfe22009b2ee44ddc94f5bea88d86
|
3 |
size 967102601
|
runs/Dec10_16-23-25_129-213-27-84/1670689420.7830398/events.out.tfevents.1670689420.129-213-27-84.69598.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:919f991c2b7b827b7bbfa43f46161f5f173d21c892703c4c7a1722f696dedfbb
|
3 |
+
size 5863
|
runs/Dec10_16-23-25_129-213-27-84/events.out.tfevents.1670689420.129-213-27-84.69598.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:945311114191fd94d4d98afee3982d6e6ada989a6b38f2442c3c6e0217f1644d
|
3 |
+
size 4637
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:004a4cf781ce4e3549410cee708eb390c3b675a56f1d039eff79f582955c901a
|
3 |
size 3579
|