Mark-X commited on
Commit
40fd564
1 Parent(s): c6c8628

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf43a03afae66bb3c4d399a71b5078f30e906b4274ce36c920ba84caf179845b
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2f5c96cac3818f65cfd5c9cbbc938181590aa22a40157a415da73638fe8d56
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bbd16135b2949d2dab3029905a4689d6db7f208fb6ff5e68520830cf1038c06
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb30c79faba064019e635c6d628696fb39cb276fc2d4ab8f2a4fd879b16ae848
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3a62e95bda658608d0b3d1a8c439b47c4e9f6762126dca6bd05f7169f1ce4f0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c46cde2c439b7b077df619cfbbcd27c19f63a4087af7587647d8e592b96a4377
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80deb1a4a6b859e82ecbde26a5ea3f76d46bacb4e73c5bf8a314599f95868431
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa6eb8a5862db08b04e41465494b9c605adfbffdf8261b4c134e0597e77bdcd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 52.628043660789245,
3
- "best_model_checkpoint": "./whisper-small-or/checkpoint-4000",
4
- "epoch": 31.25,
5
  "eval_steps": 1000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1163,6 +1163,295 @@
1163
  "eval_steps_per_second": 0.116,
1164
  "eval_wer": 52.628043660789245,
1165
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1166
  }
1167
  ],
1168
  "logging_steps": 25,
@@ -1177,12 +1466,12 @@
1177
  "should_evaluate": false,
1178
  "should_log": false,
1179
  "should_save": true,
1180
- "should_training_stop": false
1181
  },
1182
  "attributes": {}
1183
  }
1184
  },
1185
- "total_flos": 1.846946562048e+19,
1186
  "train_batch_size": 16,
1187
  "trial_name": null,
1188
  "trial_params": null
 
1
  {
2
+ "best_metric": 52.426532325776655,
3
+ "best_model_checkpoint": "./whisper-small-or/checkpoint-5000",
4
+ "epoch": 39.0625,
5
  "eval_steps": 1000,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1163
  "eval_steps_per_second": 0.116,
1164
  "eval_wer": 52.628043660789245,
1165
  "step": 4000
1166
+ },
1167
+ {
1168
+ "epoch": 31.4453125,
1169
+ "grad_norm": 0.0038889453280717134,
1170
+ "learning_rate": 2.1755555555555556e-06,
1171
+ "loss": 0.0,
1172
+ "step": 4025
1173
+ },
1174
+ {
1175
+ "epoch": 31.640625,
1176
+ "grad_norm": 0.003946984186768532,
1177
+ "learning_rate": 2.12e-06,
1178
+ "loss": 0.0,
1179
+ "step": 4050
1180
+ },
1181
+ {
1182
+ "epoch": 31.8359375,
1183
+ "grad_norm": 0.003195575438439846,
1184
+ "learning_rate": 2.064444444444445e-06,
1185
+ "loss": 0.0,
1186
+ "step": 4075
1187
+ },
1188
+ {
1189
+ "epoch": 32.03125,
1190
+ "grad_norm": 0.004425502847880125,
1191
+ "learning_rate": 2.008888888888889e-06,
1192
+ "loss": 0.0,
1193
+ "step": 4100
1194
+ },
1195
+ {
1196
+ "epoch": 32.2265625,
1197
+ "grad_norm": 0.003221447579562664,
1198
+ "learning_rate": 1.9533333333333334e-06,
1199
+ "loss": 0.0,
1200
+ "step": 4125
1201
+ },
1202
+ {
1203
+ "epoch": 32.421875,
1204
+ "grad_norm": 0.004509610123932362,
1205
+ "learning_rate": 1.8977777777777779e-06,
1206
+ "loss": 0.0,
1207
+ "step": 4150
1208
+ },
1209
+ {
1210
+ "epoch": 32.6171875,
1211
+ "grad_norm": 0.003536405274644494,
1212
+ "learning_rate": 1.8422222222222225e-06,
1213
+ "loss": 0.0,
1214
+ "step": 4175
1215
+ },
1216
+ {
1217
+ "epoch": 32.8125,
1218
+ "grad_norm": 0.004426420200616121,
1219
+ "learning_rate": 1.7866666666666668e-06,
1220
+ "loss": 0.0,
1221
+ "step": 4200
1222
+ },
1223
+ {
1224
+ "epoch": 33.0078125,
1225
+ "grad_norm": 0.003728843992576003,
1226
+ "learning_rate": 1.7311111111111112e-06,
1227
+ "loss": 0.0,
1228
+ "step": 4225
1229
+ },
1230
+ {
1231
+ "epoch": 33.203125,
1232
+ "grad_norm": 0.0036569128278642893,
1233
+ "learning_rate": 1.675555555555556e-06,
1234
+ "loss": 0.0,
1235
+ "step": 4250
1236
+ },
1237
+ {
1238
+ "epoch": 33.3984375,
1239
+ "grad_norm": 0.004421570338308811,
1240
+ "learning_rate": 1.6200000000000002e-06,
1241
+ "loss": 0.0,
1242
+ "step": 4275
1243
+ },
1244
+ {
1245
+ "epoch": 33.59375,
1246
+ "grad_norm": 0.003550709690898657,
1247
+ "learning_rate": 1.5644444444444446e-06,
1248
+ "loss": 0.0,
1249
+ "step": 4300
1250
+ },
1251
+ {
1252
+ "epoch": 33.7890625,
1253
+ "grad_norm": 0.003777899779379368,
1254
+ "learning_rate": 1.5088888888888889e-06,
1255
+ "loss": 0.0,
1256
+ "step": 4325
1257
+ },
1258
+ {
1259
+ "epoch": 33.984375,
1260
+ "grad_norm": 0.002803810639306903,
1261
+ "learning_rate": 1.4533333333333335e-06,
1262
+ "loss": 0.0,
1263
+ "step": 4350
1264
+ },
1265
+ {
1266
+ "epoch": 34.1796875,
1267
+ "grad_norm": 0.0037676438223570585,
1268
+ "learning_rate": 1.397777777777778e-06,
1269
+ "loss": 0.0,
1270
+ "step": 4375
1271
+ },
1272
+ {
1273
+ "epoch": 34.375,
1274
+ "grad_norm": 0.00398740591481328,
1275
+ "learning_rate": 1.3422222222222222e-06,
1276
+ "loss": 0.0,
1277
+ "step": 4400
1278
+ },
1279
+ {
1280
+ "epoch": 34.5703125,
1281
+ "grad_norm": 0.003252105787396431,
1282
+ "learning_rate": 1.286666666666667e-06,
1283
+ "loss": 0.0,
1284
+ "step": 4425
1285
+ },
1286
+ {
1287
+ "epoch": 34.765625,
1288
+ "grad_norm": 0.004870784468948841,
1289
+ "learning_rate": 1.2311111111111112e-06,
1290
+ "loss": 0.0,
1291
+ "step": 4450
1292
+ },
1293
+ {
1294
+ "epoch": 34.9609375,
1295
+ "grad_norm": 0.0036819297820329666,
1296
+ "learning_rate": 1.1755555555555556e-06,
1297
+ "loss": 0.0,
1298
+ "step": 4475
1299
+ },
1300
+ {
1301
+ "epoch": 35.15625,
1302
+ "grad_norm": 0.003786075860261917,
1303
+ "learning_rate": 1.12e-06,
1304
+ "loss": 0.0,
1305
+ "step": 4500
1306
+ },
1307
+ {
1308
+ "epoch": 35.3515625,
1309
+ "grad_norm": 0.004635694436728954,
1310
+ "learning_rate": 1.0644444444444445e-06,
1311
+ "loss": 0.0,
1312
+ "step": 4525
1313
+ },
1314
+ {
1315
+ "epoch": 35.546875,
1316
+ "grad_norm": 0.003975734580308199,
1317
+ "learning_rate": 1.008888888888889e-06,
1318
+ "loss": 0.0,
1319
+ "step": 4550
1320
+ },
1321
+ {
1322
+ "epoch": 35.7421875,
1323
+ "grad_norm": 0.0033323836978524923,
1324
+ "learning_rate": 9.533333333333335e-07,
1325
+ "loss": 0.0,
1326
+ "step": 4575
1327
+ },
1328
+ {
1329
+ "epoch": 35.9375,
1330
+ "grad_norm": 0.003018638351932168,
1331
+ "learning_rate": 8.977777777777778e-07,
1332
+ "loss": 0.0,
1333
+ "step": 4600
1334
+ },
1335
+ {
1336
+ "epoch": 36.1328125,
1337
+ "grad_norm": 0.0025316779501736164,
1338
+ "learning_rate": 8.422222222222224e-07,
1339
+ "loss": 0.0,
1340
+ "step": 4625
1341
+ },
1342
+ {
1343
+ "epoch": 36.328125,
1344
+ "grad_norm": 0.0029895787592977285,
1345
+ "learning_rate": 7.866666666666667e-07,
1346
+ "loss": 0.0,
1347
+ "step": 4650
1348
+ },
1349
+ {
1350
+ "epoch": 36.5234375,
1351
+ "grad_norm": 0.0027492698282003403,
1352
+ "learning_rate": 7.311111111111112e-07,
1353
+ "loss": 0.0,
1354
+ "step": 4675
1355
+ },
1356
+ {
1357
+ "epoch": 36.71875,
1358
+ "grad_norm": 0.002066458808258176,
1359
+ "learning_rate": 6.755555555555555e-07,
1360
+ "loss": 0.0,
1361
+ "step": 4700
1362
+ },
1363
+ {
1364
+ "epoch": 36.9140625,
1365
+ "grad_norm": 0.003311043605208397,
1366
+ "learning_rate": 6.200000000000001e-07,
1367
+ "loss": 0.0,
1368
+ "step": 4725
1369
+ },
1370
+ {
1371
+ "epoch": 37.109375,
1372
+ "grad_norm": 0.002838843734934926,
1373
+ "learning_rate": 5.644444444444445e-07,
1374
+ "loss": 0.0,
1375
+ "step": 4750
1376
+ },
1377
+ {
1378
+ "epoch": 37.3046875,
1379
+ "grad_norm": 0.0030876589007675648,
1380
+ "learning_rate": 5.088888888888889e-07,
1381
+ "loss": 0.0,
1382
+ "step": 4775
1383
+ },
1384
+ {
1385
+ "epoch": 37.5,
1386
+ "grad_norm": 0.002636523451656103,
1387
+ "learning_rate": 4.533333333333334e-07,
1388
+ "loss": 0.0,
1389
+ "step": 4800
1390
+ },
1391
+ {
1392
+ "epoch": 37.6953125,
1393
+ "grad_norm": 0.0028531572315841913,
1394
+ "learning_rate": 3.9777777777777783e-07,
1395
+ "loss": 0.0,
1396
+ "step": 4825
1397
+ },
1398
+ {
1399
+ "epoch": 37.890625,
1400
+ "grad_norm": 0.0033609780948609114,
1401
+ "learning_rate": 3.422222222222223e-07,
1402
+ "loss": 0.0,
1403
+ "step": 4850
1404
+ },
1405
+ {
1406
+ "epoch": 38.0859375,
1407
+ "grad_norm": 0.0024441394489258528,
1408
+ "learning_rate": 2.866666666666667e-07,
1409
+ "loss": 0.0,
1410
+ "step": 4875
1411
+ },
1412
+ {
1413
+ "epoch": 38.28125,
1414
+ "grad_norm": 0.002934487536549568,
1415
+ "learning_rate": 2.3111111111111112e-07,
1416
+ "loss": 0.0,
1417
+ "step": 4900
1418
+ },
1419
+ {
1420
+ "epoch": 38.4765625,
1421
+ "grad_norm": 0.002934112912043929,
1422
+ "learning_rate": 1.7555555555555558e-07,
1423
+ "loss": 0.0,
1424
+ "step": 4925
1425
+ },
1426
+ {
1427
+ "epoch": 38.671875,
1428
+ "grad_norm": 0.00293625146150589,
1429
+ "learning_rate": 1.2000000000000002e-07,
1430
+ "loss": 0.0,
1431
+ "step": 4950
1432
+ },
1433
+ {
1434
+ "epoch": 38.8671875,
1435
+ "grad_norm": 0.003216015174984932,
1436
+ "learning_rate": 6.444444444444445e-08,
1437
+ "loss": 0.0,
1438
+ "step": 4975
1439
+ },
1440
+ {
1441
+ "epoch": 39.0625,
1442
+ "grad_norm": 0.003249627770856023,
1443
+ "learning_rate": 8.88888888888889e-09,
1444
+ "loss": 0.0,
1445
+ "step": 5000
1446
+ },
1447
+ {
1448
+ "epoch": 39.0625,
1449
+ "eval_loss": 0.36558446288108826,
1450
+ "eval_runtime": 777.8187,
1451
+ "eval_samples_per_second": 0.895,
1452
+ "eval_steps_per_second": 0.112,
1453
+ "eval_wer": 52.426532325776655,
1454
+ "step": 5000
1455
  }
1456
  ],
1457
  "logging_steps": 25,
 
1466
  "should_evaluate": false,
1467
  "should_log": false,
1468
  "should_save": true,
1469
+ "should_training_stop": true
1470
  },
1471
  "attributes": {}
1472
  }
1473
  },
1474
+ "total_flos": 2.30868320256e+19,
1475
  "train_batch_size": 16,
1476
  "trial_name": null,
1477
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0d694af995cc0b8902f76df9a2d674f5e6b3422ed5aa3a344b068840f647b99
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:698b2142451bdf12f18341959c86e35236ca3ee806d70ff10cda10798130acfb
3
  size 5368