|
{ |
|
"best_metric": 42.71378740060809, |
|
"best_model_checkpoint": "./whisper-small-ka/checkpoint-14500", |
|
"epoch": 3.7697914048755967, |
|
"eval_steps": 500, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008451656524678837, |
|
"grad_norm": 7.251798629760742, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 1.7697, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.016903313049357674, |
|
"grad_norm": 6.12908935546875, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 1.6874, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02535496957403651, |
|
"grad_norm": 6.023613452911377, |
|
"learning_rate": 1.46e-06, |
|
"loss": 1.5915, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03380662609871535, |
|
"grad_norm": 8.941411972045898, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 1.3679, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.042258282623394185, |
|
"grad_norm": 6.101173400878906, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.9502, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05070993914807302, |
|
"grad_norm": 6.879887580871582, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.6954, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05916159567275186, |
|
"grad_norm": 5.129401206970215, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.5745, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0676132521974307, |
|
"grad_norm": 6.113542079925537, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.4604, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07606490872210954, |
|
"grad_norm": 5.392640590667725, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.4114, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08451656524678837, |
|
"grad_norm": 5.14113712310791, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.3859, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09296822177146721, |
|
"grad_norm": 3.8609211444854736, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.3692, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.10141987829614604, |
|
"grad_norm": 4.686000823974609, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.3298, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10987153482082489, |
|
"grad_norm": 5.032454490661621, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.2976, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11832319134550372, |
|
"grad_norm": 3.724012851715088, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.309, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12677484787018256, |
|
"grad_norm": 3.2685935497283936, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.2897, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1352265043948614, |
|
"grad_norm": 3.796637535095215, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.2763, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14367816091954022, |
|
"grad_norm": 6.331181049346924, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.2714, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.15212981744421908, |
|
"grad_norm": 4.792943000793457, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.2718, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1605814739688979, |
|
"grad_norm": 3.872354507446289, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.2692, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.16903313049357674, |
|
"grad_norm": 3.859339475631714, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.2612, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16903313049357674, |
|
"eval_loss": 0.26686015725135803, |
|
"eval_runtime": 8169.6122, |
|
"eval_samples_per_second": 1.79, |
|
"eval_steps_per_second": 0.224, |
|
"eval_wer": 61.611321105313856, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17748478701825557, |
|
"grad_norm": 3.6627910137176514, |
|
"learning_rate": 9.975789473684211e-06, |
|
"loss": 0.2347, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.18593644354293443, |
|
"grad_norm": 3.168164014816284, |
|
"learning_rate": 9.949473684210526e-06, |
|
"loss": 0.2438, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19438810006761326, |
|
"grad_norm": 2.521146297454834, |
|
"learning_rate": 9.923157894736844e-06, |
|
"loss": 0.2238, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2028397565922921, |
|
"grad_norm": 4.583806991577148, |
|
"learning_rate": 9.89684210526316e-06, |
|
"loss": 0.2412, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21129141311697092, |
|
"grad_norm": 3.045239210128784, |
|
"learning_rate": 9.870526315789474e-06, |
|
"loss": 0.2155, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.21974306964164977, |
|
"grad_norm": 3.3001856803894043, |
|
"learning_rate": 9.84421052631579e-06, |
|
"loss": 0.2382, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2281947261663286, |
|
"grad_norm": 3.613844871520996, |
|
"learning_rate": 9.817894736842106e-06, |
|
"loss": 0.2266, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.23664638269100743, |
|
"grad_norm": 3.4819045066833496, |
|
"learning_rate": 9.791578947368422e-06, |
|
"loss": 0.199, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24509803921568626, |
|
"grad_norm": 3.3764617443084717, |
|
"learning_rate": 9.765263157894737e-06, |
|
"loss": 0.1981, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.2535496957403651, |
|
"grad_norm": 2.9908406734466553, |
|
"learning_rate": 9.738947368421054e-06, |
|
"loss": 0.2129, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.26200135226504395, |
|
"grad_norm": 3.408409833908081, |
|
"learning_rate": 9.712631578947369e-06, |
|
"loss": 0.2171, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2704530087897228, |
|
"grad_norm": 3.7549092769622803, |
|
"learning_rate": 9.686315789473684e-06, |
|
"loss": 0.2112, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2789046653144016, |
|
"grad_norm": 3.1782822608947754, |
|
"learning_rate": 9.66e-06, |
|
"loss": 0.2224, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.28735632183908044, |
|
"grad_norm": 3.1413159370422363, |
|
"learning_rate": 9.633684210526316e-06, |
|
"loss": 0.1974, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29580797836375927, |
|
"grad_norm": 3.027810573577881, |
|
"learning_rate": 9.607368421052632e-06, |
|
"loss": 0.2078, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.30425963488843816, |
|
"grad_norm": 2.7142438888549805, |
|
"learning_rate": 9.581052631578947e-06, |
|
"loss": 0.209, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.312711291413117, |
|
"grad_norm": 2.405505895614624, |
|
"learning_rate": 9.554736842105264e-06, |
|
"loss": 0.1896, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.3211629479377958, |
|
"grad_norm": 3.2011518478393555, |
|
"learning_rate": 9.52842105263158e-06, |
|
"loss": 0.1906, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.32961460446247465, |
|
"grad_norm": 3.0706121921539307, |
|
"learning_rate": 9.502105263157896e-06, |
|
"loss": 0.1677, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3380662609871535, |
|
"grad_norm": 2.0819201469421387, |
|
"learning_rate": 9.475789473684212e-06, |
|
"loss": 0.1919, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3380662609871535, |
|
"eval_loss": 0.2120433747768402, |
|
"eval_runtime": 8173.558, |
|
"eval_samples_per_second": 1.79, |
|
"eval_steps_per_second": 0.224, |
|
"eval_wer": 54.19627095935024, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3465179175118323, |
|
"grad_norm": 4.2216315269470215, |
|
"learning_rate": 9.449473684210527e-06, |
|
"loss": 0.181, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.35496957403651114, |
|
"grad_norm": 3.4836745262145996, |
|
"learning_rate": 9.423157894736842e-06, |
|
"loss": 0.187, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.36342123056118997, |
|
"grad_norm": 2.6206018924713135, |
|
"learning_rate": 9.396842105263159e-06, |
|
"loss": 0.1634, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.37187288708586885, |
|
"grad_norm": 1.9824305772781372, |
|
"learning_rate": 9.370526315789474e-06, |
|
"loss": 0.1835, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3803245436105477, |
|
"grad_norm": 3.2038474082946777, |
|
"learning_rate": 9.34421052631579e-06, |
|
"loss": 0.1849, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.3887762001352265, |
|
"grad_norm": 2.054469108581543, |
|
"learning_rate": 9.317894736842105e-06, |
|
"loss": 0.1739, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.39722785665990534, |
|
"grad_norm": 3.952493190765381, |
|
"learning_rate": 9.291578947368422e-06, |
|
"loss": 0.193, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.4056795131845842, |
|
"grad_norm": 2.7407076358795166, |
|
"learning_rate": 9.265263157894737e-06, |
|
"loss": 0.1709, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.414131169709263, |
|
"grad_norm": 2.922785758972168, |
|
"learning_rate": 9.238947368421052e-06, |
|
"loss": 0.1782, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.42258282623394183, |
|
"grad_norm": 2.69055438041687, |
|
"learning_rate": 9.21263157894737e-06, |
|
"loss": 0.1596, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.43103448275862066, |
|
"grad_norm": 2.2836110591888428, |
|
"learning_rate": 9.186315789473685e-06, |
|
"loss": 0.1747, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.43948613928329955, |
|
"grad_norm": 3.7077369689941406, |
|
"learning_rate": 9.16e-06, |
|
"loss": 0.1669, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4479377958079784, |
|
"grad_norm": 2.6970176696777344, |
|
"learning_rate": 9.133684210526317e-06, |
|
"loss": 0.1501, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.4563894523326572, |
|
"grad_norm": 2.819770097732544, |
|
"learning_rate": 9.107368421052632e-06, |
|
"loss": 0.1559, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.46484110885733604, |
|
"grad_norm": 2.728736400604248, |
|
"learning_rate": 9.081052631578949e-06, |
|
"loss": 0.1865, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.47329276538201487, |
|
"grad_norm": 2.9660186767578125, |
|
"learning_rate": 9.054736842105264e-06, |
|
"loss": 0.1664, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4817444219066937, |
|
"grad_norm": 2.8889880180358887, |
|
"learning_rate": 9.02842105263158e-06, |
|
"loss": 0.1533, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 2.158355474472046, |
|
"learning_rate": 9.002105263157895e-06, |
|
"loss": 0.1494, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.49864773495605136, |
|
"grad_norm": 2.2264366149902344, |
|
"learning_rate": 8.97578947368421e-06, |
|
"loss": 0.1551, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.5070993914807302, |
|
"grad_norm": 2.8769357204437256, |
|
"learning_rate": 8.949473684210527e-06, |
|
"loss": 0.1706, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5070993914807302, |
|
"eval_loss": 0.19238848984241486, |
|
"eval_runtime": 8175.5745, |
|
"eval_samples_per_second": 1.789, |
|
"eval_steps_per_second": 0.224, |
|
"eval_wer": 51.696552128819626, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5155510480054091, |
|
"grad_norm": 2.3073925971984863, |
|
"learning_rate": 8.923157894736842e-06, |
|
"loss": 0.1645, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5240027045300879, |
|
"grad_norm": 2.7228150367736816, |
|
"learning_rate": 8.896842105263159e-06, |
|
"loss": 0.1333, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5324543610547667, |
|
"grad_norm": 1.97380530834198, |
|
"learning_rate": 8.870526315789474e-06, |
|
"loss": 0.1557, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.5409060175794456, |
|
"grad_norm": 2.8810389041900635, |
|
"learning_rate": 8.84421052631579e-06, |
|
"loss": 0.1578, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5493576741041244, |
|
"grad_norm": 2.2714059352874756, |
|
"learning_rate": 8.817894736842107e-06, |
|
"loss": 0.138, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5578093306288032, |
|
"grad_norm": 1.7381818294525146, |
|
"learning_rate": 8.791578947368422e-06, |
|
"loss": 0.1486, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.566260987153482, |
|
"grad_norm": 3.1913645267486572, |
|
"learning_rate": 8.765263157894739e-06, |
|
"loss": 0.1431, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"grad_norm": 2.8816912174224854, |
|
"learning_rate": 8.738947368421053e-06, |
|
"loss": 0.1592, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5831643002028397, |
|
"grad_norm": 2.4529218673706055, |
|
"learning_rate": 8.712631578947368e-06, |
|
"loss": 0.1554, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.5916159567275185, |
|
"grad_norm": 1.9601649045944214, |
|
"learning_rate": 8.686315789473685e-06, |
|
"loss": 0.135, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6000676132521975, |
|
"grad_norm": 2.27895188331604, |
|
"learning_rate": 8.66e-06, |
|
"loss": 0.1559, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.6085192697768763, |
|
"grad_norm": 2.6185851097106934, |
|
"learning_rate": 8.633684210526317e-06, |
|
"loss": 0.1491, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6169709263015551, |
|
"grad_norm": 2.6344733238220215, |
|
"learning_rate": 8.607368421052632e-06, |
|
"loss": 0.1277, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.625422582826234, |
|
"grad_norm": 2.459585189819336, |
|
"learning_rate": 8.581052631578948e-06, |
|
"loss": 0.1367, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6338742393509128, |
|
"grad_norm": 2.8337063789367676, |
|
"learning_rate": 8.554736842105263e-06, |
|
"loss": 0.1354, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.6423258958755916, |
|
"grad_norm": 1.8884564638137817, |
|
"learning_rate": 8.528421052631578e-06, |
|
"loss": 0.1335, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6507775524002705, |
|
"grad_norm": 1.8263589143753052, |
|
"learning_rate": 8.502105263157897e-06, |
|
"loss": 0.1465, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.6592292089249493, |
|
"grad_norm": 1.995797038078308, |
|
"learning_rate": 8.475789473684212e-06, |
|
"loss": 0.141, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6676808654496281, |
|
"grad_norm": 2.030308961868286, |
|
"learning_rate": 8.449473684210527e-06, |
|
"loss": 0.1368, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.676132521974307, |
|
"grad_norm": 1.9444704055786133, |
|
"learning_rate": 8.423157894736843e-06, |
|
"loss": 0.1398, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.676132521974307, |
|
"eval_loss": 0.18893150985240936, |
|
"eval_runtime": 9002.7886, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.203, |
|
"eval_wer": 48.848672146618874, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6845841784989858, |
|
"grad_norm": 2.6425070762634277, |
|
"learning_rate": 8.396842105263158e-06, |
|
"loss": 0.1373, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.6930358350236646, |
|
"grad_norm": 2.495431900024414, |
|
"learning_rate": 8.370526315789475e-06, |
|
"loss": 0.143, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7014874915483434, |
|
"grad_norm": 1.92854642868042, |
|
"learning_rate": 8.34421052631579e-06, |
|
"loss": 0.144, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.7099391480730223, |
|
"grad_norm": 2.4910595417022705, |
|
"learning_rate": 8.317894736842107e-06, |
|
"loss": 0.1222, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7183908045977011, |
|
"grad_norm": 2.873384475708008, |
|
"learning_rate": 8.291578947368422e-06, |
|
"loss": 0.1341, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.7268424611223799, |
|
"grad_norm": 2.113551616668701, |
|
"learning_rate": 8.265263157894737e-06, |
|
"loss": 0.1372, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 2.322167158126831, |
|
"learning_rate": 8.238947368421053e-06, |
|
"loss": 0.1451, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.7437457741717377, |
|
"grad_norm": 1.3647667169570923, |
|
"learning_rate": 8.212631578947368e-06, |
|
"loss": 0.1334, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7521974306964165, |
|
"grad_norm": 1.4476187229156494, |
|
"learning_rate": 8.186315789473685e-06, |
|
"loss": 0.1293, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.7606490872210954, |
|
"grad_norm": 2.103031635284424, |
|
"learning_rate": 8.16e-06, |
|
"loss": 0.1414, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7691007437457742, |
|
"grad_norm": 2.4936468601226807, |
|
"learning_rate": 8.133684210526316e-06, |
|
"loss": 0.1333, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.777552400270453, |
|
"grad_norm": 2.8524506092071533, |
|
"learning_rate": 8.107368421052633e-06, |
|
"loss": 0.1502, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7860040567951319, |
|
"grad_norm": 1.78004789352417, |
|
"learning_rate": 8.081052631578948e-06, |
|
"loss": 0.1416, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.7944557133198107, |
|
"grad_norm": 2.4518020153045654, |
|
"learning_rate": 8.054736842105265e-06, |
|
"loss": 0.1305, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8029073698444895, |
|
"grad_norm": 1.6946748495101929, |
|
"learning_rate": 8.02842105263158e-06, |
|
"loss": 0.1277, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.8113590263691683, |
|
"grad_norm": 2.044149160385132, |
|
"learning_rate": 8.002105263157895e-06, |
|
"loss": 0.1342, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8198106828938472, |
|
"grad_norm": 2.5751612186431885, |
|
"learning_rate": 7.975789473684211e-06, |
|
"loss": 0.1382, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.828262339418526, |
|
"grad_norm": 2.957038164138794, |
|
"learning_rate": 7.949473684210526e-06, |
|
"loss": 0.1238, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8367139959432048, |
|
"grad_norm": 1.6040945053100586, |
|
"learning_rate": 7.923157894736843e-06, |
|
"loss": 0.1382, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.8451656524678837, |
|
"grad_norm": 2.2693052291870117, |
|
"learning_rate": 7.896842105263158e-06, |
|
"loss": 0.1317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8451656524678837, |
|
"eval_loss": 0.1813763976097107, |
|
"eval_runtime": 8337.7114, |
|
"eval_samples_per_second": 1.754, |
|
"eval_steps_per_second": 0.219, |
|
"eval_wer": 47.81250152809494, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8536173089925625, |
|
"grad_norm": 1.6725122928619385, |
|
"learning_rate": 7.870526315789475e-06, |
|
"loss": 0.123, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 2.4779107570648193, |
|
"learning_rate": 7.84421052631579e-06, |
|
"loss": 0.1202, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8705206220419203, |
|
"grad_norm": 2.3893909454345703, |
|
"learning_rate": 7.817894736842105e-06, |
|
"loss": 0.1197, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.8789722785665991, |
|
"grad_norm": 1.9995834827423096, |
|
"learning_rate": 7.791578947368423e-06, |
|
"loss": 0.1396, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8874239350912779, |
|
"grad_norm": 2.6336281299591064, |
|
"learning_rate": 7.765263157894738e-06, |
|
"loss": 0.1295, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.8958755916159568, |
|
"grad_norm": 2.0509071350097656, |
|
"learning_rate": 7.738947368421053e-06, |
|
"loss": 0.1184, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9043272481406356, |
|
"grad_norm": 2.0094475746154785, |
|
"learning_rate": 7.71263157894737e-06, |
|
"loss": 0.1275, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.9127789046653144, |
|
"grad_norm": 1.5212738513946533, |
|
"learning_rate": 7.686315789473685e-06, |
|
"loss": 0.1129, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9212305611899932, |
|
"grad_norm": 1.1331150531768799, |
|
"learning_rate": 7.660000000000001e-06, |
|
"loss": 0.1195, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.9296822177146721, |
|
"grad_norm": 2.1462574005126953, |
|
"learning_rate": 7.633684210526316e-06, |
|
"loss": 0.1191, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9381338742393509, |
|
"grad_norm": 2.8891022205352783, |
|
"learning_rate": 7.607368421052632e-06, |
|
"loss": 0.1369, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.9465855307640297, |
|
"grad_norm": 1.5433547496795654, |
|
"learning_rate": 7.581052631578948e-06, |
|
"loss": 0.1233, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9550371872887086, |
|
"grad_norm": 1.6972781419754028, |
|
"learning_rate": 7.554736842105264e-06, |
|
"loss": 0.1358, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.9634888438133874, |
|
"grad_norm": 1.8598979711532593, |
|
"learning_rate": 7.5284210526315794e-06, |
|
"loss": 0.1203, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9719405003380662, |
|
"grad_norm": 2.7386326789855957, |
|
"learning_rate": 7.502105263157895e-06, |
|
"loss": 0.1122, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 1.9333444833755493, |
|
"learning_rate": 7.475789473684211e-06, |
|
"loss": 0.1278, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9888438133874239, |
|
"grad_norm": 2.209989070892334, |
|
"learning_rate": 7.449473684210526e-06, |
|
"loss": 0.1176, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.9972954699121027, |
|
"grad_norm": 1.7954308986663818, |
|
"learning_rate": 7.4231578947368436e-06, |
|
"loss": 0.123, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0057471264367817, |
|
"grad_norm": 2.3497698307037354, |
|
"learning_rate": 7.3968421052631585e-06, |
|
"loss": 0.1234, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.0141987829614605, |
|
"grad_norm": 2.3470804691314697, |
|
"learning_rate": 7.370526315789474e-06, |
|
"loss": 0.1015, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0141987829614605, |
|
"eval_loss": 0.18137025833129883, |
|
"eval_runtime": 8405.7063, |
|
"eval_samples_per_second": 1.74, |
|
"eval_steps_per_second": 0.218, |
|
"eval_wer": 47.43940186696528, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0226504394861393, |
|
"grad_norm": 1.895668864250183, |
|
"learning_rate": 7.34421052631579e-06, |
|
"loss": 0.1106, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.0311020960108181, |
|
"grad_norm": 1.3763364553451538, |
|
"learning_rate": 7.317894736842106e-06, |
|
"loss": 0.0896, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.039553752535497, |
|
"grad_norm": 1.778601884841919, |
|
"learning_rate": 7.291578947368422e-06, |
|
"loss": 0.1068, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.0480054090601758, |
|
"grad_norm": 1.7413523197174072, |
|
"learning_rate": 7.265263157894738e-06, |
|
"loss": 0.0941, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0564570655848546, |
|
"grad_norm": 1.7844879627227783, |
|
"learning_rate": 7.2389473684210534e-06, |
|
"loss": 0.098, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.0649087221095335, |
|
"grad_norm": 1.9690433740615845, |
|
"learning_rate": 7.212631578947369e-06, |
|
"loss": 0.1019, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0733603786342123, |
|
"grad_norm": 1.6368364095687866, |
|
"learning_rate": 7.186315789473684e-06, |
|
"loss": 0.0997, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.0818120351588911, |
|
"grad_norm": 1.9386001825332642, |
|
"learning_rate": 7.16e-06, |
|
"loss": 0.0877, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.09026369168357, |
|
"grad_norm": 1.983993411064148, |
|
"learning_rate": 7.133684210526316e-06, |
|
"loss": 0.107, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.0987153482082488, |
|
"grad_norm": 1.3441216945648193, |
|
"learning_rate": 7.107368421052632e-06, |
|
"loss": 0.1085, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1071670047329276, |
|
"grad_norm": 2.490309238433838, |
|
"learning_rate": 7.0810526315789475e-06, |
|
"loss": 0.1067, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.1156186612576064, |
|
"grad_norm": 1.403867483139038, |
|
"learning_rate": 7.054736842105264e-06, |
|
"loss": 0.0971, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1240703177822853, |
|
"grad_norm": 2.164853096008301, |
|
"learning_rate": 7.02842105263158e-06, |
|
"loss": 0.1001, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.132521974306964, |
|
"grad_norm": 1.6618404388427734, |
|
"learning_rate": 7.002105263157896e-06, |
|
"loss": 0.1215, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.140973630831643, |
|
"grad_norm": 0.8422718048095703, |
|
"learning_rate": 6.975789473684212e-06, |
|
"loss": 0.0979, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.1494252873563218, |
|
"grad_norm": 2.008251905441284, |
|
"learning_rate": 6.9494736842105275e-06, |
|
"loss": 0.0979, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1578769438810006, |
|
"grad_norm": 1.493073582649231, |
|
"learning_rate": 6.9231578947368424e-06, |
|
"loss": 0.1067, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.1663286004056794, |
|
"grad_norm": 1.2973432540893555, |
|
"learning_rate": 6.896842105263158e-06, |
|
"loss": 0.0844, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.1747802569303583, |
|
"grad_norm": 1.9988205432891846, |
|
"learning_rate": 6.870526315789474e-06, |
|
"loss": 0.1018, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.183231913455037, |
|
"grad_norm": 2.0083346366882324, |
|
"learning_rate": 6.84421052631579e-06, |
|
"loss": 0.1093, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.183231913455037, |
|
"eval_loss": 0.1796853244304657, |
|
"eval_runtime": 8340.7798, |
|
"eval_samples_per_second": 1.754, |
|
"eval_steps_per_second": 0.219, |
|
"eval_wer": 46.34797533532515, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.1916835699797161, |
|
"grad_norm": 1.823431372642517, |
|
"learning_rate": 6.817894736842106e-06, |
|
"loss": 0.0996, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.2001352265043947, |
|
"grad_norm": 2.2880189418792725, |
|
"learning_rate": 6.7915789473684215e-06, |
|
"loss": 0.1032, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.2085868830290738, |
|
"grad_norm": 1.070098876953125, |
|
"learning_rate": 6.765263157894737e-06, |
|
"loss": 0.1062, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.2170385395537526, |
|
"grad_norm": 2.1815688610076904, |
|
"learning_rate": 6.738947368421052e-06, |
|
"loss": 0.1148, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2254901960784315, |
|
"grad_norm": 2.817481756210327, |
|
"learning_rate": 6.71263157894737e-06, |
|
"loss": 0.0933, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.2339418526031103, |
|
"grad_norm": 1.8840892314910889, |
|
"learning_rate": 6.686315789473685e-06, |
|
"loss": 0.0965, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.2423935091277891, |
|
"grad_norm": 2.0833046436309814, |
|
"learning_rate": 6.660000000000001e-06, |
|
"loss": 0.1168, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.250845165652468, |
|
"grad_norm": 1.6042765378952026, |
|
"learning_rate": 6.6336842105263164e-06, |
|
"loss": 0.1033, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2592968221771468, |
|
"grad_norm": 1.8703746795654297, |
|
"learning_rate": 6.607368421052632e-06, |
|
"loss": 0.0969, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.2677484787018256, |
|
"grad_norm": 1.789585828781128, |
|
"learning_rate": 6.581052631578948e-06, |
|
"loss": 0.1084, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.2762001352265044, |
|
"grad_norm": 1.974095106124878, |
|
"learning_rate": 6.554736842105264e-06, |
|
"loss": 0.1023, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.2846517917511833, |
|
"grad_norm": 2.729954481124878, |
|
"learning_rate": 6.52842105263158e-06, |
|
"loss": 0.1109, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.293103448275862, |
|
"grad_norm": 1.8322004079818726, |
|
"learning_rate": 6.5021052631578955e-06, |
|
"loss": 0.0994, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.301555104800541, |
|
"grad_norm": 2.05082368850708, |
|
"learning_rate": 6.4757894736842105e-06, |
|
"loss": 0.105, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.3100067613252198, |
|
"grad_norm": 1.8818949460983276, |
|
"learning_rate": 6.449473684210526e-06, |
|
"loss": 0.0872, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.3184584178498986, |
|
"grad_norm": 1.996830940246582, |
|
"learning_rate": 6.423157894736842e-06, |
|
"loss": 0.0928, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.3269100743745774, |
|
"grad_norm": 3.266448736190796, |
|
"learning_rate": 6.396842105263158e-06, |
|
"loss": 0.0972, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.3353617308992562, |
|
"grad_norm": 1.4689350128173828, |
|
"learning_rate": 6.370526315789474e-06, |
|
"loss": 0.0968, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.343813387423935, |
|
"grad_norm": 1.166890263557434, |
|
"learning_rate": 6.3442105263157904e-06, |
|
"loss": 0.1024, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.352265043948614, |
|
"grad_norm": 2.1290385723114014, |
|
"learning_rate": 6.317894736842106e-06, |
|
"loss": 0.1011, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.352265043948614, |
|
"eval_loss": 0.18109461665153503, |
|
"eval_runtime": 8386.0632, |
|
"eval_samples_per_second": 1.744, |
|
"eval_steps_per_second": 0.218, |
|
"eval_wer": 46.233062595658744, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3607167004732927, |
|
"grad_norm": 1.8343077898025513, |
|
"learning_rate": 6.291578947368422e-06, |
|
"loss": 0.0864, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.3691683569979716, |
|
"grad_norm": 1.3331023454666138, |
|
"learning_rate": 6.265263157894738e-06, |
|
"loss": 0.0833, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.3776200135226504, |
|
"grad_norm": 2.407951831817627, |
|
"learning_rate": 6.238947368421054e-06, |
|
"loss": 0.1027, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.3860716700473292, |
|
"grad_norm": 1.5978944301605225, |
|
"learning_rate": 6.212631578947369e-06, |
|
"loss": 0.0994, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.394523326572008, |
|
"grad_norm": 1.399279236793518, |
|
"learning_rate": 6.1863157894736845e-06, |
|
"loss": 0.0916, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.402974983096687, |
|
"grad_norm": 1.4253430366516113, |
|
"learning_rate": 6.16e-06, |
|
"loss": 0.0999, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4114266396213657, |
|
"grad_norm": 1.8193767070770264, |
|
"learning_rate": 6.133684210526316e-06, |
|
"loss": 0.099, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.4198782961460445, |
|
"grad_norm": 1.1313689947128296, |
|
"learning_rate": 6.107368421052632e-06, |
|
"loss": 0.1019, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4283299526707234, |
|
"grad_norm": 2.423612594604492, |
|
"learning_rate": 6.081052631578948e-06, |
|
"loss": 0.0976, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.4367816091954024, |
|
"grad_norm": 1.1028584241867065, |
|
"learning_rate": 6.054736842105264e-06, |
|
"loss": 0.0904, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.445233265720081, |
|
"grad_norm": 1.811155915260315, |
|
"learning_rate": 6.0284210526315786e-06, |
|
"loss": 0.0951, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.45368492224476, |
|
"grad_norm": 1.2637566328048706, |
|
"learning_rate": 6.002105263157896e-06, |
|
"loss": 0.0963, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.4621365787694387, |
|
"grad_norm": 1.9464035034179688, |
|
"learning_rate": 5.975789473684212e-06, |
|
"loss": 0.089, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 1.8550575971603394, |
|
"learning_rate": 5.949473684210527e-06, |
|
"loss": 0.0902, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.4790398918187964, |
|
"grad_norm": 2.030001640319824, |
|
"learning_rate": 5.923157894736843e-06, |
|
"loss": 0.0945, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.4874915483434754, |
|
"grad_norm": 1.992264986038208, |
|
"learning_rate": 5.8968421052631585e-06, |
|
"loss": 0.1017, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.495943204868154, |
|
"grad_norm": 1.6505546569824219, |
|
"learning_rate": 5.870526315789474e-06, |
|
"loss": 0.0944, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.504394861392833, |
|
"grad_norm": 1.857921838760376, |
|
"learning_rate": 5.84421052631579e-06, |
|
"loss": 0.1005, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.5128465179175117, |
|
"grad_norm": 1.2483346462249756, |
|
"learning_rate": 5.817894736842106e-06, |
|
"loss": 0.0851, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.5212981744421907, |
|
"grad_norm": 2.141049861907959, |
|
"learning_rate": 5.791578947368422e-06, |
|
"loss": 0.0952, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5212981744421907, |
|
"eval_loss": 0.18032518029212952, |
|
"eval_runtime": 8277.5716, |
|
"eval_samples_per_second": 1.767, |
|
"eval_steps_per_second": 0.221, |
|
"eval_wer": 45.674635579918146, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5297498309668696, |
|
"grad_norm": 2.129145622253418, |
|
"learning_rate": 5.765263157894737e-06, |
|
"loss": 0.0998, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.5382014874915484, |
|
"grad_norm": 1.1393293142318726, |
|
"learning_rate": 5.7389473684210526e-06, |
|
"loss": 0.1009, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.5466531440162272, |
|
"grad_norm": 2.5733561515808105, |
|
"learning_rate": 5.712631578947368e-06, |
|
"loss": 0.0986, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.555104800540906, |
|
"grad_norm": 1.9032436609268188, |
|
"learning_rate": 5.686315789473684e-06, |
|
"loss": 0.0871, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.5635564570655849, |
|
"grad_norm": 1.4245878458023071, |
|
"learning_rate": 5.66e-06, |
|
"loss": 0.1071, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.5720081135902637, |
|
"grad_norm": 1.322015404701233, |
|
"learning_rate": 5.633684210526317e-06, |
|
"loss": 0.0856, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.5804597701149425, |
|
"grad_norm": 2.069826126098633, |
|
"learning_rate": 5.6073684210526325e-06, |
|
"loss": 0.0802, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.5889114266396214, |
|
"grad_norm": 1.566107988357544, |
|
"learning_rate": 5.581052631578948e-06, |
|
"loss": 0.0896, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.5973630831643002, |
|
"grad_norm": 2.0934951305389404, |
|
"learning_rate": 5.554736842105264e-06, |
|
"loss": 0.0948, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.605814739688979, |
|
"grad_norm": 1.2778680324554443, |
|
"learning_rate": 5.52842105263158e-06, |
|
"loss": 0.0914, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.6142663962136579, |
|
"grad_norm": 2.1123738288879395, |
|
"learning_rate": 5.502105263157895e-06, |
|
"loss": 0.0981, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.6227180527383367, |
|
"grad_norm": 0.9122495651245117, |
|
"learning_rate": 5.475789473684211e-06, |
|
"loss": 0.0832, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.6311697092630155, |
|
"grad_norm": 1.6515990495681763, |
|
"learning_rate": 5.4494736842105266e-06, |
|
"loss": 0.0975, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.6396213657876944, |
|
"grad_norm": 1.507886528968811, |
|
"learning_rate": 5.423157894736842e-06, |
|
"loss": 0.0934, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.6480730223123732, |
|
"grad_norm": 1.307924747467041, |
|
"learning_rate": 5.396842105263158e-06, |
|
"loss": 0.0855, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.656524678837052, |
|
"grad_norm": 1.2642782926559448, |
|
"learning_rate": 5.370526315789474e-06, |
|
"loss": 0.1, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.664976335361731, |
|
"grad_norm": 1.7240660190582275, |
|
"learning_rate": 5.34421052631579e-06, |
|
"loss": 0.0925, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.6734279918864097, |
|
"grad_norm": 1.4809589385986328, |
|
"learning_rate": 5.317894736842105e-06, |
|
"loss": 0.1017, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.6818796484110887, |
|
"grad_norm": 2.2714500427246094, |
|
"learning_rate": 5.291578947368422e-06, |
|
"loss": 0.0908, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.6903313049357673, |
|
"grad_norm": 1.7016249895095825, |
|
"learning_rate": 5.265263157894738e-06, |
|
"loss": 0.0959, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6903313049357673, |
|
"eval_loss": 0.18442150950431824, |
|
"eval_runtime": 8017.4704, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.228, |
|
"eval_wer": 45.91424086688215, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6987829614604464, |
|
"grad_norm": 1.7705456018447876, |
|
"learning_rate": 5.238947368421053e-06, |
|
"loss": 0.1014, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.707234617985125, |
|
"grad_norm": 1.8674064874649048, |
|
"learning_rate": 5.212631578947369e-06, |
|
"loss": 0.1012, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.715686274509804, |
|
"grad_norm": 1.4433462619781494, |
|
"learning_rate": 5.186315789473685e-06, |
|
"loss": 0.0859, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 1.2248002290725708, |
|
"learning_rate": 5.1600000000000006e-06, |
|
"loss": 0.0884, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.7325895875591617, |
|
"grad_norm": 1.763519525527954, |
|
"learning_rate": 5.133684210526316e-06, |
|
"loss": 0.1033, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.7410412440838403, |
|
"grad_norm": 1.8742742538452148, |
|
"learning_rate": 5.107368421052632e-06, |
|
"loss": 0.0994, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.7494929006085194, |
|
"grad_norm": 1.9813201427459717, |
|
"learning_rate": 5.081052631578948e-06, |
|
"loss": 0.0912, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.757944557133198, |
|
"grad_norm": 2.3702893257141113, |
|
"learning_rate": 5.054736842105263e-06, |
|
"loss": 0.0934, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.766396213657877, |
|
"grad_norm": 1.5483486652374268, |
|
"learning_rate": 5.028421052631579e-06, |
|
"loss": 0.1052, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.7748478701825556, |
|
"grad_norm": 1.4521353244781494, |
|
"learning_rate": 5.002105263157895e-06, |
|
"loss": 0.1046, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.7832995267072347, |
|
"grad_norm": 2.0621144771575928, |
|
"learning_rate": 4.975789473684211e-06, |
|
"loss": 0.0996, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.7917511832319133, |
|
"grad_norm": 2.86627197265625, |
|
"learning_rate": 4.949473684210527e-06, |
|
"loss": 0.0974, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.8002028397565923, |
|
"grad_norm": 1.1360923051834106, |
|
"learning_rate": 4.923157894736842e-06, |
|
"loss": 0.0874, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.8086544962812712, |
|
"grad_norm": 1.4977036714553833, |
|
"learning_rate": 4.896842105263158e-06, |
|
"loss": 0.0926, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.81710615280595, |
|
"grad_norm": 1.642228126525879, |
|
"learning_rate": 4.870526315789474e-06, |
|
"loss": 0.0878, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.8255578093306288, |
|
"grad_norm": 1.6537584066390991, |
|
"learning_rate": 4.84421052631579e-06, |
|
"loss": 0.088, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.8340094658553077, |
|
"grad_norm": 1.3952895402908325, |
|
"learning_rate": 4.817894736842106e-06, |
|
"loss": 0.0888, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.8424611223799865, |
|
"grad_norm": 1.2289971113204956, |
|
"learning_rate": 4.791578947368421e-06, |
|
"loss": 0.0917, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.8509127789046653, |
|
"grad_norm": 1.2816951274871826, |
|
"learning_rate": 4.765263157894737e-06, |
|
"loss": 0.0851, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.8593644354293442, |
|
"grad_norm": 1.385846495628357, |
|
"learning_rate": 4.738947368421053e-06, |
|
"loss": 0.0828, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.8593644354293442, |
|
"eval_loss": 0.18521299958229065, |
|
"eval_runtime": 8001.8096, |
|
"eval_samples_per_second": 1.828, |
|
"eval_steps_per_second": 0.229, |
|
"eval_wer": 45.38564226441666, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.867816091954023, |
|
"grad_norm": 1.7502315044403076, |
|
"learning_rate": 4.712631578947369e-06, |
|
"loss": 0.0931, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.8762677484787018, |
|
"grad_norm": 1.384947657585144, |
|
"learning_rate": 4.6863157894736845e-06, |
|
"loss": 0.081, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.8847194050033806, |
|
"grad_norm": 1.736101508140564, |
|
"learning_rate": 4.66e-06, |
|
"loss": 0.1042, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.8931710615280595, |
|
"grad_norm": 1.5998270511627197, |
|
"learning_rate": 4.633684210526316e-06, |
|
"loss": 0.0784, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.9016227180527383, |
|
"grad_norm": 1.7052019834518433, |
|
"learning_rate": 4.607368421052632e-06, |
|
"loss": 0.0867, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.9100743745774171, |
|
"grad_norm": 1.8424391746520996, |
|
"learning_rate": 4.581052631578948e-06, |
|
"loss": 0.1042, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.918526031102096, |
|
"grad_norm": 1.484885573387146, |
|
"learning_rate": 4.5547368421052636e-06, |
|
"loss": 0.089, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.9269776876267748, |
|
"grad_norm": 1.7894026041030884, |
|
"learning_rate": 4.528421052631579e-06, |
|
"loss": 0.0847, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.9354293441514536, |
|
"grad_norm": 1.5164501667022705, |
|
"learning_rate": 4.502105263157895e-06, |
|
"loss": 0.1001, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.9438810006761327, |
|
"grad_norm": 0.9590908885002136, |
|
"learning_rate": 4.475789473684211e-06, |
|
"loss": 0.0858, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.9523326572008113, |
|
"grad_norm": 1.887405514717102, |
|
"learning_rate": 4.449473684210527e-06, |
|
"loss": 0.079, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"grad_norm": 2.0071308612823486, |
|
"learning_rate": 4.423157894736843e-06, |
|
"loss": 0.094, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.969235970250169, |
|
"grad_norm": 2.262946605682373, |
|
"learning_rate": 4.3968421052631585e-06, |
|
"loss": 0.0992, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.977687626774848, |
|
"grad_norm": 1.1825026273727417, |
|
"learning_rate": 4.370526315789474e-06, |
|
"loss": 0.0831, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.9861392832995266, |
|
"grad_norm": 1.4014040231704712, |
|
"learning_rate": 4.344210526315789e-06, |
|
"loss": 0.0983, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.9945909398242057, |
|
"grad_norm": 1.7053906917572021, |
|
"learning_rate": 4.317894736842105e-06, |
|
"loss": 0.0937, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.0030425963488843, |
|
"grad_norm": 1.2354754209518433, |
|
"learning_rate": 4.291578947368422e-06, |
|
"loss": 0.0794, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.0114942528735633, |
|
"grad_norm": 1.5480238199234009, |
|
"learning_rate": 4.2652631578947376e-06, |
|
"loss": 0.0621, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.019945909398242, |
|
"grad_norm": 1.6204942464828491, |
|
"learning_rate": 4.238947368421053e-06, |
|
"loss": 0.0679, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.028397565922921, |
|
"grad_norm": 1.8844307661056519, |
|
"learning_rate": 4.212631578947368e-06, |
|
"loss": 0.0814, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.028397565922921, |
|
"eval_loss": 0.18354202806949615, |
|
"eval_runtime": 8220.3894, |
|
"eval_samples_per_second": 1.779, |
|
"eval_steps_per_second": 0.222, |
|
"eval_wer": 45.245302024909165, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0368492224475996, |
|
"grad_norm": 1.34647798538208, |
|
"learning_rate": 4.186315789473684e-06, |
|
"loss": 0.0707, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.0453008789722786, |
|
"grad_norm": 2.5605897903442383, |
|
"learning_rate": 4.16e-06, |
|
"loss": 0.0658, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.0537525354969572, |
|
"grad_norm": 0.988107442855835, |
|
"learning_rate": 4.133684210526316e-06, |
|
"loss": 0.0787, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.0622041920216363, |
|
"grad_norm": 1.3938881158828735, |
|
"learning_rate": 4.1073684210526325e-06, |
|
"loss": 0.0772, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.070655848546315, |
|
"grad_norm": 0.3137703835964203, |
|
"learning_rate": 4.0810526315789474e-06, |
|
"loss": 0.0728, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.079107505070994, |
|
"grad_norm": 1.0805671215057373, |
|
"learning_rate": 4.054736842105263e-06, |
|
"loss": 0.0723, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.0875591615956726, |
|
"grad_norm": 1.8240491151809692, |
|
"learning_rate": 4.029473684210527e-06, |
|
"loss": 0.0824, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.0960108181203516, |
|
"grad_norm": 0.9932010173797607, |
|
"learning_rate": 4.0031578947368424e-06, |
|
"loss": 0.0808, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.1044624746450302, |
|
"grad_norm": 1.7233359813690186, |
|
"learning_rate": 3.976842105263158e-06, |
|
"loss": 0.0656, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.1129141311697093, |
|
"grad_norm": 1.3749310970306396, |
|
"learning_rate": 3.950526315789474e-06, |
|
"loss": 0.0782, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.121365787694388, |
|
"grad_norm": 1.1162022352218628, |
|
"learning_rate": 3.92421052631579e-06, |
|
"loss": 0.0764, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.129817444219067, |
|
"grad_norm": 2.195281505584717, |
|
"learning_rate": 3.897894736842106e-06, |
|
"loss": 0.0694, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.138269100743746, |
|
"grad_norm": 1.5847153663635254, |
|
"learning_rate": 3.8715789473684215e-06, |
|
"loss": 0.0778, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.1467207572684246, |
|
"grad_norm": 1.5322215557098389, |
|
"learning_rate": 3.845263157894737e-06, |
|
"loss": 0.068, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.1551724137931036, |
|
"grad_norm": 2.1042368412017822, |
|
"learning_rate": 3.818947368421053e-06, |
|
"loss": 0.063, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.1636240703177823, |
|
"grad_norm": 2.016451120376587, |
|
"learning_rate": 3.792631578947369e-06, |
|
"loss": 0.0757, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.1720757268424613, |
|
"grad_norm": 1.9875035285949707, |
|
"learning_rate": 3.766315789473685e-06, |
|
"loss": 0.0633, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.18052738336714, |
|
"grad_norm": 1.0881574153900146, |
|
"learning_rate": 3.74e-06, |
|
"loss": 0.0779, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.188979039891819, |
|
"grad_norm": 1.4989287853240967, |
|
"learning_rate": 3.713684210526316e-06, |
|
"loss": 0.0672, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.1974306964164976, |
|
"grad_norm": 1.430418848991394, |
|
"learning_rate": 3.687368421052632e-06, |
|
"loss": 0.0903, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.1974306964164976, |
|
"eval_loss": 0.18892446160316467, |
|
"eval_runtime": 8703.8448, |
|
"eval_samples_per_second": 1.681, |
|
"eval_steps_per_second": 0.21, |
|
"eval_wer": 45.42427250455984, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 1.1100589036941528, |
|
"learning_rate": 3.6610526315789472e-06, |
|
"loss": 0.0682, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.2143340094658552, |
|
"grad_norm": 1.5231108665466309, |
|
"learning_rate": 3.6347368421052635e-06, |
|
"loss": 0.0772, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.2227856659905343, |
|
"grad_norm": 1.5531678199768066, |
|
"learning_rate": 3.6084210526315793e-06, |
|
"loss": 0.0769, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.231237322515213, |
|
"grad_norm": 2.173323154449463, |
|
"learning_rate": 3.582105263157895e-06, |
|
"loss": 0.0753, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.239688979039892, |
|
"grad_norm": 1.8664982318878174, |
|
"learning_rate": 3.555789473684211e-06, |
|
"loss": 0.0732, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.2481406355645706, |
|
"grad_norm": 1.264046549797058, |
|
"learning_rate": 3.5294736842105263e-06, |
|
"loss": 0.0727, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.2565922920892496, |
|
"grad_norm": 0.6508150100708008, |
|
"learning_rate": 3.503157894736842e-06, |
|
"loss": 0.0627, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.265043948613928, |
|
"grad_norm": 2.036755323410034, |
|
"learning_rate": 3.476842105263158e-06, |
|
"loss": 0.0703, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.2734956051386073, |
|
"grad_norm": 1.4108197689056396, |
|
"learning_rate": 3.450526315789474e-06, |
|
"loss": 0.0815, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.281947261663286, |
|
"grad_norm": 1.388468623161316, |
|
"learning_rate": 3.42421052631579e-06, |
|
"loss": 0.0678, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.290398918187965, |
|
"grad_norm": 1.7254157066345215, |
|
"learning_rate": 3.3978947368421054e-06, |
|
"loss": 0.0713, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.2988505747126435, |
|
"grad_norm": 1.8082466125488281, |
|
"learning_rate": 3.3715789473684212e-06, |
|
"loss": 0.0643, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.3073022312373226, |
|
"grad_norm": 1.5543231964111328, |
|
"learning_rate": 3.345263157894737e-06, |
|
"loss": 0.0693, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.315753887762001, |
|
"grad_norm": 2.165055513381958, |
|
"learning_rate": 3.318947368421053e-06, |
|
"loss": 0.0718, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.3242055442866802, |
|
"grad_norm": 1.7937675714492798, |
|
"learning_rate": 3.292631578947369e-06, |
|
"loss": 0.0694, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.332657200811359, |
|
"grad_norm": 1.602084994316101, |
|
"learning_rate": 3.2663157894736845e-06, |
|
"loss": 0.0575, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.341108857336038, |
|
"grad_norm": 0.9363685846328735, |
|
"learning_rate": 3.2400000000000003e-06, |
|
"loss": 0.0663, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.3495605138607165, |
|
"grad_norm": 1.2168769836425781, |
|
"learning_rate": 3.213684210526316e-06, |
|
"loss": 0.0755, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.3580121703853956, |
|
"grad_norm": 1.4504033327102661, |
|
"learning_rate": 3.187368421052632e-06, |
|
"loss": 0.079, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.366463826910074, |
|
"grad_norm": 1.5413113832473755, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 0.0712, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.366463826910074, |
|
"eval_loss": 0.19058801233768463, |
|
"eval_runtime": 8456.0898, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.216, |
|
"eval_wer": 45.145058996689535, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3749154834347532, |
|
"grad_norm": 1.0616259574890137, |
|
"learning_rate": 3.134736842105263e-06, |
|
"loss": 0.066, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 2.3833671399594323, |
|
"grad_norm": 1.821663737297058, |
|
"learning_rate": 3.1084210526315794e-06, |
|
"loss": 0.0692, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.391818796484111, |
|
"grad_norm": 1.2044411897659302, |
|
"learning_rate": 3.0821052631578952e-06, |
|
"loss": 0.0631, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 2.4002704530087895, |
|
"grad_norm": 1.1496453285217285, |
|
"learning_rate": 3.055789473684211e-06, |
|
"loss": 0.0598, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.4087221095334685, |
|
"grad_norm": 1.834873914718628, |
|
"learning_rate": 3.0294736842105264e-06, |
|
"loss": 0.0696, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.4171737660581476, |
|
"grad_norm": 1.9280352592468262, |
|
"learning_rate": 3.0031578947368423e-06, |
|
"loss": 0.0937, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.425625422582826, |
|
"grad_norm": 1.8316441774368286, |
|
"learning_rate": 2.976842105263158e-06, |
|
"loss": 0.0843, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 2.4340770791075053, |
|
"grad_norm": 1.6660900115966797, |
|
"learning_rate": 2.9505263157894735e-06, |
|
"loss": 0.0747, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.442528735632184, |
|
"grad_norm": 1.2995704412460327, |
|
"learning_rate": 2.92421052631579e-06, |
|
"loss": 0.0697, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.450980392156863, |
|
"grad_norm": 0.837418794631958, |
|
"learning_rate": 2.8978947368421055e-06, |
|
"loss": 0.0532, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.4594320486815415, |
|
"grad_norm": 1.582984447479248, |
|
"learning_rate": 2.8715789473684214e-06, |
|
"loss": 0.0785, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.4678837052062206, |
|
"grad_norm": 2.5198307037353516, |
|
"learning_rate": 2.845263157894737e-06, |
|
"loss": 0.0695, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.476335361730899, |
|
"grad_norm": 1.9426721334457397, |
|
"learning_rate": 2.8189473684210526e-06, |
|
"loss": 0.0683, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.4847870182555782, |
|
"grad_norm": 1.8814282417297363, |
|
"learning_rate": 2.7926315789473684e-06, |
|
"loss": 0.0635, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.493238674780257, |
|
"grad_norm": 1.7943637371063232, |
|
"learning_rate": 2.766315789473684e-06, |
|
"loss": 0.0703, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 2.501690331304936, |
|
"grad_norm": 1.7339930534362793, |
|
"learning_rate": 2.7400000000000004e-06, |
|
"loss": 0.0834, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.5101419878296145, |
|
"grad_norm": 0.6923316121101379, |
|
"learning_rate": 2.7136842105263163e-06, |
|
"loss": 0.0749, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 2.5185936443542936, |
|
"grad_norm": 1.8081867694854736, |
|
"learning_rate": 2.6873684210526317e-06, |
|
"loss": 0.0832, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.527045300878972, |
|
"grad_norm": 0.8502326607704163, |
|
"learning_rate": 2.6610526315789475e-06, |
|
"loss": 0.072, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 2.535496957403651, |
|
"grad_norm": 2.073284864425659, |
|
"learning_rate": 2.6347368421052633e-06, |
|
"loss": 0.0937, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.535496957403651, |
|
"eval_loss": 0.18810197710990906, |
|
"eval_runtime": 8587.5354, |
|
"eval_samples_per_second": 1.703, |
|
"eval_steps_per_second": 0.213, |
|
"eval_wer": 44.96070962284172, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.54394861392833, |
|
"grad_norm": 1.5389723777770996, |
|
"learning_rate": 2.608421052631579e-06, |
|
"loss": 0.0695, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 2.552400270453009, |
|
"grad_norm": 1.7400459051132202, |
|
"learning_rate": 2.5821052631578954e-06, |
|
"loss": 0.0752, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.5608519269776875, |
|
"grad_norm": 0.9405755400657654, |
|
"learning_rate": 2.5557894736842108e-06, |
|
"loss": 0.0728, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 2.5693035835023665, |
|
"grad_norm": 2.09281849861145, |
|
"learning_rate": 2.5294736842105266e-06, |
|
"loss": 0.0785, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.577755240027045, |
|
"grad_norm": 1.1472514867782593, |
|
"learning_rate": 2.5031578947368424e-06, |
|
"loss": 0.0763, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 1.5787365436553955, |
|
"learning_rate": 2.476842105263158e-06, |
|
"loss": 0.0782, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.5946585530764033, |
|
"grad_norm": 0.8288754820823669, |
|
"learning_rate": 2.4505263157894736e-06, |
|
"loss": 0.0617, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 2.603110209601082, |
|
"grad_norm": 1.3931238651275635, |
|
"learning_rate": 2.42421052631579e-06, |
|
"loss": 0.0652, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.6115618661257605, |
|
"grad_norm": 1.8449468612670898, |
|
"learning_rate": 2.3978947368421052e-06, |
|
"loss": 0.0708, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.6200135226504395, |
|
"grad_norm": 1.462990641593933, |
|
"learning_rate": 2.371578947368421e-06, |
|
"loss": 0.0799, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.6284651791751186, |
|
"grad_norm": 1.80518639087677, |
|
"learning_rate": 2.3452631578947373e-06, |
|
"loss": 0.0712, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 2.636916835699797, |
|
"grad_norm": 1.4477194547653198, |
|
"learning_rate": 2.3189473684210527e-06, |
|
"loss": 0.0748, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.645368492224476, |
|
"grad_norm": 1.8457978963851929, |
|
"learning_rate": 2.2926315789473685e-06, |
|
"loss": 0.0652, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.653820148749155, |
|
"grad_norm": 1.2285178899765015, |
|
"learning_rate": 2.2663157894736843e-06, |
|
"loss": 0.051, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.662271805273834, |
|
"grad_norm": 1.664722204208374, |
|
"learning_rate": 2.24e-06, |
|
"loss": 0.0696, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 2.6707234617985125, |
|
"grad_norm": 1.1485116481781006, |
|
"learning_rate": 2.213684210526316e-06, |
|
"loss": 0.078, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.679175118323191, |
|
"grad_norm": 1.6024932861328125, |
|
"learning_rate": 2.187368421052632e-06, |
|
"loss": 0.0811, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 2.68762677484787, |
|
"grad_norm": 1.3482303619384766, |
|
"learning_rate": 2.1610526315789476e-06, |
|
"loss": 0.0744, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.696078431372549, |
|
"grad_norm": 1.4488401412963867, |
|
"learning_rate": 2.1347368421052634e-06, |
|
"loss": 0.0717, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 2.704530087897228, |
|
"grad_norm": 2.0738377571105957, |
|
"learning_rate": 2.1084210526315792e-06, |
|
"loss": 0.0637, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.704530087897228, |
|
"eval_loss": 0.19148404896259308, |
|
"eval_runtime": 8546.7321, |
|
"eval_samples_per_second": 1.711, |
|
"eval_steps_per_second": 0.214, |
|
"eval_wer": 44.85362072928025, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.7129817444219064, |
|
"grad_norm": 1.608497142791748, |
|
"learning_rate": 2.082105263157895e-06, |
|
"loss": 0.0725, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 2.7214334009465855, |
|
"grad_norm": 2.1245532035827637, |
|
"learning_rate": 2.055789473684211e-06, |
|
"loss": 0.0726, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.7298850574712645, |
|
"grad_norm": 1.3856103420257568, |
|
"learning_rate": 2.0294736842105263e-06, |
|
"loss": 0.0731, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 2.738336713995943, |
|
"grad_norm": 1.2372052669525146, |
|
"learning_rate": 2.003157894736842e-06, |
|
"loss": 0.0756, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.746788370520622, |
|
"grad_norm": 1.6612149477005005, |
|
"learning_rate": 1.976842105263158e-06, |
|
"loss": 0.0636, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 2.755240027045301, |
|
"grad_norm": 1.719821572303772, |
|
"learning_rate": 1.9505263157894737e-06, |
|
"loss": 0.0804, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.76369168356998, |
|
"grad_norm": 1.575764536857605, |
|
"learning_rate": 1.9242105263157896e-06, |
|
"loss": 0.0778, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 2.7721433400946585, |
|
"grad_norm": 1.507033348083496, |
|
"learning_rate": 1.8978947368421056e-06, |
|
"loss": 0.0643, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.7805949966193375, |
|
"grad_norm": 1.1668611764907837, |
|
"learning_rate": 1.8715789473684212e-06, |
|
"loss": 0.0867, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 2.789046653144016, |
|
"grad_norm": 0.9894188046455383, |
|
"learning_rate": 1.845263157894737e-06, |
|
"loss": 0.0648, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.797498309668695, |
|
"grad_norm": 1.6405720710754395, |
|
"learning_rate": 1.8189473684210528e-06, |
|
"loss": 0.0708, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 2.805949966193374, |
|
"grad_norm": 1.034415364265442, |
|
"learning_rate": 1.7926315789473686e-06, |
|
"loss": 0.0546, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.814401622718053, |
|
"grad_norm": 0.9419425129890442, |
|
"learning_rate": 1.7663157894736843e-06, |
|
"loss": 0.07, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 2.8228532792427314, |
|
"grad_norm": 2.342827558517456, |
|
"learning_rate": 1.74e-06, |
|
"loss": 0.0697, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.8313049357674105, |
|
"grad_norm": 1.535322904586792, |
|
"learning_rate": 1.713684210526316e-06, |
|
"loss": 0.0696, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 2.839756592292089, |
|
"grad_norm": 1.6388691663742065, |
|
"learning_rate": 1.6873684210526317e-06, |
|
"loss": 0.0736, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.848208248816768, |
|
"grad_norm": 1.7451198101043701, |
|
"learning_rate": 1.6610526315789473e-06, |
|
"loss": 0.0751, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 2.8566599053414468, |
|
"grad_norm": 1.4245884418487549, |
|
"learning_rate": 1.6347368421052633e-06, |
|
"loss": 0.0797, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.865111561866126, |
|
"grad_norm": 0.799862265586853, |
|
"learning_rate": 1.6084210526315792e-06, |
|
"loss": 0.0664, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 2.873563218390805, |
|
"grad_norm": 1.09870183467865, |
|
"learning_rate": 1.5821052631578948e-06, |
|
"loss": 0.062, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.873563218390805, |
|
"eval_loss": 0.1915096938610077, |
|
"eval_runtime": 8477.0001, |
|
"eval_samples_per_second": 1.725, |
|
"eval_steps_per_second": 0.216, |
|
"eval_wer": 44.89322895018655, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.8820148749154835, |
|
"grad_norm": 1.4032130241394043, |
|
"learning_rate": 1.5557894736842106e-06, |
|
"loss": 0.0743, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 2.890466531440162, |
|
"grad_norm": 0.7570385932922363, |
|
"learning_rate": 1.5294736842105264e-06, |
|
"loss": 0.0687, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.898918187964841, |
|
"grad_norm": 1.0193418264389038, |
|
"learning_rate": 1.5031578947368422e-06, |
|
"loss": 0.0615, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 2.90736984448952, |
|
"grad_norm": 1.9574233293533325, |
|
"learning_rate": 1.4768421052631578e-06, |
|
"loss": 0.0835, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.915821501014199, |
|
"grad_norm": 1.0290601253509521, |
|
"learning_rate": 1.4505263157894739e-06, |
|
"loss": 0.0678, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 2.9242731575388774, |
|
"grad_norm": 1.3625876903533936, |
|
"learning_rate": 1.4242105263157897e-06, |
|
"loss": 0.0621, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.9327248140635565, |
|
"grad_norm": 1.5794146060943604, |
|
"learning_rate": 1.3978947368421053e-06, |
|
"loss": 0.0657, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 1.314706563949585, |
|
"learning_rate": 1.3715789473684213e-06, |
|
"loss": 0.069, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.949628127112914, |
|
"grad_norm": 1.9198169708251953, |
|
"learning_rate": 1.345263157894737e-06, |
|
"loss": 0.0701, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 2.9580797836375927, |
|
"grad_norm": 1.3500406742095947, |
|
"learning_rate": 1.3189473684210527e-06, |
|
"loss": 0.076, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.9665314401622718, |
|
"grad_norm": 1.7573151588439941, |
|
"learning_rate": 1.2926315789473683e-06, |
|
"loss": 0.0697, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 2.974983096686951, |
|
"grad_norm": 0.8199857473373413, |
|
"learning_rate": 1.2663157894736844e-06, |
|
"loss": 0.0744, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.9834347532116294, |
|
"grad_norm": 2.14540696144104, |
|
"learning_rate": 1.2400000000000002e-06, |
|
"loss": 0.0686, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 2.991886409736308, |
|
"grad_norm": 1.5464670658111572, |
|
"learning_rate": 1.213684210526316e-06, |
|
"loss": 0.07, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.000338066260987, |
|
"grad_norm": 1.2003583908081055, |
|
"learning_rate": 1.188421052631579e-06, |
|
"loss": 0.0781, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 3.008789722785666, |
|
"grad_norm": 0.630574107170105, |
|
"learning_rate": 1.1621052631578948e-06, |
|
"loss": 0.0565, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.0172413793103448, |
|
"grad_norm": 1.4540669918060303, |
|
"learning_rate": 1.1357894736842106e-06, |
|
"loss": 0.0547, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 3.025693035835024, |
|
"grad_norm": 1.408868670463562, |
|
"learning_rate": 1.1094736842105264e-06, |
|
"loss": 0.0604, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 3.0341446923597024, |
|
"grad_norm": 1.040751576423645, |
|
"learning_rate": 1.0831578947368422e-06, |
|
"loss": 0.063, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 3.0425963488843815, |
|
"grad_norm": 1.449309229850769, |
|
"learning_rate": 1.0568421052631578e-06, |
|
"loss": 0.0593, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.0425963488843815, |
|
"eval_loss": 0.19757185876369476, |
|
"eval_runtime": 8436.2186, |
|
"eval_samples_per_second": 1.734, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 44.964132555512634, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.05104800540906, |
|
"grad_norm": 2.647589921951294, |
|
"learning_rate": 1.0305263157894739e-06, |
|
"loss": 0.0602, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 3.059499661933739, |
|
"grad_norm": 1.1657071113586426, |
|
"learning_rate": 1.0042105263157897e-06, |
|
"loss": 0.0637, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 3.0679513184584177, |
|
"grad_norm": 1.9827054738998413, |
|
"learning_rate": 9.778947368421053e-07, |
|
"loss": 0.0667, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 3.076402974983097, |
|
"grad_norm": 1.7273396253585815, |
|
"learning_rate": 9.515789473684212e-07, |
|
"loss": 0.0592, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.0848546315077754, |
|
"grad_norm": 1.3955703973770142, |
|
"learning_rate": 9.252631578947368e-07, |
|
"loss": 0.0522, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 3.0933062880324544, |
|
"grad_norm": 1.3141138553619385, |
|
"learning_rate": 8.989473684210527e-07, |
|
"loss": 0.0627, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 3.101757944557133, |
|
"grad_norm": 0.7096537351608276, |
|
"learning_rate": 8.726315789473686e-07, |
|
"loss": 0.0583, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 3.110209601081812, |
|
"grad_norm": 1.3626320362091064, |
|
"learning_rate": 8.463157894736843e-07, |
|
"loss": 0.0556, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.1186612576064907, |
|
"grad_norm": 1.7827398777008057, |
|
"learning_rate": 8.200000000000001e-07, |
|
"loss": 0.0549, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 3.1271129141311698, |
|
"grad_norm": 1.8511258363723755, |
|
"learning_rate": 7.936842105263158e-07, |
|
"loss": 0.052, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 3.1355645706558484, |
|
"grad_norm": 1.5785564184188843, |
|
"learning_rate": 7.673684210526316e-07, |
|
"loss": 0.0554, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 3.1440162271805274, |
|
"grad_norm": 1.3359767198562622, |
|
"learning_rate": 7.410526315789475e-07, |
|
"loss": 0.0599, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.152467883705206, |
|
"grad_norm": 1.2103520631790161, |
|
"learning_rate": 7.147368421052632e-07, |
|
"loss": 0.0564, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 3.160919540229885, |
|
"grad_norm": 1.3505812883377075, |
|
"learning_rate": 6.884210526315791e-07, |
|
"loss": 0.0642, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 3.1693711967545637, |
|
"grad_norm": 1.9448202848434448, |
|
"learning_rate": 6.621052631578948e-07, |
|
"loss": 0.0567, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 3.1778228532792427, |
|
"grad_norm": 1.4661892652511597, |
|
"learning_rate": 6.357894736842106e-07, |
|
"loss": 0.063, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.186274509803922, |
|
"grad_norm": 2.1202943325042725, |
|
"learning_rate": 6.094736842105263e-07, |
|
"loss": 0.0589, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 3.1947261663286004, |
|
"grad_norm": 1.8288671970367432, |
|
"learning_rate": 5.831578947368421e-07, |
|
"loss": 0.0601, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 3.203177822853279, |
|
"grad_norm": 1.0134177207946777, |
|
"learning_rate": 5.56842105263158e-07, |
|
"loss": 0.0546, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 3.211629479377958, |
|
"grad_norm": 2.195054292678833, |
|
"learning_rate": 5.305263157894737e-07, |
|
"loss": 0.0598, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.211629479377958, |
|
"eval_loss": 0.1982024759054184, |
|
"eval_runtime": 8467.5483, |
|
"eval_samples_per_second": 1.727, |
|
"eval_steps_per_second": 0.216, |
|
"eval_wer": 44.87513630606886, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.220081135902637, |
|
"grad_norm": 1.3598229885101318, |
|
"learning_rate": 5.042105263157895e-07, |
|
"loss": 0.0544, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 3.2285327924273157, |
|
"grad_norm": 1.9829254150390625, |
|
"learning_rate": 4.778947368421053e-07, |
|
"loss": 0.0583, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 3.2369844489519948, |
|
"grad_norm": 1.2495145797729492, |
|
"learning_rate": 4.5157894736842107e-07, |
|
"loss": 0.0659, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 3.2454361054766734, |
|
"grad_norm": 1.0571123361587524, |
|
"learning_rate": 4.2526315789473684e-07, |
|
"loss": 0.0548, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.2538877620013524, |
|
"grad_norm": 1.3266067504882812, |
|
"learning_rate": 3.9894736842105266e-07, |
|
"loss": 0.0606, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 3.262339418526031, |
|
"grad_norm": 2.1030797958374023, |
|
"learning_rate": 3.726315789473685e-07, |
|
"loss": 0.0651, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 3.27079107505071, |
|
"grad_norm": 0.6798356175422668, |
|
"learning_rate": 3.4631578947368424e-07, |
|
"loss": 0.0567, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 3.2792427315753887, |
|
"grad_norm": 1.9583333730697632, |
|
"learning_rate": 3.2e-07, |
|
"loss": 0.0581, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.2876943881000678, |
|
"grad_norm": 2.115421772003174, |
|
"learning_rate": 2.936842105263158e-07, |
|
"loss": 0.0502, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 3.2961460446247464, |
|
"grad_norm": 1.1875923871994019, |
|
"learning_rate": 2.6736842105263164e-07, |
|
"loss": 0.0621, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.3045977011494254, |
|
"grad_norm": 1.232844352722168, |
|
"learning_rate": 2.410526315789474e-07, |
|
"loss": 0.0591, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 3.313049357674104, |
|
"grad_norm": 1.3175997734069824, |
|
"learning_rate": 2.1473684210526317e-07, |
|
"loss": 0.0547, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.321501014198783, |
|
"grad_norm": 1.7271957397460938, |
|
"learning_rate": 1.8842105263157897e-07, |
|
"loss": 0.0611, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 3.3299526707234617, |
|
"grad_norm": 1.2155121564865112, |
|
"learning_rate": 1.6210526315789476e-07, |
|
"loss": 0.0645, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.3384043272481407, |
|
"grad_norm": 1.6896015405654907, |
|
"learning_rate": 1.3578947368421055e-07, |
|
"loss": 0.0526, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 3.3468559837728193, |
|
"grad_norm": 1.4547408819198608, |
|
"learning_rate": 1.0947368421052632e-07, |
|
"loss": 0.0512, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.3553076402974984, |
|
"grad_norm": 0.5560055375099182, |
|
"learning_rate": 8.315789473684211e-08, |
|
"loss": 0.0665, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 3.363759296822177, |
|
"grad_norm": 1.9773001670837402, |
|
"learning_rate": 5.68421052631579e-08, |
|
"loss": 0.0522, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.372210953346856, |
|
"grad_norm": 1.2007287740707397, |
|
"learning_rate": 3.0526315789473686e-08, |
|
"loss": 0.0623, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 3.3806626098715347, |
|
"grad_norm": 1.430873990058899, |
|
"learning_rate": 4.210526315789474e-09, |
|
"loss": 0.0553, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.3806626098715347, |
|
"eval_loss": 0.19793640077114105, |
|
"eval_runtime": 8419.2841, |
|
"eval_samples_per_second": 1.737, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 44.86535649843768, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.519477255591857, |
|
"grad_norm": 2.5541698932647705, |
|
"learning_rate": 3.4337931034482762e-06, |
|
"loss": 0.1093, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 2.5257602412666498, |
|
"grad_norm": 2.943728446960449, |
|
"learning_rate": 3.4165517241379315e-06, |
|
"loss": 0.0942, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.5320432269414423, |
|
"grad_norm": 1.3236327171325684, |
|
"learning_rate": 3.3993103448275864e-06, |
|
"loss": 0.0831, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 2.5383262126162354, |
|
"grad_norm": 1.9076201915740967, |
|
"learning_rate": 3.3820689655172417e-06, |
|
"loss": 0.0808, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.544609198291028, |
|
"grad_norm": 1.5796664953231812, |
|
"learning_rate": 3.364827586206897e-06, |
|
"loss": 0.0738, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 2.5508921839658205, |
|
"grad_norm": 1.349244475364685, |
|
"learning_rate": 3.347586206896552e-06, |
|
"loss": 0.0854, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.557175169640613, |
|
"grad_norm": 1.5423616170883179, |
|
"learning_rate": 3.330344827586207e-06, |
|
"loss": 0.0694, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 2.5634581553154057, |
|
"grad_norm": 1.882533073425293, |
|
"learning_rate": 3.3131034482758624e-06, |
|
"loss": 0.0804, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.5697411409901987, |
|
"grad_norm": 1.6644792556762695, |
|
"learning_rate": 3.2958620689655173e-06, |
|
"loss": 0.0818, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 2.5760241266649913, |
|
"grad_norm": 1.648979663848877, |
|
"learning_rate": 3.2786206896551726e-06, |
|
"loss": 0.0806, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.582307112339784, |
|
"grad_norm": 1.8302336931228638, |
|
"learning_rate": 3.261379310344828e-06, |
|
"loss": 0.0685, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 2.5885900980145764, |
|
"grad_norm": 1.5764237642288208, |
|
"learning_rate": 3.2441379310344828e-06, |
|
"loss": 0.0669, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.594873083689369, |
|
"grad_norm": 1.609424352645874, |
|
"learning_rate": 3.226896551724138e-06, |
|
"loss": 0.0703, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 2.601156069364162, |
|
"grad_norm": 2.0463669300079346, |
|
"learning_rate": 3.209655172413793e-06, |
|
"loss": 0.0812, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.6074390550389546, |
|
"grad_norm": 1.0998157262802124, |
|
"learning_rate": 3.1924137931034486e-06, |
|
"loss": 0.0811, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 2.613722040713747, |
|
"grad_norm": 2.0969502925872803, |
|
"learning_rate": 3.175172413793104e-06, |
|
"loss": 0.0831, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.6200050263885397, |
|
"grad_norm": 1.6050735712051392, |
|
"learning_rate": 3.1579310344827592e-06, |
|
"loss": 0.0856, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 2.6262880120633323, |
|
"grad_norm": 1.6979131698608398, |
|
"learning_rate": 3.140689655172414e-06, |
|
"loss": 0.0774, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.6325709977381253, |
|
"grad_norm": 2.0969455242156982, |
|
"learning_rate": 3.1234482758620694e-06, |
|
"loss": 0.0871, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 2.638853983412918, |
|
"grad_norm": 1.3703320026397705, |
|
"learning_rate": 3.1062068965517243e-06, |
|
"loss": 0.0677, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.638853983412918, |
|
"eval_loss": 0.169998437166214, |
|
"eval_runtime": 11305.71, |
|
"eval_samples_per_second": 1.728, |
|
"eval_steps_per_second": 0.216, |
|
"eval_wer": 44.5221605147797, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.6451369690877105, |
|
"grad_norm": 1.9054275751113892, |
|
"learning_rate": 3.0889655172413796e-06, |
|
"loss": 0.0703, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 2.651419954762503, |
|
"grad_norm": 1.9211713075637817, |
|
"learning_rate": 3.071724137931035e-06, |
|
"loss": 0.078, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.6577029404372956, |
|
"grad_norm": 1.7208181619644165, |
|
"learning_rate": 3.0544827586206897e-06, |
|
"loss": 0.0732, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 2.6639859261120886, |
|
"grad_norm": 1.2526366710662842, |
|
"learning_rate": 3.037241379310345e-06, |
|
"loss": 0.0827, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.670268911786881, |
|
"grad_norm": 1.8517524003982544, |
|
"learning_rate": 3.0200000000000003e-06, |
|
"loss": 0.0758, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 2.6765518974616738, |
|
"grad_norm": 1.4826529026031494, |
|
"learning_rate": 3.002758620689655e-06, |
|
"loss": 0.0831, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.6828348831364663, |
|
"grad_norm": 2.6525490283966064, |
|
"learning_rate": 2.9855172413793105e-06, |
|
"loss": 0.0783, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 2.689117868811259, |
|
"grad_norm": 1.8993303775787354, |
|
"learning_rate": 2.9682758620689658e-06, |
|
"loss": 0.0821, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.695400854486052, |
|
"grad_norm": 1.4494125843048096, |
|
"learning_rate": 2.9510344827586206e-06, |
|
"loss": 0.0771, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 2.7016838401608445, |
|
"grad_norm": 1.8887574672698975, |
|
"learning_rate": 2.933793103448276e-06, |
|
"loss": 0.0705, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.707966825835637, |
|
"grad_norm": 2.0872342586517334, |
|
"learning_rate": 2.9165517241379316e-06, |
|
"loss": 0.0707, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 2.7142498115104297, |
|
"grad_norm": 1.3491803407669067, |
|
"learning_rate": 2.8993103448275865e-06, |
|
"loss": 0.0733, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.7205327971852222, |
|
"grad_norm": 1.6230404376983643, |
|
"learning_rate": 2.882068965517242e-06, |
|
"loss": 0.0703, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 2.7268157828600152, |
|
"grad_norm": 1.4654004573822021, |
|
"learning_rate": 2.864827586206897e-06, |
|
"loss": 0.0705, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.733098768534808, |
|
"grad_norm": 1.7205106019973755, |
|
"learning_rate": 2.847586206896552e-06, |
|
"loss": 0.0713, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 2.7393817542096004, |
|
"grad_norm": 1.7660115957260132, |
|
"learning_rate": 2.8303448275862073e-06, |
|
"loss": 0.0759, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.745664739884393, |
|
"grad_norm": 1.3411388397216797, |
|
"learning_rate": 2.813103448275862e-06, |
|
"loss": 0.0819, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 2.7519477255591855, |
|
"grad_norm": 1.66763436794281, |
|
"learning_rate": 2.7958620689655174e-06, |
|
"loss": 0.0714, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.7582307112339786, |
|
"grad_norm": 1.856402039527893, |
|
"learning_rate": 2.7786206896551727e-06, |
|
"loss": 0.0737, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 2.764513696908771, |
|
"grad_norm": 1.4149645566940308, |
|
"learning_rate": 2.7613793103448276e-06, |
|
"loss": 0.0698, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.764513696908771, |
|
"eval_loss": 0.16873179376125336, |
|
"eval_runtime": 11438.4823, |
|
"eval_samples_per_second": 1.708, |
|
"eval_steps_per_second": 0.213, |
|
"eval_wer": 44.161424743801774, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.7707966825835637, |
|
"grad_norm": 1.936444878578186, |
|
"learning_rate": 2.744137931034483e-06, |
|
"loss": 0.0757, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 2.7770796682583563, |
|
"grad_norm": 1.3079452514648438, |
|
"learning_rate": 2.726896551724138e-06, |
|
"loss": 0.0773, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.783362653933149, |
|
"grad_norm": 1.8395476341247559, |
|
"learning_rate": 2.709655172413793e-06, |
|
"loss": 0.069, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 2.789645639607942, |
|
"grad_norm": 1.5000851154327393, |
|
"learning_rate": 2.6924137931034483e-06, |
|
"loss": 0.0808, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.7959286252827344, |
|
"grad_norm": 1.7593291997909546, |
|
"learning_rate": 2.6751724137931036e-06, |
|
"loss": 0.0742, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 2.802211610957527, |
|
"grad_norm": 1.671397089958191, |
|
"learning_rate": 2.6579310344827585e-06, |
|
"loss": 0.0742, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.8084945966323196, |
|
"grad_norm": 2.4030308723449707, |
|
"learning_rate": 2.6406896551724142e-06, |
|
"loss": 0.0715, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 2.814777582307112, |
|
"grad_norm": 1.1083357334136963, |
|
"learning_rate": 2.6234482758620695e-06, |
|
"loss": 0.0685, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.821060567981905, |
|
"grad_norm": 2.314483404159546, |
|
"learning_rate": 2.6062068965517244e-06, |
|
"loss": 0.0709, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 2.8273435536566978, |
|
"grad_norm": 2.6994457244873047, |
|
"learning_rate": 2.5889655172413797e-06, |
|
"loss": 0.0716, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.8336265393314903, |
|
"grad_norm": 1.5674411058425903, |
|
"learning_rate": 2.571724137931035e-06, |
|
"loss": 0.0751, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 2.839909525006283, |
|
"grad_norm": 1.6868212223052979, |
|
"learning_rate": 2.55448275862069e-06, |
|
"loss": 0.0775, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.8461925106810755, |
|
"grad_norm": 1.8248207569122314, |
|
"learning_rate": 2.537241379310345e-06, |
|
"loss": 0.0682, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 2.8524754963558685, |
|
"grad_norm": 1.6669248342514038, |
|
"learning_rate": 2.52e-06, |
|
"loss": 0.0706, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.858758482030661, |
|
"grad_norm": 2.579449415206909, |
|
"learning_rate": 2.5027586206896553e-06, |
|
"loss": 0.0711, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 2.8650414677054536, |
|
"grad_norm": 2.2403104305267334, |
|
"learning_rate": 2.4855172413793106e-06, |
|
"loss": 0.0731, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.8713244533802462, |
|
"grad_norm": 1.7787814140319824, |
|
"learning_rate": 2.4682758620689655e-06, |
|
"loss": 0.0683, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 2.877607439055039, |
|
"grad_norm": 1.8239678144454956, |
|
"learning_rate": 2.4510344827586208e-06, |
|
"loss": 0.0752, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.883890424729832, |
|
"grad_norm": 3.1103549003601074, |
|
"learning_rate": 2.433793103448276e-06, |
|
"loss": 0.074, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 2.8901734104046244, |
|
"grad_norm": 1.3953099250793457, |
|
"learning_rate": 2.4165517241379314e-06, |
|
"loss": 0.069, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.8901734104046244, |
|
"eval_loss": 0.16406717896461487, |
|
"eval_runtime": 11006.7152, |
|
"eval_samples_per_second": 1.775, |
|
"eval_steps_per_second": 0.222, |
|
"eval_wer": 43.59414156441607, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.896456396079417, |
|
"grad_norm": 1.4673534631729126, |
|
"learning_rate": 2.3993103448275866e-06, |
|
"loss": 0.072, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 2.9027393817542095, |
|
"grad_norm": 1.790159821510315, |
|
"learning_rate": 2.3820689655172415e-06, |
|
"loss": 0.0846, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.909022367429002, |
|
"grad_norm": 1.482283115386963, |
|
"learning_rate": 2.364827586206897e-06, |
|
"loss": 0.0718, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 2.915305353103795, |
|
"grad_norm": 1.344299077987671, |
|
"learning_rate": 2.3475862068965517e-06, |
|
"loss": 0.0647, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.9215883387785877, |
|
"grad_norm": 2.296725034713745, |
|
"learning_rate": 2.330344827586207e-06, |
|
"loss": 0.0729, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 2.9278713244533803, |
|
"grad_norm": 1.4769775867462158, |
|
"learning_rate": 2.3131034482758623e-06, |
|
"loss": 0.0665, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.934154310128173, |
|
"grad_norm": 1.83705735206604, |
|
"learning_rate": 2.2958620689655176e-06, |
|
"loss": 0.0777, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 2.9404372958029654, |
|
"grad_norm": 1.8100663423538208, |
|
"learning_rate": 2.278620689655173e-06, |
|
"loss": 0.0717, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.9467202814777584, |
|
"grad_norm": 1.144631028175354, |
|
"learning_rate": 2.2613793103448277e-06, |
|
"loss": 0.0622, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 2.953003267152551, |
|
"grad_norm": 1.8608859777450562, |
|
"learning_rate": 2.244137931034483e-06, |
|
"loss": 0.0706, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.9592862528273436, |
|
"grad_norm": 2.04830002784729, |
|
"learning_rate": 2.2268965517241383e-06, |
|
"loss": 0.065, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 2.965569238502136, |
|
"grad_norm": 1.9950125217437744, |
|
"learning_rate": 2.209655172413793e-06, |
|
"loss": 0.0734, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.9718522241769287, |
|
"grad_norm": 1.2800809144973755, |
|
"learning_rate": 2.1924137931034485e-06, |
|
"loss": 0.0626, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 2.9781352098517218, |
|
"grad_norm": 2.180931568145752, |
|
"learning_rate": 2.1751724137931033e-06, |
|
"loss": 0.0694, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.9844181955265143, |
|
"grad_norm": 1.5621517896652222, |
|
"learning_rate": 2.157931034482759e-06, |
|
"loss": 0.0789, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 2.990701181201307, |
|
"grad_norm": 1.7615336179733276, |
|
"learning_rate": 2.140689655172414e-06, |
|
"loss": 0.0714, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.9969841668760995, |
|
"grad_norm": 1.793643593788147, |
|
"learning_rate": 2.1234482758620692e-06, |
|
"loss": 0.0663, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 3.003267152550892, |
|
"grad_norm": 1.6309877634048462, |
|
"learning_rate": 2.1062068965517245e-06, |
|
"loss": 0.0638, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 3.009550138225685, |
|
"grad_norm": 1.6418914794921875, |
|
"learning_rate": 2.0889655172413794e-06, |
|
"loss": 0.0601, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 3.0158331239004776, |
|
"grad_norm": 1.7936900854110718, |
|
"learning_rate": 2.0717241379310347e-06, |
|
"loss": 0.0575, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.0158331239004776, |
|
"eval_loss": 0.1658269762992859, |
|
"eval_runtime": 10966.6675, |
|
"eval_samples_per_second": 1.781, |
|
"eval_steps_per_second": 0.223, |
|
"eval_wer": 43.487979085269416, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.02211610957527, |
|
"grad_norm": 1.462873935699463, |
|
"learning_rate": 2.0544827586206896e-06, |
|
"loss": 0.0606, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 3.028399095250063, |
|
"grad_norm": 1.7744050025939941, |
|
"learning_rate": 2.037241379310345e-06, |
|
"loss": 0.07, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 3.0346820809248554, |
|
"grad_norm": 2.0455141067504883, |
|
"learning_rate": 2.02e-06, |
|
"loss": 0.063, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 3.040965066599648, |
|
"grad_norm": 1.5644878149032593, |
|
"learning_rate": 2.0027586206896554e-06, |
|
"loss": 0.0688, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.047248052274441, |
|
"grad_norm": 1.5917677879333496, |
|
"learning_rate": 1.9855172413793107e-06, |
|
"loss": 0.0751, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 3.0535310379492335, |
|
"grad_norm": 1.8695135116577148, |
|
"learning_rate": 1.9682758620689656e-06, |
|
"loss": 0.0683, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 3.059814023624026, |
|
"grad_norm": 1.6950143575668335, |
|
"learning_rate": 1.951034482758621e-06, |
|
"loss": 0.0707, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 3.0660970092988187, |
|
"grad_norm": 1.7341082096099854, |
|
"learning_rate": 1.933793103448276e-06, |
|
"loss": 0.0705, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.0723799949736113, |
|
"grad_norm": 2.4413957595825195, |
|
"learning_rate": 1.916551724137931e-06, |
|
"loss": 0.0732, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 3.0786629806484043, |
|
"grad_norm": 1.6173579692840576, |
|
"learning_rate": 1.8993103448275864e-06, |
|
"loss": 0.0693, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 3.084945966323197, |
|
"grad_norm": 1.606766939163208, |
|
"learning_rate": 1.8820689655172416e-06, |
|
"loss": 0.0648, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 3.0912289519979894, |
|
"grad_norm": 1.7197469472885132, |
|
"learning_rate": 1.8648275862068967e-06, |
|
"loss": 0.0607, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.097511937672782, |
|
"grad_norm": 1.3897112607955933, |
|
"learning_rate": 1.847586206896552e-06, |
|
"loss": 0.0722, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 3.1037949233475746, |
|
"grad_norm": 1.53862726688385, |
|
"learning_rate": 1.830344827586207e-06, |
|
"loss": 0.0635, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 3.1100779090223676, |
|
"grad_norm": 2.654273509979248, |
|
"learning_rate": 1.8131034482758622e-06, |
|
"loss": 0.0604, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 3.11636089469716, |
|
"grad_norm": 1.34363853931427, |
|
"learning_rate": 1.7958620689655173e-06, |
|
"loss": 0.0563, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.1226438803719527, |
|
"grad_norm": 1.0993194580078125, |
|
"learning_rate": 1.7786206896551726e-06, |
|
"loss": 0.0689, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 3.1289268660467453, |
|
"grad_norm": 1.6156988143920898, |
|
"learning_rate": 1.7613793103448276e-06, |
|
"loss": 0.0563, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 3.135209851721538, |
|
"grad_norm": 1.7135562896728516, |
|
"learning_rate": 1.744137931034483e-06, |
|
"loss": 0.0606, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 3.141492837396331, |
|
"grad_norm": 1.968897819519043, |
|
"learning_rate": 1.7268965517241382e-06, |
|
"loss": 0.0653, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.141492837396331, |
|
"eval_loss": 0.1663118600845337, |
|
"eval_runtime": 11490.7091, |
|
"eval_samples_per_second": 1.7, |
|
"eval_steps_per_second": 0.213, |
|
"eval_wer": 43.23918334332368, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.1477758230711235, |
|
"grad_norm": 2.3499624729156494, |
|
"learning_rate": 1.7096551724137933e-06, |
|
"loss": 0.0694, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 3.154058808745916, |
|
"grad_norm": 2.061582088470459, |
|
"learning_rate": 1.6924137931034484e-06, |
|
"loss": 0.0585, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 3.1603417944207086, |
|
"grad_norm": 1.3034265041351318, |
|
"learning_rate": 1.6751724137931037e-06, |
|
"loss": 0.0585, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 3.166624780095501, |
|
"grad_norm": 1.5961774587631226, |
|
"learning_rate": 1.6579310344827588e-06, |
|
"loss": 0.0623, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.172907765770294, |
|
"grad_norm": 1.5717999935150146, |
|
"learning_rate": 1.6406896551724138e-06, |
|
"loss": 0.0801, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 3.179190751445087, |
|
"grad_norm": 0.8206491470336914, |
|
"learning_rate": 1.623448275862069e-06, |
|
"loss": 0.0645, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 3.1854737371198794, |
|
"grad_norm": 1.6915777921676636, |
|
"learning_rate": 1.6062068965517244e-06, |
|
"loss": 0.0639, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 3.191756722794672, |
|
"grad_norm": 1.749098300933838, |
|
"learning_rate": 1.5889655172413795e-06, |
|
"loss": 0.0613, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.1980397084694645, |
|
"grad_norm": 1.5118510723114014, |
|
"learning_rate": 1.5717241379310346e-06, |
|
"loss": 0.0654, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 3.2043226941442575, |
|
"grad_norm": 1.413273811340332, |
|
"learning_rate": 1.55448275862069e-06, |
|
"loss": 0.071, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 3.21060567981905, |
|
"grad_norm": 2.4434258937835693, |
|
"learning_rate": 1.537241379310345e-06, |
|
"loss": 0.0611, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 3.2168886654938427, |
|
"grad_norm": 1.5421768426895142, |
|
"learning_rate": 1.52e-06, |
|
"loss": 0.0626, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.2231716511686352, |
|
"grad_norm": 1.3737552165985107, |
|
"learning_rate": 1.5027586206896551e-06, |
|
"loss": 0.0644, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 3.229454636843428, |
|
"grad_norm": 1.0774625539779663, |
|
"learning_rate": 1.4855172413793104e-06, |
|
"loss": 0.0628, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 3.235737622518221, |
|
"grad_norm": 1.323012113571167, |
|
"learning_rate": 1.4682758620689657e-06, |
|
"loss": 0.0681, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 3.2420206081930134, |
|
"grad_norm": 1.7004013061523438, |
|
"learning_rate": 1.4510344827586208e-06, |
|
"loss": 0.0575, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.248303593867806, |
|
"grad_norm": 1.1748865842819214, |
|
"learning_rate": 1.433793103448276e-06, |
|
"loss": 0.0686, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 3.2545865795425986, |
|
"grad_norm": 1.9509600400924683, |
|
"learning_rate": 1.4165517241379312e-06, |
|
"loss": 0.064, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 3.260869565217391, |
|
"grad_norm": 1.4009268283843994, |
|
"learning_rate": 1.3993103448275863e-06, |
|
"loss": 0.07, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 3.267152550892184, |
|
"grad_norm": 2.449444532394409, |
|
"learning_rate": 1.3820689655172416e-06, |
|
"loss": 0.0759, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.267152550892184, |
|
"eval_loss": 0.16605377197265625, |
|
"eval_runtime": 11752.4146, |
|
"eval_samples_per_second": 1.662, |
|
"eval_steps_per_second": 0.208, |
|
"eval_wer": 43.109910664634896, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.2734355365669767, |
|
"grad_norm": 1.3904393911361694, |
|
"learning_rate": 1.3648275862068966e-06, |
|
"loss": 0.0607, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 3.2797185222417693, |
|
"grad_norm": 1.4722603559494019, |
|
"learning_rate": 1.3475862068965517e-06, |
|
"loss": 0.0646, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 3.286001507916562, |
|
"grad_norm": 1.7087178230285645, |
|
"learning_rate": 1.3303448275862072e-06, |
|
"loss": 0.056, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 3.2922844935913544, |
|
"grad_norm": 1.5727674961090088, |
|
"learning_rate": 1.3131034482758623e-06, |
|
"loss": 0.0635, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.2985674792661475, |
|
"grad_norm": 1.9577021598815918, |
|
"learning_rate": 1.2958620689655174e-06, |
|
"loss": 0.064, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 3.30485046494094, |
|
"grad_norm": 1.86566960811615, |
|
"learning_rate": 1.2786206896551725e-06, |
|
"loss": 0.062, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 3.3111334506157326, |
|
"grad_norm": 1.787929892539978, |
|
"learning_rate": 1.2613793103448278e-06, |
|
"loss": 0.0587, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 3.317416436290525, |
|
"grad_norm": 1.5625816583633423, |
|
"learning_rate": 1.2441379310344829e-06, |
|
"loss": 0.0638, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.3236994219653178, |
|
"grad_norm": 1.4375091791152954, |
|
"learning_rate": 1.226896551724138e-06, |
|
"loss": 0.066, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 3.3299824076401103, |
|
"grad_norm": 1.6865044832229614, |
|
"learning_rate": 1.2096551724137932e-06, |
|
"loss": 0.0584, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 3.3362653933149033, |
|
"grad_norm": 2.0026865005493164, |
|
"learning_rate": 1.1924137931034483e-06, |
|
"loss": 0.0627, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 3.342548378989696, |
|
"grad_norm": 0.9974460601806641, |
|
"learning_rate": 1.1751724137931036e-06, |
|
"loss": 0.0638, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.3488313646644885, |
|
"grad_norm": 1.6640186309814453, |
|
"learning_rate": 1.1579310344827587e-06, |
|
"loss": 0.0599, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 3.355114350339281, |
|
"grad_norm": 1.8832957744598389, |
|
"learning_rate": 1.140689655172414e-06, |
|
"loss": 0.0609, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 3.3613973360140736, |
|
"grad_norm": 1.3539857864379883, |
|
"learning_rate": 1.123448275862069e-06, |
|
"loss": 0.0647, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 3.3676803216888667, |
|
"grad_norm": 1.54263174533844, |
|
"learning_rate": 1.1062068965517241e-06, |
|
"loss": 0.0615, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.3739633073636592, |
|
"grad_norm": 1.5486729145050049, |
|
"learning_rate": 1.0889655172413794e-06, |
|
"loss": 0.0543, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 3.380246293038452, |
|
"grad_norm": 1.3974565267562866, |
|
"learning_rate": 1.0717241379310345e-06, |
|
"loss": 0.0661, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 3.3865292787132444, |
|
"grad_norm": 1.3940776586532593, |
|
"learning_rate": 1.0544827586206898e-06, |
|
"loss": 0.062, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 3.392812264388037, |
|
"grad_norm": 2.181025981903076, |
|
"learning_rate": 1.0372413793103449e-06, |
|
"loss": 0.0668, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.392812264388037, |
|
"eval_loss": 0.16507452726364136, |
|
"eval_runtime": 11681.06, |
|
"eval_samples_per_second": 1.672, |
|
"eval_steps_per_second": 0.209, |
|
"eval_wer": 43.06982891230401, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.39909525006283, |
|
"grad_norm": 1.663004755973816, |
|
"learning_rate": 1.02e-06, |
|
"loss": 0.0597, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 3.4053782357376225, |
|
"grad_norm": 1.9507300853729248, |
|
"learning_rate": 1.0027586206896553e-06, |
|
"loss": 0.0623, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 3.411661221412415, |
|
"grad_norm": 2.0604774951934814, |
|
"learning_rate": 9.855172413793104e-07, |
|
"loss": 0.061, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 3.4179442070872077, |
|
"grad_norm": 1.7244702577590942, |
|
"learning_rate": 9.682758620689656e-07, |
|
"loss": 0.0609, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.4242271927620003, |
|
"grad_norm": 1.3366154432296753, |
|
"learning_rate": 9.510344827586207e-07, |
|
"loss": 0.0623, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 3.4305101784367933, |
|
"grad_norm": 1.6767691373825073, |
|
"learning_rate": 9.33793103448276e-07, |
|
"loss": 0.0597, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 3.436793164111586, |
|
"grad_norm": 1.580768346786499, |
|
"learning_rate": 9.165517241379311e-07, |
|
"loss": 0.073, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 3.4430761497863784, |
|
"grad_norm": 1.6998015642166138, |
|
"learning_rate": 8.993103448275863e-07, |
|
"loss": 0.0591, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.449359135461171, |
|
"grad_norm": 1.5298068523406982, |
|
"learning_rate": 8.820689655172414e-07, |
|
"loss": 0.0598, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 3.4556421211359636, |
|
"grad_norm": 2.080108165740967, |
|
"learning_rate": 8.648275862068967e-07, |
|
"loss": 0.0668, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 3.4619251068107566, |
|
"grad_norm": 1.8349488973617554, |
|
"learning_rate": 8.475862068965517e-07, |
|
"loss": 0.0671, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 3.468208092485549, |
|
"grad_norm": 2.383136034011841, |
|
"learning_rate": 8.303448275862069e-07, |
|
"loss": 0.0647, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.4744910781603418, |
|
"grad_norm": 1.7982120513916016, |
|
"learning_rate": 8.131034482758621e-07, |
|
"loss": 0.0708, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 3.4807740638351343, |
|
"grad_norm": 1.5413868427276611, |
|
"learning_rate": 7.958620689655173e-07, |
|
"loss": 0.0624, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 3.487057049509927, |
|
"grad_norm": 2.7131173610687256, |
|
"learning_rate": 7.786206896551725e-07, |
|
"loss": 0.0709, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 3.49334003518472, |
|
"grad_norm": 1.5475999116897583, |
|
"learning_rate": 7.613793103448276e-07, |
|
"loss": 0.0588, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 3.4996230208595125, |
|
"grad_norm": 0.9188130497932434, |
|
"learning_rate": 7.441379310344828e-07, |
|
"loss": 0.0541, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 3.505906006534305, |
|
"grad_norm": 1.8402552604675293, |
|
"learning_rate": 7.268965517241381e-07, |
|
"loss": 0.0611, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 3.5121889922090976, |
|
"grad_norm": 1.9850478172302246, |
|
"learning_rate": 7.096551724137931e-07, |
|
"loss": 0.0586, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 3.51847197788389, |
|
"grad_norm": 1.791929006576538, |
|
"learning_rate": 6.924137931034483e-07, |
|
"loss": 0.0582, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.51847197788389, |
|
"eval_loss": 0.16593758761882782, |
|
"eval_runtime": 11717.6125, |
|
"eval_samples_per_second": 1.667, |
|
"eval_steps_per_second": 0.208, |
|
"eval_wer": 42.9048076436985, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.5247549635586832, |
|
"grad_norm": 1.5299566984176636, |
|
"learning_rate": 6.758620689655172e-07, |
|
"loss": 0.0644, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 3.531037949233476, |
|
"grad_norm": 1.8366338014602661, |
|
"learning_rate": 6.586206896551725e-07, |
|
"loss": 0.0621, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 3.5373209349082684, |
|
"grad_norm": 1.5470293760299683, |
|
"learning_rate": 6.413793103448277e-07, |
|
"loss": 0.0624, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 3.543603920583061, |
|
"grad_norm": 1.9549955129623413, |
|
"learning_rate": 6.241379310344828e-07, |
|
"loss": 0.064, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 3.5498869062578535, |
|
"grad_norm": 1.6928914785385132, |
|
"learning_rate": 6.068965517241379e-07, |
|
"loss": 0.0688, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 3.5561698919326465, |
|
"grad_norm": 1.8158848285675049, |
|
"learning_rate": 5.896551724137931e-07, |
|
"loss": 0.0666, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 3.562452877607439, |
|
"grad_norm": 1.2600568532943726, |
|
"learning_rate": 5.724137931034483e-07, |
|
"loss": 0.057, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 3.5687358632822317, |
|
"grad_norm": 1.5076507329940796, |
|
"learning_rate": 5.551724137931035e-07, |
|
"loss": 0.0643, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.5750188489570243, |
|
"grad_norm": 1.486598014831543, |
|
"learning_rate": 5.379310344827587e-07, |
|
"loss": 0.063, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 3.581301834631817, |
|
"grad_norm": 1.381836175918579, |
|
"learning_rate": 5.206896551724138e-07, |
|
"loss": 0.06, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 3.58758482030661, |
|
"grad_norm": 1.3430190086364746, |
|
"learning_rate": 5.03448275862069e-07, |
|
"loss": 0.0551, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 3.5938678059814024, |
|
"grad_norm": 1.7509955167770386, |
|
"learning_rate": 4.862068965517241e-07, |
|
"loss": 0.0555, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 3.600150791656195, |
|
"grad_norm": 1.3928794860839844, |
|
"learning_rate": 4.6896551724137934e-07, |
|
"loss": 0.0577, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 3.6064337773309876, |
|
"grad_norm": 1.7978532314300537, |
|
"learning_rate": 4.5172413793103447e-07, |
|
"loss": 0.0668, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 3.61271676300578, |
|
"grad_norm": 1.4505749940872192, |
|
"learning_rate": 4.344827586206897e-07, |
|
"loss": 0.0608, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 3.618999748680573, |
|
"grad_norm": 1.6973689794540405, |
|
"learning_rate": 4.1724137931034485e-07, |
|
"loss": 0.0501, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.6252827343553657, |
|
"grad_norm": 2.222531318664551, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.0645, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 3.6315657200301583, |
|
"grad_norm": 2.4952709674835205, |
|
"learning_rate": 3.8275862068965517e-07, |
|
"loss": 0.0537, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 3.637848705704951, |
|
"grad_norm": 2.3859033584594727, |
|
"learning_rate": 3.6551724137931036e-07, |
|
"loss": 0.0652, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 3.6441316913797435, |
|
"grad_norm": 1.629402756690979, |
|
"learning_rate": 3.4827586206896555e-07, |
|
"loss": 0.055, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.6441316913797435, |
|
"eval_loss": 0.16478079557418823, |
|
"eval_runtime": 11635.7899, |
|
"eval_samples_per_second": 1.679, |
|
"eval_steps_per_second": 0.21, |
|
"eval_wer": 42.71378740060809, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.6504146770545365, |
|
"grad_norm": 1.4962036609649658, |
|
"learning_rate": 3.3103448275862073e-07, |
|
"loss": 0.0589, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 3.656697662729329, |
|
"grad_norm": 1.3538328409194946, |
|
"learning_rate": 3.1379310344827587e-07, |
|
"loss": 0.0614, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 3.6629806484041216, |
|
"grad_norm": 1.8565229177474976, |
|
"learning_rate": 2.9655172413793106e-07, |
|
"loss": 0.082, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 3.669263634078914, |
|
"grad_norm": 1.5180363655090332, |
|
"learning_rate": 2.7931034482758624e-07, |
|
"loss": 0.064, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.675546619753707, |
|
"grad_norm": 2.005053997039795, |
|
"learning_rate": 2.6206896551724143e-07, |
|
"loss": 0.0734, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 3.6818296054285, |
|
"grad_norm": 1.190696120262146, |
|
"learning_rate": 2.4482758620689657e-07, |
|
"loss": 0.0606, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 3.6881125911032924, |
|
"grad_norm": 1.364521861076355, |
|
"learning_rate": 2.2758620689655175e-07, |
|
"loss": 0.0652, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 3.694395576778085, |
|
"grad_norm": 1.3325939178466797, |
|
"learning_rate": 2.1034482758620692e-07, |
|
"loss": 0.0635, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 3.7006785624528775, |
|
"grad_norm": 2.166093587875366, |
|
"learning_rate": 1.931034482758621e-07, |
|
"loss": 0.0638, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 3.70696154812767, |
|
"grad_norm": 1.497362494468689, |
|
"learning_rate": 1.7586206896551726e-07, |
|
"loss": 0.0595, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 3.713244533802463, |
|
"grad_norm": 1.7004494667053223, |
|
"learning_rate": 1.5862068965517243e-07, |
|
"loss": 0.0591, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 3.7195275194772557, |
|
"grad_norm": 1.4566409587860107, |
|
"learning_rate": 1.413793103448276e-07, |
|
"loss": 0.0621, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.7258105051520483, |
|
"grad_norm": 0.7791139483451843, |
|
"learning_rate": 1.2413793103448277e-07, |
|
"loss": 0.0569, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 3.732093490826841, |
|
"grad_norm": 1.436057209968567, |
|
"learning_rate": 1.0689655172413794e-07, |
|
"loss": 0.0636, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 3.7383764765016334, |
|
"grad_norm": 1.0641875267028809, |
|
"learning_rate": 8.965517241379311e-08, |
|
"loss": 0.0595, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 3.7446594621764264, |
|
"grad_norm": 1.1096875667572021, |
|
"learning_rate": 7.241379310344829e-08, |
|
"loss": 0.0588, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 3.750942447851219, |
|
"grad_norm": 1.2665530443191528, |
|
"learning_rate": 5.517241379310345e-08, |
|
"loss": 0.0627, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 3.7572254335260116, |
|
"grad_norm": 1.4025217294692993, |
|
"learning_rate": 3.793103448275862e-08, |
|
"loss": 0.056, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 3.763508419200804, |
|
"grad_norm": 1.0198439359664917, |
|
"learning_rate": 2.0689655172413796e-08, |
|
"loss": 0.0584, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 3.7697914048755967, |
|
"grad_norm": 1.531874656677246, |
|
"learning_rate": 3.4482758620689654e-09, |
|
"loss": 0.0577, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.7697914048755967, |
|
"eval_loss": 0.16528290510177612, |
|
"eval_runtime": 11264.7491, |
|
"eval_samples_per_second": 1.734, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 42.7856456773094, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.924722114838528e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|