|
{ |
|
"best_metric": 46.4318, |
|
"best_model_checkpoint": "outputs/modernisa-v2-byt5-base-lr0.0001/checkpoint-34000", |
|
"epoch": 5.0, |
|
"global_step": 57430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.912937489117186e-05, |
|
"loss": 0.3717, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.825874978234372e-05, |
|
"loss": 0.2696, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 27.8571, |
|
"eval_cer": 34.4149, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.3027326464653015, |
|
"eval_runtime": 51.4554, |
|
"eval_samples_per_second": 45.593, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 49.5134, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.73881246735156e-05, |
|
"loss": 0.2564, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.651749956468745e-05, |
|
"loss": 0.2518, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 29.2213, |
|
"eval_cer": 34.6336, |
|
"eval_gen_len": 18.5371, |
|
"eval_loss": 0.28565752506256104, |
|
"eval_runtime": 53.6563, |
|
"eval_samples_per_second": 43.723, |
|
"eval_steps_per_second": 2.74, |
|
"eval_wer": 49.1981, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.564687445585931e-05, |
|
"loss": 0.2387, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.477624934703117e-05, |
|
"loss": 0.2343, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_bleu": 29.5067, |
|
"eval_cer": 34.9795, |
|
"eval_gen_len": 18.5537, |
|
"eval_loss": 0.2730022966861725, |
|
"eval_runtime": 51.0673, |
|
"eval_samples_per_second": 45.939, |
|
"eval_steps_per_second": 2.879, |
|
"eval_wer": 49.117, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.390562423820304e-05, |
|
"loss": 0.2331, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.30349991293749e-05, |
|
"loss": 0.2292, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 29.884, |
|
"eval_cer": 34.8015, |
|
"eval_gen_len": 18.5516, |
|
"eval_loss": 0.26898515224456787, |
|
"eval_runtime": 52.4703, |
|
"eval_samples_per_second": 44.711, |
|
"eval_steps_per_second": 2.802, |
|
"eval_wer": 48.7025, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.216437402054676e-05, |
|
"loss": 0.2243, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.129374891171862e-05, |
|
"loss": 0.2243, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_bleu": 29.9577, |
|
"eval_cer": 34.7218, |
|
"eval_gen_len": 18.5477, |
|
"eval_loss": 0.26465946435928345, |
|
"eval_runtime": 51.6529, |
|
"eval_samples_per_second": 45.419, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 48.8466, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.042312380289048e-05, |
|
"loss": 0.2185, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.955249869406234e-05, |
|
"loss": 0.2112, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_bleu": 30.3115, |
|
"eval_cer": 34.4895, |
|
"eval_gen_len": 18.5477, |
|
"eval_loss": 0.2636098265647888, |
|
"eval_runtime": 53.1222, |
|
"eval_samples_per_second": 44.162, |
|
"eval_steps_per_second": 2.767, |
|
"eval_wer": 48.3871, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.86818735852342e-05, |
|
"loss": 0.2165, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.781124847640607e-05, |
|
"loss": 0.2118, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_bleu": 30.6364, |
|
"eval_cer": 34.7455, |
|
"eval_gen_len": 18.5413, |
|
"eval_loss": 0.25554388761520386, |
|
"eval_runtime": 50.9916, |
|
"eval_samples_per_second": 46.008, |
|
"eval_steps_per_second": 2.883, |
|
"eval_wer": 48.3961, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.694062336757793e-05, |
|
"loss": 0.2105, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.606999825874978e-05, |
|
"loss": 0.205, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 31.0881, |
|
"eval_cer": 34.0759, |
|
"eval_gen_len": 18.5269, |
|
"eval_loss": 0.2507636547088623, |
|
"eval_runtime": 53.5946, |
|
"eval_samples_per_second": 43.773, |
|
"eval_steps_per_second": 2.743, |
|
"eval_wer": 47.468, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.519937314992164e-05, |
|
"loss": 0.2035, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.432874804109351e-05, |
|
"loss": 0.2049, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 31.1481, |
|
"eval_cer": 34.4133, |
|
"eval_gen_len": 18.5503, |
|
"eval_loss": 0.24714592099189758, |
|
"eval_runtime": 60.0036, |
|
"eval_samples_per_second": 39.098, |
|
"eval_steps_per_second": 2.45, |
|
"eval_wer": 47.5942, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.345812293226537e-05, |
|
"loss": 0.2074, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.258749782343723e-05, |
|
"loss": 0.2005, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_bleu": 30.9375, |
|
"eval_cer": 34.281, |
|
"eval_gen_len": 18.5405, |
|
"eval_loss": 0.24682185053825378, |
|
"eval_runtime": 51.6353, |
|
"eval_samples_per_second": 45.434, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 47.6392, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.171687271460909e-05, |
|
"loss": 0.2049, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.084624760578095e-05, |
|
"loss": 0.1999, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 30.9692, |
|
"eval_cer": 34.4183, |
|
"eval_gen_len": 18.5405, |
|
"eval_loss": 0.24305607378482819, |
|
"eval_runtime": 53.15, |
|
"eval_samples_per_second": 44.139, |
|
"eval_steps_per_second": 2.766, |
|
"eval_wer": 47.7023, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.997562249695282e-05, |
|
"loss": 0.2037, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.910499738812468e-05, |
|
"loss": 0.161, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_bleu": 31.2337, |
|
"eval_cer": 34.1878, |
|
"eval_gen_len": 18.5298, |
|
"eval_loss": 0.24913541972637177, |
|
"eval_runtime": 51.1684, |
|
"eval_samples_per_second": 45.849, |
|
"eval_steps_per_second": 2.873, |
|
"eval_wer": 47.3238, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 7.823437227929654e-05, |
|
"loss": 0.1568, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 7.73637471704684e-05, |
|
"loss": 0.1601, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_bleu": 31.4422, |
|
"eval_cer": 34.1657, |
|
"eval_gen_len": 18.5371, |
|
"eval_loss": 0.24957244098186493, |
|
"eval_runtime": 52.0238, |
|
"eval_samples_per_second": 45.095, |
|
"eval_steps_per_second": 2.826, |
|
"eval_wer": 47.3689, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.649312206164027e-05, |
|
"loss": 0.1523, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.562249695281213e-05, |
|
"loss": 0.1606, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_bleu": 31.4582, |
|
"eval_cer": 34.2386, |
|
"eval_gen_len": 18.5405, |
|
"eval_loss": 0.24588151276111603, |
|
"eval_runtime": 51.9336, |
|
"eval_samples_per_second": 45.173, |
|
"eval_steps_per_second": 2.831, |
|
"eval_wer": 47.3329, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.475187184398399e-05, |
|
"loss": 0.158, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.388124673515585e-05, |
|
"loss": 0.1594, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_bleu": 31.386, |
|
"eval_cer": 34.2912, |
|
"eval_gen_len": 18.5375, |
|
"eval_loss": 0.24664774537086487, |
|
"eval_runtime": 51.7169, |
|
"eval_samples_per_second": 45.362, |
|
"eval_steps_per_second": 2.842, |
|
"eval_wer": 47.1166, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.30106216263277e-05, |
|
"loss": 0.1609, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.213999651749956e-05, |
|
"loss": 0.1617, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 31.6546, |
|
"eval_cer": 34.0149, |
|
"eval_gen_len": 18.5294, |
|
"eval_loss": 0.2411554753780365, |
|
"eval_runtime": 52.5275, |
|
"eval_samples_per_second": 44.662, |
|
"eval_steps_per_second": 2.799, |
|
"eval_wer": 46.8373, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.126937140867142e-05, |
|
"loss": 0.1594, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.03987462998433e-05, |
|
"loss": 0.1582, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_bleu": 31.2924, |
|
"eval_cer": 34.2573, |
|
"eval_gen_len": 18.5503, |
|
"eval_loss": 0.24606570601463318, |
|
"eval_runtime": 50.8697, |
|
"eval_samples_per_second": 46.118, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 47.4139, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.952812119101515e-05, |
|
"loss": 0.1599, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.865749608218701e-05, |
|
"loss": 0.1572, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_bleu": 31.1484, |
|
"eval_cer": 34.3675, |
|
"eval_gen_len": 18.5499, |
|
"eval_loss": 0.24250736832618713, |
|
"eval_runtime": 52.5057, |
|
"eval_samples_per_second": 44.681, |
|
"eval_steps_per_second": 2.8, |
|
"eval_wer": 47.45, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.778687097335887e-05, |
|
"loss": 0.1574, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.691624586453074e-05, |
|
"loss": 0.1565, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_bleu": 31.6967, |
|
"eval_cer": 34.1047, |
|
"eval_gen_len": 18.5388, |
|
"eval_loss": 0.2424342930316925, |
|
"eval_runtime": 51.0272, |
|
"eval_samples_per_second": 45.975, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 46.9724, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.60456207557026e-05, |
|
"loss": 0.1582, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.517499564687446e-05, |
|
"loss": 0.1585, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_bleu": 31.9026, |
|
"eval_cer": 34.281, |
|
"eval_gen_len": 18.558, |
|
"eval_loss": 0.2381763756275177, |
|
"eval_runtime": 52.4669, |
|
"eval_samples_per_second": 44.714, |
|
"eval_steps_per_second": 2.802, |
|
"eval_wer": 47.0175, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.430437053804632e-05, |
|
"loss": 0.1559, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 6.343374542921819e-05, |
|
"loss": 0.1522, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_bleu": 32.1619, |
|
"eval_cer": 33.9369, |
|
"eval_gen_len": 18.5311, |
|
"eval_loss": 0.23654896020889282, |
|
"eval_runtime": 52.4567, |
|
"eval_samples_per_second": 44.723, |
|
"eval_steps_per_second": 2.802, |
|
"eval_wer": 46.5219, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.256312032039005e-05, |
|
"loss": 0.1578, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.169249521156191e-05, |
|
"loss": 0.156, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_bleu": 31.7762, |
|
"eval_cer": 33.9572, |
|
"eval_gen_len": 18.5401, |
|
"eval_loss": 0.2381468415260315, |
|
"eval_runtime": 56.1509, |
|
"eval_samples_per_second": 41.78, |
|
"eval_steps_per_second": 2.618, |
|
"eval_wer": 46.7922, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.0821870102733766e-05, |
|
"loss": 0.1567, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.995124499390563e-05, |
|
"loss": 0.1538, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 31.8785, |
|
"eval_cer": 34.2319, |
|
"eval_gen_len": 18.5516, |
|
"eval_loss": 0.24016974866390228, |
|
"eval_runtime": 53.089, |
|
"eval_samples_per_second": 44.19, |
|
"eval_steps_per_second": 2.769, |
|
"eval_wer": 46.8012, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.908061988507749e-05, |
|
"loss": 0.1075, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 5.8209994776249355e-05, |
|
"loss": 0.1083, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_bleu": 31.9905, |
|
"eval_cer": 34.0098, |
|
"eval_gen_len": 18.5384, |
|
"eval_loss": 0.2653577923774719, |
|
"eval_runtime": 52.9572, |
|
"eval_samples_per_second": 44.3, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 46.603, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.7339369667421214e-05, |
|
"loss": 0.108, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.6468744558593066e-05, |
|
"loss": 0.1086, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_bleu": 31.6257, |
|
"eval_cer": 34.2607, |
|
"eval_gen_len": 18.5409, |
|
"eval_loss": 0.26182088255882263, |
|
"eval_runtime": 51.8801, |
|
"eval_samples_per_second": 45.22, |
|
"eval_steps_per_second": 2.833, |
|
"eval_wer": 46.9995, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.559811944976493e-05, |
|
"loss": 0.1085, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 5.472749434093679e-05, |
|
"loss": 0.1092, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_bleu": 31.4886, |
|
"eval_cer": 34.337, |
|
"eval_gen_len": 18.5422, |
|
"eval_loss": 0.2658332288265228, |
|
"eval_runtime": 51.9175, |
|
"eval_samples_per_second": 45.187, |
|
"eval_steps_per_second": 2.831, |
|
"eval_wer": 47.1436, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 5.3856869232108655e-05, |
|
"loss": 0.1106, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.2986244123280514e-05, |
|
"loss": 0.1086, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_bleu": 31.8448, |
|
"eval_cer": 34.1217, |
|
"eval_gen_len": 18.5375, |
|
"eval_loss": 0.26663142442703247, |
|
"eval_runtime": 51.6292, |
|
"eval_samples_per_second": 45.439, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 46.6751, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.211561901445238e-05, |
|
"loss": 0.1087, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.124499390562424e-05, |
|
"loss": 0.1098, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_bleu": 31.709, |
|
"eval_cer": 34.1946, |
|
"eval_gen_len": 18.5452, |
|
"eval_loss": 0.2659294009208679, |
|
"eval_runtime": 58.8373, |
|
"eval_samples_per_second": 39.873, |
|
"eval_steps_per_second": 2.498, |
|
"eval_wer": 46.8913, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.03743687967961e-05, |
|
"loss": 0.1106, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.950374368796796e-05, |
|
"loss": 0.1117, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_bleu": 31.8114, |
|
"eval_cer": 34.1708, |
|
"eval_gen_len": 18.5431, |
|
"eval_loss": 0.2648890018463135, |
|
"eval_runtime": 57.7914, |
|
"eval_samples_per_second": 40.594, |
|
"eval_steps_per_second": 2.544, |
|
"eval_wer": 46.8913, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.863311857913983e-05, |
|
"loss": 0.1087, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.7762493470311686e-05, |
|
"loss": 0.1094, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_bleu": 31.6955, |
|
"eval_cer": 34.1606, |
|
"eval_gen_len": 18.5375, |
|
"eval_loss": 0.26563677191734314, |
|
"eval_runtime": 53.0374, |
|
"eval_samples_per_second": 44.233, |
|
"eval_steps_per_second": 2.772, |
|
"eval_wer": 46.8643, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.689186836148355e-05, |
|
"loss": 0.1079, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.602124325265541e-05, |
|
"loss": 0.1077, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_bleu": 31.5495, |
|
"eval_cer": 34.0064, |
|
"eval_gen_len": 18.5448, |
|
"eval_loss": 0.2636907398700714, |
|
"eval_runtime": 53.9667, |
|
"eval_samples_per_second": 43.471, |
|
"eval_steps_per_second": 2.724, |
|
"eval_wer": 46.8823, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.5150618143827276e-05, |
|
"loss": 0.1103, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.427999303499913e-05, |
|
"loss": 0.1088, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_bleu": 32.0837, |
|
"eval_cer": 33.9504, |
|
"eval_gen_len": 18.5413, |
|
"eval_loss": 0.2668997645378113, |
|
"eval_runtime": 50.6471, |
|
"eval_samples_per_second": 46.321, |
|
"eval_steps_per_second": 2.902, |
|
"eval_wer": 46.612, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.340936792617099e-05, |
|
"loss": 0.1087, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.253874281734285e-05, |
|
"loss": 0.1087, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_bleu": 31.5549, |
|
"eval_cer": 34.2149, |
|
"eval_gen_len": 18.5286, |
|
"eval_loss": 0.264612078666687, |
|
"eval_runtime": 52.8215, |
|
"eval_samples_per_second": 44.414, |
|
"eval_steps_per_second": 2.783, |
|
"eval_wer": 47.0806, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.166811770851472e-05, |
|
"loss": 0.1091, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.0797492599686575e-05, |
|
"loss": 0.1077, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_bleu": 32.1129, |
|
"eval_cer": 33.9403, |
|
"eval_gen_len": 18.5452, |
|
"eval_loss": 0.26299673318862915, |
|
"eval_runtime": 53.065, |
|
"eval_samples_per_second": 44.21, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 46.4318, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.992686749085844e-05, |
|
"loss": 0.1037, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.90562423820303e-05, |
|
"loss": 0.0652, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_bleu": 31.3861, |
|
"eval_cer": 34.1149, |
|
"eval_gen_len": 18.5396, |
|
"eval_loss": 0.33602526783943176, |
|
"eval_runtime": 53.5525, |
|
"eval_samples_per_second": 43.807, |
|
"eval_steps_per_second": 2.745, |
|
"eval_wer": 47.1977, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.8185617273202165e-05, |
|
"loss": 0.0648, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.7314992164374024e-05, |
|
"loss": 0.0662, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_bleu": 31.2372, |
|
"eval_cer": 34.203, |
|
"eval_gen_len": 18.552, |
|
"eval_loss": 0.3401012718677521, |
|
"eval_runtime": 51.9918, |
|
"eval_samples_per_second": 45.122, |
|
"eval_steps_per_second": 2.827, |
|
"eval_wer": 47.3869, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.644436705554589e-05, |
|
"loss": 0.0635, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.557374194671774e-05, |
|
"loss": 0.0666, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_bleu": 31.3462, |
|
"eval_cer": 34.1759, |
|
"eval_gen_len": 18.5469, |
|
"eval_loss": 0.33890092372894287, |
|
"eval_runtime": 51.8417, |
|
"eval_samples_per_second": 45.253, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 47.2968, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.4703116837889606e-05, |
|
"loss": 0.0646, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.3832491729061465e-05, |
|
"loss": 0.0648, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_bleu": 30.835, |
|
"eval_cer": 34.381, |
|
"eval_gen_len": 18.552, |
|
"eval_loss": 0.3339092433452606, |
|
"eval_runtime": 51.3621, |
|
"eval_samples_per_second": 45.676, |
|
"eval_steps_per_second": 2.862, |
|
"eval_wer": 47.6753, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.296186662023333e-05, |
|
"loss": 0.0648, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.209124151140519e-05, |
|
"loss": 0.0654, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_bleu": 31.0958, |
|
"eval_cer": 34.4692, |
|
"eval_gen_len": 18.5524, |
|
"eval_loss": 0.33954936265945435, |
|
"eval_runtime": 51.3892, |
|
"eval_samples_per_second": 45.652, |
|
"eval_steps_per_second": 2.861, |
|
"eval_wer": 47.7203, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.1220616402577054e-05, |
|
"loss": 0.0645, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.0349991293748913e-05, |
|
"loss": 0.0663, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_bleu": 31.126, |
|
"eval_cer": 34.4539, |
|
"eval_gen_len": 18.5499, |
|
"eval_loss": 0.3317714333534241, |
|
"eval_runtime": 53.3556, |
|
"eval_samples_per_second": 43.969, |
|
"eval_steps_per_second": 2.755, |
|
"eval_wer": 47.5942, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.9479366184920775e-05, |
|
"loss": 0.0657, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.8608741076092637e-05, |
|
"loss": 0.0648, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_bleu": 31.0295, |
|
"eval_cer": 34.3539, |
|
"eval_gen_len": 18.5477, |
|
"eval_loss": 0.33970215916633606, |
|
"eval_runtime": 52.8717, |
|
"eval_samples_per_second": 44.372, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 47.5852, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.77381159672645e-05, |
|
"loss": 0.0647, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.6867490858436354e-05, |
|
"loss": 0.0635, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_bleu": 31.1287, |
|
"eval_cer": 34.4285, |
|
"eval_gen_len": 18.5494, |
|
"eval_loss": 0.3414219617843628, |
|
"eval_runtime": 52.9358, |
|
"eval_samples_per_second": 44.318, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 47.5491, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.5996865749608216e-05, |
|
"loss": 0.0657, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.512624064078008e-05, |
|
"loss": 0.0656, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_bleu": 30.9225, |
|
"eval_cer": 34.4285, |
|
"eval_gen_len": 18.5563, |
|
"eval_loss": 0.3393559455871582, |
|
"eval_runtime": 51.5264, |
|
"eval_samples_per_second": 45.53, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 47.6392, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.425561553195194e-05, |
|
"loss": 0.0659, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.3384990423123803e-05, |
|
"loss": 0.0625, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_bleu": 31.2435, |
|
"eval_cer": 34.1674, |
|
"eval_gen_len": 18.5439, |
|
"eval_loss": 0.341974139213562, |
|
"eval_runtime": 51.6192, |
|
"eval_samples_per_second": 45.448, |
|
"eval_steps_per_second": 2.848, |
|
"eval_wer": 47.2968, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2514365314295665e-05, |
|
"loss": 0.0642, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.1643740205467527e-05, |
|
"loss": 0.0636, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_bleu": 31.0688, |
|
"eval_cer": 34.3743, |
|
"eval_gen_len": 18.5439, |
|
"eval_loss": 0.3447582423686981, |
|
"eval_runtime": 51.0408, |
|
"eval_samples_per_second": 45.963, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 47.6843, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.0773115096639385e-05, |
|
"loss": 0.0644, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.9902489987811247e-05, |
|
"loss": 0.0586, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 31.2353, |
|
"eval_cer": 34.2963, |
|
"eval_gen_len": 18.549, |
|
"eval_loss": 0.36747270822525024, |
|
"eval_runtime": 54.2264, |
|
"eval_samples_per_second": 43.263, |
|
"eval_steps_per_second": 2.711, |
|
"eval_wer": 47.441, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.903186487898311e-05, |
|
"loss": 0.0326, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.816123977015497e-05, |
|
"loss": 0.0298, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_bleu": 30.698, |
|
"eval_cer": 34.4319, |
|
"eval_gen_len": 18.5512, |
|
"eval_loss": 0.45656564831733704, |
|
"eval_runtime": 53.4442, |
|
"eval_samples_per_second": 43.896, |
|
"eval_steps_per_second": 2.751, |
|
"eval_wer": 47.8555, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.7290614661326833e-05, |
|
"loss": 0.0308, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.6419989552498692e-05, |
|
"loss": 0.0301, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_bleu": 30.7773, |
|
"eval_cer": 34.3861, |
|
"eval_gen_len": 18.5507, |
|
"eval_loss": 0.4724096655845642, |
|
"eval_runtime": 52.6433, |
|
"eval_samples_per_second": 44.564, |
|
"eval_steps_per_second": 2.792, |
|
"eval_wer": 47.8374, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.5549364443670554e-05, |
|
"loss": 0.0307, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.4678739334842418e-05, |
|
"loss": 0.0311, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_bleu": 31.0878, |
|
"eval_cer": 34.3861, |
|
"eval_gen_len": 18.5503, |
|
"eval_loss": 0.4639967978000641, |
|
"eval_runtime": 51.8688, |
|
"eval_samples_per_second": 45.229, |
|
"eval_steps_per_second": 2.834, |
|
"eval_wer": 47.6212, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.380811422601428e-05, |
|
"loss": 0.0313, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.2937489117186142e-05, |
|
"loss": 0.03, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_bleu": 30.8319, |
|
"eval_cer": 34.459, |
|
"eval_gen_len": 18.5529, |
|
"eval_loss": 0.46544739603996277, |
|
"eval_runtime": 56.982, |
|
"eval_samples_per_second": 41.171, |
|
"eval_steps_per_second": 2.58, |
|
"eval_wer": 47.8915, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.2066864008358002e-05, |
|
"loss": 0.0306, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.1196238899529864e-05, |
|
"loss": 0.0302, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_bleu": 30.9236, |
|
"eval_cer": 34.4997, |
|
"eval_gen_len": 18.552, |
|
"eval_loss": 0.46650850772857666, |
|
"eval_runtime": 50.604, |
|
"eval_samples_per_second": 46.36, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 47.9276, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.0325613790701725e-05, |
|
"loss": 0.0303, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 9.454988681873587e-06, |
|
"loss": 0.029, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_bleu": 30.8307, |
|
"eval_cer": 34.4997, |
|
"eval_gen_len": 18.5482, |
|
"eval_loss": 0.4756769835948944, |
|
"eval_runtime": 53.9866, |
|
"eval_samples_per_second": 43.455, |
|
"eval_steps_per_second": 2.723, |
|
"eval_wer": 47.9456, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.584363573045447e-06, |
|
"loss": 0.031, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.713738464217309e-06, |
|
"loss": 0.0301, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_bleu": 30.7983, |
|
"eval_cer": 34.5218, |
|
"eval_gen_len": 18.5473, |
|
"eval_loss": 0.4672394096851349, |
|
"eval_runtime": 52.9301, |
|
"eval_samples_per_second": 44.323, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 47.9456, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 6.843113355389169e-06, |
|
"loss": 0.0296, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.972488246561031e-06, |
|
"loss": 0.0294, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_bleu": 30.8924, |
|
"eval_cer": 34.4353, |
|
"eval_gen_len": 18.5529, |
|
"eval_loss": 0.4714747667312622, |
|
"eval_runtime": 53.7737, |
|
"eval_samples_per_second": 43.627, |
|
"eval_steps_per_second": 2.734, |
|
"eval_wer": 47.7564, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.1018631377328925e-06, |
|
"loss": 0.0292, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.2312380289047546e-06, |
|
"loss": 0.0288, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_bleu": 30.7372, |
|
"eval_cer": 34.4675, |
|
"eval_gen_len": 18.5524, |
|
"eval_loss": 0.4751755893230438, |
|
"eval_runtime": 51.756, |
|
"eval_samples_per_second": 45.328, |
|
"eval_steps_per_second": 2.84, |
|
"eval_wer": 47.7924, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.3606129200766153e-06, |
|
"loss": 0.0293, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.4899878112484765e-06, |
|
"loss": 0.0289, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_bleu": 30.8554, |
|
"eval_cer": 34.459, |
|
"eval_gen_len": 18.5516, |
|
"eval_loss": 0.4744004011154175, |
|
"eval_runtime": 51.9465, |
|
"eval_samples_per_second": 45.162, |
|
"eval_steps_per_second": 2.83, |
|
"eval_wer": 47.8555, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.619362702420338e-06, |
|
"loss": 0.0285, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.487375935921992e-07, |
|
"loss": 0.0288, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_bleu": 30.8745, |
|
"eval_cer": 34.4895, |
|
"eval_gen_len": 18.5499, |
|
"eval_loss": 0.4744308888912201, |
|
"eval_runtime": 51.4462, |
|
"eval_samples_per_second": 45.601, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 47.8194, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 57430, |
|
"total_flos": 1.2162866037733786e+17, |
|
"train_loss": 0.11780740710349318, |
|
"train_runtime": 33856.2876, |
|
"train_samples_per_second": 27.14, |
|
"train_steps_per_second": 1.696 |
|
} |
|
], |
|
"max_steps": 57430, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.2162866037733786e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|