modernisa-v2-byt5-base-lr0.0001 / trainer_state.json
versae's picture
End of training
adaada8
raw
history blame
32.5 kB
{
"best_metric": 46.4318,
"best_model_checkpoint": "outputs/modernisa-v2-byt5-base-lr0.0001/checkpoint-34000",
"epoch": 5.0,
"global_step": 57430,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 9.912937489117186e-05,
"loss": 0.3717,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 9.825874978234372e-05,
"loss": 0.2696,
"step": 1000
},
{
"epoch": 0.09,
"eval_bleu": 27.8571,
"eval_cer": 34.4149,
"eval_gen_len": 18.5,
"eval_loss": 0.3027326464653015,
"eval_runtime": 51.4554,
"eval_samples_per_second": 45.593,
"eval_steps_per_second": 2.857,
"eval_wer": 49.5134,
"step": 1000
},
{
"epoch": 0.13,
"learning_rate": 9.73881246735156e-05,
"loss": 0.2564,
"step": 1500
},
{
"epoch": 0.17,
"learning_rate": 9.651749956468745e-05,
"loss": 0.2518,
"step": 2000
},
{
"epoch": 0.17,
"eval_bleu": 29.2213,
"eval_cer": 34.6336,
"eval_gen_len": 18.5371,
"eval_loss": 0.28565752506256104,
"eval_runtime": 53.6563,
"eval_samples_per_second": 43.723,
"eval_steps_per_second": 2.74,
"eval_wer": 49.1981,
"step": 2000
},
{
"epoch": 0.22,
"learning_rate": 9.564687445585931e-05,
"loss": 0.2387,
"step": 2500
},
{
"epoch": 0.26,
"learning_rate": 9.477624934703117e-05,
"loss": 0.2343,
"step": 3000
},
{
"epoch": 0.26,
"eval_bleu": 29.5067,
"eval_cer": 34.9795,
"eval_gen_len": 18.5537,
"eval_loss": 0.2730022966861725,
"eval_runtime": 51.0673,
"eval_samples_per_second": 45.939,
"eval_steps_per_second": 2.879,
"eval_wer": 49.117,
"step": 3000
},
{
"epoch": 0.3,
"learning_rate": 9.390562423820304e-05,
"loss": 0.2331,
"step": 3500
},
{
"epoch": 0.35,
"learning_rate": 9.30349991293749e-05,
"loss": 0.2292,
"step": 4000
},
{
"epoch": 0.35,
"eval_bleu": 29.884,
"eval_cer": 34.8015,
"eval_gen_len": 18.5516,
"eval_loss": 0.26898515224456787,
"eval_runtime": 52.4703,
"eval_samples_per_second": 44.711,
"eval_steps_per_second": 2.802,
"eval_wer": 48.7025,
"step": 4000
},
{
"epoch": 0.39,
"learning_rate": 9.216437402054676e-05,
"loss": 0.2243,
"step": 4500
},
{
"epoch": 0.44,
"learning_rate": 9.129374891171862e-05,
"loss": 0.2243,
"step": 5000
},
{
"epoch": 0.44,
"eval_bleu": 29.9577,
"eval_cer": 34.7218,
"eval_gen_len": 18.5477,
"eval_loss": 0.26465946435928345,
"eval_runtime": 51.6529,
"eval_samples_per_second": 45.419,
"eval_steps_per_second": 2.846,
"eval_wer": 48.8466,
"step": 5000
},
{
"epoch": 0.48,
"learning_rate": 9.042312380289048e-05,
"loss": 0.2185,
"step": 5500
},
{
"epoch": 0.52,
"learning_rate": 8.955249869406234e-05,
"loss": 0.2112,
"step": 6000
},
{
"epoch": 0.52,
"eval_bleu": 30.3115,
"eval_cer": 34.4895,
"eval_gen_len": 18.5477,
"eval_loss": 0.2636098265647888,
"eval_runtime": 53.1222,
"eval_samples_per_second": 44.162,
"eval_steps_per_second": 2.767,
"eval_wer": 48.3871,
"step": 6000
},
{
"epoch": 0.57,
"learning_rate": 8.86818735852342e-05,
"loss": 0.2165,
"step": 6500
},
{
"epoch": 0.61,
"learning_rate": 8.781124847640607e-05,
"loss": 0.2118,
"step": 7000
},
{
"epoch": 0.61,
"eval_bleu": 30.6364,
"eval_cer": 34.7455,
"eval_gen_len": 18.5413,
"eval_loss": 0.25554388761520386,
"eval_runtime": 50.9916,
"eval_samples_per_second": 46.008,
"eval_steps_per_second": 2.883,
"eval_wer": 48.3961,
"step": 7000
},
{
"epoch": 0.65,
"learning_rate": 8.694062336757793e-05,
"loss": 0.2105,
"step": 7500
},
{
"epoch": 0.7,
"learning_rate": 8.606999825874978e-05,
"loss": 0.205,
"step": 8000
},
{
"epoch": 0.7,
"eval_bleu": 31.0881,
"eval_cer": 34.0759,
"eval_gen_len": 18.5269,
"eval_loss": 0.2507636547088623,
"eval_runtime": 53.5946,
"eval_samples_per_second": 43.773,
"eval_steps_per_second": 2.743,
"eval_wer": 47.468,
"step": 8000
},
{
"epoch": 0.74,
"learning_rate": 8.519937314992164e-05,
"loss": 0.2035,
"step": 8500
},
{
"epoch": 0.78,
"learning_rate": 8.432874804109351e-05,
"loss": 0.2049,
"step": 9000
},
{
"epoch": 0.78,
"eval_bleu": 31.1481,
"eval_cer": 34.4133,
"eval_gen_len": 18.5503,
"eval_loss": 0.24714592099189758,
"eval_runtime": 60.0036,
"eval_samples_per_second": 39.098,
"eval_steps_per_second": 2.45,
"eval_wer": 47.5942,
"step": 9000
},
{
"epoch": 0.83,
"learning_rate": 8.345812293226537e-05,
"loss": 0.2074,
"step": 9500
},
{
"epoch": 0.87,
"learning_rate": 8.258749782343723e-05,
"loss": 0.2005,
"step": 10000
},
{
"epoch": 0.87,
"eval_bleu": 30.9375,
"eval_cer": 34.281,
"eval_gen_len": 18.5405,
"eval_loss": 0.24682185053825378,
"eval_runtime": 51.6353,
"eval_samples_per_second": 45.434,
"eval_steps_per_second": 2.847,
"eval_wer": 47.6392,
"step": 10000
},
{
"epoch": 0.91,
"learning_rate": 8.171687271460909e-05,
"loss": 0.2049,
"step": 10500
},
{
"epoch": 0.96,
"learning_rate": 8.084624760578095e-05,
"loss": 0.1999,
"step": 11000
},
{
"epoch": 0.96,
"eval_bleu": 30.9692,
"eval_cer": 34.4183,
"eval_gen_len": 18.5405,
"eval_loss": 0.24305607378482819,
"eval_runtime": 53.15,
"eval_samples_per_second": 44.139,
"eval_steps_per_second": 2.766,
"eval_wer": 47.7023,
"step": 11000
},
{
"epoch": 1.0,
"learning_rate": 7.997562249695282e-05,
"loss": 0.2037,
"step": 11500
},
{
"epoch": 1.04,
"learning_rate": 7.910499738812468e-05,
"loss": 0.161,
"step": 12000
},
{
"epoch": 1.04,
"eval_bleu": 31.2337,
"eval_cer": 34.1878,
"eval_gen_len": 18.5298,
"eval_loss": 0.24913541972637177,
"eval_runtime": 51.1684,
"eval_samples_per_second": 45.849,
"eval_steps_per_second": 2.873,
"eval_wer": 47.3238,
"step": 12000
},
{
"epoch": 1.09,
"learning_rate": 7.823437227929654e-05,
"loss": 0.1568,
"step": 12500
},
{
"epoch": 1.13,
"learning_rate": 7.73637471704684e-05,
"loss": 0.1601,
"step": 13000
},
{
"epoch": 1.13,
"eval_bleu": 31.4422,
"eval_cer": 34.1657,
"eval_gen_len": 18.5371,
"eval_loss": 0.24957244098186493,
"eval_runtime": 52.0238,
"eval_samples_per_second": 45.095,
"eval_steps_per_second": 2.826,
"eval_wer": 47.3689,
"step": 13000
},
{
"epoch": 1.18,
"learning_rate": 7.649312206164027e-05,
"loss": 0.1523,
"step": 13500
},
{
"epoch": 1.22,
"learning_rate": 7.562249695281213e-05,
"loss": 0.1606,
"step": 14000
},
{
"epoch": 1.22,
"eval_bleu": 31.4582,
"eval_cer": 34.2386,
"eval_gen_len": 18.5405,
"eval_loss": 0.24588151276111603,
"eval_runtime": 51.9336,
"eval_samples_per_second": 45.173,
"eval_steps_per_second": 2.831,
"eval_wer": 47.3329,
"step": 14000
},
{
"epoch": 1.26,
"learning_rate": 7.475187184398399e-05,
"loss": 0.158,
"step": 14500
},
{
"epoch": 1.31,
"learning_rate": 7.388124673515585e-05,
"loss": 0.1594,
"step": 15000
},
{
"epoch": 1.31,
"eval_bleu": 31.386,
"eval_cer": 34.2912,
"eval_gen_len": 18.5375,
"eval_loss": 0.24664774537086487,
"eval_runtime": 51.7169,
"eval_samples_per_second": 45.362,
"eval_steps_per_second": 2.842,
"eval_wer": 47.1166,
"step": 15000
},
{
"epoch": 1.35,
"learning_rate": 7.30106216263277e-05,
"loss": 0.1609,
"step": 15500
},
{
"epoch": 1.39,
"learning_rate": 7.213999651749956e-05,
"loss": 0.1617,
"step": 16000
},
{
"epoch": 1.39,
"eval_bleu": 31.6546,
"eval_cer": 34.0149,
"eval_gen_len": 18.5294,
"eval_loss": 0.2411554753780365,
"eval_runtime": 52.5275,
"eval_samples_per_second": 44.662,
"eval_steps_per_second": 2.799,
"eval_wer": 46.8373,
"step": 16000
},
{
"epoch": 1.44,
"learning_rate": 7.126937140867142e-05,
"loss": 0.1594,
"step": 16500
},
{
"epoch": 1.48,
"learning_rate": 7.03987462998433e-05,
"loss": 0.1582,
"step": 17000
},
{
"epoch": 1.48,
"eval_bleu": 31.2924,
"eval_cer": 34.2573,
"eval_gen_len": 18.5503,
"eval_loss": 0.24606570601463318,
"eval_runtime": 50.8697,
"eval_samples_per_second": 46.118,
"eval_steps_per_second": 2.89,
"eval_wer": 47.4139,
"step": 17000
},
{
"epoch": 1.52,
"learning_rate": 6.952812119101515e-05,
"loss": 0.1599,
"step": 17500
},
{
"epoch": 1.57,
"learning_rate": 6.865749608218701e-05,
"loss": 0.1572,
"step": 18000
},
{
"epoch": 1.57,
"eval_bleu": 31.1484,
"eval_cer": 34.3675,
"eval_gen_len": 18.5499,
"eval_loss": 0.24250736832618713,
"eval_runtime": 52.5057,
"eval_samples_per_second": 44.681,
"eval_steps_per_second": 2.8,
"eval_wer": 47.45,
"step": 18000
},
{
"epoch": 1.61,
"learning_rate": 6.778687097335887e-05,
"loss": 0.1574,
"step": 18500
},
{
"epoch": 1.65,
"learning_rate": 6.691624586453074e-05,
"loss": 0.1565,
"step": 19000
},
{
"epoch": 1.65,
"eval_bleu": 31.6967,
"eval_cer": 34.1047,
"eval_gen_len": 18.5388,
"eval_loss": 0.2424342930316925,
"eval_runtime": 51.0272,
"eval_samples_per_second": 45.975,
"eval_steps_per_second": 2.881,
"eval_wer": 46.9724,
"step": 19000
},
{
"epoch": 1.7,
"learning_rate": 6.60456207557026e-05,
"loss": 0.1582,
"step": 19500
},
{
"epoch": 1.74,
"learning_rate": 6.517499564687446e-05,
"loss": 0.1585,
"step": 20000
},
{
"epoch": 1.74,
"eval_bleu": 31.9026,
"eval_cer": 34.281,
"eval_gen_len": 18.558,
"eval_loss": 0.2381763756275177,
"eval_runtime": 52.4669,
"eval_samples_per_second": 44.714,
"eval_steps_per_second": 2.802,
"eval_wer": 47.0175,
"step": 20000
},
{
"epoch": 1.78,
"learning_rate": 6.430437053804632e-05,
"loss": 0.1559,
"step": 20500
},
{
"epoch": 1.83,
"learning_rate": 6.343374542921819e-05,
"loss": 0.1522,
"step": 21000
},
{
"epoch": 1.83,
"eval_bleu": 32.1619,
"eval_cer": 33.9369,
"eval_gen_len": 18.5311,
"eval_loss": 0.23654896020889282,
"eval_runtime": 52.4567,
"eval_samples_per_second": 44.723,
"eval_steps_per_second": 2.802,
"eval_wer": 46.5219,
"step": 21000
},
{
"epoch": 1.87,
"learning_rate": 6.256312032039005e-05,
"loss": 0.1578,
"step": 21500
},
{
"epoch": 1.92,
"learning_rate": 6.169249521156191e-05,
"loss": 0.156,
"step": 22000
},
{
"epoch": 1.92,
"eval_bleu": 31.7762,
"eval_cer": 33.9572,
"eval_gen_len": 18.5401,
"eval_loss": 0.2381468415260315,
"eval_runtime": 56.1509,
"eval_samples_per_second": 41.78,
"eval_steps_per_second": 2.618,
"eval_wer": 46.7922,
"step": 22000
},
{
"epoch": 1.96,
"learning_rate": 6.0821870102733766e-05,
"loss": 0.1567,
"step": 22500
},
{
"epoch": 2.0,
"learning_rate": 5.995124499390563e-05,
"loss": 0.1538,
"step": 23000
},
{
"epoch": 2.0,
"eval_bleu": 31.8785,
"eval_cer": 34.2319,
"eval_gen_len": 18.5516,
"eval_loss": 0.24016974866390228,
"eval_runtime": 53.089,
"eval_samples_per_second": 44.19,
"eval_steps_per_second": 2.769,
"eval_wer": 46.8012,
"step": 23000
},
{
"epoch": 2.05,
"learning_rate": 5.908061988507749e-05,
"loss": 0.1075,
"step": 23500
},
{
"epoch": 2.09,
"learning_rate": 5.8209994776249355e-05,
"loss": 0.1083,
"step": 24000
},
{
"epoch": 2.09,
"eval_bleu": 31.9905,
"eval_cer": 34.0098,
"eval_gen_len": 18.5384,
"eval_loss": 0.2653577923774719,
"eval_runtime": 52.9572,
"eval_samples_per_second": 44.3,
"eval_steps_per_second": 2.776,
"eval_wer": 46.603,
"step": 24000
},
{
"epoch": 2.13,
"learning_rate": 5.7339369667421214e-05,
"loss": 0.108,
"step": 24500
},
{
"epoch": 2.18,
"learning_rate": 5.6468744558593066e-05,
"loss": 0.1086,
"step": 25000
},
{
"epoch": 2.18,
"eval_bleu": 31.6257,
"eval_cer": 34.2607,
"eval_gen_len": 18.5409,
"eval_loss": 0.26182088255882263,
"eval_runtime": 51.8801,
"eval_samples_per_second": 45.22,
"eval_steps_per_second": 2.833,
"eval_wer": 46.9995,
"step": 25000
},
{
"epoch": 2.22,
"learning_rate": 5.559811944976493e-05,
"loss": 0.1085,
"step": 25500
},
{
"epoch": 2.26,
"learning_rate": 5.472749434093679e-05,
"loss": 0.1092,
"step": 26000
},
{
"epoch": 2.26,
"eval_bleu": 31.4886,
"eval_cer": 34.337,
"eval_gen_len": 18.5422,
"eval_loss": 0.2658332288265228,
"eval_runtime": 51.9175,
"eval_samples_per_second": 45.187,
"eval_steps_per_second": 2.831,
"eval_wer": 47.1436,
"step": 26000
},
{
"epoch": 2.31,
"learning_rate": 5.3856869232108655e-05,
"loss": 0.1106,
"step": 26500
},
{
"epoch": 2.35,
"learning_rate": 5.2986244123280514e-05,
"loss": 0.1086,
"step": 27000
},
{
"epoch": 2.35,
"eval_bleu": 31.8448,
"eval_cer": 34.1217,
"eval_gen_len": 18.5375,
"eval_loss": 0.26663142442703247,
"eval_runtime": 51.6292,
"eval_samples_per_second": 45.439,
"eval_steps_per_second": 2.847,
"eval_wer": 46.6751,
"step": 27000
},
{
"epoch": 2.39,
"learning_rate": 5.211561901445238e-05,
"loss": 0.1087,
"step": 27500
},
{
"epoch": 2.44,
"learning_rate": 5.124499390562424e-05,
"loss": 0.1098,
"step": 28000
},
{
"epoch": 2.44,
"eval_bleu": 31.709,
"eval_cer": 34.1946,
"eval_gen_len": 18.5452,
"eval_loss": 0.2659294009208679,
"eval_runtime": 58.8373,
"eval_samples_per_second": 39.873,
"eval_steps_per_second": 2.498,
"eval_wer": 46.8913,
"step": 28000
},
{
"epoch": 2.48,
"learning_rate": 5.03743687967961e-05,
"loss": 0.1106,
"step": 28500
},
{
"epoch": 2.52,
"learning_rate": 4.950374368796796e-05,
"loss": 0.1117,
"step": 29000
},
{
"epoch": 2.52,
"eval_bleu": 31.8114,
"eval_cer": 34.1708,
"eval_gen_len": 18.5431,
"eval_loss": 0.2648890018463135,
"eval_runtime": 57.7914,
"eval_samples_per_second": 40.594,
"eval_steps_per_second": 2.544,
"eval_wer": 46.8913,
"step": 29000
},
{
"epoch": 2.57,
"learning_rate": 4.863311857913983e-05,
"loss": 0.1087,
"step": 29500
},
{
"epoch": 2.61,
"learning_rate": 4.7762493470311686e-05,
"loss": 0.1094,
"step": 30000
},
{
"epoch": 2.61,
"eval_bleu": 31.6955,
"eval_cer": 34.1606,
"eval_gen_len": 18.5375,
"eval_loss": 0.26563677191734314,
"eval_runtime": 53.0374,
"eval_samples_per_second": 44.233,
"eval_steps_per_second": 2.772,
"eval_wer": 46.8643,
"step": 30000
},
{
"epoch": 2.66,
"learning_rate": 4.689186836148355e-05,
"loss": 0.1079,
"step": 30500
},
{
"epoch": 2.7,
"learning_rate": 4.602124325265541e-05,
"loss": 0.1077,
"step": 31000
},
{
"epoch": 2.7,
"eval_bleu": 31.5495,
"eval_cer": 34.0064,
"eval_gen_len": 18.5448,
"eval_loss": 0.2636907398700714,
"eval_runtime": 53.9667,
"eval_samples_per_second": 43.471,
"eval_steps_per_second": 2.724,
"eval_wer": 46.8823,
"step": 31000
},
{
"epoch": 2.74,
"learning_rate": 4.5150618143827276e-05,
"loss": 0.1103,
"step": 31500
},
{
"epoch": 2.79,
"learning_rate": 4.427999303499913e-05,
"loss": 0.1088,
"step": 32000
},
{
"epoch": 2.79,
"eval_bleu": 32.0837,
"eval_cer": 33.9504,
"eval_gen_len": 18.5413,
"eval_loss": 0.2668997645378113,
"eval_runtime": 50.6471,
"eval_samples_per_second": 46.321,
"eval_steps_per_second": 2.902,
"eval_wer": 46.612,
"step": 32000
},
{
"epoch": 2.83,
"learning_rate": 4.340936792617099e-05,
"loss": 0.1087,
"step": 32500
},
{
"epoch": 2.87,
"learning_rate": 4.253874281734285e-05,
"loss": 0.1087,
"step": 33000
},
{
"epoch": 2.87,
"eval_bleu": 31.5549,
"eval_cer": 34.2149,
"eval_gen_len": 18.5286,
"eval_loss": 0.264612078666687,
"eval_runtime": 52.8215,
"eval_samples_per_second": 44.414,
"eval_steps_per_second": 2.783,
"eval_wer": 47.0806,
"step": 33000
},
{
"epoch": 2.92,
"learning_rate": 4.166811770851472e-05,
"loss": 0.1091,
"step": 33500
},
{
"epoch": 2.96,
"learning_rate": 4.0797492599686575e-05,
"loss": 0.1077,
"step": 34000
},
{
"epoch": 2.96,
"eval_bleu": 32.1129,
"eval_cer": 33.9403,
"eval_gen_len": 18.5452,
"eval_loss": 0.26299673318862915,
"eval_runtime": 53.065,
"eval_samples_per_second": 44.21,
"eval_steps_per_second": 2.77,
"eval_wer": 46.4318,
"step": 34000
},
{
"epoch": 3.0,
"learning_rate": 3.992686749085844e-05,
"loss": 0.1037,
"step": 34500
},
{
"epoch": 3.05,
"learning_rate": 3.90562423820303e-05,
"loss": 0.0652,
"step": 35000
},
{
"epoch": 3.05,
"eval_bleu": 31.3861,
"eval_cer": 34.1149,
"eval_gen_len": 18.5396,
"eval_loss": 0.33602526783943176,
"eval_runtime": 53.5525,
"eval_samples_per_second": 43.807,
"eval_steps_per_second": 2.745,
"eval_wer": 47.1977,
"step": 35000
},
{
"epoch": 3.09,
"learning_rate": 3.8185617273202165e-05,
"loss": 0.0648,
"step": 35500
},
{
"epoch": 3.13,
"learning_rate": 3.7314992164374024e-05,
"loss": 0.0662,
"step": 36000
},
{
"epoch": 3.13,
"eval_bleu": 31.2372,
"eval_cer": 34.203,
"eval_gen_len": 18.552,
"eval_loss": 0.3401012718677521,
"eval_runtime": 51.9918,
"eval_samples_per_second": 45.122,
"eval_steps_per_second": 2.827,
"eval_wer": 47.3869,
"step": 36000
},
{
"epoch": 3.18,
"learning_rate": 3.644436705554589e-05,
"loss": 0.0635,
"step": 36500
},
{
"epoch": 3.22,
"learning_rate": 3.557374194671774e-05,
"loss": 0.0666,
"step": 37000
},
{
"epoch": 3.22,
"eval_bleu": 31.3462,
"eval_cer": 34.1759,
"eval_gen_len": 18.5469,
"eval_loss": 0.33890092372894287,
"eval_runtime": 51.8417,
"eval_samples_per_second": 45.253,
"eval_steps_per_second": 2.836,
"eval_wer": 47.2968,
"step": 37000
},
{
"epoch": 3.26,
"learning_rate": 3.4703116837889606e-05,
"loss": 0.0646,
"step": 37500
},
{
"epoch": 3.31,
"learning_rate": 3.3832491729061465e-05,
"loss": 0.0648,
"step": 38000
},
{
"epoch": 3.31,
"eval_bleu": 30.835,
"eval_cer": 34.381,
"eval_gen_len": 18.552,
"eval_loss": 0.3339092433452606,
"eval_runtime": 51.3621,
"eval_samples_per_second": 45.676,
"eval_steps_per_second": 2.862,
"eval_wer": 47.6753,
"step": 38000
},
{
"epoch": 3.35,
"learning_rate": 3.296186662023333e-05,
"loss": 0.0648,
"step": 38500
},
{
"epoch": 3.4,
"learning_rate": 3.209124151140519e-05,
"loss": 0.0654,
"step": 39000
},
{
"epoch": 3.4,
"eval_bleu": 31.0958,
"eval_cer": 34.4692,
"eval_gen_len": 18.5524,
"eval_loss": 0.33954936265945435,
"eval_runtime": 51.3892,
"eval_samples_per_second": 45.652,
"eval_steps_per_second": 2.861,
"eval_wer": 47.7203,
"step": 39000
},
{
"epoch": 3.44,
"learning_rate": 3.1220616402577054e-05,
"loss": 0.0645,
"step": 39500
},
{
"epoch": 3.48,
"learning_rate": 3.0349991293748913e-05,
"loss": 0.0663,
"step": 40000
},
{
"epoch": 3.48,
"eval_bleu": 31.126,
"eval_cer": 34.4539,
"eval_gen_len": 18.5499,
"eval_loss": 0.3317714333534241,
"eval_runtime": 53.3556,
"eval_samples_per_second": 43.969,
"eval_steps_per_second": 2.755,
"eval_wer": 47.5942,
"step": 40000
},
{
"epoch": 3.53,
"learning_rate": 2.9479366184920775e-05,
"loss": 0.0657,
"step": 40500
},
{
"epoch": 3.57,
"learning_rate": 2.8608741076092637e-05,
"loss": 0.0648,
"step": 41000
},
{
"epoch": 3.57,
"eval_bleu": 31.0295,
"eval_cer": 34.3539,
"eval_gen_len": 18.5477,
"eval_loss": 0.33970215916633606,
"eval_runtime": 52.8717,
"eval_samples_per_second": 44.372,
"eval_steps_per_second": 2.78,
"eval_wer": 47.5852,
"step": 41000
},
{
"epoch": 3.61,
"learning_rate": 2.77381159672645e-05,
"loss": 0.0647,
"step": 41500
},
{
"epoch": 3.66,
"learning_rate": 2.6867490858436354e-05,
"loss": 0.0635,
"step": 42000
},
{
"epoch": 3.66,
"eval_bleu": 31.1287,
"eval_cer": 34.4285,
"eval_gen_len": 18.5494,
"eval_loss": 0.3414219617843628,
"eval_runtime": 52.9358,
"eval_samples_per_second": 44.318,
"eval_steps_per_second": 2.777,
"eval_wer": 47.5491,
"step": 42000
},
{
"epoch": 3.7,
"learning_rate": 2.5996865749608216e-05,
"loss": 0.0657,
"step": 42500
},
{
"epoch": 3.74,
"learning_rate": 2.512624064078008e-05,
"loss": 0.0656,
"step": 43000
},
{
"epoch": 3.74,
"eval_bleu": 30.9225,
"eval_cer": 34.4285,
"eval_gen_len": 18.5563,
"eval_loss": 0.3393559455871582,
"eval_runtime": 51.5264,
"eval_samples_per_second": 45.53,
"eval_steps_per_second": 2.853,
"eval_wer": 47.6392,
"step": 43000
},
{
"epoch": 3.79,
"learning_rate": 2.425561553195194e-05,
"loss": 0.0659,
"step": 43500
},
{
"epoch": 3.83,
"learning_rate": 2.3384990423123803e-05,
"loss": 0.0625,
"step": 44000
},
{
"epoch": 3.83,
"eval_bleu": 31.2435,
"eval_cer": 34.1674,
"eval_gen_len": 18.5439,
"eval_loss": 0.341974139213562,
"eval_runtime": 51.6192,
"eval_samples_per_second": 45.448,
"eval_steps_per_second": 2.848,
"eval_wer": 47.2968,
"step": 44000
},
{
"epoch": 3.87,
"learning_rate": 2.2514365314295665e-05,
"loss": 0.0642,
"step": 44500
},
{
"epoch": 3.92,
"learning_rate": 2.1643740205467527e-05,
"loss": 0.0636,
"step": 45000
},
{
"epoch": 3.92,
"eval_bleu": 31.0688,
"eval_cer": 34.3743,
"eval_gen_len": 18.5439,
"eval_loss": 0.3447582423686981,
"eval_runtime": 51.0408,
"eval_samples_per_second": 45.963,
"eval_steps_per_second": 2.88,
"eval_wer": 47.6843,
"step": 45000
},
{
"epoch": 3.96,
"learning_rate": 2.0773115096639385e-05,
"loss": 0.0644,
"step": 45500
},
{
"epoch": 4.0,
"learning_rate": 1.9902489987811247e-05,
"loss": 0.0586,
"step": 46000
},
{
"epoch": 4.0,
"eval_bleu": 31.2353,
"eval_cer": 34.2963,
"eval_gen_len": 18.549,
"eval_loss": 0.36747270822525024,
"eval_runtime": 54.2264,
"eval_samples_per_second": 43.263,
"eval_steps_per_second": 2.711,
"eval_wer": 47.441,
"step": 46000
},
{
"epoch": 4.05,
"learning_rate": 1.903186487898311e-05,
"loss": 0.0326,
"step": 46500
},
{
"epoch": 4.09,
"learning_rate": 1.816123977015497e-05,
"loss": 0.0298,
"step": 47000
},
{
"epoch": 4.09,
"eval_bleu": 30.698,
"eval_cer": 34.4319,
"eval_gen_len": 18.5512,
"eval_loss": 0.45656564831733704,
"eval_runtime": 53.4442,
"eval_samples_per_second": 43.896,
"eval_steps_per_second": 2.751,
"eval_wer": 47.8555,
"step": 47000
},
{
"epoch": 4.14,
"learning_rate": 1.7290614661326833e-05,
"loss": 0.0308,
"step": 47500
},
{
"epoch": 4.18,
"learning_rate": 1.6419989552498692e-05,
"loss": 0.0301,
"step": 48000
},
{
"epoch": 4.18,
"eval_bleu": 30.7773,
"eval_cer": 34.3861,
"eval_gen_len": 18.5507,
"eval_loss": 0.4724096655845642,
"eval_runtime": 52.6433,
"eval_samples_per_second": 44.564,
"eval_steps_per_second": 2.792,
"eval_wer": 47.8374,
"step": 48000
},
{
"epoch": 4.22,
"learning_rate": 1.5549364443670554e-05,
"loss": 0.0307,
"step": 48500
},
{
"epoch": 4.27,
"learning_rate": 1.4678739334842418e-05,
"loss": 0.0311,
"step": 49000
},
{
"epoch": 4.27,
"eval_bleu": 31.0878,
"eval_cer": 34.3861,
"eval_gen_len": 18.5503,
"eval_loss": 0.4639967978000641,
"eval_runtime": 51.8688,
"eval_samples_per_second": 45.229,
"eval_steps_per_second": 2.834,
"eval_wer": 47.6212,
"step": 49000
},
{
"epoch": 4.31,
"learning_rate": 1.380811422601428e-05,
"loss": 0.0313,
"step": 49500
},
{
"epoch": 4.35,
"learning_rate": 1.2937489117186142e-05,
"loss": 0.03,
"step": 50000
},
{
"epoch": 4.35,
"eval_bleu": 30.8319,
"eval_cer": 34.459,
"eval_gen_len": 18.5529,
"eval_loss": 0.46544739603996277,
"eval_runtime": 56.982,
"eval_samples_per_second": 41.171,
"eval_steps_per_second": 2.58,
"eval_wer": 47.8915,
"step": 50000
},
{
"epoch": 4.4,
"learning_rate": 1.2066864008358002e-05,
"loss": 0.0306,
"step": 50500
},
{
"epoch": 4.44,
"learning_rate": 1.1196238899529864e-05,
"loss": 0.0302,
"step": 51000
},
{
"epoch": 4.44,
"eval_bleu": 30.9236,
"eval_cer": 34.4997,
"eval_gen_len": 18.552,
"eval_loss": 0.46650850772857666,
"eval_runtime": 50.604,
"eval_samples_per_second": 46.36,
"eval_steps_per_second": 2.905,
"eval_wer": 47.9276,
"step": 51000
},
{
"epoch": 4.48,
"learning_rate": 1.0325613790701725e-05,
"loss": 0.0303,
"step": 51500
},
{
"epoch": 4.53,
"learning_rate": 9.454988681873587e-06,
"loss": 0.029,
"step": 52000
},
{
"epoch": 4.53,
"eval_bleu": 30.8307,
"eval_cer": 34.4997,
"eval_gen_len": 18.5482,
"eval_loss": 0.4756769835948944,
"eval_runtime": 53.9866,
"eval_samples_per_second": 43.455,
"eval_steps_per_second": 2.723,
"eval_wer": 47.9456,
"step": 52000
},
{
"epoch": 4.57,
"learning_rate": 8.584363573045447e-06,
"loss": 0.031,
"step": 52500
},
{
"epoch": 4.61,
"learning_rate": 7.713738464217309e-06,
"loss": 0.0301,
"step": 53000
},
{
"epoch": 4.61,
"eval_bleu": 30.7983,
"eval_cer": 34.5218,
"eval_gen_len": 18.5473,
"eval_loss": 0.4672394096851349,
"eval_runtime": 52.9301,
"eval_samples_per_second": 44.323,
"eval_steps_per_second": 2.777,
"eval_wer": 47.9456,
"step": 53000
},
{
"epoch": 4.66,
"learning_rate": 6.843113355389169e-06,
"loss": 0.0296,
"step": 53500
},
{
"epoch": 4.7,
"learning_rate": 5.972488246561031e-06,
"loss": 0.0294,
"step": 54000
},
{
"epoch": 4.7,
"eval_bleu": 30.8924,
"eval_cer": 34.4353,
"eval_gen_len": 18.5529,
"eval_loss": 0.4714747667312622,
"eval_runtime": 53.7737,
"eval_samples_per_second": 43.627,
"eval_steps_per_second": 2.734,
"eval_wer": 47.7564,
"step": 54000
},
{
"epoch": 4.74,
"learning_rate": 5.1018631377328925e-06,
"loss": 0.0292,
"step": 54500
},
{
"epoch": 4.79,
"learning_rate": 4.2312380289047546e-06,
"loss": 0.0288,
"step": 55000
},
{
"epoch": 4.79,
"eval_bleu": 30.7372,
"eval_cer": 34.4675,
"eval_gen_len": 18.5524,
"eval_loss": 0.4751755893230438,
"eval_runtime": 51.756,
"eval_samples_per_second": 45.328,
"eval_steps_per_second": 2.84,
"eval_wer": 47.7924,
"step": 55000
},
{
"epoch": 4.83,
"learning_rate": 3.3606129200766153e-06,
"loss": 0.0293,
"step": 55500
},
{
"epoch": 4.88,
"learning_rate": 2.4899878112484765e-06,
"loss": 0.0289,
"step": 56000
},
{
"epoch": 4.88,
"eval_bleu": 30.8554,
"eval_cer": 34.459,
"eval_gen_len": 18.5516,
"eval_loss": 0.4744004011154175,
"eval_runtime": 51.9465,
"eval_samples_per_second": 45.162,
"eval_steps_per_second": 2.83,
"eval_wer": 47.8555,
"step": 56000
},
{
"epoch": 4.92,
"learning_rate": 1.619362702420338e-06,
"loss": 0.0285,
"step": 56500
},
{
"epoch": 4.96,
"learning_rate": 7.487375935921992e-07,
"loss": 0.0288,
"step": 57000
},
{
"epoch": 4.96,
"eval_bleu": 30.8745,
"eval_cer": 34.4895,
"eval_gen_len": 18.5499,
"eval_loss": 0.4744308888912201,
"eval_runtime": 51.4462,
"eval_samples_per_second": 45.601,
"eval_steps_per_second": 2.857,
"eval_wer": 47.8194,
"step": 57000
},
{
"epoch": 5.0,
"step": 57430,
"total_flos": 1.2162866037733786e+17,
"train_loss": 0.11780740710349318,
"train_runtime": 33856.2876,
"train_samples_per_second": 27.14,
"train_steps_per_second": 1.696
}
],
"max_steps": 57430,
"num_train_epochs": 5,
"total_flos": 1.2162866037733786e+17,
"trial_name": null,
"trial_params": null
}