{ "best_metric": 10.058774675781, "best_model_checkpoint": "./model_out_trpro_more_more/checkpoint-13000", "epoch": 3.6020224407812718, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 0.487, "step": 25 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 0.4472, "step": 50 }, { "epoch": 0.02, "learning_rate": 7.5e-06, "loss": 0.4173, "step": 75 }, { "epoch": 0.03, "learning_rate": 1e-05, "loss": 0.4623, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.25e-05, "loss": 0.457, "step": 125 }, { "epoch": 0.04, "learning_rate": 1.5e-05, "loss": 0.4775, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.75e-05, "loss": 0.4445, "step": 175 }, { "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.4751, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.25e-05, "loss": 0.4633, "step": 225 }, { "epoch": 0.07, "learning_rate": 2.5e-05, "loss": 0.4682, "step": 250 }, { "epoch": 0.08, "learning_rate": 2.7500000000000004e-05, "loss": 0.4807, "step": 275 }, { "epoch": 0.08, "learning_rate": 3e-05, "loss": 0.4882, "step": 300 }, { "epoch": 0.09, "learning_rate": 3.2500000000000004e-05, "loss": 0.4681, "step": 325 }, { "epoch": 0.1, "learning_rate": 3.5e-05, "loss": 0.4918, "step": 350 }, { "epoch": 0.1, "learning_rate": 3.7500000000000003e-05, "loss": 0.4871, "step": 375 }, { "epoch": 0.11, "learning_rate": 4e-05, "loss": 0.4986, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.25e-05, "loss": 0.4859, "step": 425 }, { "epoch": 0.12, "learning_rate": 4.5e-05, "loss": 0.4761, "step": 450 }, { "epoch": 0.13, "learning_rate": 4.75e-05, "loss": 0.4884, "step": 475 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 0.5453, "step": 500 }, { "epoch": 0.14, "eval_cer": 14.794288634766497, "eval_loss": 0.3284708261489868, "eval_runtime": 1225.6472, "eval_samples_per_second": 4.562, "eval_steps_per_second": 0.286, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.991379310344828e-05, "loss": 0.5147, "step": 525 }, { "epoch": 0.15, "learning_rate": 4.982758620689655e-05, "loss": 0.514, "step": 550 }, { "epoch": 0.16, "learning_rate": 4.9741379310344836e-05, "loss": 0.5397, "step": 575 }, { "epoch": 0.17, "learning_rate": 4.9655172413793107e-05, "loss": 0.5269, "step": 600 }, { "epoch": 0.17, "learning_rate": 4.9568965517241384e-05, "loss": 0.4992, "step": 625 }, { "epoch": 0.18, "learning_rate": 4.9482758620689655e-05, "loss": 0.538, "step": 650 }, { "epoch": 0.19, "learning_rate": 4.939655172413793e-05, "loss": 0.5123, "step": 675 }, { "epoch": 0.19, "learning_rate": 4.931034482758621e-05, "loss": 0.5051, "step": 700 }, { "epoch": 0.2, "learning_rate": 4.922413793103449e-05, "loss": 0.5342, "step": 725 }, { "epoch": 0.21, "learning_rate": 4.913793103448276e-05, "loss": 0.5037, "step": 750 }, { "epoch": 0.21, "learning_rate": 4.905172413793104e-05, "loss": 0.5492, "step": 775 }, { "epoch": 0.22, "learning_rate": 4.896551724137931e-05, "loss": 0.5256, "step": 800 }, { "epoch": 0.23, "learning_rate": 4.8879310344827586e-05, "loss": 0.531, "step": 825 }, { "epoch": 0.24, "learning_rate": 4.8793103448275864e-05, "loss": 0.5245, "step": 850 }, { "epoch": 0.24, "learning_rate": 4.870689655172414e-05, "loss": 0.4702, "step": 875 }, { "epoch": 0.25, "learning_rate": 4.862068965517241e-05, "loss": 0.5404, "step": 900 }, { "epoch": 0.26, "learning_rate": 4.853793103448276e-05, "loss": 0.4968, "step": 925 }, { "epoch": 0.26, "learning_rate": 4.8451724137931036e-05, "loss": 0.4905, "step": 950 }, { "epoch": 0.27, "learning_rate": 4.836551724137931e-05, "loss": 0.5281, "step": 975 }, { "epoch": 0.28, "learning_rate": 4.827931034482759e-05, "loss": 0.5144, "step": 1000 }, { "epoch": 0.28, "eval_cer": 14.356672842266658, "eval_loss": 0.3405072093009949, "eval_runtime": 1158.9803, "eval_samples_per_second": 4.824, "eval_steps_per_second": 0.302, "step": 1000 }, { "epoch": 0.28, "learning_rate": 4.819310344827587e-05, "loss": 0.5357, "step": 1025 }, { "epoch": 0.29, "learning_rate": 4.810689655172414e-05, "loss": 0.5521, "step": 1050 }, { "epoch": 0.3, "learning_rate": 4.802068965517242e-05, "loss": 0.5413, "step": 1075 }, { "epoch": 0.3, "learning_rate": 4.793448275862069e-05, "loss": 0.4707, "step": 1100 }, { "epoch": 0.31, "learning_rate": 4.784827586206897e-05, "loss": 0.4771, "step": 1125 }, { "epoch": 0.32, "learning_rate": 4.7762068965517245e-05, "loss": 0.5055, "step": 1150 }, { "epoch": 0.33, "learning_rate": 4.767586206896552e-05, "loss": 0.5464, "step": 1175 }, { "epoch": 0.33, "learning_rate": 4.758965517241379e-05, "loss": 0.4839, "step": 1200 }, { "epoch": 0.34, "learning_rate": 4.750344827586207e-05, "loss": 0.5087, "step": 1225 }, { "epoch": 0.35, "learning_rate": 4.741724137931035e-05, "loss": 0.4728, "step": 1250 }, { "epoch": 0.35, "learning_rate": 4.7331034482758626e-05, "loss": 0.4457, "step": 1275 }, { "epoch": 0.36, "learning_rate": 4.72448275862069e-05, "loss": 0.5246, "step": 1300 }, { "epoch": 0.37, "learning_rate": 4.7158620689655175e-05, "loss": 0.4953, "step": 1325 }, { "epoch": 0.37, "learning_rate": 4.7072413793103446e-05, "loss": 0.5318, "step": 1350 }, { "epoch": 0.38, "learning_rate": 4.6986206896551724e-05, "loss": 0.5062, "step": 1375 }, { "epoch": 0.39, "learning_rate": 4.69e-05, "loss": 0.4979, "step": 1400 }, { "epoch": 0.39, "learning_rate": 4.681379310344828e-05, "loss": 0.5202, "step": 1425 }, { "epoch": 0.4, "learning_rate": 4.672758620689656e-05, "loss": 0.5267, "step": 1450 }, { "epoch": 0.41, "learning_rate": 4.664137931034483e-05, "loss": 0.4971, "step": 1475 }, { "epoch": 0.42, "learning_rate": 4.6555172413793106e-05, "loss": 0.49, "step": 1500 }, { "epoch": 0.42, "eval_cer": 14.058008049575161, "eval_loss": 0.3323359787464142, "eval_runtime": 1178.6215, "eval_samples_per_second": 4.744, "eval_steps_per_second": 0.297, "step": 1500 }, { "epoch": 0.42, "learning_rate": 4.6468965517241384e-05, "loss": 0.4821, "step": 1525 }, { "epoch": 0.43, "learning_rate": 4.638275862068966e-05, "loss": 0.4603, "step": 1550 }, { "epoch": 0.44, "learning_rate": 4.629655172413793e-05, "loss": 0.5213, "step": 1575 }, { "epoch": 0.44, "learning_rate": 4.621034482758621e-05, "loss": 0.4922, "step": 1600 }, { "epoch": 0.45, "learning_rate": 4.612413793103448e-05, "loss": 0.5202, "step": 1625 }, { "epoch": 0.46, "learning_rate": 4.603793103448276e-05, "loss": 0.5046, "step": 1650 }, { "epoch": 0.46, "learning_rate": 4.5951724137931036e-05, "loss": 0.4795, "step": 1675 }, { "epoch": 0.47, "learning_rate": 4.5865517241379314e-05, "loss": 0.447, "step": 1700 }, { "epoch": 0.48, "learning_rate": 4.5779310344827585e-05, "loss": 0.4752, "step": 1725 }, { "epoch": 0.48, "learning_rate": 4.569310344827586e-05, "loss": 0.515, "step": 1750 }, { "epoch": 0.49, "learning_rate": 4.560689655172414e-05, "loss": 0.4882, "step": 1775 }, { "epoch": 0.5, "learning_rate": 4.552068965517242e-05, "loss": 0.4721, "step": 1800 }, { "epoch": 0.51, "learning_rate": 4.5434482758620696e-05, "loss": 0.4542, "step": 1825 }, { "epoch": 0.51, "learning_rate": 4.534827586206897e-05, "loss": 0.5197, "step": 1850 }, { "epoch": 0.52, "learning_rate": 4.5262068965517245e-05, "loss": 0.4589, "step": 1875 }, { "epoch": 0.53, "learning_rate": 4.5175862068965516e-05, "loss": 0.4668, "step": 1900 }, { "epoch": 0.53, "learning_rate": 4.50896551724138e-05, "loss": 0.4824, "step": 1925 }, { "epoch": 0.54, "learning_rate": 4.500344827586207e-05, "loss": 0.4967, "step": 1950 }, { "epoch": 0.55, "learning_rate": 4.491724137931035e-05, "loss": 0.4913, "step": 1975 }, { "epoch": 0.55, "learning_rate": 4.483103448275862e-05, "loss": 0.461, "step": 2000 }, { "epoch": 0.55, "eval_cer": 13.056602568197789, "eval_loss": 0.31062883138656616, "eval_runtime": 1163.7705, "eval_samples_per_second": 4.804, "eval_steps_per_second": 0.301, "step": 2000 }, { "epoch": 0.56, "learning_rate": 4.47448275862069e-05, "loss": 0.4987, "step": 2025 }, { "epoch": 0.57, "learning_rate": 4.4658620689655175e-05, "loss": 0.5091, "step": 2050 }, { "epoch": 0.57, "learning_rate": 4.457241379310345e-05, "loss": 0.5096, "step": 2075 }, { "epoch": 0.58, "learning_rate": 4.4486206896551724e-05, "loss": 0.4716, "step": 2100 }, { "epoch": 0.59, "learning_rate": 4.44e-05, "loss": 0.4618, "step": 2125 }, { "epoch": 0.6, "learning_rate": 4.431379310344827e-05, "loss": 0.4588, "step": 2150 }, { "epoch": 0.6, "learning_rate": 4.422758620689656e-05, "loss": 0.5037, "step": 2175 }, { "epoch": 0.61, "learning_rate": 4.414137931034483e-05, "loss": 0.4818, "step": 2200 }, { "epoch": 0.62, "learning_rate": 4.4055172413793106e-05, "loss": 0.4802, "step": 2225 }, { "epoch": 0.62, "learning_rate": 4.3968965517241384e-05, "loss": 0.4889, "step": 2250 }, { "epoch": 0.63, "learning_rate": 4.3882758620689655e-05, "loss": 0.4914, "step": 2275 }, { "epoch": 0.64, "learning_rate": 4.379655172413793e-05, "loss": 0.4867, "step": 2300 }, { "epoch": 0.64, "learning_rate": 4.371034482758621e-05, "loss": 0.5006, "step": 2325 }, { "epoch": 0.65, "learning_rate": 4.362413793103449e-05, "loss": 0.4705, "step": 2350 }, { "epoch": 0.66, "learning_rate": 4.353793103448276e-05, "loss": 0.482, "step": 2375 }, { "epoch": 0.66, "learning_rate": 4.3451724137931037e-05, "loss": 0.4818, "step": 2400 }, { "epoch": 0.67, "learning_rate": 4.336551724137931e-05, "loss": 0.4478, "step": 2425 }, { "epoch": 0.68, "learning_rate": 4.327931034482759e-05, "loss": 0.4759, "step": 2450 }, { "epoch": 0.69, "learning_rate": 4.319310344827586e-05, "loss": 0.4518, "step": 2475 }, { "epoch": 0.69, "learning_rate": 4.310689655172414e-05, "loss": 0.4935, "step": 2500 }, { "epoch": 0.69, "eval_cer": 13.829617325752253, "eval_loss": 0.3001407980918884, "eval_runtime": 1213.7136, "eval_samples_per_second": 4.607, "eval_steps_per_second": 0.288, "step": 2500 }, { "epoch": 0.7, "learning_rate": 4.302068965517241e-05, "loss": 0.4943, "step": 2525 }, { "epoch": 0.71, "learning_rate": 4.293448275862069e-05, "loss": 0.4451, "step": 2550 }, { "epoch": 0.71, "learning_rate": 4.284827586206897e-05, "loss": 0.456, "step": 2575 }, { "epoch": 0.72, "learning_rate": 4.2762068965517245e-05, "loss": 0.4944, "step": 2600 }, { "epoch": 0.73, "learning_rate": 4.267586206896552e-05, "loss": 0.4523, "step": 2625 }, { "epoch": 0.73, "learning_rate": 4.2589655172413794e-05, "loss": 0.4865, "step": 2650 }, { "epoch": 0.74, "learning_rate": 4.250344827586207e-05, "loss": 0.4429, "step": 2675 }, { "epoch": 0.75, "learning_rate": 4.241724137931035e-05, "loss": 0.4705, "step": 2700 }, { "epoch": 0.75, "learning_rate": 4.233103448275863e-05, "loss": 0.437, "step": 2725 }, { "epoch": 0.76, "learning_rate": 4.22448275862069e-05, "loss": 0.4218, "step": 2750 }, { "epoch": 0.77, "learning_rate": 4.2158620689655176e-05, "loss": 0.5038, "step": 2775 }, { "epoch": 0.78, "learning_rate": 4.2072413793103447e-05, "loss": 0.4579, "step": 2800 }, { "epoch": 0.78, "learning_rate": 4.1986206896551724e-05, "loss": 0.4738, "step": 2825 }, { "epoch": 0.79, "learning_rate": 4.19e-05, "loss": 0.4642, "step": 2850 }, { "epoch": 0.8, "learning_rate": 4.181379310344828e-05, "loss": 0.4422, "step": 2875 }, { "epoch": 0.8, "learning_rate": 4.172758620689655e-05, "loss": 0.4212, "step": 2900 }, { "epoch": 0.81, "learning_rate": 4.164137931034483e-05, "loss": 0.4803, "step": 2925 }, { "epoch": 0.82, "learning_rate": 4.1555172413793106e-05, "loss": 0.4333, "step": 2950 }, { "epoch": 0.82, "learning_rate": 4.1468965517241384e-05, "loss": 0.4663, "step": 2975 }, { "epoch": 0.83, "learning_rate": 4.1382758620689655e-05, "loss": 0.4836, "step": 3000 }, { "epoch": 0.83, "eval_cer": 13.110905257778061, "eval_loss": 0.29782944917678833, "eval_runtime": 1227.0769, "eval_samples_per_second": 4.556, "eval_steps_per_second": 0.285, "step": 3000 }, { "epoch": 0.84, "learning_rate": 4.129655172413793e-05, "loss": 0.482, "step": 3025 }, { "epoch": 0.84, "learning_rate": 4.121034482758621e-05, "loss": 0.4706, "step": 3050 }, { "epoch": 0.85, "learning_rate": 4.112413793103448e-05, "loss": 0.4658, "step": 3075 }, { "epoch": 0.86, "learning_rate": 4.1037931034482766e-05, "loss": 0.4624, "step": 3100 }, { "epoch": 0.87, "learning_rate": 4.095172413793104e-05, "loss": 0.4754, "step": 3125 }, { "epoch": 0.87, "learning_rate": 4.0865517241379315e-05, "loss": 0.4866, "step": 3150 }, { "epoch": 0.88, "learning_rate": 4.0779310344827586e-05, "loss": 0.4723, "step": 3175 }, { "epoch": 0.89, "learning_rate": 4.069310344827586e-05, "loss": 0.4707, "step": 3200 }, { "epoch": 0.89, "learning_rate": 4.060689655172414e-05, "loss": 0.3989, "step": 3225 }, { "epoch": 0.9, "learning_rate": 4.052068965517242e-05, "loss": 0.4714, "step": 3250 }, { "epoch": 0.91, "learning_rate": 4.043448275862069e-05, "loss": 0.4451, "step": 3275 }, { "epoch": 0.91, "learning_rate": 4.034827586206897e-05, "loss": 0.4785, "step": 3300 }, { "epoch": 0.92, "learning_rate": 4.026206896551724e-05, "loss": 0.4509, "step": 3325 }, { "epoch": 0.93, "learning_rate": 4.017586206896552e-05, "loss": 0.4523, "step": 3350 }, { "epoch": 0.94, "learning_rate": 4.0089655172413794e-05, "loss": 0.4264, "step": 3375 }, { "epoch": 0.94, "learning_rate": 4.000344827586207e-05, "loss": 0.4685, "step": 3400 }, { "epoch": 0.95, "learning_rate": 3.991724137931035e-05, "loss": 0.4632, "step": 3425 }, { "epoch": 0.96, "learning_rate": 3.983103448275862e-05, "loss": 0.4548, "step": 3450 }, { "epoch": 0.96, "learning_rate": 3.97448275862069e-05, "loss": 0.4306, "step": 3475 }, { "epoch": 0.97, "learning_rate": 3.9658620689655176e-05, "loss": 0.4782, "step": 3500 }, { "epoch": 0.97, "eval_cer": 12.711620775570179, "eval_loss": 0.2923573851585388, "eval_runtime": 1186.1965, "eval_samples_per_second": 4.713, "eval_steps_per_second": 0.295, "step": 3500 }, { "epoch": 0.98, "learning_rate": 3.9572413793103454e-05, "loss": 0.4219, "step": 3525 }, { "epoch": 0.98, "learning_rate": 3.9486206896551725e-05, "loss": 0.471, "step": 3550 }, { "epoch": 0.99, "learning_rate": 3.94e-05, "loss": 0.4687, "step": 3575 }, { "epoch": 1.0, "learning_rate": 3.931379310344827e-05, "loss": 0.4356, "step": 3600 }, { "epoch": 1.0, "learning_rate": 3.922758620689656e-05, "loss": 0.4144, "step": 3625 }, { "epoch": 1.01, "learning_rate": 3.914137931034483e-05, "loss": 0.3605, "step": 3650 }, { "epoch": 1.02, "learning_rate": 3.9055172413793106e-05, "loss": 0.3652, "step": 3675 }, { "epoch": 1.03, "learning_rate": 3.896896551724138e-05, "loss": 0.3725, "step": 3700 }, { "epoch": 1.03, "learning_rate": 3.8882758620689655e-05, "loss": 0.3481, "step": 3725 }, { "epoch": 1.04, "learning_rate": 3.879655172413793e-05, "loss": 0.3662, "step": 3750 }, { "epoch": 1.05, "learning_rate": 3.871034482758621e-05, "loss": 0.3835, "step": 3775 }, { "epoch": 1.05, "learning_rate": 3.862413793103448e-05, "loss": 0.3638, "step": 3800 }, { "epoch": 1.06, "learning_rate": 3.853793103448276e-05, "loss": 0.3615, "step": 3825 }, { "epoch": 1.07, "learning_rate": 3.845172413793104e-05, "loss": 0.3327, "step": 3850 }, { "epoch": 1.07, "learning_rate": 3.8365517241379315e-05, "loss": 0.3679, "step": 3875 }, { "epoch": 1.08, "learning_rate": 3.827931034482759e-05, "loss": 0.3549, "step": 3900 }, { "epoch": 1.09, "learning_rate": 3.8193103448275863e-05, "loss": 0.3728, "step": 3925 }, { "epoch": 1.09, "learning_rate": 3.810689655172414e-05, "loss": 0.3493, "step": 3950 }, { "epoch": 1.1, "learning_rate": 3.802068965517241e-05, "loss": 0.3509, "step": 3975 }, { "epoch": 1.11, "learning_rate": 3.793448275862069e-05, "loss": 0.3441, "step": 4000 }, { "epoch": 1.11, "eval_cer": 11.794863604420877, "eval_loss": 0.2874628007411957, "eval_runtime": 1164.5329, "eval_samples_per_second": 4.801, "eval_steps_per_second": 0.301, "step": 4000 }, { "epoch": 1.12, "learning_rate": 3.784827586206897e-05, "loss": 0.3513, "step": 4025 }, { "epoch": 1.12, "learning_rate": 3.7762068965517245e-05, "loss": 0.3442, "step": 4050 }, { "epoch": 1.13, "learning_rate": 3.7675862068965516e-05, "loss": 0.3183, "step": 4075 }, { "epoch": 1.14, "learning_rate": 3.7589655172413794e-05, "loss": 0.3674, "step": 4100 }, { "epoch": 1.14, "learning_rate": 3.750344827586207e-05, "loss": 0.3491, "step": 4125 }, { "epoch": 1.15, "learning_rate": 3.741724137931035e-05, "loss": 0.337, "step": 4150 }, { "epoch": 1.16, "learning_rate": 3.733103448275862e-05, "loss": 0.3711, "step": 4175 }, { "epoch": 1.16, "learning_rate": 3.72448275862069e-05, "loss": 0.3839, "step": 4200 }, { "epoch": 1.17, "learning_rate": 3.7158620689655176e-05, "loss": 0.3724, "step": 4225 }, { "epoch": 1.18, "learning_rate": 3.707241379310345e-05, "loss": 0.3444, "step": 4250 }, { "epoch": 1.18, "learning_rate": 3.698620689655173e-05, "loss": 0.3997, "step": 4275 }, { "epoch": 1.19, "learning_rate": 3.69e-05, "loss": 0.3742, "step": 4300 }, { "epoch": 1.2, "learning_rate": 3.681379310344828e-05, "loss": 0.3657, "step": 4325 }, { "epoch": 1.21, "learning_rate": 3.672758620689655e-05, "loss": 0.3762, "step": 4350 }, { "epoch": 1.21, "learning_rate": 3.664137931034483e-05, "loss": 0.3815, "step": 4375 }, { "epoch": 1.22, "learning_rate": 3.655517241379311e-05, "loss": 0.3628, "step": 4400 }, { "epoch": 1.23, "learning_rate": 3.6468965517241384e-05, "loss": 0.3533, "step": 4425 }, { "epoch": 1.23, "learning_rate": 3.6382758620689655e-05, "loss": 0.3857, "step": 4450 }, { "epoch": 1.24, "learning_rate": 3.629655172413793e-05, "loss": 0.3381, "step": 4475 }, { "epoch": 1.25, "learning_rate": 3.6210344827586204e-05, "loss": 0.3647, "step": 4500 }, { "epoch": 1.25, "eval_cer": 11.617581294320578, "eval_loss": 0.28388434648513794, "eval_runtime": 1169.2298, "eval_samples_per_second": 4.782, "eval_steps_per_second": 0.299, "step": 4500 }, { "epoch": 1.25, "learning_rate": 3.612413793103449e-05, "loss": 0.3935, "step": 4525 }, { "epoch": 1.26, "learning_rate": 3.603793103448276e-05, "loss": 0.3514, "step": 4550 }, { "epoch": 1.27, "learning_rate": 3.595172413793104e-05, "loss": 0.3925, "step": 4575 }, { "epoch": 1.27, "learning_rate": 3.586551724137931e-05, "loss": 0.355, "step": 4600 }, { "epoch": 1.28, "learning_rate": 3.5779310344827586e-05, "loss": 0.3539, "step": 4625 }, { "epoch": 1.29, "learning_rate": 3.5693103448275864e-05, "loss": 0.3583, "step": 4650 }, { "epoch": 1.3, "learning_rate": 3.560689655172414e-05, "loss": 0.3312, "step": 4675 }, { "epoch": 1.3, "learning_rate": 3.552068965517242e-05, "loss": 0.331, "step": 4700 }, { "epoch": 1.31, "learning_rate": 3.543448275862069e-05, "loss": 0.3703, "step": 4725 }, { "epoch": 1.32, "learning_rate": 3.534827586206897e-05, "loss": 0.3599, "step": 4750 }, { "epoch": 1.32, "learning_rate": 3.526206896551724e-05, "loss": 0.3779, "step": 4775 }, { "epoch": 1.33, "learning_rate": 3.517586206896552e-05, "loss": 0.3338, "step": 4800 }, { "epoch": 1.34, "learning_rate": 3.5089655172413794e-05, "loss": 0.3249, "step": 4825 }, { "epoch": 1.34, "learning_rate": 3.500344827586207e-05, "loss": 0.364, "step": 4850 }, { "epoch": 1.35, "learning_rate": 3.491724137931034e-05, "loss": 0.3755, "step": 4875 }, { "epoch": 1.36, "learning_rate": 3.483103448275862e-05, "loss": 0.353, "step": 4900 }, { "epoch": 1.36, "learning_rate": 3.47448275862069e-05, "loss": 0.3483, "step": 4925 }, { "epoch": 1.37, "learning_rate": 3.4658620689655176e-05, "loss": 0.3675, "step": 4950 }, { "epoch": 1.38, "learning_rate": 3.457241379310345e-05, "loss": 0.3779, "step": 4975 }, { "epoch": 1.39, "learning_rate": 3.4486206896551725e-05, "loss": 0.3642, "step": 5000 }, { "epoch": 1.39, "eval_cer": 11.572861432313294, "eval_loss": 0.28437113761901855, "eval_runtime": 1173.2699, "eval_samples_per_second": 4.765, "eval_steps_per_second": 0.298, "step": 5000 }, { "epoch": 1.39, "learning_rate": 3.4399999999999996e-05, "loss": 0.3872, "step": 5025 }, { "epoch": 1.4, "learning_rate": 3.431379310344828e-05, "loss": 0.392, "step": 5050 }, { "epoch": 1.41, "learning_rate": 3.422758620689656e-05, "loss": 0.3545, "step": 5075 }, { "epoch": 1.41, "learning_rate": 3.41448275862069e-05, "loss": 0.3627, "step": 5100 }, { "epoch": 1.42, "learning_rate": 3.4058620689655175e-05, "loss": 0.3362, "step": 5125 }, { "epoch": 1.43, "learning_rate": 3.397241379310345e-05, "loss": 0.3628, "step": 5150 }, { "epoch": 1.43, "learning_rate": 3.3886206896551724e-05, "loss": 0.3157, "step": 5175 }, { "epoch": 1.44, "learning_rate": 3.38e-05, "loss": 0.3649, "step": 5200 }, { "epoch": 1.45, "learning_rate": 3.371379310344828e-05, "loss": 0.3462, "step": 5225 }, { "epoch": 1.45, "learning_rate": 3.362758620689656e-05, "loss": 0.3833, "step": 5250 }, { "epoch": 1.46, "learning_rate": 3.354137931034483e-05, "loss": 0.3643, "step": 5275 }, { "epoch": 1.47, "learning_rate": 3.3455172413793106e-05, "loss": 0.3788, "step": 5300 }, { "epoch": 1.48, "learning_rate": 3.336896551724138e-05, "loss": 0.3625, "step": 5325 }, { "epoch": 1.48, "learning_rate": 3.328275862068966e-05, "loss": 0.363, "step": 5350 }, { "epoch": 1.49, "learning_rate": 3.319655172413793e-05, "loss": 0.3714, "step": 5375 }, { "epoch": 1.5, "learning_rate": 3.311034482758621e-05, "loss": 0.3636, "step": 5400 }, { "epoch": 1.5, "learning_rate": 3.302413793103448e-05, "loss": 0.3349, "step": 5425 }, { "epoch": 1.51, "learning_rate": 3.293793103448276e-05, "loss": 0.3516, "step": 5450 }, { "epoch": 1.52, "learning_rate": 3.2851724137931036e-05, "loss": 0.3327, "step": 5475 }, { "epoch": 1.52, "learning_rate": 3.2765517241379314e-05, "loss": 0.3493, "step": 5500 }, { "epoch": 1.52, "eval_cer": 11.528141570306012, "eval_loss": 0.2879265248775482, "eval_runtime": 1178.4372, "eval_samples_per_second": 4.744, "eval_steps_per_second": 0.297, "step": 5500 }, { "epoch": 1.53, "learning_rate": 3.2679310344827585e-05, "loss": 0.3419, "step": 5525 }, { "epoch": 1.54, "learning_rate": 3.259310344827586e-05, "loss": 0.3876, "step": 5550 }, { "epoch": 1.54, "learning_rate": 3.250689655172414e-05, "loss": 0.3392, "step": 5575 }, { "epoch": 1.55, "learning_rate": 3.242068965517241e-05, "loss": 0.3781, "step": 5600 }, { "epoch": 1.56, "learning_rate": 3.2334482758620696e-05, "loss": 0.3616, "step": 5625 }, { "epoch": 1.57, "learning_rate": 3.224827586206897e-05, "loss": 0.3596, "step": 5650 }, { "epoch": 1.57, "learning_rate": 3.2162068965517245e-05, "loss": 0.3681, "step": 5675 }, { "epoch": 1.58, "learning_rate": 3.2075862068965516e-05, "loss": 0.3373, "step": 5700 }, { "epoch": 1.59, "learning_rate": 3.1989655172413794e-05, "loss": 0.3311, "step": 5725 }, { "epoch": 1.59, "learning_rate": 3.190344827586207e-05, "loss": 0.4004, "step": 5750 }, { "epoch": 1.6, "learning_rate": 3.181724137931035e-05, "loss": 0.3731, "step": 5775 }, { "epoch": 1.61, "learning_rate": 3.173103448275862e-05, "loss": 0.3443, "step": 5800 }, { "epoch": 1.61, "learning_rate": 3.16448275862069e-05, "loss": 0.345, "step": 5825 }, { "epoch": 1.62, "learning_rate": 3.155862068965517e-05, "loss": 0.312, "step": 5850 }, { "epoch": 1.63, "learning_rate": 3.147241379310345e-05, "loss": 0.3474, "step": 5875 }, { "epoch": 1.63, "learning_rate": 3.1386206896551724e-05, "loss": 0.3162, "step": 5900 }, { "epoch": 1.64, "learning_rate": 3.13e-05, "loss": 0.3227, "step": 5925 }, { "epoch": 1.65, "learning_rate": 3.121379310344828e-05, "loss": 0.3482, "step": 5950 }, { "epoch": 1.66, "learning_rate": 3.112758620689655e-05, "loss": 0.3398, "step": 5975 }, { "epoch": 1.66, "learning_rate": 3.104137931034483e-05, "loss": 0.3466, "step": 6000 }, { "epoch": 1.66, "eval_cer": 11.44349326007794, "eval_loss": 0.28201985359191895, "eval_runtime": 1169.509, "eval_samples_per_second": 4.781, "eval_steps_per_second": 0.299, "step": 6000 }, { "epoch": 1.67, "learning_rate": 3.0955172413793106e-05, "loss": 0.3256, "step": 6025 }, { "epoch": 1.68, "learning_rate": 3.0868965517241384e-05, "loss": 0.3208, "step": 6050 }, { "epoch": 1.68, "learning_rate": 3.0782758620689655e-05, "loss": 0.3267, "step": 6075 }, { "epoch": 1.69, "learning_rate": 3.069655172413793e-05, "loss": 0.3675, "step": 6100 }, { "epoch": 1.7, "learning_rate": 3.061034482758621e-05, "loss": 0.3502, "step": 6125 }, { "epoch": 1.7, "learning_rate": 3.052413793103449e-05, "loss": 0.3586, "step": 6150 }, { "epoch": 1.71, "learning_rate": 3.043793103448276e-05, "loss": 0.3211, "step": 6175 }, { "epoch": 1.72, "learning_rate": 3.0351724137931037e-05, "loss": 0.3631, "step": 6200 }, { "epoch": 1.72, "learning_rate": 3.026551724137931e-05, "loss": 0.3764, "step": 6225 }, { "epoch": 1.73, "learning_rate": 3.017931034482759e-05, "loss": 0.3636, "step": 6250 }, { "epoch": 1.74, "learning_rate": 3.009310344827586e-05, "loss": 0.3682, "step": 6275 }, { "epoch": 1.75, "learning_rate": 3.000689655172414e-05, "loss": 0.3484, "step": 6300 }, { "epoch": 1.75, "learning_rate": 2.9920689655172412e-05, "loss": 0.336, "step": 6325 }, { "epoch": 1.76, "learning_rate": 2.983448275862069e-05, "loss": 0.3457, "step": 6350 }, { "epoch": 1.77, "learning_rate": 2.974827586206897e-05, "loss": 0.3548, "step": 6375 }, { "epoch": 1.77, "learning_rate": 2.966206896551724e-05, "loss": 0.3455, "step": 6400 }, { "epoch": 1.78, "learning_rate": 2.957586206896552e-05, "loss": 0.3303, "step": 6425 }, { "epoch": 1.79, "learning_rate": 2.9489655172413794e-05, "loss": 0.3344, "step": 6450 }, { "epoch": 1.79, "learning_rate": 2.940344827586207e-05, "loss": 0.3517, "step": 6475 }, { "epoch": 1.8, "learning_rate": 2.9317241379310346e-05, "loss": 0.3507, "step": 6500 }, { "epoch": 1.8, "eval_cer": 11.012266019293426, "eval_loss": 0.27717769145965576, "eval_runtime": 1163.8916, "eval_samples_per_second": 4.804, "eval_steps_per_second": 0.301, "step": 6500 }, { "epoch": 1.81, "learning_rate": 2.9231034482758624e-05, "loss": 0.3283, "step": 6525 }, { "epoch": 1.81, "learning_rate": 2.9144827586206898e-05, "loss": 0.3808, "step": 6550 }, { "epoch": 1.82, "learning_rate": 2.9058620689655176e-05, "loss": 0.3473, "step": 6575 }, { "epoch": 1.83, "learning_rate": 2.8972413793103447e-05, "loss": 0.3502, "step": 6600 }, { "epoch": 1.84, "learning_rate": 2.8886206896551728e-05, "loss": 0.3507, "step": 6625 }, { "epoch": 1.84, "learning_rate": 2.88e-05, "loss": 0.3086, "step": 6650 }, { "epoch": 1.85, "learning_rate": 2.8713793103448276e-05, "loss": 0.3266, "step": 6675 }, { "epoch": 1.86, "learning_rate": 2.862758620689655e-05, "loss": 0.3236, "step": 6700 }, { "epoch": 1.86, "learning_rate": 2.854137931034483e-05, "loss": 0.3063, "step": 6725 }, { "epoch": 1.87, "learning_rate": 2.8455172413793106e-05, "loss": 0.3357, "step": 6750 }, { "epoch": 1.88, "learning_rate": 2.836896551724138e-05, "loss": 0.3366, "step": 6775 }, { "epoch": 1.88, "learning_rate": 2.828275862068966e-05, "loss": 0.3459, "step": 6800 }, { "epoch": 1.89, "learning_rate": 2.8196551724137933e-05, "loss": 0.3396, "step": 6825 }, { "epoch": 1.9, "learning_rate": 2.811034482758621e-05, "loss": 0.3388, "step": 6850 }, { "epoch": 1.9, "learning_rate": 2.8024137931034485e-05, "loss": 0.3657, "step": 6875 }, { "epoch": 1.91, "learning_rate": 2.7937931034482763e-05, "loss": 0.3604, "step": 6900 }, { "epoch": 1.92, "learning_rate": 2.7851724137931033e-05, "loss": 0.3467, "step": 6925 }, { "epoch": 1.93, "learning_rate": 2.7765517241379315e-05, "loss": 0.3434, "step": 6950 }, { "epoch": 1.93, "learning_rate": 2.7679310344827586e-05, "loss": 0.3416, "step": 6975 }, { "epoch": 1.94, "learning_rate": 2.7593103448275863e-05, "loss": 0.3482, "step": 7000 }, { "epoch": 1.94, "eval_cer": 11.178368363891906, "eval_loss": 0.2766138017177582, "eval_runtime": 1182.8471, "eval_samples_per_second": 4.727, "eval_steps_per_second": 0.296, "step": 7000 }, { "epoch": 1.95, "learning_rate": 2.7506896551724138e-05, "loss": 0.3213, "step": 7025 }, { "epoch": 1.95, "learning_rate": 2.7420689655172415e-05, "loss": 0.359, "step": 7050 }, { "epoch": 1.96, "learning_rate": 2.733448275862069e-05, "loss": 0.3506, "step": 7075 }, { "epoch": 1.97, "learning_rate": 2.7248275862068968e-05, "loss": 0.3178, "step": 7100 }, { "epoch": 1.97, "learning_rate": 2.716206896551724e-05, "loss": 0.3371, "step": 7125 }, { "epoch": 1.98, "learning_rate": 2.707586206896552e-05, "loss": 0.3236, "step": 7150 }, { "epoch": 1.99, "learning_rate": 2.6989655172413797e-05, "loss": 0.3231, "step": 7175 }, { "epoch": 1.99, "learning_rate": 2.6903448275862068e-05, "loss": 0.3007, "step": 7200 }, { "epoch": 2.0, "learning_rate": 2.6820689655172414e-05, "loss": 0.324, "step": 7225 }, { "epoch": 2.01, "learning_rate": 2.6734482758620692e-05, "loss": 0.2574, "step": 7250 }, { "epoch": 2.02, "learning_rate": 2.6648275862068966e-05, "loss": 0.2346, "step": 7275 }, { "epoch": 2.02, "learning_rate": 2.6562068965517244e-05, "loss": 0.2627, "step": 7300 }, { "epoch": 2.03, "learning_rate": 2.647586206896552e-05, "loss": 0.2614, "step": 7325 }, { "epoch": 2.04, "learning_rate": 2.6389655172413796e-05, "loss": 0.2279, "step": 7350 }, { "epoch": 2.04, "learning_rate": 2.630344827586207e-05, "loss": 0.271, "step": 7375 }, { "epoch": 2.05, "learning_rate": 2.621724137931035e-05, "loss": 0.2566, "step": 7400 }, { "epoch": 2.06, "learning_rate": 2.613103448275862e-05, "loss": 0.2471, "step": 7425 }, { "epoch": 2.06, "learning_rate": 2.60448275862069e-05, "loss": 0.2438, "step": 7450 }, { "epoch": 2.07, "learning_rate": 2.595862068965517e-05, "loss": 0.2457, "step": 7475 }, { "epoch": 2.08, "learning_rate": 2.587241379310345e-05, "loss": 0.232, "step": 7500 }, { "epoch": 2.08, "eval_cer": 10.926020571136522, "eval_loss": 0.2784821093082428, "eval_runtime": 1168.6896, "eval_samples_per_second": 4.784, "eval_steps_per_second": 0.299, "step": 7500 }, { "epoch": 2.09, "learning_rate": 2.5786206896551724e-05, "loss": 0.256, "step": 7525 }, { "epoch": 2.09, "learning_rate": 2.57e-05, "loss": 0.2383, "step": 7550 }, { "epoch": 2.1, "learning_rate": 2.5613793103448276e-05, "loss": 0.2516, "step": 7575 }, { "epoch": 2.11, "learning_rate": 2.5527586206896553e-05, "loss": 0.229, "step": 7600 }, { "epoch": 2.11, "learning_rate": 2.5441379310344828e-05, "loss": 0.242, "step": 7625 }, { "epoch": 2.12, "learning_rate": 2.5355172413793105e-05, "loss": 0.243, "step": 7650 }, { "epoch": 2.13, "learning_rate": 2.5268965517241383e-05, "loss": 0.2476, "step": 7675 }, { "epoch": 2.13, "learning_rate": 2.5182758620689658e-05, "loss": 0.2351, "step": 7700 }, { "epoch": 2.14, "learning_rate": 2.5096551724137935e-05, "loss": 0.2526, "step": 7725 }, { "epoch": 2.15, "learning_rate": 2.5010344827586206e-05, "loss": 0.2424, "step": 7750 }, { "epoch": 2.15, "learning_rate": 2.4924137931034484e-05, "loss": 0.264, "step": 7775 }, { "epoch": 2.16, "learning_rate": 2.483793103448276e-05, "loss": 0.244, "step": 7800 }, { "epoch": 2.17, "learning_rate": 2.4751724137931036e-05, "loss": 0.2588, "step": 7825 }, { "epoch": 2.18, "learning_rate": 2.4665517241379314e-05, "loss": 0.2418, "step": 7850 }, { "epoch": 2.18, "learning_rate": 2.4579310344827588e-05, "loss": 0.2556, "step": 7875 }, { "epoch": 2.19, "learning_rate": 2.4493103448275866e-05, "loss": 0.2253, "step": 7900 }, { "epoch": 2.2, "learning_rate": 2.440689655172414e-05, "loss": 0.2629, "step": 7925 }, { "epoch": 2.2, "learning_rate": 2.4320689655172415e-05, "loss": 0.2512, "step": 7950 }, { "epoch": 2.21, "learning_rate": 2.4234482758620692e-05, "loss": 0.2186, "step": 7975 }, { "epoch": 2.22, "learning_rate": 2.4148275862068967e-05, "loss": 0.2539, "step": 8000 }, { "epoch": 2.22, "eval_cer": 10.593815881939564, "eval_loss": 0.2776859700679779, "eval_runtime": 1165.9559, "eval_samples_per_second": 4.795, "eval_steps_per_second": 0.3, "step": 8000 }, { "epoch": 2.22, "learning_rate": 2.406206896551724e-05, "loss": 0.2433, "step": 8025 }, { "epoch": 2.23, "learning_rate": 2.397586206896552e-05, "loss": 0.2383, "step": 8050 }, { "epoch": 2.24, "learning_rate": 2.3889655172413793e-05, "loss": 0.2454, "step": 8075 }, { "epoch": 2.24, "learning_rate": 2.380344827586207e-05, "loss": 0.2596, "step": 8100 }, { "epoch": 2.25, "learning_rate": 2.3717241379310345e-05, "loss": 0.2479, "step": 8125 }, { "epoch": 2.26, "learning_rate": 2.363103448275862e-05, "loss": 0.2519, "step": 8150 }, { "epoch": 2.27, "learning_rate": 2.3544827586206897e-05, "loss": 0.2394, "step": 8175 }, { "epoch": 2.27, "learning_rate": 2.345862068965517e-05, "loss": 0.2336, "step": 8200 }, { "epoch": 2.28, "learning_rate": 2.337241379310345e-05, "loss": 0.2549, "step": 8225 }, { "epoch": 2.29, "learning_rate": 2.3286206896551727e-05, "loss": 0.2784, "step": 8250 }, { "epoch": 2.29, "learning_rate": 2.32e-05, "loss": 0.2365, "step": 8275 }, { "epoch": 2.3, "learning_rate": 2.311379310344828e-05, "loss": 0.2445, "step": 8300 }, { "epoch": 2.31, "learning_rate": 2.3027586206896554e-05, "loss": 0.2411, "step": 8325 }, { "epoch": 2.31, "learning_rate": 2.2941379310344828e-05, "loss": 0.257, "step": 8350 }, { "epoch": 2.32, "learning_rate": 2.2855172413793106e-05, "loss": 0.2196, "step": 8375 }, { "epoch": 2.33, "learning_rate": 2.276896551724138e-05, "loss": 0.2583, "step": 8400 }, { "epoch": 2.33, "learning_rate": 2.2682758620689658e-05, "loss": 0.2522, "step": 8425 }, { "epoch": 2.34, "learning_rate": 2.2596551724137932e-05, "loss": 0.2712, "step": 8450 }, { "epoch": 2.35, "learning_rate": 2.2510344827586206e-05, "loss": 0.2624, "step": 8475 }, { "epoch": 2.36, "learning_rate": 2.2424137931034484e-05, "loss": 0.2377, "step": 8500 }, { "epoch": 2.36, "eval_cer": 10.659298537021657, "eval_loss": 0.2783927917480469, "eval_runtime": 1180.6736, "eval_samples_per_second": 4.735, "eval_steps_per_second": 0.296, "step": 8500 }, { "epoch": 2.36, "learning_rate": 2.233793103448276e-05, "loss": 0.2526, "step": 8525 }, { "epoch": 2.37, "learning_rate": 2.2251724137931036e-05, "loss": 0.2408, "step": 8550 }, { "epoch": 2.38, "learning_rate": 2.216551724137931e-05, "loss": 0.2778, "step": 8575 }, { "epoch": 2.38, "learning_rate": 2.2079310344827585e-05, "loss": 0.253, "step": 8600 }, { "epoch": 2.39, "learning_rate": 2.1993103448275863e-05, "loss": 0.2416, "step": 8625 }, { "epoch": 2.4, "learning_rate": 2.190689655172414e-05, "loss": 0.2477, "step": 8650 }, { "epoch": 2.4, "learning_rate": 2.1820689655172415e-05, "loss": 0.2453, "step": 8675 }, { "epoch": 2.41, "learning_rate": 2.1734482758620693e-05, "loss": 0.2463, "step": 8700 }, { "epoch": 2.42, "learning_rate": 2.1648275862068967e-05, "loss": 0.2409, "step": 8725 }, { "epoch": 2.42, "learning_rate": 2.1562068965517245e-05, "loss": 0.2305, "step": 8750 }, { "epoch": 2.43, "learning_rate": 2.147586206896552e-05, "loss": 0.2342, "step": 8775 }, { "epoch": 2.44, "learning_rate": 2.1389655172413793e-05, "loss": 0.2551, "step": 8800 }, { "epoch": 2.45, "learning_rate": 2.130344827586207e-05, "loss": 0.2396, "step": 8825 }, { "epoch": 2.45, "learning_rate": 2.1217241379310345e-05, "loss": 0.268, "step": 8850 }, { "epoch": 2.46, "learning_rate": 2.1131034482758623e-05, "loss": 0.2384, "step": 8875 }, { "epoch": 2.47, "learning_rate": 2.1044827586206898e-05, "loss": 0.2418, "step": 8900 }, { "epoch": 2.47, "learning_rate": 2.0958620689655172e-05, "loss": 0.2414, "step": 8925 }, { "epoch": 2.48, "learning_rate": 2.087241379310345e-05, "loss": 0.2232, "step": 8950 }, { "epoch": 2.49, "learning_rate": 2.0786206896551724e-05, "loss": 0.248, "step": 8975 }, { "epoch": 2.49, "learning_rate": 2.07e-05, "loss": 0.2384, "step": 9000 }, { "epoch": 2.49, "eval_cer": 10.659298537021657, "eval_loss": 0.2808603048324585, "eval_runtime": 1165.5072, "eval_samples_per_second": 4.797, "eval_steps_per_second": 0.3, "step": 9000 }, { "epoch": 2.5, "learning_rate": 2.0613793103448276e-05, "loss": 0.2448, "step": 9025 }, { "epoch": 2.51, "learning_rate": 2.0527586206896554e-05, "loss": 0.2268, "step": 9050 }, { "epoch": 2.51, "learning_rate": 2.0441379310344828e-05, "loss": 0.247, "step": 9075 }, { "epoch": 2.52, "learning_rate": 2.0355172413793106e-05, "loss": 0.2345, "step": 9100 }, { "epoch": 2.53, "learning_rate": 2.026896551724138e-05, "loss": 0.2117, "step": 9125 }, { "epoch": 2.54, "learning_rate": 2.0182758620689658e-05, "loss": 0.2265, "step": 9150 }, { "epoch": 2.54, "learning_rate": 2.0096551724137932e-05, "loss": 0.2464, "step": 9175 }, { "epoch": 2.55, "learning_rate": 2.0010344827586207e-05, "loss": 0.2741, "step": 9200 }, { "epoch": 2.56, "learning_rate": 1.9924137931034484e-05, "loss": 0.2397, "step": 9225 }, { "epoch": 2.56, "learning_rate": 1.983793103448276e-05, "loss": 0.2357, "step": 9250 }, { "epoch": 2.57, "learning_rate": 1.9751724137931037e-05, "loss": 0.2435, "step": 9275 }, { "epoch": 2.58, "learning_rate": 1.966551724137931e-05, "loss": 0.2571, "step": 9300 }, { "epoch": 2.58, "learning_rate": 1.9579310344827585e-05, "loss": 0.2675, "step": 9325 }, { "epoch": 2.59, "learning_rate": 1.9493103448275863e-05, "loss": 0.2476, "step": 9350 }, { "epoch": 2.6, "learning_rate": 1.9406896551724137e-05, "loss": 0.2412, "step": 9375 }, { "epoch": 2.6, "learning_rate": 1.9320689655172415e-05, "loss": 0.2502, "step": 9400 }, { "epoch": 2.61, "learning_rate": 1.923448275862069e-05, "loss": 0.2327, "step": 9425 }, { "epoch": 2.62, "learning_rate": 1.9148275862068964e-05, "loss": 0.2198, "step": 9450 }, { "epoch": 2.63, "learning_rate": 1.9062068965517245e-05, "loss": 0.2614, "step": 9475 }, { "epoch": 2.63, "learning_rate": 1.897586206896552e-05, "loss": 0.2495, "step": 9500 }, { "epoch": 2.63, "eval_cer": 10.632147192231521, "eval_loss": 0.281376451253891, "eval_runtime": 1164.8408, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.3, "step": 9500 }, { "epoch": 2.64, "learning_rate": 1.8889655172413794e-05, "loss": 0.2442, "step": 9525 }, { "epoch": 2.65, "learning_rate": 1.880344827586207e-05, "loss": 0.2369, "step": 9550 }, { "epoch": 2.65, "learning_rate": 1.8717241379310346e-05, "loss": 0.2186, "step": 9575 }, { "epoch": 2.66, "learning_rate": 1.8631034482758623e-05, "loss": 0.234, "step": 9600 }, { "epoch": 2.67, "learning_rate": 1.8544827586206898e-05, "loss": 0.2448, "step": 9625 }, { "epoch": 2.67, "learning_rate": 1.8458620689655172e-05, "loss": 0.2449, "step": 9650 }, { "epoch": 2.68, "learning_rate": 1.837241379310345e-05, "loss": 0.2322, "step": 9675 }, { "epoch": 2.69, "learning_rate": 1.8286206896551724e-05, "loss": 0.2489, "step": 9700 }, { "epoch": 2.69, "learning_rate": 1.8200000000000002e-05, "loss": 0.2569, "step": 9725 }, { "epoch": 2.7, "learning_rate": 1.8113793103448276e-05, "loss": 0.2331, "step": 9750 }, { "epoch": 2.71, "learning_rate": 1.802758620689655e-05, "loss": 0.2647, "step": 9775 }, { "epoch": 2.72, "learning_rate": 1.794137931034483e-05, "loss": 0.2426, "step": 9800 }, { "epoch": 2.72, "learning_rate": 1.7855172413793103e-05, "loss": 0.2341, "step": 9825 }, { "epoch": 2.73, "learning_rate": 1.776896551724138e-05, "loss": 0.24, "step": 9850 }, { "epoch": 2.74, "learning_rate": 1.7682758620689658e-05, "loss": 0.249, "step": 9875 }, { "epoch": 2.74, "learning_rate": 1.7596551724137933e-05, "loss": 0.2217, "step": 9900 }, { "epoch": 2.75, "learning_rate": 1.751034482758621e-05, "loss": 0.2467, "step": 9925 }, { "epoch": 2.76, "learning_rate": 1.7424137931034485e-05, "loss": 0.2529, "step": 9950 }, { "epoch": 2.76, "learning_rate": 1.733793103448276e-05, "loss": 0.2578, "step": 9975 }, { "epoch": 2.77, "learning_rate": 1.7251724137931037e-05, "loss": 0.2312, "step": 10000 }, { "epoch": 2.77, "eval_cer": 10.60020443365489, "eval_loss": 0.28221753239631653, "eval_runtime": 1168.5035, "eval_samples_per_second": 4.785, "eval_steps_per_second": 0.3, "step": 10000 }, { "epoch": 2.78, "learning_rate": 1.716551724137931e-05, "loss": 0.2284, "step": 10025 }, { "epoch": 2.78, "learning_rate": 1.7079310344827585e-05, "loss": 0.2348, "step": 10050 }, { "epoch": 2.79, "learning_rate": 1.6993103448275863e-05, "loss": 0.2244, "step": 10075 }, { "epoch": 2.8, "learning_rate": 1.6906896551724138e-05, "loss": 0.2522, "step": 10100 }, { "epoch": 2.81, "learning_rate": 1.6820689655172415e-05, "loss": 0.2476, "step": 10125 }, { "epoch": 2.81, "learning_rate": 1.673448275862069e-05, "loss": 0.2279, "step": 10150 }, { "epoch": 2.82, "learning_rate": 1.6648275862068964e-05, "loss": 0.2317, "step": 10175 }, { "epoch": 2.83, "learning_rate": 1.6562068965517242e-05, "loss": 0.2305, "step": 10200 }, { "epoch": 2.83, "learning_rate": 1.6475862068965516e-05, "loss": 0.2364, "step": 10225 }, { "epoch": 2.84, "learning_rate": 1.6389655172413794e-05, "loss": 0.2572, "step": 10250 }, { "epoch": 2.85, "learning_rate": 1.630344827586207e-05, "loss": 0.2065, "step": 10275 }, { "epoch": 2.85, "learning_rate": 1.6217241379310346e-05, "loss": 0.2398, "step": 10300 }, { "epoch": 2.86, "learning_rate": 1.6131034482758624e-05, "loss": 0.2526, "step": 10325 }, { "epoch": 2.87, "learning_rate": 1.6044827586206898e-05, "loss": 0.2263, "step": 10350 }, { "epoch": 2.87, "learning_rate": 1.5958620689655172e-05, "loss": 0.2527, "step": 10375 }, { "epoch": 2.88, "learning_rate": 1.587241379310345e-05, "loss": 0.2401, "step": 10400 }, { "epoch": 2.89, "learning_rate": 1.5786206896551724e-05, "loss": 0.2221, "step": 10425 }, { "epoch": 2.9, "learning_rate": 1.5700000000000002e-05, "loss": 0.2251, "step": 10450 }, { "epoch": 2.9, "learning_rate": 1.5613793103448276e-05, "loss": 0.247, "step": 10475 }, { "epoch": 2.91, "learning_rate": 1.552758620689655e-05, "loss": 0.2264, "step": 10500 }, { "epoch": 2.91, "eval_cer": 10.73436401967674, "eval_loss": 0.2812344431877136, "eval_runtime": 1186.9835, "eval_samples_per_second": 4.71, "eval_steps_per_second": 0.295, "step": 10500 }, { "epoch": 2.92, "learning_rate": 1.544137931034483e-05, "loss": 0.2459, "step": 10525 }, { "epoch": 2.92, "learning_rate": 1.5355172413793103e-05, "loss": 0.2541, "step": 10550 }, { "epoch": 2.93, "learning_rate": 1.526896551724138e-05, "loss": 0.2265, "step": 10575 }, { "epoch": 2.94, "learning_rate": 1.5182758620689655e-05, "loss": 0.2329, "step": 10600 }, { "epoch": 2.94, "learning_rate": 1.5096551724137931e-05, "loss": 0.2245, "step": 10625 }, { "epoch": 2.95, "learning_rate": 1.5010344827586207e-05, "loss": 0.265, "step": 10650 }, { "epoch": 2.96, "learning_rate": 1.4924137931034485e-05, "loss": 0.2423, "step": 10675 }, { "epoch": 2.96, "learning_rate": 1.4837931034482761e-05, "loss": 0.2319, "step": 10700 }, { "epoch": 2.97, "learning_rate": 1.4751724137931037e-05, "loss": 0.2286, "step": 10725 }, { "epoch": 2.98, "learning_rate": 1.4665517241379311e-05, "loss": 0.2352, "step": 10750 }, { "epoch": 2.99, "learning_rate": 1.4579310344827587e-05, "loss": 0.2635, "step": 10775 }, { "epoch": 2.99, "learning_rate": 1.4493103448275863e-05, "loss": 0.2269, "step": 10800 }, { "epoch": 3.0, "learning_rate": 1.440689655172414e-05, "loss": 0.2296, "step": 10825 }, { "epoch": 3.01, "learning_rate": 1.4324137931034484e-05, "loss": 0.1896, "step": 10850 }, { "epoch": 3.01, "learning_rate": 1.423793103448276e-05, "loss": 0.1824, "step": 10875 }, { "epoch": 3.02, "learning_rate": 1.4151724137931036e-05, "loss": 0.1621, "step": 10900 }, { "epoch": 3.03, "learning_rate": 1.4065517241379312e-05, "loss": 0.1497, "step": 10925 }, { "epoch": 3.03, "learning_rate": 1.3979310344827586e-05, "loss": 0.1597, "step": 10950 }, { "epoch": 3.04, "learning_rate": 1.3893103448275862e-05, "loss": 0.1642, "step": 10975 }, { "epoch": 3.05, "learning_rate": 1.3806896551724138e-05, "loss": 0.1468, "step": 11000 }, { "epoch": 3.05, "eval_cer": 10.796652398901168, "eval_loss": 0.2832615077495575, "eval_runtime": 1183.0471, "eval_samples_per_second": 4.726, "eval_steps_per_second": 0.296, "step": 11000 }, { "epoch": 3.05, "learning_rate": 1.3720689655172414e-05, "loss": 0.1427, "step": 11025 }, { "epoch": 3.06, "learning_rate": 1.363448275862069e-05, "loss": 0.17, "step": 11050 }, { "epoch": 3.07, "learning_rate": 1.3548275862068965e-05, "loss": 0.1585, "step": 11075 }, { "epoch": 3.08, "learning_rate": 1.3462068965517241e-05, "loss": 0.1727, "step": 11100 }, { "epoch": 3.08, "learning_rate": 1.3375862068965517e-05, "loss": 0.1654, "step": 11125 }, { "epoch": 3.09, "learning_rate": 1.3289655172413793e-05, "loss": 0.1517, "step": 11150 }, { "epoch": 3.1, "learning_rate": 1.320344827586207e-05, "loss": 0.1536, "step": 11175 }, { "epoch": 3.1, "learning_rate": 1.3117241379310347e-05, "loss": 0.1692, "step": 11200 }, { "epoch": 3.11, "learning_rate": 1.3031034482758623e-05, "loss": 0.1673, "step": 11225 }, { "epoch": 3.12, "learning_rate": 1.2944827586206897e-05, "loss": 0.1554, "step": 11250 }, { "epoch": 3.12, "learning_rate": 1.2858620689655173e-05, "loss": 0.1616, "step": 11275 }, { "epoch": 3.13, "learning_rate": 1.277241379310345e-05, "loss": 0.1553, "step": 11300 }, { "epoch": 3.14, "learning_rate": 1.2686206896551725e-05, "loss": 0.1692, "step": 11325 }, { "epoch": 3.14, "learning_rate": 1.2600000000000001e-05, "loss": 0.1582, "step": 11350 }, { "epoch": 3.15, "learning_rate": 1.2513793103448276e-05, "loss": 0.1634, "step": 11375 }, { "epoch": 3.16, "learning_rate": 1.2427586206896552e-05, "loss": 0.1584, "step": 11400 }, { "epoch": 3.17, "learning_rate": 1.2341379310344828e-05, "loss": 0.1678, "step": 11425 }, { "epoch": 3.17, "learning_rate": 1.2255172413793104e-05, "loss": 0.1767, "step": 11450 }, { "epoch": 3.18, "learning_rate": 1.216896551724138e-05, "loss": 0.1561, "step": 11475 }, { "epoch": 3.19, "learning_rate": 1.2082758620689656e-05, "loss": 0.1557, "step": 11500 }, { "epoch": 3.19, "eval_cer": 10.533124640643967, "eval_loss": 0.28545621037483215, "eval_runtime": 1176.7108, "eval_samples_per_second": 4.751, "eval_steps_per_second": 0.297, "step": 11500 }, { "epoch": 3.19, "learning_rate": 1.1996551724137932e-05, "loss": 0.172, "step": 11525 }, { "epoch": 3.2, "learning_rate": 1.1910344827586208e-05, "loss": 0.1571, "step": 11550 }, { "epoch": 3.21, "learning_rate": 1.1824137931034484e-05, "loss": 0.1605, "step": 11575 }, { "epoch": 3.21, "learning_rate": 1.1737931034482758e-05, "loss": 0.1612, "step": 11600 }, { "epoch": 3.22, "learning_rate": 1.1651724137931034e-05, "loss": 0.1615, "step": 11625 }, { "epoch": 3.23, "learning_rate": 1.156551724137931e-05, "loss": 0.1627, "step": 11650 }, { "epoch": 3.23, "learning_rate": 1.1479310344827588e-05, "loss": 0.1481, "step": 11675 }, { "epoch": 3.24, "learning_rate": 1.1393103448275863e-05, "loss": 0.1532, "step": 11700 }, { "epoch": 3.25, "learning_rate": 1.1306896551724139e-05, "loss": 0.1358, "step": 11725 }, { "epoch": 3.26, "learning_rate": 1.1220689655172415e-05, "loss": 0.1619, "step": 11750 }, { "epoch": 3.26, "learning_rate": 1.113448275862069e-05, "loss": 0.1721, "step": 11775 }, { "epoch": 3.27, "learning_rate": 1.1048275862068965e-05, "loss": 0.1662, "step": 11800 }, { "epoch": 3.28, "learning_rate": 1.0962068965517241e-05, "loss": 0.1511, "step": 11825 }, { "epoch": 3.28, "learning_rate": 1.0875862068965517e-05, "loss": 0.1554, "step": 11850 }, { "epoch": 3.29, "learning_rate": 1.0789655172413795e-05, "loss": 0.1767, "step": 11875 }, { "epoch": 3.3, "learning_rate": 1.070344827586207e-05, "loss": 0.1613, "step": 11900 }, { "epoch": 3.3, "learning_rate": 1.0617241379310345e-05, "loss": 0.1752, "step": 11925 }, { "epoch": 3.31, "learning_rate": 1.0531034482758621e-05, "loss": 0.1493, "step": 11950 }, { "epoch": 3.32, "learning_rate": 1.0444827586206897e-05, "loss": 0.1927, "step": 11975 }, { "epoch": 3.32, "learning_rate": 1.0358620689655173e-05, "loss": 0.1639, "step": 12000 }, { "epoch": 3.32, "eval_cer": 10.167380054941546, "eval_loss": 0.28585749864578247, "eval_runtime": 1168.3755, "eval_samples_per_second": 4.785, "eval_steps_per_second": 0.3, "step": 12000 }, { "epoch": 3.33, "learning_rate": 1.0272413793103448e-05, "loss": 0.1708, "step": 12025 }, { "epoch": 3.34, "learning_rate": 1.0186206896551724e-05, "loss": 0.1692, "step": 12050 }, { "epoch": 3.35, "learning_rate": 1.0100000000000002e-05, "loss": 0.1754, "step": 12075 }, { "epoch": 3.35, "learning_rate": 1.0013793103448278e-05, "loss": 0.1695, "step": 12100 }, { "epoch": 3.36, "learning_rate": 9.927586206896552e-06, "loss": 0.1695, "step": 12125 }, { "epoch": 3.37, "learning_rate": 9.841379310344828e-06, "loss": 0.1718, "step": 12150 }, { "epoch": 3.37, "learning_rate": 9.755172413793104e-06, "loss": 0.151, "step": 12175 }, { "epoch": 3.38, "learning_rate": 9.66896551724138e-06, "loss": 0.1672, "step": 12200 }, { "epoch": 3.39, "learning_rate": 9.582758620689654e-06, "loss": 0.1665, "step": 12225 }, { "epoch": 3.39, "learning_rate": 9.49655172413793e-06, "loss": 0.1567, "step": 12250 }, { "epoch": 3.4, "learning_rate": 9.410344827586208e-06, "loss": 0.1691, "step": 12275 }, { "epoch": 3.41, "learning_rate": 9.324137931034484e-06, "loss": 0.1648, "step": 12300 }, { "epoch": 3.42, "learning_rate": 9.237931034482759e-06, "loss": 0.1576, "step": 12325 }, { "epoch": 3.42, "learning_rate": 9.151724137931035e-06, "loss": 0.1689, "step": 12350 }, { "epoch": 3.43, "learning_rate": 9.06551724137931e-06, "loss": 0.159, "step": 12375 }, { "epoch": 3.44, "learning_rate": 8.979310344827587e-06, "loss": 0.1682, "step": 12400 }, { "epoch": 3.44, "learning_rate": 8.893103448275863e-06, "loss": 0.1729, "step": 12425 }, { "epoch": 3.45, "learning_rate": 8.806896551724137e-06, "loss": 0.168, "step": 12450 }, { "epoch": 3.46, "learning_rate": 8.720689655172415e-06, "loss": 0.1704, "step": 12475 }, { "epoch": 3.46, "learning_rate": 8.634482758620691e-06, "loss": 0.1603, "step": 12500 }, { "epoch": 3.46, "eval_cer": 10.212099916948828, "eval_loss": 0.2885717749595642, "eval_runtime": 1168.0562, "eval_samples_per_second": 4.787, "eval_steps_per_second": 0.3, "step": 12500 }, { "epoch": 3.47, "learning_rate": 8.548275862068967e-06, "loss": 0.1564, "step": 12525 }, { "epoch": 3.48, "learning_rate": 8.462068965517241e-06, "loss": 0.1511, "step": 12550 }, { "epoch": 3.48, "learning_rate": 8.375862068965517e-06, "loss": 0.1575, "step": 12575 }, { "epoch": 3.49, "learning_rate": 8.289655172413793e-06, "loss": 0.1574, "step": 12600 }, { "epoch": 3.5, "learning_rate": 8.20344827586207e-06, "loss": 0.1673, "step": 12625 }, { "epoch": 3.51, "learning_rate": 8.117241379310346e-06, "loss": 0.1605, "step": 12650 }, { "epoch": 3.51, "learning_rate": 8.031034482758622e-06, "loss": 0.1562, "step": 12675 }, { "epoch": 3.52, "learning_rate": 7.944827586206898e-06, "loss": 0.1629, "step": 12700 }, { "epoch": 3.53, "learning_rate": 7.858620689655174e-06, "loss": 0.1629, "step": 12725 }, { "epoch": 3.53, "learning_rate": 7.772413793103448e-06, "loss": 0.1849, "step": 12750 }, { "epoch": 3.54, "learning_rate": 7.686206896551724e-06, "loss": 0.1501, "step": 12775 }, { "epoch": 3.55, "learning_rate": 7.6e-06, "loss": 0.1563, "step": 12800 }, { "epoch": 3.55, "learning_rate": 7.513793103448276e-06, "loss": 0.1596, "step": 12825 }, { "epoch": 3.56, "learning_rate": 7.427586206896551e-06, "loss": 0.156, "step": 12850 }, { "epoch": 3.57, "learning_rate": 7.341379310344828e-06, "loss": 0.1655, "step": 12875 }, { "epoch": 3.57, "learning_rate": 7.255172413793104e-06, "loss": 0.1642, "step": 12900 }, { "epoch": 3.58, "learning_rate": 7.16896551724138e-06, "loss": 0.1676, "step": 12925 }, { "epoch": 3.59, "learning_rate": 7.0827586206896555e-06, "loss": 0.149, "step": 12950 }, { "epoch": 3.6, "learning_rate": 6.9965517241379315e-06, "loss": 0.1734, "step": 12975 }, { "epoch": 3.6, "learning_rate": 6.910344827586207e-06, "loss": 0.1599, "step": 13000 }, { "epoch": 3.6, "eval_cer": 10.058774675781, "eval_loss": 0.2872118651866913, "eval_runtime": 1162.7195, "eval_samples_per_second": 4.809, "eval_steps_per_second": 0.301, "step": 13000 } ], "max_steps": 15000, "num_train_epochs": 5, "total_flos": 3.7516448344080384e+20, "trial_name": null, "trial_params": null }