{ "best_metric": 0.7119987566055331, "best_model_checkpoint": "xls-r-greek-aivaliot/checkpoint-14196", "epoch": 35.0, "eval_steps": 500, "global_step": 19110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.37, "learning_rate": 0.00011999999999999999, "loss": 6.1718, "step": 200 }, { "epoch": 0.73, "learning_rate": 0.00023999999999999998, "loss": 3.1837, "step": 400 }, { "epoch": 1.0, "eval_cer": 0.6650782444993528, "eval_loss": 2.3546931743621826, "eval_runtime": 42.745, "eval_samples_per_second": 25.547, "eval_steps_per_second": 3.205, "eval_wer": 0.972023624494871, "step": 546 }, { "epoch": 1.1, "learning_rate": 0.00029838796346050506, "loss": 2.2974, "step": 600 }, { "epoch": 1.47, "learning_rate": 0.0002951638903815153, "loss": 2.0321, "step": 800 }, { "epoch": 1.83, "learning_rate": 0.00029193981730252553, "loss": 1.9583, "step": 1000 }, { "epoch": 2.0, "eval_cer": 0.45720084715848924, "eval_loss": 1.9236657619476318, "eval_runtime": 44.6726, "eval_samples_per_second": 24.444, "eval_steps_per_second": 3.067, "eval_wer": 0.8734846129934721, "step": 1092 }, { "epoch": 2.2, "learning_rate": 0.0002887157442235357, "loss": 1.7559, "step": 1200 }, { "epoch": 2.56, "learning_rate": 0.00028549167114454594, "loss": 1.6492, "step": 1400 }, { "epoch": 2.93, "learning_rate": 0.0002822675980655561, "loss": 1.6148, "step": 1600 }, { "epoch": 3.0, "eval_cer": 0.450847158489234, "eval_loss": 1.817557692527771, "eval_runtime": 41.1726, "eval_samples_per_second": 26.522, "eval_steps_per_second": 3.327, "eval_wer": 0.8234379857009636, "step": 1638 }, { "epoch": 3.3, "learning_rate": 0.00027904352498656635, "loss": 1.421, "step": 1800 }, { "epoch": 3.66, "learning_rate": 0.0002758194519075766, "loss": 1.423, "step": 2000 }, { "epoch": 4.0, "eval_cer": 0.4232556771384869, "eval_loss": 1.8342238664627075, "eval_runtime": 39.2094, "eval_samples_per_second": 27.85, "eval_steps_per_second": 3.494, "eval_wer": 0.820329499533727, "step": 2184 }, { "epoch": 4.03, "learning_rate": 0.00027259537882858676, "loss": 1.4186, "step": 2200 }, { "epoch": 4.4, "learning_rate": 0.000269371305749597, "loss": 1.2185, "step": 2400 }, { "epoch": 4.76, "learning_rate": 0.00026614723267060717, "loss": 1.2324, "step": 2600 }, { "epoch": 5.0, "eval_cer": 0.4242852100247088, "eval_loss": 1.7897675037384033, "eval_runtime": 45.221, "eval_samples_per_second": 24.148, "eval_steps_per_second": 3.03, "eval_wer": 0.8047870686975443, "step": 2730 }, { "epoch": 5.13, "learning_rate": 0.00026292315959161735, "loss": 1.135, "step": 2800 }, { "epoch": 5.49, "learning_rate": 0.0002596990865126276, "loss": 1.0774, "step": 3000 }, { "epoch": 5.86, "learning_rate": 0.0002564750134336378, "loss": 1.0871, "step": 3200 }, { "epoch": 6.0, "eval_cer": 0.40566537239675254, "eval_loss": 1.783974051475525, "eval_runtime": 39.7961, "eval_samples_per_second": 27.44, "eval_steps_per_second": 3.443, "eval_wer": 0.7671743860739819, "step": 3276 }, { "epoch": 6.23, "learning_rate": 0.000253250940354648, "loss": 1.0111, "step": 3400 }, { "epoch": 6.59, "learning_rate": 0.0002500268672756582, "loss": 0.9552, "step": 3600 }, { "epoch": 6.96, "learning_rate": 0.00024680279419666846, "loss": 0.9449, "step": 3800 }, { "epoch": 7.0, "eval_cer": 0.3969584657018473, "eval_loss": 1.9680403470993042, "eval_runtime": 41.0562, "eval_samples_per_second": 26.598, "eval_steps_per_second": 3.337, "eval_wer": 0.7777432390425862, "step": 3822 }, { "epoch": 7.33, "learning_rate": 0.00024357872111767866, "loss": 0.8435, "step": 4000 }, { "epoch": 7.69, "learning_rate": 0.00024035464803868887, "loss": 0.8168, "step": 4200 }, { "epoch": 8.0, "eval_cer": 0.4033415695964231, "eval_loss": 1.956693172454834, "eval_runtime": 38.8124, "eval_samples_per_second": 28.135, "eval_steps_per_second": 3.53, "eval_wer": 0.7690394777743239, "step": 4368 }, { "epoch": 8.06, "learning_rate": 0.00023713057495969905, "loss": 0.8366, "step": 4400 }, { "epoch": 8.42, "learning_rate": 0.00023390650188070925, "loss": 0.728, "step": 4600 }, { "epoch": 8.79, "learning_rate": 0.00023068242880171949, "loss": 0.7254, "step": 4800 }, { "epoch": 9.0, "eval_cer": 0.408224496999647, "eval_loss": 2.184379816055298, "eval_runtime": 39.1148, "eval_samples_per_second": 27.918, "eval_steps_per_second": 3.503, "eval_wer": 0.7878458190861051, "step": 4914 }, { "epoch": 9.16, "learning_rate": 0.0002274583557227297, "loss": 0.7016, "step": 5000 }, { "epoch": 9.52, "learning_rate": 0.0002242342826437399, "loss": 0.6563, "step": 5200 }, { "epoch": 9.89, "learning_rate": 0.0002210102095647501, "loss": 0.6584, "step": 5400 }, { "epoch": 10.0, "eval_cer": 0.3924579362277915, "eval_loss": 2.313096046447754, "eval_runtime": 38.9682, "eval_samples_per_second": 28.023, "eval_steps_per_second": 3.516, "eval_wer": 0.7681069319241529, "step": 5460 }, { "epoch": 10.26, "learning_rate": 0.0002177861364857603, "loss": 0.6053, "step": 5600 }, { "epoch": 10.62, "learning_rate": 0.00021456206340677054, "loss": 0.5844, "step": 5800 }, { "epoch": 10.99, "learning_rate": 0.00021133799032778075, "loss": 0.5825, "step": 6000 }, { "epoch": 11.0, "eval_cer": 0.39419343452170846, "eval_loss": 2.4024157524108887, "eval_runtime": 38.7244, "eval_samples_per_second": 28.199, "eval_steps_per_second": 3.538, "eval_wer": 0.7614236866645944, "step": 6006 }, { "epoch": 11.36, "learning_rate": 0.00020811391724879095, "loss": 0.4944, "step": 6200 }, { "epoch": 11.72, "learning_rate": 0.00020488984416980116, "loss": 0.5214, "step": 6400 }, { "epoch": 12.0, "eval_cer": 0.37542652076714905, "eval_loss": 2.2336461544036865, "eval_runtime": 39.0775, "eval_samples_per_second": 27.944, "eval_steps_per_second": 3.506, "eval_wer": 0.7415293751942804, "step": 6552 }, { "epoch": 12.09, "learning_rate": 0.0002016657710908114, "loss": 0.4977, "step": 6600 }, { "epoch": 12.45, "learning_rate": 0.0001984416980118216, "loss": 0.4326, "step": 6800 }, { "epoch": 12.82, "learning_rate": 0.0001952176249328318, "loss": 0.4618, "step": 7000 }, { "epoch": 13.0, "eval_cer": 0.38816331333098014, "eval_loss": 2.4235401153564453, "eval_runtime": 38.6611, "eval_samples_per_second": 28.245, "eval_steps_per_second": 3.544, "eval_wer": 0.7475909232203917, "step": 7098 }, { "epoch": 13.19, "learning_rate": 0.000191993551853842, "loss": 0.4269, "step": 7200 }, { "epoch": 13.55, "learning_rate": 0.0001887694787748522, "loss": 0.3915, "step": 7400 }, { "epoch": 13.92, "learning_rate": 0.00018554540569586244, "loss": 0.4034, "step": 7600 }, { "epoch": 14.0, "eval_cer": 0.38966348982233207, "eval_loss": 2.432621479034424, "eval_runtime": 39.0269, "eval_samples_per_second": 27.981, "eval_steps_per_second": 3.51, "eval_wer": 0.7384208890270438, "step": 7644 }, { "epoch": 14.29, "learning_rate": 0.00018232133261687265, "loss": 0.3616, "step": 7800 }, { "epoch": 14.65, "learning_rate": 0.00017909725953788285, "loss": 0.3638, "step": 8000 }, { "epoch": 15.0, "eval_cer": 0.38351570773032123, "eval_loss": 2.5917038917541504, "eval_runtime": 38.9489, "eval_samples_per_second": 28.037, "eval_steps_per_second": 3.517, "eval_wer": 0.7409076779608331, "step": 8190 }, { "epoch": 15.02, "learning_rate": 0.00017587318645889303, "loss": 0.3469, "step": 8200 }, { "epoch": 15.38, "learning_rate": 0.00017264911337990324, "loss": 0.2989, "step": 8400 }, { "epoch": 15.75, "learning_rate": 0.0001694250403009135, "loss": 0.307, "step": 8600 }, { "epoch": 16.0, "eval_cer": 0.38233909871749616, "eval_loss": 2.653606414794922, "eval_runtime": 38.6877, "eval_samples_per_second": 28.226, "eval_steps_per_second": 3.541, "eval_wer": 0.7483680447622008, "step": 8736 }, { "epoch": 16.12, "learning_rate": 0.00016620096722192368, "loss": 0.3028, "step": 8800 }, { "epoch": 16.48, "learning_rate": 0.00016297689414293388, "loss": 0.2857, "step": 9000 }, { "epoch": 16.85, "learning_rate": 0.00015975282106394409, "loss": 0.2794, "step": 9200 }, { "epoch": 17.0, "eval_cer": 0.3855159430521238, "eval_loss": 2.7668089866638184, "eval_runtime": 38.9844, "eval_samples_per_second": 28.011, "eval_steps_per_second": 3.514, "eval_wer": 0.7413739508859185, "step": 9282 }, { "epoch": 17.22, "learning_rate": 0.00015652874798495432, "loss": 0.2484, "step": 9400 }, { "epoch": 17.58, "learning_rate": 0.00015330467490596452, "loss": 0.245, "step": 9600 }, { "epoch": 17.95, "learning_rate": 0.00015008060182697473, "loss": 0.2445, "step": 9800 }, { "epoch": 18.0, "eval_cer": 0.3990469466996117, "eval_loss": 2.955599069595337, "eval_runtime": 38.9844, "eval_samples_per_second": 28.011, "eval_steps_per_second": 3.514, "eval_wer": 0.7597140192726143, "step": 9828 }, { "epoch": 18.32, "learning_rate": 0.00014685652874798494, "loss": 0.208, "step": 10000 }, { "epoch": 18.68, "learning_rate": 0.00014363245566899517, "loss": 0.2209, "step": 10200 }, { "epoch": 19.0, "eval_cer": 0.38439816448994, "eval_loss": 2.9723663330078125, "eval_runtime": 38.8984, "eval_samples_per_second": 28.073, "eval_steps_per_second": 3.522, "eval_wer": 0.7396642834939384, "step": 10374 }, { "epoch": 19.05, "learning_rate": 0.00014040838259000535, "loss": 0.2107, "step": 10400 }, { "epoch": 19.41, "learning_rate": 0.00013718430951101558, "loss": 0.1935, "step": 10600 }, { "epoch": 19.78, "learning_rate": 0.00013396023643202578, "loss": 0.193, "step": 10800 }, { "epoch": 20.0, "eval_cer": 0.3829568184492293, "eval_loss": 3.105555534362793, "eval_runtime": 38.7297, "eval_samples_per_second": 28.195, "eval_steps_per_second": 3.537, "eval_wer": 0.7412185265775567, "step": 10920 }, { "epoch": 20.15, "learning_rate": 0.000130736163353036, "loss": 0.1764, "step": 11000 }, { "epoch": 20.51, "learning_rate": 0.0001275120902740462, "loss": 0.1683, "step": 11200 }, { "epoch": 20.88, "learning_rate": 0.0001242880171950564, "loss": 0.1608, "step": 11400 }, { "epoch": 21.0, "eval_cer": 0.3850452994469938, "eval_loss": 3.2178432941436768, "eval_runtime": 38.643, "eval_samples_per_second": 28.259, "eval_steps_per_second": 3.545, "eval_wer": 0.7332918868511035, "step": 11466 }, { "epoch": 21.25, "learning_rate": 0.00012106394411606662, "loss": 0.1514, "step": 11600 }, { "epoch": 21.61, "learning_rate": 0.00011783987103707682, "loss": 0.1421, "step": 11800 }, { "epoch": 21.98, "learning_rate": 0.00011461579795808704, "loss": 0.1454, "step": 12000 }, { "epoch": 22.0, "eval_cer": 0.3789563478056242, "eval_loss": 3.3512697219848633, "eval_runtime": 38.841, "eval_samples_per_second": 28.115, "eval_steps_per_second": 3.527, "eval_wer": 0.7322039166925707, "step": 12012 }, { "epoch": 22.34, "learning_rate": 0.00011139172487909725, "loss": 0.1378, "step": 12200 }, { "epoch": 22.71, "learning_rate": 0.00010816765180010745, "loss": 0.1269, "step": 12400 }, { "epoch": 23.0, "eval_cer": 0.38342746205435935, "eval_loss": 3.4092280864715576, "eval_runtime": 39.0183, "eval_samples_per_second": 27.987, "eval_steps_per_second": 3.511, "eval_wer": 0.7311159465340379, "step": 12558 }, { "epoch": 23.08, "learning_rate": 0.00010494357872111767, "loss": 0.1334, "step": 12600 }, { "epoch": 23.44, "learning_rate": 0.00010171950564212788, "loss": 0.1069, "step": 12800 }, { "epoch": 23.81, "learning_rate": 9.84954325631381e-05, "loss": 0.114, "step": 13000 }, { "epoch": 24.0, "eval_cer": 0.3863101541357807, "eval_loss": 3.3670547008514404, "eval_runtime": 38.7589, "eval_samples_per_second": 28.174, "eval_steps_per_second": 3.535, "eval_wer": 0.7300279763755051, "step": 13104 }, { "epoch": 24.18, "learning_rate": 9.527135948414829e-05, "loss": 0.1064, "step": 13200 }, { "epoch": 24.54, "learning_rate": 9.204728640515852e-05, "loss": 0.1056, "step": 13400 }, { "epoch": 24.91, "learning_rate": 8.882321332616871e-05, "loss": 0.1086, "step": 13600 }, { "epoch": 25.0, "eval_cer": 0.38175079421108365, "eval_loss": 3.6167728900909424, "eval_runtime": 38.7877, "eval_samples_per_second": 28.153, "eval_steps_per_second": 3.532, "eval_wer": 0.7179048803232826, "step": 13650 }, { "epoch": 25.27, "learning_rate": 8.559914024717892e-05, "loss": 0.0853, "step": 13800 }, { "epoch": 25.64, "learning_rate": 8.237506716818914e-05, "loss": 0.0877, "step": 14000 }, { "epoch": 26.0, "eval_cer": 0.3746617249088128, "eval_loss": 3.5180928707122803, "eval_runtime": 38.7044, "eval_samples_per_second": 28.214, "eval_steps_per_second": 3.54, "eval_wer": 0.7119987566055331, "step": 14196 }, { "epoch": 26.01, "learning_rate": 7.915099408919934e-05, "loss": 0.0966, "step": 14200 }, { "epoch": 26.37, "learning_rate": 7.592692101020956e-05, "loss": 0.0855, "step": 14400 }, { "epoch": 26.74, "learning_rate": 7.270284793121977e-05, "loss": 0.0856, "step": 14600 }, { "epoch": 27.0, "eval_cer": 0.39136957289092833, "eval_loss": 3.875309467315674, "eval_runtime": 38.7918, "eval_samples_per_second": 28.15, "eval_steps_per_second": 3.532, "eval_wer": 0.7429281939695368, "step": 14742 }, { "epoch": 27.11, "learning_rate": 6.947877485222997e-05, "loss": 0.081, "step": 14800 }, { "epoch": 27.47, "learning_rate": 6.625470177324019e-05, "loss": 0.0708, "step": 15000 }, { "epoch": 27.84, "learning_rate": 6.30306286942504e-05, "loss": 0.0734, "step": 15200 }, { "epoch": 28.0, "eval_cer": 0.37516178373926345, "eval_loss": 3.8759613037109375, "eval_runtime": 38.881, "eval_samples_per_second": 28.086, "eval_steps_per_second": 3.524, "eval_wer": 0.7227230338824993, "step": 15288 }, { "epoch": 28.21, "learning_rate": 5.980655561526061e-05, "loss": 0.0734, "step": 15400 }, { "epoch": 28.57, "learning_rate": 5.658248253627082e-05, "loss": 0.0642, "step": 15600 }, { "epoch": 28.94, "learning_rate": 5.335840945728102e-05, "loss": 0.0702, "step": 15800 }, { "epoch": 29.0, "eval_cer": 0.37551476644311094, "eval_loss": 3.9142181873321533, "eval_runtime": 38.8332, "eval_samples_per_second": 28.12, "eval_steps_per_second": 3.528, "eval_wer": 0.723811004041032, "step": 15834 }, { "epoch": 29.3, "learning_rate": 5.0134336378291234e-05, "loss": 0.0617, "step": 16000 }, { "epoch": 29.67, "learning_rate": 4.6910263299301446e-05, "loss": 0.0661, "step": 16200 }, { "epoch": 30.0, "eval_cer": 0.3737498529238734, "eval_loss": 4.010254859924316, "eval_runtime": 38.9811, "eval_samples_per_second": 28.014, "eval_steps_per_second": 3.515, "eval_wer": 0.7196145477152627, "step": 16380 }, { "epoch": 30.04, "learning_rate": 4.368619022031166e-05, "loss": 0.0559, "step": 16400 }, { "epoch": 30.4, "learning_rate": 4.046211714132187e-05, "loss": 0.0575, "step": 16600 }, { "epoch": 30.77, "learning_rate": 3.7238044062332076e-05, "loss": 0.0569, "step": 16800 }, { "epoch": 31.0, "eval_cer": 0.3768090363572185, "eval_loss": 4.117391109466553, "eval_runtime": 39.1729, "eval_samples_per_second": 27.876, "eval_steps_per_second": 3.497, "eval_wer": 0.729250854833696, "step": 16926 }, { "epoch": 31.14, "learning_rate": 3.401397098334229e-05, "loss": 0.0559, "step": 17000 }, { "epoch": 31.5, "learning_rate": 3.078989790435249e-05, "loss": 0.0501, "step": 17200 }, { "epoch": 31.87, "learning_rate": 2.756582482536271e-05, "loss": 0.0494, "step": 17400 }, { "epoch": 32.0, "eval_cer": 0.37404400517707964, "eval_loss": 4.161037921905518, "eval_runtime": 38.9916, "eval_samples_per_second": 28.006, "eval_steps_per_second": 3.514, "eval_wer": 0.7207025178737955, "step": 17472 }, { "epoch": 32.23, "learning_rate": 2.4341751746372914e-05, "loss": 0.048, "step": 17600 }, { "epoch": 32.6, "learning_rate": 2.1117678667383127e-05, "loss": 0.0509, "step": 17800 }, { "epoch": 32.97, "learning_rate": 1.7893605588393335e-05, "loss": 0.0493, "step": 18000 }, { "epoch": 33.0, "eval_cer": 0.37219084598188024, "eval_loss": 4.1340861320495605, "eval_runtime": 38.9947, "eval_samples_per_second": 28.004, "eval_steps_per_second": 3.513, "eval_wer": 0.7165060615480261, "step": 18018 }, { "epoch": 33.33, "learning_rate": 1.4669532509403546e-05, "loss": 0.0415, "step": 18200 }, { "epoch": 33.7, "learning_rate": 1.1445459430413755e-05, "loss": 0.0395, "step": 18400 }, { "epoch": 34.0, "eval_cer": 0.372132015531239, "eval_loss": 4.2251057624816895, "eval_runtime": 38.8607, "eval_samples_per_second": 28.1, "eval_steps_per_second": 3.525, "eval_wer": 0.7135529996891514, "step": 18564 }, { "epoch": 34.07, "learning_rate": 8.221386351423965e-06, "loss": 0.0472, "step": 18600 }, { "epoch": 34.43, "learning_rate": 4.997313272434174e-06, "loss": 0.037, "step": 18800 }, { "epoch": 34.8, "learning_rate": 1.7732401934443846e-06, "loss": 0.0377, "step": 19000 }, { "epoch": 35.0, "eval_cer": 0.3725438286857277, "eval_loss": 4.262682914733887, "eval_runtime": 38.6349, "eval_samples_per_second": 28.265, "eval_steps_per_second": 3.546, "eval_wer": 0.715728940006217, "step": 19110 } ], "logging_steps": 200, "max_steps": 19110, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 500, "total_flos": 2.6904981744760697e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }