{ "best_metric": 0.9149749926448956, "best_model_checkpoint": "trillsson3-ft-keyword-spotting-12/checkpoint-23955", "epoch": 20.0, "global_step": 31940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 8.73512836568566e-06, "loss": 8.0793, "step": 100 }, { "epoch": 0.13, "learning_rate": 1.812773951158422e-05, "loss": 7.1754, "step": 200 }, { "epoch": 0.19, "learning_rate": 2.7520350657482777e-05, "loss": 5.6723, "step": 300 }, { "epoch": 0.25, "learning_rate": 3.691296180338134e-05, "loss": 4.1468, "step": 400 }, { "epoch": 0.31, "learning_rate": 4.6305572949279896e-05, "loss": 3.3576, "step": 500 }, { "epoch": 0.38, "learning_rate": 5.5698184095178454e-05, "loss": 2.955, "step": 600 }, { "epoch": 0.44, "learning_rate": 6.509079524107701e-05, "loss": 2.7689, "step": 700 }, { "epoch": 0.5, "learning_rate": 7.448340638697557e-05, "loss": 2.5858, "step": 800 }, { "epoch": 0.56, "learning_rate": 8.387601753287413e-05, "loss": 2.3275, "step": 900 }, { "epoch": 0.63, "learning_rate": 9.326862867877268e-05, "loss": 2.0771, "step": 1000 }, { "epoch": 0.69, "learning_rate": 0.00010266123982467126, "loss": 1.8958, "step": 1100 }, { "epoch": 0.75, "learning_rate": 0.0001120538509705698, "loss": 1.6778, "step": 1200 }, { "epoch": 0.81, "learning_rate": 0.00012144646211646837, "loss": 1.5346, "step": 1300 }, { "epoch": 0.88, "learning_rate": 0.00013083907326236693, "loss": 1.4238, "step": 1400 }, { "epoch": 0.94, "learning_rate": 0.0001402316844082655, "loss": 1.2824, "step": 1500 }, { "epoch": 1.0, "eval_accuracy": 0.6891732862606649, "eval_loss": 0.7817752957344055, "eval_runtime": 166.2325, "eval_samples_per_second": 40.895, "eval_steps_per_second": 0.644, "step": 1597 }, { "epoch": 1.0, "learning_rate": 0.00014962429555416404, "loss": 1.2121, "step": 1600 }, { "epoch": 1.06, "learning_rate": 0.0001590169067000626, "loss": 1.1391, "step": 1700 }, { "epoch": 1.13, "learning_rate": 0.00016840951784596116, "loss": 1.12, "step": 1800 }, { "epoch": 1.19, "learning_rate": 0.00017780212899185972, "loss": 1.053, "step": 1900 }, { "epoch": 1.25, "learning_rate": 0.0001871947401377583, "loss": 1.0245, "step": 2000 }, { "epoch": 1.31, "learning_rate": 0.00019658735128365683, "loss": 0.9887, "step": 2100 }, { "epoch": 1.38, "learning_rate": 0.0002059799624295554, "loss": 0.9671, "step": 2200 }, { "epoch": 1.44, "learning_rate": 0.00021537257357545395, "loss": 0.8912, "step": 2300 }, { "epoch": 1.5, "learning_rate": 0.00022476518472135253, "loss": 0.9015, "step": 2400 }, { "epoch": 1.57, "learning_rate": 0.0002341577958672511, "loss": 0.89, "step": 2500 }, { "epoch": 1.63, "learning_rate": 0.00024355040701314962, "loss": 0.8173, "step": 2600 }, { "epoch": 1.69, "learning_rate": 0.0002529430181590482, "loss": 0.8338, "step": 2700 }, { "epoch": 1.75, "learning_rate": 0.00026233562930494674, "loss": 0.8313, "step": 2800 }, { "epoch": 1.82, "learning_rate": 0.0002717282404508453, "loss": 0.8395, "step": 2900 }, { "epoch": 1.88, "learning_rate": 0.00028112085159674385, "loss": 0.8036, "step": 3000 }, { "epoch": 1.94, "learning_rate": 0.00029051346274264244, "loss": 0.8003, "step": 3100 }, { "epoch": 2.0, "eval_accuracy": 0.8734922035892909, "eval_loss": 0.44425612688064575, "eval_runtime": 158.3963, "eval_samples_per_second": 42.918, "eval_steps_per_second": 0.676, "step": 3194 }, { "epoch": 2.0, "learning_rate": 0.00029990607388854097, "loss": 0.7951, "step": 3200 }, { "epoch": 2.07, "learning_rate": 0.0002989668127739511, "loss": 0.8114, "step": 3300 }, { "epoch": 2.13, "learning_rate": 0.00029792318931329574, "loss": 0.8343, "step": 3400 }, { "epoch": 2.19, "learning_rate": 0.00029687956585264035, "loss": 0.7617, "step": 3500 }, { "epoch": 2.25, "learning_rate": 0.00029583594239198497, "loss": 0.7779, "step": 3600 }, { "epoch": 2.32, "learning_rate": 0.0002947923189313295, "loss": 0.7512, "step": 3700 }, { "epoch": 2.38, "learning_rate": 0.00029374869547067414, "loss": 0.7962, "step": 3800 }, { "epoch": 2.44, "learning_rate": 0.00029270507201001875, "loss": 0.7134, "step": 3900 }, { "epoch": 2.5, "learning_rate": 0.00029166144854936337, "loss": 0.786, "step": 4000 }, { "epoch": 2.57, "learning_rate": 0.000290617825088708, "loss": 0.7808, "step": 4100 }, { "epoch": 2.63, "learning_rate": 0.0002895742016280526, "loss": 0.7379, "step": 4200 }, { "epoch": 2.69, "learning_rate": 0.00028853057816739715, "loss": 0.7545, "step": 4300 }, { "epoch": 2.76, "learning_rate": 0.00028748695470674177, "loss": 0.7557, "step": 4400 }, { "epoch": 2.82, "learning_rate": 0.0002864433312460864, "loss": 0.7633, "step": 4500 }, { "epoch": 2.88, "learning_rate": 0.000285399707785431, "loss": 0.7414, "step": 4600 }, { "epoch": 2.94, "learning_rate": 0.0002843560843247756, "loss": 0.7232, "step": 4700 }, { "epoch": 3.0, "eval_accuracy": 0.8833480435422183, "eval_loss": 0.372787207365036, "eval_runtime": 149.856, "eval_samples_per_second": 45.364, "eval_steps_per_second": 0.714, "step": 4791 }, { "epoch": 3.01, "learning_rate": 0.00028331246086412017, "loss": 0.7661, "step": 4800 }, { "epoch": 3.07, "learning_rate": 0.00028226883740346484, "loss": 0.7224, "step": 4900 }, { "epoch": 3.13, "learning_rate": 0.0002812252139428094, "loss": 0.7186, "step": 5000 }, { "epoch": 3.19, "learning_rate": 0.0002801920267167606, "loss": 0.7151, "step": 5100 }, { "epoch": 3.26, "learning_rate": 0.00027914840325610516, "loss": 0.7175, "step": 5200 }, { "epoch": 3.32, "learning_rate": 0.0002781047797954498, "loss": 0.6871, "step": 5300 }, { "epoch": 3.38, "learning_rate": 0.0002770611563347944, "loss": 0.7446, "step": 5400 }, { "epoch": 3.44, "learning_rate": 0.000276017532874139, "loss": 0.7128, "step": 5500 }, { "epoch": 3.51, "learning_rate": 0.0002749739094134836, "loss": 0.6896, "step": 5600 }, { "epoch": 3.57, "learning_rate": 0.0002739302859528282, "loss": 0.714, "step": 5700 }, { "epoch": 3.63, "learning_rate": 0.0002728866624921728, "loss": 0.7478, "step": 5800 }, { "epoch": 3.69, "learning_rate": 0.0002718430390315174, "loss": 0.6853, "step": 5900 }, { "epoch": 3.76, "learning_rate": 0.000270799415570862, "loss": 0.7165, "step": 6000 }, { "epoch": 3.82, "learning_rate": 0.0002697557921102066, "loss": 0.704, "step": 6100 }, { "epoch": 3.88, "learning_rate": 0.00026871216864955125, "loss": 0.6954, "step": 6200 }, { "epoch": 3.94, "learning_rate": 0.0002676685451888958, "loss": 0.73, "step": 6300 }, { "epoch": 4.0, "eval_accuracy": 0.8973227419829362, "eval_loss": 0.346542090177536, "eval_runtime": 149.5904, "eval_samples_per_second": 45.444, "eval_steps_per_second": 0.715, "step": 6388 }, { "epoch": 4.01, "learning_rate": 0.0002666249217282404, "loss": 0.7149, "step": 6400 }, { "epoch": 4.07, "learning_rate": 0.00026558129826758503, "loss": 0.7107, "step": 6500 }, { "epoch": 4.13, "learning_rate": 0.00026453767480692965, "loss": 0.6744, "step": 6600 }, { "epoch": 4.2, "learning_rate": 0.0002634940513462742, "loss": 0.727, "step": 6700 }, { "epoch": 4.26, "learning_rate": 0.0002624504278856189, "loss": 0.7, "step": 6800 }, { "epoch": 4.32, "learning_rate": 0.00026140680442496343, "loss": 0.7239, "step": 6900 }, { "epoch": 4.38, "learning_rate": 0.00026036318096430805, "loss": 0.6847, "step": 7000 }, { "epoch": 4.45, "learning_rate": 0.00025931955750365266, "loss": 0.7151, "step": 7100 }, { "epoch": 4.51, "learning_rate": 0.0002582759340429973, "loss": 0.7228, "step": 7200 }, { "epoch": 4.57, "learning_rate": 0.0002572427468169484, "loss": 0.6925, "step": 7300 }, { "epoch": 4.63, "learning_rate": 0.000256199123356293, "loss": 0.7064, "step": 7400 }, { "epoch": 4.7, "learning_rate": 0.00025515549989563765, "loss": 0.6861, "step": 7500 }, { "epoch": 4.76, "learning_rate": 0.0002541118764349822, "loss": 0.7057, "step": 7600 }, { "epoch": 4.82, "learning_rate": 0.0002530682529743269, "loss": 0.6811, "step": 7700 }, { "epoch": 4.88, "learning_rate": 0.00025202462951367144, "loss": 0.6676, "step": 7800 }, { "epoch": 4.95, "learning_rate": 0.00025098100605301605, "loss": 0.7015, "step": 7900 }, { "epoch": 5.0, "eval_accuracy": 0.910856134157105, "eval_loss": 0.3211327791213989, "eval_runtime": 150.7528, "eval_samples_per_second": 45.094, "eval_steps_per_second": 0.71, "step": 7985 }, { "epoch": 5.01, "learning_rate": 0.00024993738259236067, "loss": 0.708, "step": 8000 }, { "epoch": 5.07, "learning_rate": 0.0002488937591317053, "loss": 0.7372, "step": 8100 }, { "epoch": 5.13, "learning_rate": 0.00024785013567104984, "loss": 0.7089, "step": 8200 }, { "epoch": 5.2, "learning_rate": 0.00024680651221039445, "loss": 0.6962, "step": 8300 }, { "epoch": 5.26, "learning_rate": 0.00024576288874973907, "loss": 0.7209, "step": 8400 }, { "epoch": 5.32, "learning_rate": 0.0002447192652890837, "loss": 0.7091, "step": 8500 }, { "epoch": 5.39, "learning_rate": 0.0002436756418284283, "loss": 0.6898, "step": 8600 }, { "epoch": 5.45, "learning_rate": 0.00024263201836777288, "loss": 0.726, "step": 8700 }, { "epoch": 5.51, "learning_rate": 0.00024158839490711747, "loss": 0.682, "step": 8800 }, { "epoch": 5.57, "learning_rate": 0.0002405447714464621, "loss": 0.6855, "step": 8900 }, { "epoch": 5.64, "learning_rate": 0.0002395011479858067, "loss": 0.7108, "step": 9000 }, { "epoch": 5.7, "learning_rate": 0.00023845752452515128, "loss": 0.6945, "step": 9100 }, { "epoch": 5.76, "learning_rate": 0.00023741390106449592, "loss": 0.6621, "step": 9200 }, { "epoch": 5.82, "learning_rate": 0.0002363702776038405, "loss": 0.7176, "step": 9300 }, { "epoch": 5.89, "learning_rate": 0.00023532665414318512, "loss": 0.6824, "step": 9400 }, { "epoch": 5.95, "learning_rate": 0.00023428303068252974, "loss": 0.6981, "step": 9500 }, { "epoch": 6.0, "eval_accuracy": 0.9080611944689615, "eval_loss": 0.3200249671936035, "eval_runtime": 151.3502, "eval_samples_per_second": 44.916, "eval_steps_per_second": 0.707, "step": 9582 }, { "epoch": 6.01, "learning_rate": 0.00023323940722187433, "loss": 0.6934, "step": 9600 }, { "epoch": 6.07, "learning_rate": 0.00023219578376121894, "loss": 0.7101, "step": 9700 }, { "epoch": 6.14, "learning_rate": 0.00023115216030056353, "loss": 0.6922, "step": 9800 }, { "epoch": 6.2, "learning_rate": 0.00023010853683990814, "loss": 0.7027, "step": 9900 }, { "epoch": 6.26, "learning_rate": 0.00022906491337925275, "loss": 0.6931, "step": 10000 }, { "epoch": 6.32, "learning_rate": 0.00022802128991859734, "loss": 0.6782, "step": 10100 }, { "epoch": 6.39, "learning_rate": 0.00022697766645794195, "loss": 0.6842, "step": 10200 }, { "epoch": 6.45, "learning_rate": 0.00022593404299728657, "loss": 0.7295, "step": 10300 }, { "epoch": 6.51, "learning_rate": 0.00022489041953663115, "loss": 0.7145, "step": 10400 }, { "epoch": 6.57, "learning_rate": 0.00022384679607597577, "loss": 0.6637, "step": 10500 }, { "epoch": 6.64, "learning_rate": 0.00022280317261532038, "loss": 0.6666, "step": 10600 }, { "epoch": 6.7, "learning_rate": 0.00022175954915466497, "loss": 0.7068, "step": 10700 }, { "epoch": 6.76, "learning_rate": 0.00022071592569400955, "loss": 0.6759, "step": 10800 }, { "epoch": 6.83, "learning_rate": 0.0002196723022333542, "loss": 0.6926, "step": 10900 }, { "epoch": 6.89, "learning_rate": 0.00021862867877269878, "loss": 0.6825, "step": 11000 }, { "epoch": 6.95, "learning_rate": 0.00021758505531204342, "loss": 0.6807, "step": 11100 }, { "epoch": 7.0, "eval_accuracy": 0.9058546631362165, "eval_loss": 0.3208906650543213, "eval_runtime": 150.3488, "eval_samples_per_second": 45.215, "eval_steps_per_second": 0.712, "step": 11179 }, { "epoch": 7.01, "learning_rate": 0.000216541431851388, "loss": 0.677, "step": 11200 }, { "epoch": 7.08, "learning_rate": 0.00021550824462533916, "loss": 0.7379, "step": 11300 }, { "epoch": 7.14, "learning_rate": 0.00021447505739929034, "loss": 0.7156, "step": 11400 }, { "epoch": 7.2, "learning_rate": 0.00021343143393863493, "loss": 0.6406, "step": 11500 }, { "epoch": 7.26, "learning_rate": 0.0002123878104779795, "loss": 0.6888, "step": 11600 }, { "epoch": 7.33, "learning_rate": 0.00021134418701732413, "loss": 0.6968, "step": 11700 }, { "epoch": 7.39, "learning_rate": 0.00021030056355666874, "loss": 0.6989, "step": 11800 }, { "epoch": 7.45, "learning_rate": 0.00020925694009601333, "loss": 0.6751, "step": 11900 }, { "epoch": 7.51, "learning_rate": 0.00020821331663535794, "loss": 0.6879, "step": 12000 }, { "epoch": 7.58, "learning_rate": 0.00020716969317470255, "loss": 0.678, "step": 12100 }, { "epoch": 7.64, "learning_rate": 0.00020612606971404714, "loss": 0.6501, "step": 12200 }, { "epoch": 7.7, "learning_rate": 0.00020508244625339176, "loss": 0.6679, "step": 12300 }, { "epoch": 7.76, "learning_rate": 0.00020403882279273637, "loss": 0.7116, "step": 12400 }, { "epoch": 7.83, "learning_rate": 0.00020299519933208098, "loss": 0.6899, "step": 12500 }, { "epoch": 7.89, "learning_rate": 0.00020195157587142557, "loss": 0.6892, "step": 12600 }, { "epoch": 7.95, "learning_rate": 0.00020090795241077016, "loss": 0.6873, "step": 12700 }, { "epoch": 8.0, "eval_accuracy": 0.902177110914975, "eval_loss": 0.3205910921096802, "eval_runtime": 152.1731, "eval_samples_per_second": 44.673, "eval_steps_per_second": 0.703, "step": 12776 }, { "epoch": 8.02, "learning_rate": 0.0001998643289501148, "loss": 0.6847, "step": 12800 }, { "epoch": 8.08, "learning_rate": 0.00019882070548945938, "loss": 0.6833, "step": 12900 }, { "epoch": 8.14, "learning_rate": 0.00019777708202880397, "loss": 0.6737, "step": 13000 }, { "epoch": 8.2, "learning_rate": 0.0001967334585681486, "loss": 0.6568, "step": 13100 }, { "epoch": 8.27, "learning_rate": 0.0001956898351074932, "loss": 0.7059, "step": 13200 }, { "epoch": 8.33, "learning_rate": 0.00019464621164683778, "loss": 0.6831, "step": 13300 }, { "epoch": 8.39, "learning_rate": 0.00019360258818618243, "loss": 0.7174, "step": 13400 }, { "epoch": 8.45, "learning_rate": 0.00019256940096013358, "loss": 0.6966, "step": 13500 }, { "epoch": 8.52, "learning_rate": 0.00019152577749947816, "loss": 0.699, "step": 13600 }, { "epoch": 8.58, "learning_rate": 0.00019048215403882278, "loss": 0.6774, "step": 13700 }, { "epoch": 8.64, "learning_rate": 0.0001894385305781674, "loss": 0.6848, "step": 13800 }, { "epoch": 8.7, "learning_rate": 0.00018839490711751198, "loss": 0.6901, "step": 13900 }, { "epoch": 8.77, "learning_rate": 0.00018735128365685656, "loss": 0.6994, "step": 14000 }, { "epoch": 8.83, "learning_rate": 0.0001863076601962012, "loss": 0.6739, "step": 14100 }, { "epoch": 8.89, "learning_rate": 0.0001852640367355458, "loss": 0.6854, "step": 14200 }, { "epoch": 8.95, "learning_rate": 0.00018422041327489038, "loss": 0.6416, "step": 14300 }, { "epoch": 9.0, "eval_accuracy": 0.9057075610473668, "eval_loss": 0.31237688660621643, "eval_runtime": 151.5107, "eval_samples_per_second": 44.868, "eval_steps_per_second": 0.706, "step": 14373 }, { "epoch": 9.02, "learning_rate": 0.00018317678981423502, "loss": 0.6711, "step": 14400 }, { "epoch": 9.08, "learning_rate": 0.0001821331663535796, "loss": 0.6965, "step": 14500 }, { "epoch": 9.14, "learning_rate": 0.00018108954289292422, "loss": 0.6896, "step": 14600 }, { "epoch": 9.2, "learning_rate": 0.00018004591943226883, "loss": 0.6944, "step": 14700 }, { "epoch": 9.27, "learning_rate": 0.00017900229597161342, "loss": 0.6981, "step": 14800 }, { "epoch": 9.33, "learning_rate": 0.00017795867251095803, "loss": 0.6819, "step": 14900 }, { "epoch": 9.39, "learning_rate": 0.00017691504905030265, "loss": 0.6869, "step": 15000 }, { "epoch": 9.46, "learning_rate": 0.00017587142558964723, "loss": 0.6974, "step": 15100 }, { "epoch": 9.52, "learning_rate": 0.00017482780212899185, "loss": 0.6621, "step": 15200 }, { "epoch": 9.58, "learning_rate": 0.00017378417866833646, "loss": 0.6732, "step": 15300 }, { "epoch": 9.64, "learning_rate": 0.00017274055520768105, "loss": 0.678, "step": 15400 }, { "epoch": 9.71, "learning_rate": 0.00017169693174702566, "loss": 0.6494, "step": 15500 }, { "epoch": 9.77, "learning_rate": 0.00017065330828637025, "loss": 0.6909, "step": 15600 }, { "epoch": 9.83, "learning_rate": 0.00016960968482571486, "loss": 0.687, "step": 15700 }, { "epoch": 9.89, "learning_rate": 0.00016856606136505948, "loss": 0.6705, "step": 15800 }, { "epoch": 9.96, "learning_rate": 0.00016752243790440406, "loss": 0.6698, "step": 15900 }, { "epoch": 10.0, "eval_accuracy": 0.8949691085613416, "eval_loss": 0.3288457989692688, "eval_runtime": 151.6827, "eval_samples_per_second": 44.817, "eval_steps_per_second": 0.705, "step": 15970 }, { "epoch": 10.02, "learning_rate": 0.00016648925067835524, "loss": 0.6701, "step": 16000 }, { "epoch": 10.08, "learning_rate": 0.00016544562721769983, "loss": 0.6771, "step": 16100 }, { "epoch": 10.14, "learning_rate": 0.00016440200375704444, "loss": 0.6877, "step": 16200 }, { "epoch": 10.21, "learning_rate": 0.00016335838029638906, "loss": 0.6495, "step": 16300 }, { "epoch": 10.27, "learning_rate": 0.00016231475683573364, "loss": 0.6925, "step": 16400 }, { "epoch": 10.33, "learning_rate": 0.00016127113337507826, "loss": 0.647, "step": 16500 }, { "epoch": 10.39, "learning_rate": 0.00016022750991442287, "loss": 0.658, "step": 16600 }, { "epoch": 10.46, "learning_rate": 0.00015918388645376746, "loss": 0.7033, "step": 16700 }, { "epoch": 10.52, "learning_rate": 0.00015814026299311207, "loss": 0.6675, "step": 16800 }, { "epoch": 10.58, "learning_rate": 0.00015709663953245666, "loss": 0.6905, "step": 16900 }, { "epoch": 10.64, "learning_rate": 0.0001560530160718013, "loss": 0.6766, "step": 17000 }, { "epoch": 10.71, "learning_rate": 0.00015500939261114588, "loss": 0.684, "step": 17100 }, { "epoch": 10.77, "learning_rate": 0.00015396576915049047, "loss": 0.6382, "step": 17200 }, { "epoch": 10.83, "learning_rate": 0.0001529221456898351, "loss": 0.6737, "step": 17300 }, { "epoch": 10.9, "learning_rate": 0.0001518785222291797, "loss": 0.691, "step": 17400 }, { "epoch": 10.96, "learning_rate": 0.00015083489876852429, "loss": 0.716, "step": 17500 }, { "epoch": 11.0, "eval_accuracy": 0.8998234774933804, "eval_loss": 0.31469690799713135, "eval_runtime": 150.1862, "eval_samples_per_second": 45.264, "eval_steps_per_second": 0.712, "step": 17567 }, { "epoch": 11.02, "learning_rate": 0.0001497912753078689, "loss": 0.7326, "step": 17600 }, { "epoch": 11.08, "learning_rate": 0.0001487476518472135, "loss": 0.6747, "step": 17700 }, { "epoch": 11.15, "learning_rate": 0.00014770402838655813, "loss": 0.7075, "step": 17800 }, { "epoch": 11.21, "learning_rate": 0.0001466604049259027, "loss": 0.69, "step": 17900 }, { "epoch": 11.27, "learning_rate": 0.00014561678146524733, "loss": 0.6793, "step": 18000 }, { "epoch": 11.33, "learning_rate": 0.00014457315800459194, "loss": 0.6782, "step": 18100 }, { "epoch": 11.4, "learning_rate": 0.00014352953454393655, "loss": 0.6532, "step": 18200 }, { "epoch": 11.46, "learning_rate": 0.00014249634731788768, "loss": 0.7053, "step": 18300 }, { "epoch": 11.52, "learning_rate": 0.0001414527238572323, "loss": 0.6476, "step": 18400 }, { "epoch": 11.58, "learning_rate": 0.0001404091003965769, "loss": 0.6308, "step": 18500 }, { "epoch": 11.65, "learning_rate": 0.0001393654769359215, "loss": 0.6886, "step": 18600 }, { "epoch": 11.71, "learning_rate": 0.0001383218534752661, "loss": 0.6631, "step": 18700 }, { "epoch": 11.77, "learning_rate": 0.00013727823001461072, "loss": 0.7056, "step": 18800 }, { "epoch": 11.83, "learning_rate": 0.00013623460655395533, "loss": 0.6602, "step": 18900 }, { "epoch": 11.9, "learning_rate": 0.00013519098309329992, "loss": 0.6728, "step": 19000 }, { "epoch": 11.96, "learning_rate": 0.00013414735963264453, "loss": 0.6514, "step": 19100 }, { "epoch": 12.0, "eval_accuracy": 0.9111503383348043, "eval_loss": 0.3034283220767975, "eval_runtime": 150.4249, "eval_samples_per_second": 45.192, "eval_steps_per_second": 0.711, "step": 19164 }, { "epoch": 12.02, "learning_rate": 0.00013310373617198915, "loss": 0.6567, "step": 19200 }, { "epoch": 12.09, "learning_rate": 0.00013206011271133373, "loss": 0.6882, "step": 19300 }, { "epoch": 12.15, "learning_rate": 0.00013101648925067835, "loss": 0.6511, "step": 19400 }, { "epoch": 12.21, "learning_rate": 0.00012997286579002296, "loss": 0.6705, "step": 19500 }, { "epoch": 12.27, "learning_rate": 0.00012892924232936755, "loss": 0.6693, "step": 19600 }, { "epoch": 12.34, "learning_rate": 0.00012788561886871216, "loss": 0.68, "step": 19700 }, { "epoch": 12.4, "learning_rate": 0.00012684199540805675, "loss": 0.6767, "step": 19800 }, { "epoch": 12.46, "learning_rate": 0.00012579837194740136, "loss": 0.6768, "step": 19900 }, { "epoch": 12.52, "learning_rate": 0.00012475474848674598, "loss": 0.6662, "step": 20000 }, { "epoch": 12.59, "learning_rate": 0.00012371112502609056, "loss": 0.6511, "step": 20100 }, { "epoch": 12.65, "learning_rate": 0.00012266750156543518, "loss": 0.7057, "step": 20200 }, { "epoch": 12.71, "learning_rate": 0.00012163431433938634, "loss": 0.6699, "step": 20300 }, { "epoch": 12.77, "learning_rate": 0.00012059069087873094, "loss": 0.6541, "step": 20400 }, { "epoch": 12.84, "learning_rate": 0.00011954706741807554, "loss": 0.6741, "step": 20500 }, { "epoch": 12.9, "learning_rate": 0.00011850344395742016, "loss": 0.658, "step": 20600 }, { "epoch": 12.96, "learning_rate": 0.00011745982049676476, "loss": 0.6513, "step": 20700 }, { "epoch": 13.0, "eval_accuracy": 0.9092380111797588, "eval_loss": 0.30905914306640625, "eval_runtime": 152.1119, "eval_samples_per_second": 44.691, "eval_steps_per_second": 0.703, "step": 20761 }, { "epoch": 13.02, "learning_rate": 0.00011641619703610936, "loss": 0.6568, "step": 20800 }, { "epoch": 13.09, "learning_rate": 0.00011537257357545397, "loss": 0.6853, "step": 20900 }, { "epoch": 13.15, "learning_rate": 0.00011432895011479858, "loss": 0.6699, "step": 21000 }, { "epoch": 13.21, "learning_rate": 0.00011328532665414317, "loss": 0.6494, "step": 21100 }, { "epoch": 13.27, "learning_rate": 0.00011224170319348778, "loss": 0.7118, "step": 21200 }, { "epoch": 13.34, "learning_rate": 0.00011119807973283239, "loss": 0.6649, "step": 21300 }, { "epoch": 13.4, "learning_rate": 0.00011015445627217699, "loss": 0.6646, "step": 21400 }, { "epoch": 13.46, "learning_rate": 0.0001091108328115216, "loss": 0.6436, "step": 21500 }, { "epoch": 13.53, "learning_rate": 0.0001080672093508662, "loss": 0.6258, "step": 21600 }, { "epoch": 13.59, "learning_rate": 0.0001070235858902108, "loss": 0.6754, "step": 21700 }, { "epoch": 13.65, "learning_rate": 0.0001059799624295554, "loss": 0.6737, "step": 21800 }, { "epoch": 13.71, "learning_rate": 0.00010493633896890001, "loss": 0.6511, "step": 21900 }, { "epoch": 13.78, "learning_rate": 0.00010389271550824463, "loss": 0.6472, "step": 22000 }, { "epoch": 13.84, "learning_rate": 0.00010284909204758921, "loss": 0.6571, "step": 22100 }, { "epoch": 13.9, "learning_rate": 0.00010180546858693383, "loss": 0.693, "step": 22200 }, { "epoch": 13.96, "learning_rate": 0.00010076184512627843, "loss": 0.652, "step": 22300 }, { "epoch": 14.0, "eval_accuracy": 0.909973521624007, "eval_loss": 0.30560359358787537, "eval_runtime": 151.7724, "eval_samples_per_second": 44.791, "eval_steps_per_second": 0.705, "step": 22358 }, { "epoch": 14.03, "learning_rate": 9.971822166562303e-05, "loss": 0.6286, "step": 22400 }, { "epoch": 14.09, "learning_rate": 9.867459820496764e-05, "loss": 0.6503, "step": 22500 }, { "epoch": 14.15, "learning_rate": 9.763097474431224e-05, "loss": 0.6514, "step": 22600 }, { "epoch": 14.21, "learning_rate": 9.659778751826341e-05, "loss": 0.6728, "step": 22700 }, { "epoch": 14.28, "learning_rate": 9.555416405760801e-05, "loss": 0.6621, "step": 22800 }, { "epoch": 14.34, "learning_rate": 9.451054059695261e-05, "loss": 0.6771, "step": 22900 }, { "epoch": 14.4, "learning_rate": 9.346691713629722e-05, "loss": 0.6689, "step": 23000 }, { "epoch": 14.46, "learning_rate": 9.242329367564181e-05, "loss": 0.6712, "step": 23100 }, { "epoch": 14.53, "learning_rate": 9.137967021498642e-05, "loss": 0.6761, "step": 23200 }, { "epoch": 14.59, "learning_rate": 9.033604675433104e-05, "loss": 0.6327, "step": 23300 }, { "epoch": 14.65, "learning_rate": 8.929242329367564e-05, "loss": 0.6671, "step": 23400 }, { "epoch": 14.72, "learning_rate": 8.824879983302024e-05, "loss": 0.6598, "step": 23500 }, { "epoch": 14.78, "learning_rate": 8.720517637236485e-05, "loss": 0.6317, "step": 23600 }, { "epoch": 14.84, "learning_rate": 8.616155291170945e-05, "loss": 0.6615, "step": 23700 }, { "epoch": 14.9, "learning_rate": 8.511792945105405e-05, "loss": 0.6087, "step": 23800 }, { "epoch": 14.97, "learning_rate": 8.407430599039865e-05, "loss": 0.7105, "step": 23900 }, { "epoch": 15.0, "eval_accuracy": 0.9149749926448956, "eval_loss": 0.30149412155151367, "eval_runtime": 151.4148, "eval_samples_per_second": 44.897, "eval_steps_per_second": 0.707, "step": 23955 }, { "epoch": 15.03, "learning_rate": 8.303068252974326e-05, "loss": 0.6911, "step": 24000 }, { "epoch": 15.09, "learning_rate": 8.198705906908788e-05, "loss": 0.6717, "step": 24100 }, { "epoch": 15.15, "learning_rate": 8.094343560843246e-05, "loss": 0.6564, "step": 24200 }, { "epoch": 15.22, "learning_rate": 7.989981214777708e-05, "loss": 0.6446, "step": 24300 }, { "epoch": 15.28, "learning_rate": 7.885618868712169e-05, "loss": 0.6431, "step": 24400 }, { "epoch": 15.34, "learning_rate": 7.781256522646628e-05, "loss": 0.6762, "step": 24500 }, { "epoch": 15.4, "learning_rate": 7.676894176581089e-05, "loss": 0.6656, "step": 24600 }, { "epoch": 15.47, "learning_rate": 7.572531830515549e-05, "loss": 0.6337, "step": 24700 }, { "epoch": 15.53, "learning_rate": 7.468169484450009e-05, "loss": 0.6541, "step": 24800 }, { "epoch": 15.59, "learning_rate": 7.363807138384469e-05, "loss": 0.6772, "step": 24900 }, { "epoch": 15.65, "learning_rate": 7.260488415779586e-05, "loss": 0.629, "step": 25000 }, { "epoch": 15.72, "learning_rate": 7.156126069714046e-05, "loss": 0.6998, "step": 25100 }, { "epoch": 15.78, "learning_rate": 7.051763723648507e-05, "loss": 0.6686, "step": 25200 }, { "epoch": 15.84, "learning_rate": 6.947401377582967e-05, "loss": 0.6822, "step": 25300 }, { "epoch": 15.9, "learning_rate": 6.843039031517429e-05, "loss": 0.6143, "step": 25400 }, { "epoch": 15.97, "learning_rate": 6.738676685451889e-05, "loss": 0.6337, "step": 25500 }, { "epoch": 16.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.30700910091400146, "eval_runtime": 152.1641, "eval_samples_per_second": 44.675, "eval_steps_per_second": 0.703, "step": 25552 }, { "epoch": 16.03, "learning_rate": 6.634314339386349e-05, "loss": 0.6502, "step": 25600 }, { "epoch": 16.09, "learning_rate": 6.52995199332081e-05, "loss": 0.6448, "step": 25700 }, { "epoch": 16.16, "learning_rate": 6.42558964725527e-05, "loss": 0.6272, "step": 25800 }, { "epoch": 16.22, "learning_rate": 6.32122730118973e-05, "loss": 0.6316, "step": 25900 }, { "epoch": 16.28, "learning_rate": 6.21686495512419e-05, "loss": 0.6442, "step": 26000 }, { "epoch": 16.34, "learning_rate": 6.11250260905865e-05, "loss": 0.6489, "step": 26100 }, { "epoch": 16.41, "learning_rate": 6.0081402629931114e-05, "loss": 0.6603, "step": 26200 }, { "epoch": 16.47, "learning_rate": 5.9037779169275714e-05, "loss": 0.6449, "step": 26300 }, { "epoch": 16.53, "learning_rate": 5.799415570862033e-05, "loss": 0.661, "step": 26400 }, { "epoch": 16.59, "learning_rate": 5.695053224796493e-05, "loss": 0.642, "step": 26500 }, { "epoch": 16.66, "learning_rate": 5.5906908787309535e-05, "loss": 0.6498, "step": 26600 }, { "epoch": 16.72, "learning_rate": 5.4863285326654135e-05, "loss": 0.6902, "step": 26700 }, { "epoch": 16.78, "learning_rate": 5.381966186599874e-05, "loss": 0.6336, "step": 26800 }, { "epoch": 16.84, "learning_rate": 5.277603840534335e-05, "loss": 0.6393, "step": 26900 }, { "epoch": 16.91, "learning_rate": 5.173241494468795e-05, "loss": 0.6496, "step": 27000 }, { "epoch": 16.97, "learning_rate": 5.0688791484032557e-05, "loss": 0.63, "step": 27100 }, { "epoch": 17.0, "eval_accuracy": 0.913503971756399, "eval_loss": 0.30175167322158813, "eval_runtime": 146.2393, "eval_samples_per_second": 46.485, "eval_steps_per_second": 0.732, "step": 27149 }, { "epoch": 17.03, "learning_rate": 4.9645168023377163e-05, "loss": 0.6195, "step": 27200 }, { "epoch": 17.09, "learning_rate": 4.861198079732832e-05, "loss": 0.6299, "step": 27300 }, { "epoch": 17.16, "learning_rate": 4.756835733667292e-05, "loss": 0.6939, "step": 27400 }, { "epoch": 17.22, "learning_rate": 4.652473387601752e-05, "loss": 0.6416, "step": 27500 }, { "epoch": 17.28, "learning_rate": 4.5481110415362136e-05, "loss": 0.6626, "step": 27600 }, { "epoch": 17.35, "learning_rate": 4.4437486954706736e-05, "loss": 0.6444, "step": 27700 }, { "epoch": 17.41, "learning_rate": 4.339386349405134e-05, "loss": 0.6238, "step": 27800 }, { "epoch": 17.47, "learning_rate": 4.2350240033395944e-05, "loss": 0.6505, "step": 27900 }, { "epoch": 17.53, "learning_rate": 4.130661657274056e-05, "loss": 0.6346, "step": 28000 }, { "epoch": 17.6, "learning_rate": 4.026299311208516e-05, "loss": 0.6319, "step": 28100 }, { "epoch": 17.66, "learning_rate": 3.921936965142976e-05, "loss": 0.6811, "step": 28200 }, { "epoch": 17.72, "learning_rate": 3.8175746190774365e-05, "loss": 0.6645, "step": 28300 }, { "epoch": 17.78, "learning_rate": 3.713212273011897e-05, "loss": 0.6512, "step": 28400 }, { "epoch": 17.85, "learning_rate": 3.608849926946357e-05, "loss": 0.6578, "step": 28500 }, { "epoch": 17.91, "learning_rate": 3.504487580880818e-05, "loss": 0.6358, "step": 28600 }, { "epoch": 17.97, "learning_rate": 3.4001252348152786e-05, "loss": 0.6672, "step": 28700 }, { "epoch": 18.0, "eval_accuracy": 0.9087967049132097, "eval_loss": 0.30836355686187744, "eval_runtime": 148.5759, "eval_samples_per_second": 45.754, "eval_steps_per_second": 0.72, "step": 28746 }, { "epoch": 18.03, "learning_rate": 3.295762888749739e-05, "loss": 0.65, "step": 28800 }, { "epoch": 18.1, "learning_rate": 3.191400542684199e-05, "loss": 0.6293, "step": 28900 }, { "epoch": 18.16, "learning_rate": 3.087038196618659e-05, "loss": 0.6059, "step": 29000 }, { "epoch": 18.22, "learning_rate": 2.98267585055312e-05, "loss": 0.5975, "step": 29100 }, { "epoch": 18.28, "learning_rate": 2.8783135044875807e-05, "loss": 0.6759, "step": 29200 }, { "epoch": 18.35, "learning_rate": 2.773951158422041e-05, "loss": 0.6457, "step": 29300 }, { "epoch": 18.41, "learning_rate": 2.6695888123565017e-05, "loss": 0.6715, "step": 29400 }, { "epoch": 18.47, "learning_rate": 2.565226466290962e-05, "loss": 0.6642, "step": 29500 }, { "epoch": 18.53, "learning_rate": 2.460864120225422e-05, "loss": 0.6432, "step": 29600 }, { "epoch": 18.6, "learning_rate": 2.3565017741598828e-05, "loss": 0.6441, "step": 29700 }, { "epoch": 18.66, "learning_rate": 2.2521394280943432e-05, "loss": 0.6843, "step": 29800 }, { "epoch": 18.72, "learning_rate": 2.147777082028804e-05, "loss": 0.6459, "step": 29900 }, { "epoch": 18.79, "learning_rate": 2.0434147359632642e-05, "loss": 0.6233, "step": 30000 }, { "epoch": 18.85, "learning_rate": 1.939052389897725e-05, "loss": 0.6634, "step": 30100 }, { "epoch": 18.91, "learning_rate": 1.8346900438321853e-05, "loss": 0.6701, "step": 30200 }, { "epoch": 18.97, "learning_rate": 1.7303276977666456e-05, "loss": 0.6479, "step": 30300 }, { "epoch": 19.0, "eval_accuracy": 0.9101206237128567, "eval_loss": 0.3060016632080078, "eval_runtime": 145.9275, "eval_samples_per_second": 46.585, "eval_steps_per_second": 0.733, "step": 30343 }, { "epoch": 19.04, "learning_rate": 1.625965351701106e-05, "loss": 0.6155, "step": 30400 }, { "epoch": 19.1, "learning_rate": 1.5216030056355665e-05, "loss": 0.6079, "step": 30500 }, { "epoch": 19.16, "learning_rate": 1.417240659570027e-05, "loss": 0.6709, "step": 30600 }, { "epoch": 19.22, "learning_rate": 1.3128783135044874e-05, "loss": 0.6604, "step": 30700 }, { "epoch": 19.29, "learning_rate": 1.208515967438948e-05, "loss": 0.6222, "step": 30800 }, { "epoch": 19.35, "learning_rate": 1.1041536213734085e-05, "loss": 0.6428, "step": 30900 }, { "epoch": 19.41, "learning_rate": 9.997912753078688e-06, "loss": 0.6664, "step": 31000 }, { "epoch": 19.47, "learning_rate": 8.954289292423293e-06, "loss": 0.6489, "step": 31100 }, { "epoch": 19.54, "learning_rate": 7.910665831767897e-06, "loss": 0.6414, "step": 31200 }, { "epoch": 19.6, "learning_rate": 6.877478605719056e-06, "loss": 0.6599, "step": 31300 }, { "epoch": 19.66, "learning_rate": 5.83385514506366e-06, "loss": 0.6433, "step": 31400 }, { "epoch": 19.72, "learning_rate": 4.790231684408265e-06, "loss": 0.6325, "step": 31500 }, { "epoch": 19.79, "learning_rate": 3.7466082237528697e-06, "loss": 0.6363, "step": 31600 }, { "epoch": 19.85, "learning_rate": 2.7029847630974745e-06, "loss": 0.6408, "step": 31700 }, { "epoch": 19.91, "learning_rate": 1.6593613024420787e-06, "loss": 0.645, "step": 31800 }, { "epoch": 19.97, "learning_rate": 6.157378417866834e-07, "loss": 0.6658, "step": 31900 }, { "epoch": 20.0, "eval_accuracy": 0.9089438070020595, "eval_loss": 0.3071773946285248, "eval_runtime": 145.97, "eval_samples_per_second": 46.571, "eval_steps_per_second": 0.733, "step": 31940 }, { "epoch": 20.0, "step": 31940, "total_flos": 0.0, "train_loss": 0.8136612295581911, "train_runtime": 28286.4741, "train_samples_per_second": 36.126, "train_steps_per_second": 1.129 } ], "max_steps": 31940, "num_train_epochs": 20, "total_flos": 0.0, "trial_name": null, "trial_params": null }