{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.61392489388835, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 7.109546016941087e-06, "loss": 49.9771, "step": 500 }, { "epoch": 0.1, "learning_rate": 1.4247644427524506e-05, "loss": 43.393, "step": 1000 }, { "epoch": 0.14, "learning_rate": 2.1385742838107926e-05, "loss": 39.2346, "step": 1500 }, { "epoch": 0.19, "learning_rate": 2.852384124869135e-05, "loss": 35.6806, "step": 2000 }, { "epoch": 0.24, "learning_rate": 3.566193965927477e-05, "loss": 33.5102, "step": 2500 }, { "epoch": 0.29, "learning_rate": 4.280003806985819e-05, "loss": 32.3278, "step": 3000 }, { "epoch": 0.33, "learning_rate": 4.993813648044161e-05, "loss": 31.4953, "step": 3500 }, { "epoch": 0.38, "learning_rate": 5.707623489102503e-05, "loss": 30.816, "step": 4000 }, { "epoch": 0.43, "learning_rate": 6.421433330160845e-05, "loss": 30.252, "step": 4500 }, { "epoch": 0.48, "learning_rate": 7.135243171219186e-05, "loss": 29.7712, "step": 5000 }, { "epoch": 0.52, "learning_rate": 7.849053012277528e-05, "loss": 29.3302, "step": 5500 }, { "epoch": 0.57, "learning_rate": 8.56286285333587e-05, "loss": 28.9378, "step": 6000 }, { "epoch": 0.62, "learning_rate": 9.276672694394213e-05, "loss": 28.5842, "step": 6500 }, { "epoch": 0.67, "learning_rate": 9.990482535452555e-05, "loss": 28.2426, "step": 7000 }, { "epoch": 0.71, "learning_rate": 0.00010704292376510896, "loss": 27.9328, "step": 7500 }, { "epoch": 0.76, "learning_rate": 0.00011416674597887122, "loss": 27.6412, "step": 8000 }, { "epoch": 0.81, "learning_rate": 0.00012129056819263346, "loss": 27.364, "step": 8500 }, { "epoch": 0.86, "learning_rate": 0.0001284286666032169, "loss": 27.1143, "step": 9000 }, { "epoch": 0.9, "learning_rate": 0.00013556676501380032, "loss": 26.8555, "step": 9500 }, { "epoch": 0.95, "learning_rate": 0.00014270486342438373, "loss": 26.6034, "step": 10000 }, { "epoch": 1.0, "learning_rate": 0.00014984296183496716, "loss": 26.3698, "step": 10500 }, { "epoch": 1.05, "learning_rate": 0.00015698106024555057, "loss": 26.1622, "step": 11000 }, { "epoch": 1.09, "learning_rate": 0.000164119158656134, "loss": 25.9071, "step": 11500 }, { "epoch": 1.14, "learning_rate": 0.0001712572570667174, "loss": 25.7091, "step": 12000 }, { "epoch": 1.19, "learning_rate": 0.0001783953554773008, "loss": 25.5191, "step": 12500 }, { "epoch": 1.24, "learning_rate": 0.00018553345388788427, "loss": 25.3345, "step": 13000 }, { "epoch": 1.28, "learning_rate": 0.0001926572761016465, "loss": 25.1661, "step": 13500 }, { "epoch": 1.33, "learning_rate": 0.0001997953745122299, "loss": 25.0167, "step": 14000 }, { "epoch": 1.38, "learning_rate": 0.00020693347292281334, "loss": 24.8676, "step": 14500 }, { "epoch": 1.43, "learning_rate": 0.00021407157133339675, "loss": 24.7221, "step": 15000 }, { "epoch": 1.48, "learning_rate": 0.00022119539354715904, "loss": 24.5844, "step": 15500 }, { "epoch": 1.52, "learning_rate": 0.00022831921576092128, "loss": 24.4609, "step": 16000 }, { "epoch": 1.57, "learning_rate": 0.00023545731417150468, "loss": 24.3188, "step": 16500 }, { "epoch": 1.62, "learning_rate": 0.0002425954125820881, "loss": 24.1855, "step": 17000 }, { "epoch": 1.67, "learning_rate": 0.0002497049585990292, "loss": 24.0573, "step": 17500 }, { "epoch": 1.71, "learning_rate": 0.0002568430570096126, "loss": 23.9433, "step": 18000 }, { "epoch": 1.76, "learning_rate": 0.000263981155420196, "loss": 23.8441, "step": 18500 }, { "epoch": 1.81, "learning_rate": 0.0002711192538307794, "loss": 23.7344, "step": 19000 }, { "epoch": 1.86, "learning_rate": 0.0002782573522413629, "loss": 23.6054, "step": 19500 }, { "epoch": 1.9, "learning_rate": 0.0002853811744551251, "loss": 23.5031, "step": 20000 }, { "epoch": 1.95, "learning_rate": 0.0002925192728657086, "loss": 23.409, "step": 20500 }, { "epoch": 2.0, "learning_rate": 0.000299657371276292, "loss": 23.2973, "step": 21000 }, { "epoch": 2.05, "learning_rate": 0.00029924653405666063, "loss": 23.1983, "step": 21500 }, { "epoch": 2.09, "learning_rate": 0.00029845341201104026, "loss": 23.0667, "step": 22000 }, { "epoch": 2.14, "learning_rate": 0.00029766028996541983, "loss": 22.9586, "step": 22500 }, { "epoch": 2.19, "learning_rate": 0.00029686716791979946, "loss": 22.8611, "step": 23000 }, { "epoch": 2.24, "learning_rate": 0.0002960740458741791, "loss": 22.7729, "step": 23500 }, { "epoch": 2.28, "learning_rate": 0.0002952809238285587, "loss": 22.6924, "step": 24000 }, { "epoch": 2.33, "learning_rate": 0.00029448780178293834, "loss": 22.6076, "step": 24500 }, { "epoch": 2.38, "learning_rate": 0.00029369467973731797, "loss": 22.536, "step": 25000 }, { "epoch": 2.43, "learning_rate": 0.0002929015576916976, "loss": 22.4573, "step": 25500 }, { "epoch": 2.47, "learning_rate": 0.00029210843564607717, "loss": 22.3945, "step": 26000 }, { "epoch": 2.52, "learning_rate": 0.0002913153136004568, "loss": 22.3224, "step": 26500 }, { "epoch": 2.57, "learning_rate": 0.0002905221915548364, "loss": 22.2743, "step": 27000 }, { "epoch": 2.62, "learning_rate": 0.00028972906950921605, "loss": 22.2017, "step": 27500 }, { "epoch": 2.66, "learning_rate": 0.0002889359474635957, "loss": 22.1405, "step": 28000 }, { "epoch": 2.71, "learning_rate": 0.0002881428254179753, "loss": 22.0828, "step": 28500 }, { "epoch": 2.76, "learning_rate": 0.00028734970337235494, "loss": 22.0215, "step": 29000 }, { "epoch": 2.81, "learning_rate": 0.00028655658132673457, "loss": 21.9786, "step": 29500 }, { "epoch": 2.86, "learning_rate": 0.0002857650455252054, "loss": 21.9217, "step": 30000 }, { "epoch": 2.9, "learning_rate": 0.0002849750959677675, "loss": 21.8631, "step": 30500 }, { "epoch": 2.95, "learning_rate": 0.00028418197392214713, "loss": 21.8205, "step": 31000 }, { "epoch": 3.0, "learning_rate": 0.00028338885187652676, "loss": 21.7683, "step": 31500 }, { "epoch": 3.05, "learning_rate": 0.0002825957298309064, "loss": 21.7244, "step": 32000 }, { "epoch": 3.09, "learning_rate": 0.00028180260778528596, "loss": 21.6501, "step": 32500 }, { "epoch": 3.14, "learning_rate": 0.0002810094857396656, "loss": 21.6091, "step": 33000 }, { "epoch": 3.19, "learning_rate": 0.00028021794993813643, "loss": 21.5633, "step": 33500 }, { "epoch": 3.24, "learning_rate": 0.0002794280003806986, "loss": 21.521, "step": 34000 }, { "epoch": 3.28, "learning_rate": 0.0002786348783350782, "loss": 21.485, "step": 34500 }, { "epoch": 3.33, "learning_rate": 0.0002778417562894578, "loss": 21.4439, "step": 35000 }, { "epoch": 3.38, "learning_rate": 0.0002770486342438374, "loss": 21.3951, "step": 35500 }, { "epoch": 3.43, "learning_rate": 0.00027625551219821704, "loss": 21.3558, "step": 36000 }, { "epoch": 3.47, "learning_rate": 0.00027546239015259667, "loss": 21.3135, "step": 36500 }, { "epoch": 3.52, "learning_rate": 0.0002746692681069763, "loss": 21.2646, "step": 37000 }, { "epoch": 3.57, "learning_rate": 0.0002738761460613559, "loss": 21.2365, "step": 37500 }, { "epoch": 3.62, "learning_rate": 0.00027308302401573555, "loss": 21.2025, "step": 38000 }, { "epoch": 3.66, "learning_rate": 0.00027228990197011513, "loss": 21.156, "step": 38500 }, { "epoch": 3.71, "learning_rate": 0.00027149677992449476, "loss": 21.1298, "step": 39000 }, { "epoch": 3.76, "learning_rate": 0.0002707036578788744, "loss": 21.1058, "step": 39500 }, { "epoch": 3.81, "learning_rate": 0.000269910535833254, "loss": 21.0673, "step": 40000 }, { "epoch": 3.85, "learning_rate": 0.00026911741378763364, "loss": 21.0313, "step": 40500 }, { "epoch": 3.9, "learning_rate": 0.00026832429174201327, "loss": 21.003, "step": 41000 }, { "epoch": 3.95, "learning_rate": 0.00026753116969639284, "loss": 20.9636, "step": 41500 }, { "epoch": 4.0, "learning_rate": 0.00026673804765077247, "loss": 20.9453, "step": 42000 }, { "epoch": 4.04, "learning_rate": 0.0002659449256051521, "loss": 20.9264, "step": 42500 }, { "epoch": 4.09, "learning_rate": 0.0002651518035595317, "loss": 20.8611, "step": 43000 }, { "epoch": 4.14, "learning_rate": 0.00026436026775800256, "loss": 20.8391, "step": 43500 }, { "epoch": 4.19, "learning_rate": 0.0002635671457123822, "loss": 20.8082, "step": 44000 }, { "epoch": 4.24, "learning_rate": 0.0002627740236667618, "loss": 20.7882, "step": 44500 }, { "epoch": 4.28, "learning_rate": 0.00026198090162114144, "loss": 20.7546, "step": 45000 }, { "epoch": 4.33, "learning_rate": 0.00026118777957552107, "loss": 20.7187, "step": 45500 }, { "epoch": 4.38, "learning_rate": 0.00026039465752990065, "loss": 20.7079, "step": 46000 }, { "epoch": 4.43, "learning_rate": 0.0002596047079724628, "loss": 20.6731, "step": 46500 }, { "epoch": 4.47, "learning_rate": 0.0002588115859268424, "loss": 20.656, "step": 47000 }, { "epoch": 4.52, "learning_rate": 0.000258018463881222, "loss": 20.6342, "step": 47500 }, { "epoch": 4.57, "learning_rate": 0.00025722534183560163, "loss": 20.6155, "step": 48000 }, { "epoch": 4.62, "learning_rate": 0.00025643380603407247, "loss": 20.587, "step": 48500 }, { "epoch": 4.66, "learning_rate": 0.0002556406839884521, "loss": 20.5652, "step": 49000 }, { "epoch": 4.71, "learning_rate": 0.0002548475619428317, "loss": 20.5426, "step": 49500 }, { "epoch": 4.76, "learning_rate": 0.00025405443989721135, "loss": 20.5183, "step": 50000 }, { "epoch": 4.81, "learning_rate": 0.000253261317851591, "loss": 20.4988, "step": 50500 }, { "epoch": 4.85, "learning_rate": 0.0002524681958059706, "loss": 20.4794, "step": 51000 }, { "epoch": 4.9, "learning_rate": 0.00025167507376035024, "loss": 20.4558, "step": 51500 }, { "epoch": 4.95, "learning_rate": 0.00025088195171472986, "loss": 20.4292, "step": 52000 }, { "epoch": 5.0, "learning_rate": 0.00025008882966910944, "loss": 20.4047, "step": 52500 }, { "epoch": 5.04, "learning_rate": 0.00024929888011167154, "loss": 20.404, "step": 53000 }, { "epoch": 5.09, "learning_rate": 0.00024850575806605117, "loss": 20.3589, "step": 53500 }, { "epoch": 5.14, "learning_rate": 0.00024771422226452206, "loss": 20.3373, "step": 54000 }, { "epoch": 5.19, "learning_rate": 0.0002469211002189017, "loss": 20.3208, "step": 54500 }, { "epoch": 5.23, "learning_rate": 0.00024612797817328126, "loss": 20.2981, "step": 55000 }, { "epoch": 5.28, "learning_rate": 0.0002453348561276609, "loss": 20.2951, "step": 55500 }, { "epoch": 5.33, "learning_rate": 0.0002445417340820405, "loss": 20.2748, "step": 56000 }, { "epoch": 5.38, "learning_rate": 0.00024374861203642015, "loss": 20.2582, "step": 56500 }, { "epoch": 5.42, "learning_rate": 0.00024295548999079977, "loss": 20.2498, "step": 57000 }, { "epoch": 5.47, "learning_rate": 0.0002421623679451794, "loss": 20.2228, "step": 57500 }, { "epoch": 5.52, "learning_rate": 0.00024136924589955903, "loss": 20.2119, "step": 58000 }, { "epoch": 5.57, "learning_rate": 0.0002405761238539386, "loss": 20.1897, "step": 58500 }, { "epoch": 5.62, "learning_rate": 0.00023978300180831823, "loss": 20.1715, "step": 59000 }, { "epoch": 5.66, "learning_rate": 0.00023898987976269786, "loss": 20.1637, "step": 59500 }, { "epoch": 5.71, "learning_rate": 0.00023819675771707748, "loss": 20.1498, "step": 60000 }, { "epoch": 5.76, "learning_rate": 0.0002374036356714571, "loss": 20.1365, "step": 60500 }, { "epoch": 5.81, "learning_rate": 0.00023661209986992798, "loss": 20.119, "step": 61000 }, { "epoch": 5.85, "learning_rate": 0.0002358189778243076, "loss": 20.1109, "step": 61500 }, { "epoch": 5.9, "learning_rate": 0.00023502585577868718, "loss": 20.1085, "step": 62000 }, { "epoch": 5.95, "learning_rate": 0.0002342327337330668, "loss": 20.0777, "step": 62500 }, { "epoch": 6.0, "learning_rate": 0.00023344278417562894, "loss": 20.0649, "step": 63000 }, { "epoch": 6.04, "learning_rate": 0.00023264966213000857, "loss": 20.0717, "step": 63500 }, { "epoch": 6.09, "learning_rate": 0.00023185654008438817, "loss": 20.0345, "step": 64000 }, { "epoch": 6.14, "learning_rate": 0.0002310634180387678, "loss": 20.0282, "step": 64500 }, { "epoch": 6.19, "learning_rate": 0.0002302702959931474, "loss": 20.012, "step": 65000 }, { "epoch": 6.23, "learning_rate": 0.00022947717394752702, "loss": 20.0094, "step": 65500 }, { "epoch": 6.28, "learning_rate": 0.00022868405190190665, "loss": 19.9956, "step": 66000 }, { "epoch": 6.33, "learning_rate": 0.00022789092985628628, "loss": 19.9879, "step": 66500 }, { "epoch": 6.38, "learning_rate": 0.00022710098029884838, "loss": 19.9747, "step": 67000 }, { "epoch": 6.42, "learning_rate": 0.000226307858253228, "loss": 19.9557, "step": 67500 }, { "epoch": 6.47, "learning_rate": 0.00022551473620760758, "loss": 19.9602, "step": 68000 }, { "epoch": 6.52, "learning_rate": 0.0002247216141619872, "loss": 19.9466, "step": 68500 }, { "epoch": 6.57, "learning_rate": 0.00022392849211636684, "loss": 19.9366, "step": 69000 }, { "epoch": 6.61, "learning_rate": 0.00022314171504711142, "loss": 19.9531, "step": 69500 }, { "epoch": 6.66, "learning_rate": 0.00022234859300149104, "loss": 19.929, "step": 70000 }, { "epoch": 6.71, "learning_rate": 0.00022155547095587067, "loss": 19.9242, "step": 70500 }, { "epoch": 6.76, "learning_rate": 0.0002207623489102503, "loss": 19.9227, "step": 71000 }, { "epoch": 6.8, "learning_rate": 0.0002199692268646299, "loss": 19.9067, "step": 71500 }, { "epoch": 6.85, "learning_rate": 0.00021917610481900953, "loss": 19.8972, "step": 72000 }, { "epoch": 6.9, "learning_rate": 0.00021838298277338916, "loss": 19.8938, "step": 72500 }, { "epoch": 6.95, "learning_rate": 0.00021758986072776878, "loss": 19.8794, "step": 73000 }, { "epoch": 7.0, "learning_rate": 0.00021679673868214838, "loss": 19.8704, "step": 73500 }, { "epoch": 7.04, "learning_rate": 0.000216003616636528, "loss": 19.8757, "step": 74000 }, { "epoch": 7.09, "learning_rate": 0.0002152104945909076, "loss": 19.8342, "step": 74500 }, { "epoch": 7.14, "learning_rate": 0.00021441737254528724, "loss": 19.8257, "step": 75000 }, { "epoch": 7.19, "learning_rate": 0.00021362425049966687, "loss": 19.8177, "step": 75500 }, { "epoch": 7.23, "learning_rate": 0.0002128311284540465, "loss": 19.8072, "step": 76000 }, { "epoch": 7.28, "learning_rate": 0.00021203800640842612, "loss": 19.7952, "step": 76500 }, { "epoch": 7.33, "learning_rate": 0.00021124488436280575, "loss": 19.7894, "step": 77000 }, { "epoch": 7.38, "learning_rate": 0.00021045652104945906, "loss": 19.8001, "step": 77500 }, { "epoch": 7.42, "learning_rate": 0.0002096633990038387, "loss": 19.7767, "step": 78000 }, { "epoch": 7.47, "learning_rate": 0.00020887027695821832, "loss": 19.7795, "step": 78500 }, { "epoch": 7.52, "learning_rate": 0.00020807715491259795, "loss": 19.7661, "step": 79000 }, { "epoch": 7.57, "learning_rate": 0.00020728561911106879, "loss": 19.7486, "step": 79500 }, { "epoch": 7.61, "learning_rate": 0.0002064924970654484, "loss": 19.7425, "step": 80000 } ], "max_steps": 210140, "num_train_epochs": 20, "total_flos": 0.0, "trial_name": null, "trial_params": null }