{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.999927855133107, "global_step": 55440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0004954906204906205, "loss": 0.5606, "step": 500 }, { "epoch": 0.14, "learning_rate": 0.000490981240981241, "loss": 0.4359, "step": 1000 }, { "epoch": 0.22, "learning_rate": 0.00048647186147186144, "loss": 0.3852, "step": 1500 }, { "epoch": 0.29, "learning_rate": 0.000481962481962482, "loss": 0.3778, "step": 2000 }, { "epoch": 0.36, "learning_rate": 0.00047745310245310245, "loss": 0.3523, "step": 2500 }, { "epoch": 0.43, "learning_rate": 0.0004729437229437229, "loss": 0.3319, "step": 3000 }, { "epoch": 0.51, "learning_rate": 0.00046843434343434346, "loss": 0.3153, "step": 3500 }, { "epoch": 0.58, "learning_rate": 0.00046392496392496394, "loss": 0.3027, "step": 4000 }, { "epoch": 0.65, "learning_rate": 0.0004594155844155844, "loss": 0.3063, "step": 4500 }, { "epoch": 0.72, "learning_rate": 0.00045490620490620494, "loss": 0.292, "step": 5000 }, { "epoch": 0.79, "learning_rate": 0.0004503968253968254, "loss": 0.2802, "step": 5500 }, { "epoch": 0.87, "learning_rate": 0.0004458874458874459, "loss": 0.2705, "step": 6000 }, { "epoch": 0.94, "learning_rate": 0.0004413780663780664, "loss": 0.2669, "step": 6500 }, { "epoch": 1.01, "learning_rate": 0.00043686868686868685, "loss": 0.2616, "step": 7000 }, { "epoch": 1.08, "learning_rate": 0.00043235930735930733, "loss": 0.2486, "step": 7500 }, { "epoch": 1.15, "learning_rate": 0.00042784992784992786, "loss": 0.2432, "step": 8000 }, { "epoch": 1.23, "learning_rate": 0.00042334054834054834, "loss": 0.2413, "step": 8500 }, { "epoch": 1.3, "learning_rate": 0.0004188311688311688, "loss": 0.2479, "step": 9000 }, { "epoch": 1.37, "learning_rate": 0.00041432178932178935, "loss": 0.2344, "step": 9500 }, { "epoch": 1.44, "learning_rate": 0.0004098124098124098, "loss": 0.2363, "step": 10000 }, { "epoch": 1.52, "learning_rate": 0.0004053030303030303, "loss": 0.2328, "step": 10500 }, { "epoch": 1.59, "learning_rate": 0.00040079365079365083, "loss": 0.237, "step": 11000 }, { "epoch": 1.66, "learning_rate": 0.0003962842712842713, "loss": 0.2284, "step": 11500 }, { "epoch": 1.73, "learning_rate": 0.0003917748917748918, "loss": 0.2286, "step": 12000 }, { "epoch": 1.8, "learning_rate": 0.00038726551226551226, "loss": 0.222, "step": 12500 }, { "epoch": 1.88, "learning_rate": 0.00038275613275613274, "loss": 0.2144, "step": 13000 }, { "epoch": 1.95, "learning_rate": 0.0003782467532467532, "loss": 0.2272, "step": 13500 }, { "epoch": 2.02, "learning_rate": 0.00037373737373737375, "loss": 0.2107, "step": 14000 }, { "epoch": 2.09, "learning_rate": 0.0003692279942279942, "loss": 0.2054, "step": 14500 }, { "epoch": 2.16, "learning_rate": 0.0003647186147186147, "loss": 0.1953, "step": 15000 }, { "epoch": 2.24, "learning_rate": 0.00036020923520923523, "loss": 0.1955, "step": 15500 }, { "epoch": 2.31, "learning_rate": 0.0003556998556998557, "loss": 0.2012, "step": 16000 }, { "epoch": 2.38, "learning_rate": 0.0003511904761904762, "loss": 0.1938, "step": 16500 }, { "epoch": 2.45, "learning_rate": 0.0003466810966810967, "loss": 0.1932, "step": 17000 }, { "epoch": 2.53, "learning_rate": 0.0003421717171717172, "loss": 0.1944, "step": 17500 }, { "epoch": 2.6, "learning_rate": 0.00033766233766233767, "loss": 0.1967, "step": 18000 }, { "epoch": 2.67, "learning_rate": 0.00033315295815295815, "loss": 0.1955, "step": 18500 }, { "epoch": 2.74, "learning_rate": 0.0003286435786435786, "loss": 0.1906, "step": 19000 }, { "epoch": 2.81, "learning_rate": 0.0003241341991341991, "loss": 0.1894, "step": 19500 }, { "epoch": 2.89, "learning_rate": 0.00031962481962481964, "loss": 0.1908, "step": 20000 }, { "epoch": 2.96, "learning_rate": 0.0003151154401154401, "loss": 0.1835, "step": 20500 }, { "epoch": 3.03, "learning_rate": 0.0003106060606060606, "loss": 0.1755, "step": 21000 }, { "epoch": 3.1, "learning_rate": 0.0003060966810966811, "loss": 0.1725, "step": 21500 }, { "epoch": 3.17, "learning_rate": 0.0003015873015873016, "loss": 0.172, "step": 22000 }, { "epoch": 3.25, "learning_rate": 0.0002970779220779221, "loss": 0.172, "step": 22500 }, { "epoch": 3.32, "learning_rate": 0.0002925685425685426, "loss": 0.1649, "step": 23000 }, { "epoch": 3.39, "learning_rate": 0.0002880591630591631, "loss": 0.1683, "step": 23500 }, { "epoch": 3.46, "learning_rate": 0.00028354978354978356, "loss": 0.1696, "step": 24000 }, { "epoch": 3.54, "learning_rate": 0.00027904040404040404, "loss": 0.1695, "step": 24500 }, { "epoch": 3.61, "learning_rate": 0.0002745310245310245, "loss": 0.1705, "step": 25000 }, { "epoch": 3.68, "learning_rate": 0.000270021645021645, "loss": 0.1659, "step": 25500 }, { "epoch": 3.75, "learning_rate": 0.0002655122655122655, "loss": 0.1694, "step": 26000 }, { "epoch": 3.82, "learning_rate": 0.000261002886002886, "loss": 0.1599, "step": 26500 }, { "epoch": 3.9, "learning_rate": 0.0002564935064935065, "loss": 0.1638, "step": 27000 }, { "epoch": 3.97, "learning_rate": 0.000251984126984127, "loss": 0.1662, "step": 27500 }, { "epoch": 4.04, "learning_rate": 0.0002474747474747475, "loss": 0.1587, "step": 28000 }, { "epoch": 4.11, "learning_rate": 0.00024296536796536796, "loss": 0.1453, "step": 28500 }, { "epoch": 4.18, "learning_rate": 0.00023845598845598847, "loss": 0.1505, "step": 29000 }, { "epoch": 4.26, "learning_rate": 0.00023394660894660897, "loss": 0.1498, "step": 29500 }, { "epoch": 4.33, "learning_rate": 0.00022943722943722945, "loss": 0.145, "step": 30000 }, { "epoch": 4.4, "learning_rate": 0.00022492784992784992, "loss": 0.1446, "step": 30500 }, { "epoch": 4.47, "learning_rate": 0.00022041847041847043, "loss": 0.1425, "step": 31000 }, { "epoch": 4.55, "learning_rate": 0.0002159090909090909, "loss": 0.1465, "step": 31500 }, { "epoch": 4.62, "learning_rate": 0.0002113997113997114, "loss": 0.1501, "step": 32000 }, { "epoch": 4.69, "learning_rate": 0.00020689033189033191, "loss": 0.1488, "step": 32500 }, { "epoch": 4.76, "learning_rate": 0.0002023809523809524, "loss": 0.1506, "step": 33000 }, { "epoch": 4.83, "learning_rate": 0.00019787157287157287, "loss": 0.1457, "step": 33500 }, { "epoch": 4.91, "learning_rate": 0.00019336219336219337, "loss": 0.1436, "step": 34000 }, { "epoch": 4.98, "learning_rate": 0.00018885281385281385, "loss": 0.1447, "step": 34500 }, { "epoch": 5.05, "learning_rate": 0.00018434343434343435, "loss": 0.1381, "step": 35000 }, { "epoch": 5.12, "learning_rate": 0.00017983405483405486, "loss": 0.1297, "step": 35500 }, { "epoch": 5.19, "learning_rate": 0.00017532467532467534, "loss": 0.1284, "step": 36000 }, { "epoch": 5.27, "learning_rate": 0.0001708152958152958, "loss": 0.1375, "step": 36500 }, { "epoch": 5.34, "learning_rate": 0.00016630591630591632, "loss": 0.1326, "step": 37000 }, { "epoch": 5.41, "learning_rate": 0.0001617965367965368, "loss": 0.1351, "step": 37500 }, { "epoch": 5.48, "learning_rate": 0.00015728715728715727, "loss": 0.1338, "step": 38000 }, { "epoch": 5.56, "learning_rate": 0.0001527777777777778, "loss": 0.1303, "step": 38500 }, { "epoch": 5.63, "learning_rate": 0.00014826839826839828, "loss": 0.1311, "step": 39000 }, { "epoch": 5.7, "learning_rate": 0.00014375901875901876, "loss": 0.1332, "step": 39500 }, { "epoch": 5.77, "learning_rate": 0.00013924963924963926, "loss": 0.132, "step": 40000 }, { "epoch": 5.84, "learning_rate": 0.00013474025974025974, "loss": 0.1299, "step": 40500 }, { "epoch": 5.92, "learning_rate": 0.00013023088023088021, "loss": 0.128, "step": 41000 }, { "epoch": 5.99, "learning_rate": 0.00012572150072150075, "loss": 0.131, "step": 41500 }, { "epoch": 6.06, "learning_rate": 0.00012121212121212122, "loss": 0.1173, "step": 42000 }, { "epoch": 6.13, "learning_rate": 0.0001167027417027417, "loss": 0.1195, "step": 42500 }, { "epoch": 6.2, "learning_rate": 0.00011219336219336219, "loss": 0.1184, "step": 43000 }, { "epoch": 6.28, "learning_rate": 0.0001076839826839827, "loss": 0.1226, "step": 43500 }, { "epoch": 6.35, "learning_rate": 0.00010317460317460317, "loss": 0.1182, "step": 44000 }, { "epoch": 6.42, "learning_rate": 9.866522366522366e-05, "loss": 0.1249, "step": 44500 }, { "epoch": 6.49, "learning_rate": 9.415584415584417e-05, "loss": 0.1187, "step": 45000 }, { "epoch": 6.57, "learning_rate": 8.964646464646464e-05, "loss": 0.1214, "step": 45500 }, { "epoch": 6.64, "learning_rate": 8.513708513708513e-05, "loss": 0.1194, "step": 46000 }, { "epoch": 6.71, "learning_rate": 8.062770562770564e-05, "loss": 0.1187, "step": 46500 }, { "epoch": 6.78, "learning_rate": 7.611832611832612e-05, "loss": 0.1168, "step": 47000 }, { "epoch": 6.85, "learning_rate": 7.16089466089466e-05, "loss": 0.12, "step": 47500 }, { "epoch": 6.93, "learning_rate": 6.709956709956711e-05, "loss": 0.1198, "step": 48000 }, { "epoch": 7.0, "learning_rate": 6.259018759018759e-05, "loss": 0.1189, "step": 48500 }, { "epoch": 7.07, "learning_rate": 5.808080808080808e-05, "loss": 0.1116, "step": 49000 }, { "epoch": 7.14, "learning_rate": 5.357142857142857e-05, "loss": 0.1114, "step": 49500 }, { "epoch": 7.21, "learning_rate": 4.9062049062049066e-05, "loss": 0.1108, "step": 50000 }, { "epoch": 7.29, "learning_rate": 4.455266955266955e-05, "loss": 0.1101, "step": 50500 }, { "epoch": 7.36, "learning_rate": 4.004329004329004e-05, "loss": 0.1062, "step": 51000 }, { "epoch": 7.43, "learning_rate": 3.553391053391054e-05, "loss": 0.1141, "step": 51500 }, { "epoch": 7.5, "learning_rate": 3.102453102453102e-05, "loss": 0.1106, "step": 52000 }, { "epoch": 7.58, "learning_rate": 2.6515151515151516e-05, "loss": 0.1105, "step": 52500 }, { "epoch": 7.65, "learning_rate": 2.2005772005772003e-05, "loss": 0.11, "step": 53000 }, { "epoch": 7.72, "learning_rate": 1.7496392496392497e-05, "loss": 0.1117, "step": 53500 }, { "epoch": 7.79, "learning_rate": 1.2987012987012988e-05, "loss": 0.111, "step": 54000 }, { "epoch": 7.86, "learning_rate": 8.477633477633478e-06, "loss": 0.1144, "step": 54500 }, { "epoch": 7.94, "learning_rate": 3.968253968253968e-06, "loss": 0.1126, "step": 55000 }, { "epoch": 8.0, "step": 55440, "total_flos": 9.377895890180506e+17, "train_loss": 0.18044453823205198, "train_runtime": 140707.6037, "train_samples_per_second": 12.609, "train_steps_per_second": 0.394 } ], "max_steps": 55440, "num_train_epochs": 8, "total_flos": 9.377895890180506e+17, "trial_name": null, "trial_params": null }