{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 390, "global_step": 390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0128, "grad_norm": 161.61465524426922, "learning_rate": 7.500000000000001e-08, "loss": 2.4411, "step": 5 }, { "epoch": 0.0256, "grad_norm": 154.06818191195163, "learning_rate": 1.5000000000000002e-07, "loss": 2.414, "step": 10 }, { "epoch": 0.0384, "grad_norm": 117.19018117409365, "learning_rate": 2.25e-07, "loss": 2.2681, "step": 15 }, { "epoch": 0.0512, "grad_norm": 75.19062918097424, "learning_rate": 3.0000000000000004e-07, "loss": 1.7582, "step": 20 }, { "epoch": 0.064, "grad_norm": 58.25969834757995, "learning_rate": 3.75e-07, "loss": 1.1475, "step": 25 }, { "epoch": 0.0768, "grad_norm": 17.7044048384125, "learning_rate": 4.5e-07, "loss": 0.606, "step": 30 }, { "epoch": 0.0896, "grad_norm": 16.186509390955344, "learning_rate": 5.25e-07, "loss": 0.4462, "step": 35 }, { "epoch": 0.1024, "grad_norm": 28.948881179049923, "learning_rate": 6.000000000000001e-07, "loss": 0.3074, "step": 40 }, { "epoch": 0.1152, "grad_norm": 9.08435498039366, "learning_rate": 6.75e-07, "loss": 0.2532, "step": 45 }, { "epoch": 0.128, "grad_norm": 6.5840402479180575, "learning_rate": 7.5e-07, "loss": 0.2201, "step": 50 }, { "epoch": 0.1408, "grad_norm": 7.327177616981191, "learning_rate": 8.25e-07, "loss": 0.1959, "step": 55 }, { "epoch": 0.1536, "grad_norm": 6.172579048424751, "learning_rate": 9e-07, "loss": 0.1806, "step": 60 }, { "epoch": 0.1664, "grad_norm": 6.085525518741895, "learning_rate": 9.75e-07, "loss": 0.169, "step": 65 }, { "epoch": 0.1792, "grad_norm": 16.06939591089291, "learning_rate": 1.05e-06, "loss": 0.1498, "step": 70 }, { "epoch": 0.192, "grad_norm": 8.11523279664268, "learning_rate": 1.125e-06, "loss": 0.1537, "step": 75 }, { "epoch": 0.2048, "grad_norm": 7.906944055310749, "learning_rate": 1.2000000000000002e-06, "loss": 0.1439, "step": 80 }, { "epoch": 0.2176, "grad_norm": 6.43033129109838, "learning_rate": 1.275e-06, "loss": 0.1326, "step": 85 }, { "epoch": 0.2304, "grad_norm": 8.02640200042406, "learning_rate": 1.35e-06, "loss": 0.1227, "step": 90 }, { "epoch": 0.2432, "grad_norm": 6.69740377832319, "learning_rate": 1.425e-06, "loss": 0.1158, "step": 95 }, { "epoch": 0.256, "grad_norm": 5.197915018046733, "learning_rate": 1.5e-06, "loss": 0.1094, "step": 100 }, { "epoch": 0.2688, "grad_norm": 5.060029864642565, "learning_rate": 1.5750000000000002e-06, "loss": 0.1003, "step": 105 }, { "epoch": 0.2816, "grad_norm": 7.497811681915685, "learning_rate": 1.65e-06, "loss": 0.091, "step": 110 }, { "epoch": 0.2944, "grad_norm": 7.034577528708689, "learning_rate": 1.725e-06, "loss": 0.0861, "step": 115 }, { "epoch": 0.3072, "grad_norm": 11.080391678804938, "learning_rate": 1.8e-06, "loss": 0.0893, "step": 120 }, { "epoch": 0.32, "grad_norm": 19.426631066876524, "learning_rate": 1.875e-06, "loss": 0.0827, "step": 125 }, { "epoch": 0.3328, "grad_norm": 4.952420510103284, "learning_rate": 1.95e-06, "loss": 0.0831, "step": 130 }, { "epoch": 0.3456, "grad_norm": 10.227216835328893, "learning_rate": 2.025e-06, "loss": 0.0773, "step": 135 }, { "epoch": 0.3584, "grad_norm": 7.807787341242278, "learning_rate": 2.1e-06, "loss": 0.0761, "step": 140 }, { "epoch": 0.3712, "grad_norm": 5.774196535784976, "learning_rate": 2.175e-06, "loss": 0.0752, "step": 145 }, { "epoch": 0.384, "grad_norm": 3.9786936164168414, "learning_rate": 2.25e-06, "loss": 0.0722, "step": 150 }, { "epoch": 0.3968, "grad_norm": 4.6447288095056996, "learning_rate": 2.325e-06, "loss": 0.0606, "step": 155 }, { "epoch": 0.4096, "grad_norm": 5.149169735072777, "learning_rate": 2.4000000000000003e-06, "loss": 0.0713, "step": 160 }, { "epoch": 0.4224, "grad_norm": 11.516436146930202, "learning_rate": 2.475e-06, "loss": 0.064, "step": 165 }, { "epoch": 0.4352, "grad_norm": 12.631792384481054, "learning_rate": 2.55e-06, "loss": 0.061, "step": 170 }, { "epoch": 0.448, "grad_norm": 12.412923039035704, "learning_rate": 2.6250000000000003e-06, "loss": 0.0621, "step": 175 }, { "epoch": 0.4608, "grad_norm": 6.029127426855161, "learning_rate": 2.7e-06, "loss": 0.0674, "step": 180 }, { "epoch": 0.4736, "grad_norm": 4.2613952463850415, "learning_rate": 2.775e-06, "loss": 0.068, "step": 185 }, { "epoch": 0.4864, "grad_norm": 6.372179389423735, "learning_rate": 2.85e-06, "loss": 0.0628, "step": 190 }, { "epoch": 0.4992, "grad_norm": 6.41276579707489, "learning_rate": 2.925e-06, "loss": 0.0612, "step": 195 }, { "epoch": 0.512, "grad_norm": 5.9771618570220655, "learning_rate": 3e-06, "loss": 0.0496, "step": 200 }, { "epoch": 0.5248, "grad_norm": 3.7882498321692775, "learning_rate": 2.9210526315789475e-06, "loss": 0.0579, "step": 205 }, { "epoch": 0.5376, "grad_norm": 3.2901697453585323, "learning_rate": 2.8421052631578946e-06, "loss": 0.0603, "step": 210 }, { "epoch": 0.5504, "grad_norm": 4.418781826576313, "learning_rate": 2.763157894736842e-06, "loss": 0.0596, "step": 215 }, { "epoch": 0.5632, "grad_norm": 6.086889232894656, "learning_rate": 2.6842105263157895e-06, "loss": 0.0585, "step": 220 }, { "epoch": 0.576, "grad_norm": 4.6861307205960925, "learning_rate": 2.605263157894737e-06, "loss": 0.0557, "step": 225 }, { "epoch": 0.5888, "grad_norm": 4.745714589263688, "learning_rate": 2.526315789473684e-06, "loss": 0.0562, "step": 230 }, { "epoch": 0.6016, "grad_norm": 4.697181466590033, "learning_rate": 2.4473684210526314e-06, "loss": 0.052, "step": 235 }, { "epoch": 0.6144, "grad_norm": 5.676309664606183, "learning_rate": 2.368421052631579e-06, "loss": 0.0581, "step": 240 }, { "epoch": 0.6272, "grad_norm": 6.187909188215041, "learning_rate": 2.2894736842105263e-06, "loss": 0.0596, "step": 245 }, { "epoch": 0.64, "grad_norm": 4.216720589170728, "learning_rate": 2.2105263157894738e-06, "loss": 0.0496, "step": 250 }, { "epoch": 0.6528, "grad_norm": 5.196715826539319, "learning_rate": 2.1315789473684212e-06, "loss": 0.0515, "step": 255 }, { "epoch": 0.6656, "grad_norm": 11.353541751433445, "learning_rate": 2.0526315789473687e-06, "loss": 0.0407, "step": 260 }, { "epoch": 0.6784, "grad_norm": 5.177415075589649, "learning_rate": 1.973684210526316e-06, "loss": 0.0417, "step": 265 }, { "epoch": 0.6912, "grad_norm": 8.946108487101375, "learning_rate": 1.8947368421052632e-06, "loss": 0.0488, "step": 270 }, { "epoch": 0.704, "grad_norm": 4.475569997515576, "learning_rate": 1.8157894736842106e-06, "loss": 0.0521, "step": 275 }, { "epoch": 0.7168, "grad_norm": 6.784366889150163, "learning_rate": 1.736842105263158e-06, "loss": 0.0473, "step": 280 }, { "epoch": 0.7296, "grad_norm": 12.414020629521627, "learning_rate": 1.6578947368421056e-06, "loss": 0.046, "step": 285 }, { "epoch": 0.7424, "grad_norm": 9.884779353467142, "learning_rate": 1.5789473684210526e-06, "loss": 0.0502, "step": 290 }, { "epoch": 0.7552, "grad_norm": 6.678072565724174, "learning_rate": 1.5e-06, "loss": 0.0473, "step": 295 }, { "epoch": 0.768, "grad_norm": 5.712356237287952, "learning_rate": 1.4210526315789473e-06, "loss": 0.0431, "step": 300 }, { "epoch": 0.7808, "grad_norm": 4.9395960174524145, "learning_rate": 1.3421052631578947e-06, "loss": 0.0401, "step": 305 }, { "epoch": 0.7936, "grad_norm": 6.834755086014392, "learning_rate": 1.263157894736842e-06, "loss": 0.0397, "step": 310 }, { "epoch": 0.8064, "grad_norm": 2.775196760101048, "learning_rate": 1.1842105263157894e-06, "loss": 0.0377, "step": 315 }, { "epoch": 0.8192, "grad_norm": 4.786995996855356, "learning_rate": 1.1052631578947369e-06, "loss": 0.0398, "step": 320 }, { "epoch": 0.832, "grad_norm": 14.331978713606828, "learning_rate": 1.0263157894736843e-06, "loss": 0.0387, "step": 325 }, { "epoch": 0.8448, "grad_norm": 5.110049394601092, "learning_rate": 9.473684210526316e-07, "loss": 0.0412, "step": 330 }, { "epoch": 0.8576, "grad_norm": 2.3797196581438, "learning_rate": 8.68421052631579e-07, "loss": 0.0407, "step": 335 }, { "epoch": 0.8704, "grad_norm": 3.1978316895686003, "learning_rate": 7.894736842105263e-07, "loss": 0.0397, "step": 340 }, { "epoch": 0.8832, "grad_norm": 4.6674024529684885, "learning_rate": 7.105263157894736e-07, "loss": 0.0399, "step": 345 }, { "epoch": 0.896, "grad_norm": 2.5803866776162767, "learning_rate": 6.31578947368421e-07, "loss": 0.0336, "step": 350 }, { "epoch": 0.9088, "grad_norm": 4.604776602886812, "learning_rate": 5.526315789473684e-07, "loss": 0.0383, "step": 355 }, { "epoch": 0.9216, "grad_norm": 11.254341619231537, "learning_rate": 4.736842105263158e-07, "loss": 0.0437, "step": 360 }, { "epoch": 0.9344, "grad_norm": 4.658978706509525, "learning_rate": 3.9473684210526315e-07, "loss": 0.0402, "step": 365 }, { "epoch": 0.9472, "grad_norm": 5.509226127844381, "learning_rate": 3.157894736842105e-07, "loss": 0.0422, "step": 370 }, { "epoch": 0.96, "grad_norm": 2.556812465734739, "learning_rate": 2.368421052631579e-07, "loss": 0.0354, "step": 375 }, { "epoch": 0.9728, "grad_norm": 1.847642831707115, "learning_rate": 1.5789473684210525e-07, "loss": 0.0392, "step": 380 }, { "epoch": 0.9856, "grad_norm": 3.1108025008097293, "learning_rate": 7.894736842105262e-08, "loss": 0.0367, "step": 385 }, { "epoch": 0.9984, "grad_norm": 4.407140308538649, "learning_rate": 0.0, "loss": 0.0361, "step": 390 }, { "epoch": 0.9984, "eval_accuracy": 0.007941866145578897, "eval_loss": 0.031122559681534767, "eval_runtime": 44.2812, "eval_samples_per_second": 22.583, "eval_steps_per_second": 0.723, "step": 390 }, { "epoch": 0.9984, "logic_eval_extr_stps.D-0.answer_accuracy": 0.3333333333333333, "logic_eval_extr_stps.D-0.proof_accuracy.zero_one": 0.3333333333333333, "logic_eval_extr_stps.D-0.rouge1": 35.245819999999995, "logic_eval_extr_stps.D-0.rouge2": 0.3415933333333333, "logic_eval_extr_stps.D-0.rougeL": 35.245819999999995, "logic_eval_extr_stps.D-0.rougeLsum": 35.245819999999995, "logic_eval_extr_stps.D-1.answer_accuracy": 0.391304347826087, "logic_eval_extr_stps.D-1.proof_accuracy.zero_one": 0.30434782608695654, "logic_eval_extr_stps.D-1.rouge1": 30.34304782608695, "logic_eval_extr_stps.D-1.rouge2": 4.933721739130434, "logic_eval_extr_stps.D-1.rougeL": 30.34304782608695, "logic_eval_extr_stps.D-1.rougeLsum": 30.34304782608695, "logic_eval_extr_stps.D-2.answer_accuracy": 0.3333333333333333, "logic_eval_extr_stps.D-2.proof_accuracy.zero_one": 0.2222222222222222, "logic_eval_extr_stps.D-2.rouge1": 33.186255555555555, "logic_eval_extr_stps.D-2.rouge2": 6.600677777777777, "logic_eval_extr_stps.D-2.rougeL": 32.34552777777778, "logic_eval_extr_stps.D-2.rougeLsum": 32.828688888888884, "logic_eval_extr_stps.D-3.answer_accuracy": 0.21428571428571427, "logic_eval_extr_stps.D-3.proof_accuracy.zero_one": 0.07142857142857142, "logic_eval_extr_stps.D-3.rouge1": 11.588842857142854, "logic_eval_extr_stps.D-3.rouge2": 7.087435714285713, "logic_eval_extr_stps.D-3.rougeL": 10.851507142857143, "logic_eval_extr_stps.D-3.rougeLsum": 11.588842857142854, "logic_eval_extr_stps.D-None.answer_accuracy": 0.12903225806451613, "logic_eval_extr_stps.D-None.proof_accuracy.zero_one": 0.12903225806451613, "logic_eval_extr_stps.D-None.rouge1": 14.077829032258066, "logic_eval_extr_stps.D-None.rouge2": 0.0, "logic_eval_extr_stps.D-None.rougeL": 14.077829032258066, "logic_eval_extr_stps.D-None.rougeLsum": 14.077829032258066, "logic_eval_extr_stps.D-all.answer_accuracy": 0.26732673267326734, "logic_eval_extr_stps.D-all.proof_accuracy.zero_one": 0.2079207920792079, "logic_eval_extr_stps.D-all.rouge1": 23.986004950495047, "logic_eval_extr_stps.D-all.rouge2": 3.3330277227722767, "logic_eval_extr_stps.D-all.rougeL": 23.73396732673267, "logic_eval_extr_stps.D-all.rougeLsum": 23.922280198019806, "logic_eval_gen_len": 331.7734375, "logic_eval_runtime": 658.0701, "logic_eval_samples_per_second": 0.153, "logic_eval_steps_per_second": 0.006, "logic_eval_strct.D-0.answer_accuracy": 0.3333333333333333, "logic_eval_strct.D-0.proof_accuracy.zero_one": 0.3333333333333333, "logic_eval_strct.D-0.rouge1": 35.245819999999995, "logic_eval_strct.D-0.rouge2": 0.3415933333333333, "logic_eval_strct.D-0.rougeL": 35.245819999999995, "logic_eval_strct.D-0.rougeLsum": 35.245819999999995, "logic_eval_strct.D-1.answer_accuracy": 0.391304347826087, "logic_eval_strct.D-1.proof_accuracy.zero_one": 0.21739130434782608, "logic_eval_strct.D-1.rouge1": 30.34304782608695, "logic_eval_strct.D-1.rouge2": 4.933721739130434, "logic_eval_strct.D-1.rougeL": 30.34304782608695, "logic_eval_strct.D-1.rougeLsum": 30.34304782608695, "logic_eval_strct.D-2.answer_accuracy": 0.3333333333333333, "logic_eval_strct.D-2.proof_accuracy.zero_one": 0.2222222222222222, "logic_eval_strct.D-2.rouge1": 33.186255555555555, "logic_eval_strct.D-2.rouge2": 6.600677777777777, "logic_eval_strct.D-2.rougeL": 32.34552777777778, "logic_eval_strct.D-2.rougeLsum": 32.828688888888884, "logic_eval_strct.D-3.answer_accuracy": 0.21428571428571427, "logic_eval_strct.D-3.proof_accuracy.zero_one": 0.0, "logic_eval_strct.D-3.rouge1": 11.588842857142854, "logic_eval_strct.D-3.rouge2": 7.087435714285713, "logic_eval_strct.D-3.rougeL": 10.851507142857143, "logic_eval_strct.D-3.rougeLsum": 11.588842857142854, "logic_eval_strct.D-None.answer_accuracy": 0.12903225806451613, "logic_eval_strct.D-None.proof_accuracy.zero_one": 0.12903225806451613, "logic_eval_strct.D-None.rouge1": 14.077829032258066, "logic_eval_strct.D-None.rouge2": 0.0, "logic_eval_strct.D-None.rougeL": 14.077829032258066, "logic_eval_strct.D-None.rougeLsum": 14.077829032258066, "logic_eval_strct.D-all.answer_accuracy": 0.26732673267326734, "logic_eval_strct.D-all.proof_accuracy.zero_one": 0.1782178217821782, "logic_eval_strct.D-all.rouge1": 23.986004950495047, "logic_eval_strct.D-all.rouge2": 3.3330277227722767, "logic_eval_strct.D-all.rougeL": 23.73396732673267, "logic_eval_strct.D-all.rougeLsum": 23.922280198019806, "step": 390 } ], "logging_steps": 5, "max_steps": 390, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 390, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1617567361794048.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }