|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 390, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0128, |
|
"grad_norm": 161.61465524426922, |
|
"learning_rate": 7.500000000000001e-08, |
|
"loss": 2.4411, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0256, |
|
"grad_norm": 154.06818191195163, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 2.414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 117.19018117409365, |
|
"learning_rate": 2.25e-07, |
|
"loss": 2.2681, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0512, |
|
"grad_norm": 75.19062918097424, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 1.7582, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 58.25969834757995, |
|
"learning_rate": 3.75e-07, |
|
"loss": 1.1475, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 17.7044048384125, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.606, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0896, |
|
"grad_norm": 16.186509390955344, |
|
"learning_rate": 5.25e-07, |
|
"loss": 0.4462, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1024, |
|
"grad_norm": 28.948881179049923, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.3074, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 9.08435498039366, |
|
"learning_rate": 6.75e-07, |
|
"loss": 0.2532, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 6.5840402479180575, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.2201, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1408, |
|
"grad_norm": 7.327177616981191, |
|
"learning_rate": 8.25e-07, |
|
"loss": 0.1959, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 6.172579048424751, |
|
"learning_rate": 9e-07, |
|
"loss": 0.1806, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1664, |
|
"grad_norm": 6.085525518741895, |
|
"learning_rate": 9.75e-07, |
|
"loss": 0.169, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1792, |
|
"grad_norm": 16.06939591089291, |
|
"learning_rate": 1.05e-06, |
|
"loss": 0.1498, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 8.11523279664268, |
|
"learning_rate": 1.125e-06, |
|
"loss": 0.1537, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2048, |
|
"grad_norm": 7.906944055310749, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.1439, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2176, |
|
"grad_norm": 6.43033129109838, |
|
"learning_rate": 1.275e-06, |
|
"loss": 0.1326, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 8.02640200042406, |
|
"learning_rate": 1.35e-06, |
|
"loss": 0.1227, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2432, |
|
"grad_norm": 6.69740377832319, |
|
"learning_rate": 1.425e-06, |
|
"loss": 0.1158, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 5.197915018046733, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1094, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 5.060029864642565, |
|
"learning_rate": 1.5750000000000002e-06, |
|
"loss": 0.1003, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2816, |
|
"grad_norm": 7.497811681915685, |
|
"learning_rate": 1.65e-06, |
|
"loss": 0.091, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2944, |
|
"grad_norm": 7.034577528708689, |
|
"learning_rate": 1.725e-06, |
|
"loss": 0.0861, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 11.080391678804938, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.0893, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 19.426631066876524, |
|
"learning_rate": 1.875e-06, |
|
"loss": 0.0827, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3328, |
|
"grad_norm": 4.952420510103284, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.0831, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 10.227216835328893, |
|
"learning_rate": 2.025e-06, |
|
"loss": 0.0773, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3584, |
|
"grad_norm": 7.807787341242278, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.0761, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3712, |
|
"grad_norm": 5.774196535784976, |
|
"learning_rate": 2.175e-06, |
|
"loss": 0.0752, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 3.9786936164168414, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.0722, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"grad_norm": 4.6447288095056996, |
|
"learning_rate": 2.325e-06, |
|
"loss": 0.0606, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4096, |
|
"grad_norm": 5.149169735072777, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.0713, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 11.516436146930202, |
|
"learning_rate": 2.475e-06, |
|
"loss": 0.064, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4352, |
|
"grad_norm": 12.631792384481054, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.061, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 12.412923039035704, |
|
"learning_rate": 2.6250000000000003e-06, |
|
"loss": 0.0621, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 6.029127426855161, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.0674, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4736, |
|
"grad_norm": 4.2613952463850415, |
|
"learning_rate": 2.775e-06, |
|
"loss": 0.068, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4864, |
|
"grad_norm": 6.372179389423735, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.0628, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 6.41276579707489, |
|
"learning_rate": 2.925e-06, |
|
"loss": 0.0612, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 5.9771618570220655, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0496, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5248, |
|
"grad_norm": 3.7882498321692775, |
|
"learning_rate": 2.9210526315789475e-06, |
|
"loss": 0.0579, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 3.2901697453585323, |
|
"learning_rate": 2.8421052631578946e-06, |
|
"loss": 0.0603, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5504, |
|
"grad_norm": 4.418781826576313, |
|
"learning_rate": 2.763157894736842e-06, |
|
"loss": 0.0596, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5632, |
|
"grad_norm": 6.086889232894656, |
|
"learning_rate": 2.6842105263157895e-06, |
|
"loss": 0.0585, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 4.6861307205960925, |
|
"learning_rate": 2.605263157894737e-06, |
|
"loss": 0.0557, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5888, |
|
"grad_norm": 4.745714589263688, |
|
"learning_rate": 2.526315789473684e-06, |
|
"loss": 0.0562, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6016, |
|
"grad_norm": 4.697181466590033, |
|
"learning_rate": 2.4473684210526314e-06, |
|
"loss": 0.052, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6144, |
|
"grad_norm": 5.676309664606183, |
|
"learning_rate": 2.368421052631579e-06, |
|
"loss": 0.0581, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6272, |
|
"grad_norm": 6.187909188215041, |
|
"learning_rate": 2.2894736842105263e-06, |
|
"loss": 0.0596, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.216720589170728, |
|
"learning_rate": 2.2105263157894738e-06, |
|
"loss": 0.0496, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6528, |
|
"grad_norm": 5.196715826539319, |
|
"learning_rate": 2.1315789473684212e-06, |
|
"loss": 0.0515, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6656, |
|
"grad_norm": 11.353541751433445, |
|
"learning_rate": 2.0526315789473687e-06, |
|
"loss": 0.0407, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6784, |
|
"grad_norm": 5.177415075589649, |
|
"learning_rate": 1.973684210526316e-06, |
|
"loss": 0.0417, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6912, |
|
"grad_norm": 8.946108487101375, |
|
"learning_rate": 1.8947368421052632e-06, |
|
"loss": 0.0488, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 4.475569997515576, |
|
"learning_rate": 1.8157894736842106e-06, |
|
"loss": 0.0521, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7168, |
|
"grad_norm": 6.784366889150163, |
|
"learning_rate": 1.736842105263158e-06, |
|
"loss": 0.0473, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7296, |
|
"grad_norm": 12.414020629521627, |
|
"learning_rate": 1.6578947368421056e-06, |
|
"loss": 0.046, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7424, |
|
"grad_norm": 9.884779353467142, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.0502, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7552, |
|
"grad_norm": 6.678072565724174, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0473, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 5.712356237287952, |
|
"learning_rate": 1.4210526315789473e-06, |
|
"loss": 0.0431, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7808, |
|
"grad_norm": 4.9395960174524145, |
|
"learning_rate": 1.3421052631578947e-06, |
|
"loss": 0.0401, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7936, |
|
"grad_norm": 6.834755086014392, |
|
"learning_rate": 1.263157894736842e-06, |
|
"loss": 0.0397, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"grad_norm": 2.775196760101048, |
|
"learning_rate": 1.1842105263157894e-06, |
|
"loss": 0.0377, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8192, |
|
"grad_norm": 4.786995996855356, |
|
"learning_rate": 1.1052631578947369e-06, |
|
"loss": 0.0398, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 14.331978713606828, |
|
"learning_rate": 1.0263157894736843e-06, |
|
"loss": 0.0387, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8448, |
|
"grad_norm": 5.110049394601092, |
|
"learning_rate": 9.473684210526316e-07, |
|
"loss": 0.0412, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8576, |
|
"grad_norm": 2.3797196581438, |
|
"learning_rate": 8.68421052631579e-07, |
|
"loss": 0.0407, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8704, |
|
"grad_norm": 3.1978316895686003, |
|
"learning_rate": 7.894736842105263e-07, |
|
"loss": 0.0397, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8832, |
|
"grad_norm": 4.6674024529684885, |
|
"learning_rate": 7.105263157894736e-07, |
|
"loss": 0.0399, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 2.5803866776162767, |
|
"learning_rate": 6.31578947368421e-07, |
|
"loss": 0.0336, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9088, |
|
"grad_norm": 4.604776602886812, |
|
"learning_rate": 5.526315789473684e-07, |
|
"loss": 0.0383, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9216, |
|
"grad_norm": 11.254341619231537, |
|
"learning_rate": 4.736842105263158e-07, |
|
"loss": 0.0437, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9344, |
|
"grad_norm": 4.658978706509525, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"loss": 0.0402, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9472, |
|
"grad_norm": 5.509226127844381, |
|
"learning_rate": 3.157894736842105e-07, |
|
"loss": 0.0422, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.556812465734739, |
|
"learning_rate": 2.368421052631579e-07, |
|
"loss": 0.0354, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9728, |
|
"grad_norm": 1.847642831707115, |
|
"learning_rate": 1.5789473684210525e-07, |
|
"loss": 0.0392, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9856, |
|
"grad_norm": 3.1108025008097293, |
|
"learning_rate": 7.894736842105262e-08, |
|
"loss": 0.0367, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"grad_norm": 4.407140308538649, |
|
"learning_rate": 0.0, |
|
"loss": 0.0361, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"eval_accuracy": 0.007941866145578897, |
|
"eval_loss": 0.031122559681534767, |
|
"eval_runtime": 44.2812, |
|
"eval_samples_per_second": 22.583, |
|
"eval_steps_per_second": 0.723, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"logic_eval_extr_stps.D-0.answer_accuracy": 0.3333333333333333, |
|
"logic_eval_extr_stps.D-0.proof_accuracy.zero_one": 0.3333333333333333, |
|
"logic_eval_extr_stps.D-0.rouge1": 35.245819999999995, |
|
"logic_eval_extr_stps.D-0.rouge2": 0.3415933333333333, |
|
"logic_eval_extr_stps.D-0.rougeL": 35.245819999999995, |
|
"logic_eval_extr_stps.D-0.rougeLsum": 35.245819999999995, |
|
"logic_eval_extr_stps.D-1.answer_accuracy": 0.391304347826087, |
|
"logic_eval_extr_stps.D-1.proof_accuracy.zero_one": 0.30434782608695654, |
|
"logic_eval_extr_stps.D-1.rouge1": 30.34304782608695, |
|
"logic_eval_extr_stps.D-1.rouge2": 4.933721739130434, |
|
"logic_eval_extr_stps.D-1.rougeL": 30.34304782608695, |
|
"logic_eval_extr_stps.D-1.rougeLsum": 30.34304782608695, |
|
"logic_eval_extr_stps.D-2.answer_accuracy": 0.3333333333333333, |
|
"logic_eval_extr_stps.D-2.proof_accuracy.zero_one": 0.2222222222222222, |
|
"logic_eval_extr_stps.D-2.rouge1": 33.186255555555555, |
|
"logic_eval_extr_stps.D-2.rouge2": 6.600677777777777, |
|
"logic_eval_extr_stps.D-2.rougeL": 32.34552777777778, |
|
"logic_eval_extr_stps.D-2.rougeLsum": 32.828688888888884, |
|
"logic_eval_extr_stps.D-3.answer_accuracy": 0.21428571428571427, |
|
"logic_eval_extr_stps.D-3.proof_accuracy.zero_one": 0.07142857142857142, |
|
"logic_eval_extr_stps.D-3.rouge1": 11.588842857142854, |
|
"logic_eval_extr_stps.D-3.rouge2": 7.087435714285713, |
|
"logic_eval_extr_stps.D-3.rougeL": 10.851507142857143, |
|
"logic_eval_extr_stps.D-3.rougeLsum": 11.588842857142854, |
|
"logic_eval_extr_stps.D-None.answer_accuracy": 0.12903225806451613, |
|
"logic_eval_extr_stps.D-None.proof_accuracy.zero_one": 0.12903225806451613, |
|
"logic_eval_extr_stps.D-None.rouge1": 14.077829032258066, |
|
"logic_eval_extr_stps.D-None.rouge2": 0.0, |
|
"logic_eval_extr_stps.D-None.rougeL": 14.077829032258066, |
|
"logic_eval_extr_stps.D-None.rougeLsum": 14.077829032258066, |
|
"logic_eval_extr_stps.D-all.answer_accuracy": 0.26732673267326734, |
|
"logic_eval_extr_stps.D-all.proof_accuracy.zero_one": 0.2079207920792079, |
|
"logic_eval_extr_stps.D-all.rouge1": 23.986004950495047, |
|
"logic_eval_extr_stps.D-all.rouge2": 3.3330277227722767, |
|
"logic_eval_extr_stps.D-all.rougeL": 23.73396732673267, |
|
"logic_eval_extr_stps.D-all.rougeLsum": 23.922280198019806, |
|
"logic_eval_gen_len": 331.7734375, |
|
"logic_eval_runtime": 658.0701, |
|
"logic_eval_samples_per_second": 0.153, |
|
"logic_eval_steps_per_second": 0.006, |
|
"logic_eval_strct.D-0.answer_accuracy": 0.3333333333333333, |
|
"logic_eval_strct.D-0.proof_accuracy.zero_one": 0.3333333333333333, |
|
"logic_eval_strct.D-0.rouge1": 35.245819999999995, |
|
"logic_eval_strct.D-0.rouge2": 0.3415933333333333, |
|
"logic_eval_strct.D-0.rougeL": 35.245819999999995, |
|
"logic_eval_strct.D-0.rougeLsum": 35.245819999999995, |
|
"logic_eval_strct.D-1.answer_accuracy": 0.391304347826087, |
|
"logic_eval_strct.D-1.proof_accuracy.zero_one": 0.21739130434782608, |
|
"logic_eval_strct.D-1.rouge1": 30.34304782608695, |
|
"logic_eval_strct.D-1.rouge2": 4.933721739130434, |
|
"logic_eval_strct.D-1.rougeL": 30.34304782608695, |
|
"logic_eval_strct.D-1.rougeLsum": 30.34304782608695, |
|
"logic_eval_strct.D-2.answer_accuracy": 0.3333333333333333, |
|
"logic_eval_strct.D-2.proof_accuracy.zero_one": 0.2222222222222222, |
|
"logic_eval_strct.D-2.rouge1": 33.186255555555555, |
|
"logic_eval_strct.D-2.rouge2": 6.600677777777777, |
|
"logic_eval_strct.D-2.rougeL": 32.34552777777778, |
|
"logic_eval_strct.D-2.rougeLsum": 32.828688888888884, |
|
"logic_eval_strct.D-3.answer_accuracy": 0.21428571428571427, |
|
"logic_eval_strct.D-3.proof_accuracy.zero_one": 0.0, |
|
"logic_eval_strct.D-3.rouge1": 11.588842857142854, |
|
"logic_eval_strct.D-3.rouge2": 7.087435714285713, |
|
"logic_eval_strct.D-3.rougeL": 10.851507142857143, |
|
"logic_eval_strct.D-3.rougeLsum": 11.588842857142854, |
|
"logic_eval_strct.D-None.answer_accuracy": 0.12903225806451613, |
|
"logic_eval_strct.D-None.proof_accuracy.zero_one": 0.12903225806451613, |
|
"logic_eval_strct.D-None.rouge1": 14.077829032258066, |
|
"logic_eval_strct.D-None.rouge2": 0.0, |
|
"logic_eval_strct.D-None.rougeL": 14.077829032258066, |
|
"logic_eval_strct.D-None.rougeLsum": 14.077829032258066, |
|
"logic_eval_strct.D-all.answer_accuracy": 0.26732673267326734, |
|
"logic_eval_strct.D-all.proof_accuracy.zero_one": 0.1782178217821782, |
|
"logic_eval_strct.D-all.rouge1": 23.986004950495047, |
|
"logic_eval_strct.D-all.rouge2": 3.3330277227722767, |
|
"logic_eval_strct.D-all.rougeL": 23.73396732673267, |
|
"logic_eval_strct.D-all.rougeLsum": 23.922280198019806, |
|
"step": 390 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 390, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1617567361794048.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|