{ "best_metric": 0.8432615399360657, "best_model_checkpoint": "Llama-3.2-1B-sportsqa-V1/checkpoint-150", "epoch": 6.511041009463723, "eval_steps": 10, "global_step": 170, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3785488958990536, "grad_norm": 0.9724205136299133, "learning_rate": 0.0002, "loss": 1.4368, "step": 10 }, { "epoch": 0.3785488958990536, "eval_loss": 1.348753809928894, "eval_runtime": 6.5589, "eval_samples_per_second": 85.838, "eval_steps_per_second": 10.825, "step": 10 }, { "epoch": 0.7570977917981072, "grad_norm": 0.5118973255157471, "learning_rate": 0.0002, "loss": 1.2118, "step": 20 }, { "epoch": 0.7570977917981072, "eval_loss": 1.1572917699813843, "eval_runtime": 6.43, "eval_samples_per_second": 87.558, "eval_steps_per_second": 11.042, "step": 20 }, { "epoch": 1.1482649842271293, "grad_norm": 0.41862204670906067, "learning_rate": 0.0002, "loss": 1.1684, "step": 30 }, { "epoch": 1.1482649842271293, "eval_loss": 1.0873514413833618, "eval_runtime": 6.4834, "eval_samples_per_second": 86.838, "eval_steps_per_second": 10.951, "step": 30 }, { "epoch": 1.526813880126183, "grad_norm": 0.42144763469696045, "learning_rate": 0.0002, "loss": 0.9907, "step": 40 }, { "epoch": 1.526813880126183, "eval_loss": 1.0390795469284058, "eval_runtime": 6.4784, "eval_samples_per_second": 86.904, "eval_steps_per_second": 10.959, "step": 40 }, { "epoch": 1.9053627760252367, "grad_norm": 0.6625965237617493, "learning_rate": 0.0002, "loss": 0.8936, "step": 50 }, { "epoch": 1.9053627760252367, "eval_loss": 0.9832194447517395, "eval_runtime": 6.4575, "eval_samples_per_second": 87.185, "eval_steps_per_second": 10.995, "step": 50 }, { "epoch": 2.2965299684542586, "grad_norm": 0.5491335391998291, "learning_rate": 0.0002, "loss": 0.9537, "step": 60 }, { "epoch": 2.2965299684542586, "eval_loss": 0.9610708355903625, "eval_runtime": 6.4935, "eval_samples_per_second": 86.702, "eval_steps_per_second": 10.934, "step": 60 }, { "epoch": 2.6750788643533125, "grad_norm": 0.6222676038742065, "learning_rate": 0.0002, "loss": 0.7753, "step": 70 }, { "epoch": 2.6750788643533125, "eval_loss": 0.9286745190620422, "eval_runtime": 6.5271, "eval_samples_per_second": 86.256, "eval_steps_per_second": 10.878, "step": 70 }, { "epoch": 3.0662460567823344, "grad_norm": 0.47406917810440063, "learning_rate": 0.0002, "loss": 0.8796, "step": 80 }, { "epoch": 3.0662460567823344, "eval_loss": 0.9002482891082764, "eval_runtime": 6.5139, "eval_samples_per_second": 86.43, "eval_steps_per_second": 10.9, "step": 80 }, { "epoch": 3.444794952681388, "grad_norm": 0.54665207862854, "learning_rate": 0.0002, "loss": 0.6697, "step": 90 }, { "epoch": 3.444794952681388, "eval_loss": 0.8929345607757568, "eval_runtime": 6.349, "eval_samples_per_second": 88.676, "eval_steps_per_second": 11.183, "step": 90 }, { "epoch": 3.823343848580442, "grad_norm": 0.47858959436416626, "learning_rate": 0.0002, "loss": 0.7382, "step": 100 }, { "epoch": 3.823343848580442, "eval_loss": 0.8687711358070374, "eval_runtime": 6.4617, "eval_samples_per_second": 87.129, "eval_steps_per_second": 10.988, "step": 100 }, { "epoch": 4.214511041009464, "grad_norm": 0.6165306568145752, "learning_rate": 0.0002, "loss": 0.6795, "step": 110 }, { "epoch": 4.214511041009464, "eval_loss": 0.8820136189460754, "eval_runtime": 6.4456, "eval_samples_per_second": 87.346, "eval_steps_per_second": 11.015, "step": 110 }, { "epoch": 4.593059936908517, "grad_norm": 0.5470077395439148, "learning_rate": 0.0002, "loss": 0.646, "step": 120 }, { "epoch": 4.593059936908517, "eval_loss": 0.8543145060539246, "eval_runtime": 6.4186, "eval_samples_per_second": 87.713, "eval_steps_per_second": 11.062, "step": 120 }, { "epoch": 4.971608832807571, "grad_norm": 0.5192540287971497, "learning_rate": 0.0002, "loss": 0.5972, "step": 130 }, { "epoch": 4.971608832807571, "eval_loss": 0.8454616069793701, "eval_runtime": 6.3674, "eval_samples_per_second": 88.419, "eval_steps_per_second": 11.151, "step": 130 }, { "epoch": 5.3627760252365935, "grad_norm": 0.5664217472076416, "learning_rate": 0.0002, "loss": 0.6179, "step": 140 }, { "epoch": 5.3627760252365935, "eval_loss": 0.845314085483551, "eval_runtime": 6.4608, "eval_samples_per_second": 87.141, "eval_steps_per_second": 10.989, "step": 140 }, { "epoch": 5.7413249211356465, "grad_norm": 0.5141603350639343, "learning_rate": 0.0002, "loss": 0.5488, "step": 150 }, { "epoch": 5.7413249211356465, "eval_loss": 0.8432615399360657, "eval_runtime": 6.4351, "eval_samples_per_second": 87.489, "eval_steps_per_second": 11.033, "step": 150 }, { "epoch": 6.132492113564669, "grad_norm": 0.4900747835636139, "learning_rate": 0.0002, "loss": 0.5574, "step": 160 }, { "epoch": 6.132492113564669, "eval_loss": 0.8658251166343689, "eval_runtime": 6.4211, "eval_samples_per_second": 87.679, "eval_steps_per_second": 11.057, "step": 160 }, { "epoch": 6.511041009463723, "grad_norm": 0.5367661118507385, "learning_rate": 0.0002, "loss": 0.4778, "step": 170 }, { "epoch": 6.511041009463723, "eval_loss": 0.8633659482002258, "eval_runtime": 6.4456, "eval_samples_per_second": 87.346, "eval_steps_per_second": 11.015, "step": 170 }, { "epoch": 6.511041009463723, "step": 170, "total_flos": 1.5618225878728704e+16, "train_loss": 0.8142486740561092, "train_runtime": 890.4577, "train_samples_per_second": 39.817, "train_steps_per_second": 0.204 } ], "logging_steps": 10, "max_steps": 182, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5618225878728704e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }