{ "best_metric": 0.8520275354385376, "best_model_checkpoint": "Llama-3.2-1B-sportsqa-V1/checkpoint-130", "epoch": 4.971608832807571, "eval_steps": 10, "global_step": 130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3785488958990536, "grad_norm": 0.5394421219825745, "learning_rate": 0.0002, "loss": 1.8051, "step": 10 }, { "epoch": 0.3785488958990536, "eval_loss": 1.5353755950927734, "eval_runtime": 6.6028, "eval_samples_per_second": 85.267, "eval_steps_per_second": 10.753, "step": 10 }, { "epoch": 0.7570977917981072, "grad_norm": 0.6756414771080017, "learning_rate": 0.0002, "loss": 1.4193, "step": 20 }, { "epoch": 0.7570977917981072, "eval_loss": 1.3329508304595947, "eval_runtime": 6.4761, "eval_samples_per_second": 86.935, "eval_steps_per_second": 10.963, "step": 20 }, { "epoch": 1.1482649842271293, "grad_norm": 0.4857617914676666, "learning_rate": 0.0002, "loss": 1.2895, "step": 30 }, { "epoch": 1.1482649842271293, "eval_loss": 1.1635785102844238, "eval_runtime": 6.4995, "eval_samples_per_second": 86.622, "eval_steps_per_second": 10.924, "step": 30 }, { "epoch": 1.526813880126183, "grad_norm": 0.43100810050964355, "learning_rate": 0.0002, "loss": 1.0738, "step": 40 }, { "epoch": 1.526813880126183, "eval_loss": 1.0900534391403198, "eval_runtime": 6.4609, "eval_samples_per_second": 87.14, "eval_steps_per_second": 10.989, "step": 40 }, { "epoch": 1.9053627760252367, "grad_norm": 0.661459743976593, "learning_rate": 0.0002, "loss": 0.9551, "step": 50 }, { "epoch": 1.9053627760252367, "eval_loss": 1.032881498336792, "eval_runtime": 6.3928, "eval_samples_per_second": 88.067, "eval_steps_per_second": 11.106, "step": 50 }, { "epoch": 2.2965299684542586, "grad_norm": 0.41446322202682495, "learning_rate": 0.0002, "loss": 1.0296, "step": 60 }, { "epoch": 2.2965299684542586, "eval_loss": 0.9979809522628784, "eval_runtime": 6.4989, "eval_samples_per_second": 86.63, "eval_steps_per_second": 10.925, "step": 60 }, { "epoch": 2.6750788643533125, "grad_norm": 0.6213859915733337, "learning_rate": 0.0002, "loss": 0.8314, "step": 70 }, { "epoch": 2.6750788643533125, "eval_loss": 0.9495619535446167, "eval_runtime": 6.4778, "eval_samples_per_second": 86.912, "eval_steps_per_second": 10.961, "step": 70 }, { "epoch": 3.0662460567823344, "grad_norm": 0.5072933435440063, "learning_rate": 0.0002, "loss": 0.9114, "step": 80 }, { "epoch": 3.0662460567823344, "eval_loss": 0.9263410568237305, "eval_runtime": 6.4355, "eval_samples_per_second": 87.484, "eval_steps_per_second": 11.033, "step": 80 }, { "epoch": 3.444794952681388, "grad_norm": 0.5831320881843567, "learning_rate": 0.0002, "loss": 0.7374, "step": 90 }, { "epoch": 3.444794952681388, "eval_loss": 0.9075612425804138, "eval_runtime": 6.4648, "eval_samples_per_second": 87.087, "eval_steps_per_second": 10.983, "step": 90 }, { "epoch": 3.823343848580442, "grad_norm": 0.5210297107696533, "learning_rate": 0.0002, "loss": 0.7606, "step": 100 }, { "epoch": 3.823343848580442, "eval_loss": 0.8877434730529785, "eval_runtime": 6.4394, "eval_samples_per_second": 87.43, "eval_steps_per_second": 11.026, "step": 100 }, { "epoch": 4.214511041009464, "grad_norm": 0.7101346254348755, "learning_rate": 0.0002, "loss": 0.7158, "step": 110 }, { "epoch": 4.214511041009464, "eval_loss": 0.8949338793754578, "eval_runtime": 6.4802, "eval_samples_per_second": 86.88, "eval_steps_per_second": 10.956, "step": 110 }, { "epoch": 4.593059936908517, "grad_norm": 0.4959416091442108, "learning_rate": 0.0002, "loss": 0.6792, "step": 120 }, { "epoch": 4.593059936908517, "eval_loss": 0.8711220622062683, "eval_runtime": 6.5288, "eval_samples_per_second": 86.233, "eval_steps_per_second": 10.875, "step": 120 }, { "epoch": 4.971608832807571, "grad_norm": 0.4901735484600067, "learning_rate": 0.0002, "loss": 0.6251, "step": 130 }, { "epoch": 4.971608832807571, "eval_loss": 0.8520275354385376, "eval_runtime": 6.5294, "eval_samples_per_second": 86.226, "eval_steps_per_second": 10.874, "step": 130 }, { "epoch": 4.971608832807571, "step": 130, "total_flos": 1.1913588129595392e+16, "train_loss": 0.9871828225942758, "train_runtime": 692.3499, "train_samples_per_second": 36.578, "train_steps_per_second": 0.188 } ], "logging_steps": 10, "max_steps": 130, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1913588129595392e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }