|
{ |
|
"best_metric": 0.8520275354385376, |
|
"best_model_checkpoint": "Llama-3.2-1B-sportsqa-V1/checkpoint-130", |
|
"epoch": 4.971608832807571, |
|
"eval_steps": 10, |
|
"global_step": 130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3785488958990536, |
|
"grad_norm": 0.5394421219825745, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8051, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3785488958990536, |
|
"eval_loss": 1.5353755950927734, |
|
"eval_runtime": 6.6028, |
|
"eval_samples_per_second": 85.267, |
|
"eval_steps_per_second": 10.753, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7570977917981072, |
|
"grad_norm": 0.6756414771080017, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4193, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7570977917981072, |
|
"eval_loss": 1.3329508304595947, |
|
"eval_runtime": 6.4761, |
|
"eval_samples_per_second": 86.935, |
|
"eval_steps_per_second": 10.963, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.1482649842271293, |
|
"grad_norm": 0.4857617914676666, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2895, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.1482649842271293, |
|
"eval_loss": 1.1635785102844238, |
|
"eval_runtime": 6.4995, |
|
"eval_samples_per_second": 86.622, |
|
"eval_steps_per_second": 10.924, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.526813880126183, |
|
"grad_norm": 0.43100810050964355, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0738, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.526813880126183, |
|
"eval_loss": 1.0900534391403198, |
|
"eval_runtime": 6.4609, |
|
"eval_samples_per_second": 87.14, |
|
"eval_steps_per_second": 10.989, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.9053627760252367, |
|
"grad_norm": 0.661459743976593, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9551, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.9053627760252367, |
|
"eval_loss": 1.032881498336792, |
|
"eval_runtime": 6.3928, |
|
"eval_samples_per_second": 88.067, |
|
"eval_steps_per_second": 11.106, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.2965299684542586, |
|
"grad_norm": 0.41446322202682495, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0296, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.2965299684542586, |
|
"eval_loss": 0.9979809522628784, |
|
"eval_runtime": 6.4989, |
|
"eval_samples_per_second": 86.63, |
|
"eval_steps_per_second": 10.925, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.6750788643533125, |
|
"grad_norm": 0.6213859915733337, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8314, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.6750788643533125, |
|
"eval_loss": 0.9495619535446167, |
|
"eval_runtime": 6.4778, |
|
"eval_samples_per_second": 86.912, |
|
"eval_steps_per_second": 10.961, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.0662460567823344, |
|
"grad_norm": 0.5072933435440063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9114, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.0662460567823344, |
|
"eval_loss": 0.9263410568237305, |
|
"eval_runtime": 6.4355, |
|
"eval_samples_per_second": 87.484, |
|
"eval_steps_per_second": 11.033, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.444794952681388, |
|
"grad_norm": 0.5831320881843567, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7374, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.444794952681388, |
|
"eval_loss": 0.9075612425804138, |
|
"eval_runtime": 6.4648, |
|
"eval_samples_per_second": 87.087, |
|
"eval_steps_per_second": 10.983, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.823343848580442, |
|
"grad_norm": 0.5210297107696533, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7606, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.823343848580442, |
|
"eval_loss": 0.8877434730529785, |
|
"eval_runtime": 6.4394, |
|
"eval_samples_per_second": 87.43, |
|
"eval_steps_per_second": 11.026, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.214511041009464, |
|
"grad_norm": 0.7101346254348755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7158, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.214511041009464, |
|
"eval_loss": 0.8949338793754578, |
|
"eval_runtime": 6.4802, |
|
"eval_samples_per_second": 86.88, |
|
"eval_steps_per_second": 10.956, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.593059936908517, |
|
"grad_norm": 0.4959416091442108, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6792, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.593059936908517, |
|
"eval_loss": 0.8711220622062683, |
|
"eval_runtime": 6.5288, |
|
"eval_samples_per_second": 86.233, |
|
"eval_steps_per_second": 10.875, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.971608832807571, |
|
"grad_norm": 0.4901735484600067, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6251, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.971608832807571, |
|
"eval_loss": 0.8520275354385376, |
|
"eval_runtime": 6.5294, |
|
"eval_samples_per_second": 86.226, |
|
"eval_steps_per_second": 10.874, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.971608832807571, |
|
"step": 130, |
|
"total_flos": 1.1913588129595392e+16, |
|
"train_loss": 0.9871828225942758, |
|
"train_runtime": 692.3499, |
|
"train_samples_per_second": 36.578, |
|
"train_steps_per_second": 0.188 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1913588129595392e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|