Llama-3.2-1B-sportsqa-V1 / trainer_state.json
Kwhale's picture
End of training
5dc3b53 verified
{
"best_metric": 0.8520275354385376,
"best_model_checkpoint": "Llama-3.2-1B-sportsqa-V1/checkpoint-130",
"epoch": 4.971608832807571,
"eval_steps": 10,
"global_step": 130,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3785488958990536,
"grad_norm": 0.5394421219825745,
"learning_rate": 0.0002,
"loss": 1.8051,
"step": 10
},
{
"epoch": 0.3785488958990536,
"eval_loss": 1.5353755950927734,
"eval_runtime": 6.6028,
"eval_samples_per_second": 85.267,
"eval_steps_per_second": 10.753,
"step": 10
},
{
"epoch": 0.7570977917981072,
"grad_norm": 0.6756414771080017,
"learning_rate": 0.0002,
"loss": 1.4193,
"step": 20
},
{
"epoch": 0.7570977917981072,
"eval_loss": 1.3329508304595947,
"eval_runtime": 6.4761,
"eval_samples_per_second": 86.935,
"eval_steps_per_second": 10.963,
"step": 20
},
{
"epoch": 1.1482649842271293,
"grad_norm": 0.4857617914676666,
"learning_rate": 0.0002,
"loss": 1.2895,
"step": 30
},
{
"epoch": 1.1482649842271293,
"eval_loss": 1.1635785102844238,
"eval_runtime": 6.4995,
"eval_samples_per_second": 86.622,
"eval_steps_per_second": 10.924,
"step": 30
},
{
"epoch": 1.526813880126183,
"grad_norm": 0.43100810050964355,
"learning_rate": 0.0002,
"loss": 1.0738,
"step": 40
},
{
"epoch": 1.526813880126183,
"eval_loss": 1.0900534391403198,
"eval_runtime": 6.4609,
"eval_samples_per_second": 87.14,
"eval_steps_per_second": 10.989,
"step": 40
},
{
"epoch": 1.9053627760252367,
"grad_norm": 0.661459743976593,
"learning_rate": 0.0002,
"loss": 0.9551,
"step": 50
},
{
"epoch": 1.9053627760252367,
"eval_loss": 1.032881498336792,
"eval_runtime": 6.3928,
"eval_samples_per_second": 88.067,
"eval_steps_per_second": 11.106,
"step": 50
},
{
"epoch": 2.2965299684542586,
"grad_norm": 0.41446322202682495,
"learning_rate": 0.0002,
"loss": 1.0296,
"step": 60
},
{
"epoch": 2.2965299684542586,
"eval_loss": 0.9979809522628784,
"eval_runtime": 6.4989,
"eval_samples_per_second": 86.63,
"eval_steps_per_second": 10.925,
"step": 60
},
{
"epoch": 2.6750788643533125,
"grad_norm": 0.6213859915733337,
"learning_rate": 0.0002,
"loss": 0.8314,
"step": 70
},
{
"epoch": 2.6750788643533125,
"eval_loss": 0.9495619535446167,
"eval_runtime": 6.4778,
"eval_samples_per_second": 86.912,
"eval_steps_per_second": 10.961,
"step": 70
},
{
"epoch": 3.0662460567823344,
"grad_norm": 0.5072933435440063,
"learning_rate": 0.0002,
"loss": 0.9114,
"step": 80
},
{
"epoch": 3.0662460567823344,
"eval_loss": 0.9263410568237305,
"eval_runtime": 6.4355,
"eval_samples_per_second": 87.484,
"eval_steps_per_second": 11.033,
"step": 80
},
{
"epoch": 3.444794952681388,
"grad_norm": 0.5831320881843567,
"learning_rate": 0.0002,
"loss": 0.7374,
"step": 90
},
{
"epoch": 3.444794952681388,
"eval_loss": 0.9075612425804138,
"eval_runtime": 6.4648,
"eval_samples_per_second": 87.087,
"eval_steps_per_second": 10.983,
"step": 90
},
{
"epoch": 3.823343848580442,
"grad_norm": 0.5210297107696533,
"learning_rate": 0.0002,
"loss": 0.7606,
"step": 100
},
{
"epoch": 3.823343848580442,
"eval_loss": 0.8877434730529785,
"eval_runtime": 6.4394,
"eval_samples_per_second": 87.43,
"eval_steps_per_second": 11.026,
"step": 100
},
{
"epoch": 4.214511041009464,
"grad_norm": 0.7101346254348755,
"learning_rate": 0.0002,
"loss": 0.7158,
"step": 110
},
{
"epoch": 4.214511041009464,
"eval_loss": 0.8949338793754578,
"eval_runtime": 6.4802,
"eval_samples_per_second": 86.88,
"eval_steps_per_second": 10.956,
"step": 110
},
{
"epoch": 4.593059936908517,
"grad_norm": 0.4959416091442108,
"learning_rate": 0.0002,
"loss": 0.6792,
"step": 120
},
{
"epoch": 4.593059936908517,
"eval_loss": 0.8711220622062683,
"eval_runtime": 6.5288,
"eval_samples_per_second": 86.233,
"eval_steps_per_second": 10.875,
"step": 120
},
{
"epoch": 4.971608832807571,
"grad_norm": 0.4901735484600067,
"learning_rate": 0.0002,
"loss": 0.6251,
"step": 130
},
{
"epoch": 4.971608832807571,
"eval_loss": 0.8520275354385376,
"eval_runtime": 6.5294,
"eval_samples_per_second": 86.226,
"eval_steps_per_second": 10.874,
"step": 130
},
{
"epoch": 4.971608832807571,
"step": 130,
"total_flos": 1.1913588129595392e+16,
"train_loss": 0.9871828225942758,
"train_runtime": 692.3499,
"train_samples_per_second": 36.578,
"train_steps_per_second": 0.188
}
],
"logging_steps": 10,
"max_steps": 130,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 10,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1913588129595392e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}