|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 3540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.211864406779661, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 0.00016949152542372882, |
|
"loss": 1.4882, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 0.00019934060086830688, |
|
"loss": 1.1689, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.635593220338983, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 0.00019676565602164643, |
|
"loss": 1.1984, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 0.00019229383471353536, |
|
"loss": 1.1429, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0593220338983051, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 0.00018601279710076812, |
|
"loss": 1.1166, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 1.0, |
|
"learning_rate": 0.00017804566902045448, |
|
"loss": 1.0057, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4830508474576272, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 0.00016854862838056546, |
|
"loss": 1.003, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 0.0001577078436378025, |
|
"loss": 0.9752, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9067796610169492, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 0.00014573582437715308, |
|
"loss": 1.0262, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 0.00013286725553194177, |
|
"loss": 0.9129, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.330508474576271, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 0.00011935439690527049, |
|
"loss": 0.8845, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 0.0001054621381750442, |
|
"loss": 0.883, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.7542372881355934, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 9.146280631826239e-05, |
|
"loss": 0.8786, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.9661016949152543, |
|
"grad_norm": 1.125, |
|
"learning_rate": 7.763082724352662e-05, |
|
"loss": 0.9204, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1779661016949152, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 6.423734627864077e-05, |
|
"loss": 0.8706, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 5.1544912966734994e-05, |
|
"loss": 0.8667, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.601694915254237, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 3.980233436371869e-05, |
|
"loss": 0.8416, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.8135593220338984, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 2.9239797726771324e-05, |
|
"loss": 0.8213, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.02542372881356, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 2.006435820276602e-05, |
|
"loss": 0.8559, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.2455879970504569e-05, |
|
"loss": 0.8274, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.4491525423728815, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 6.563510401684669e-06, |
|
"loss": 0.831, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.661016949152542, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 2.50275635686813e-06, |
|
"loss": 0.831, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.872881355932203, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 3.5321992919458146e-07, |
|
"loss": 0.8406, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3540, |
|
"total_flos": 8.16978613968e+16, |
|
"train_loss": 0.9628061628611074, |
|
"train_runtime": 7816.2612, |
|
"train_samples_per_second": 3.621, |
|
"train_steps_per_second": 0.453 |
|
} |
|
], |
|
"logging_steps": 150, |
|
"max_steps": 3540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.16978613968e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|