|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.3526750367605476, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09048750141386722, |
|
"eval_accuracy": 0.16714730898859703, |
|
"eval_loss": 4.686699867248535, |
|
"eval_runtime": 125.2427, |
|
"eval_samples_per_second": 7.218, |
|
"eval_steps_per_second": 7.218, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18097500282773443, |
|
"eval_accuracy": 0.25826880194607743, |
|
"eval_loss": 3.9017727375030518, |
|
"eval_runtime": 125.5606, |
|
"eval_samples_per_second": 7.2, |
|
"eval_steps_per_second": 7.2, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2714625042416016, |
|
"eval_accuracy": 0.29766981254599767, |
|
"eval_loss": 3.5929646492004395, |
|
"eval_runtime": 125.5573, |
|
"eval_samples_per_second": 7.2, |
|
"eval_steps_per_second": 7.2, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36195000565546886, |
|
"eval_accuracy": 0.3236774609630093, |
|
"eval_loss": 3.4225211143493652, |
|
"eval_runtime": 125.7379, |
|
"eval_samples_per_second": 7.19, |
|
"eval_steps_per_second": 7.19, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45243750706933605, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 4.7737556561085976e-05, |
|
"loss": 4.0632, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.45243750706933605, |
|
"eval_accuracy": 0.3405748527595257, |
|
"eval_loss": 3.307744026184082, |
|
"eval_runtime": 125.5713, |
|
"eval_samples_per_second": 7.199, |
|
"eval_steps_per_second": 7.199, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5429250084832032, |
|
"eval_accuracy": 0.354706238577747, |
|
"eval_loss": 3.21340012550354, |
|
"eval_runtime": 125.6117, |
|
"eval_samples_per_second": 7.197, |
|
"eval_steps_per_second": 7.197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6334125098970704, |
|
"eval_accuracy": 0.3676221206998826, |
|
"eval_loss": 3.127941608428955, |
|
"eval_runtime": 125.646, |
|
"eval_samples_per_second": 7.195, |
|
"eval_steps_per_second": 7.195, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7239000113109377, |
|
"eval_accuracy": 0.377770033996102, |
|
"eval_loss": 3.0699830055236816, |
|
"eval_runtime": 125.238, |
|
"eval_samples_per_second": 7.218, |
|
"eval_steps_per_second": 7.218, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8143875127248049, |
|
"eval_accuracy": 0.3878323087639568, |
|
"eval_loss": 2.992367744445801, |
|
"eval_runtime": 126.0865, |
|
"eval_samples_per_second": 7.17, |
|
"eval_steps_per_second": 7.17, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9048750141386721, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 4.547511312217195e-05, |
|
"loss": 3.0582, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9048750141386721, |
|
"eval_accuracy": 0.3950470582191688, |
|
"eval_loss": 2.9669389724731445, |
|
"eval_runtime": 125.9784, |
|
"eval_samples_per_second": 7.176, |
|
"eval_steps_per_second": 7.176, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9953625155525393, |
|
"eval_accuracy": 0.4000391717207191, |
|
"eval_loss": 2.936887264251709, |
|
"eval_runtime": 125.9788, |
|
"eval_samples_per_second": 7.176, |
|
"eval_steps_per_second": 7.176, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0858500169664065, |
|
"eval_accuracy": 0.40566369384035433, |
|
"eval_loss": 2.889920234680176, |
|
"eval_runtime": 126.1226, |
|
"eval_samples_per_second": 7.168, |
|
"eval_steps_per_second": 7.168, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1763375183802738, |
|
"eval_accuracy": 0.41152411235050507, |
|
"eval_loss": 2.855320930480957, |
|
"eval_runtime": 125.9479, |
|
"eval_samples_per_second": 7.178, |
|
"eval_steps_per_second": 7.178, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2668250197941409, |
|
"eval_accuracy": 0.4195339922877197, |
|
"eval_loss": 2.8255977630615234, |
|
"eval_runtime": 125.4794, |
|
"eval_samples_per_second": 7.204, |
|
"eval_steps_per_second": 7.204, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3573125212080082, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 4.321266968325792e-05, |
|
"loss": 2.7942, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.3573125212080082, |
|
"eval_accuracy": 0.4255361411350747, |
|
"eval_loss": 2.780564308166504, |
|
"eval_runtime": 125.3637, |
|
"eval_samples_per_second": 7.211, |
|
"eval_steps_per_second": 7.211, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4478000226218755, |
|
"eval_accuracy": 0.4327748821383073, |
|
"eval_loss": 2.7515323162078857, |
|
"eval_runtime": 125.451, |
|
"eval_samples_per_second": 7.206, |
|
"eval_steps_per_second": 7.206, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.5382875240357425, |
|
"eval_accuracy": 0.4364924338825894, |
|
"eval_loss": 2.731539726257324, |
|
"eval_runtime": 125.5016, |
|
"eval_samples_per_second": 7.203, |
|
"eval_steps_per_second": 7.203, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.6287750254496096, |
|
"eval_accuracy": 0.44307129828565556, |
|
"eval_loss": 2.693981170654297, |
|
"eval_runtime": 125.3005, |
|
"eval_samples_per_second": 7.215, |
|
"eval_steps_per_second": 7.215, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.719262526863477, |
|
"eval_accuracy": 0.4504961080016023, |
|
"eval_loss": 2.660767078399658, |
|
"eval_runtime": 125.3064, |
|
"eval_samples_per_second": 7.214, |
|
"eval_steps_per_second": 7.214, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.8097500282773442, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.095022624434389e-05, |
|
"loss": 2.6245, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8097500282773442, |
|
"eval_accuracy": 0.4550176169447281, |
|
"eval_loss": 2.637939929962158, |
|
"eval_runtime": 125.4251, |
|
"eval_samples_per_second": 7.207, |
|
"eval_steps_per_second": 7.207, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.9002375296912115, |
|
"eval_accuracy": 0.4604336970231151, |
|
"eval_loss": 2.624295949935913, |
|
"eval_runtime": 125.5212, |
|
"eval_samples_per_second": 7.202, |
|
"eval_steps_per_second": 7.202, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.9907250311050786, |
|
"eval_accuracy": 0.4636457964607748, |
|
"eval_loss": 2.602674722671509, |
|
"eval_runtime": 125.5023, |
|
"eval_samples_per_second": 7.203, |
|
"eval_steps_per_second": 7.203, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.0812125325189457, |
|
"eval_accuracy": 0.4712341921958477, |
|
"eval_loss": 2.5688230991363525, |
|
"eval_runtime": 125.5191, |
|
"eval_samples_per_second": 7.202, |
|
"eval_steps_per_second": 7.202, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.171700033932813, |
|
"eval_accuracy": 0.47027530058798783, |
|
"eval_loss": 2.567828416824341, |
|
"eval_runtime": 125.4716, |
|
"eval_samples_per_second": 7.205, |
|
"eval_steps_per_second": 7.205, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.2621875353466803, |
|
"grad_norm": 4.75, |
|
"learning_rate": 3.868778280542987e-05, |
|
"loss": 2.4882, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.2621875353466803, |
|
"eval_accuracy": 0.47281695755880343, |
|
"eval_loss": 2.5568008422851562, |
|
"eval_runtime": 125.4324, |
|
"eval_samples_per_second": 7.207, |
|
"eval_steps_per_second": 7.207, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.3526750367605476, |
|
"eval_accuracy": 0.47575571302156866, |
|
"eval_loss": 2.536648750305176, |
|
"eval_runtime": 125.6154, |
|
"eval_samples_per_second": 7.197, |
|
"eval_steps_per_second": 7.197, |
|
"step": 2600 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 200, |
|
"total_flos": 1.9289090310537216e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|