|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1027095148078134, |
|
"global_step": 140000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9921235034656587e-06, |
|
"loss": 0.2621, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.984247006931317e-06, |
|
"loss": 0.2548, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9763705103969753e-06, |
|
"loss": 0.2451, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9684940138626337e-06, |
|
"loss": 0.2479, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9606175173282924e-06, |
|
"loss": 0.2477, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9227449297904968, |
|
"eval_loss": 0.42459121346473694, |
|
"eval_runtime": 532.8547, |
|
"eval_samples_per_second": 53.054, |
|
"eval_steps_per_second": 13.264, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9527410207939508e-06, |
|
"loss": 0.253, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9448645242596095e-06, |
|
"loss": 0.2466, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.936988027725268e-06, |
|
"loss": 0.25, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.929111531190926e-06, |
|
"loss": 0.2402, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9212350346565845e-06, |
|
"loss": 0.2515, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.926034688949585, |
|
"eval_loss": 0.3926495909690857, |
|
"eval_runtime": 506.3742, |
|
"eval_samples_per_second": 55.828, |
|
"eval_steps_per_second": 13.958, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9133585381222433e-06, |
|
"loss": 0.2383, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9054820415879016e-06, |
|
"loss": 0.2523, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.8976055450535602e-06, |
|
"loss": 0.2372, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8897290485192185e-06, |
|
"loss": 0.2395, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.881852551984877e-06, |
|
"loss": 0.2376, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9263883829116821, |
|
"eval_loss": 0.3989144265651703, |
|
"eval_runtime": 505.8843, |
|
"eval_samples_per_second": 55.882, |
|
"eval_steps_per_second": 13.972, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8739760554505356e-06, |
|
"loss": 0.2331, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8660995589161941e-06, |
|
"loss": 0.2426, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8582230623818525e-06, |
|
"loss": 0.2493, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.850346565847511e-06, |
|
"loss": 0.2379, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8424700693131694e-06, |
|
"loss": 0.2428, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9267421364784241, |
|
"eval_loss": 0.3985295295715332, |
|
"eval_runtime": 549.265, |
|
"eval_samples_per_second": 51.469, |
|
"eval_steps_per_second": 12.868, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.834593572778828e-06, |
|
"loss": 0.2429, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8267170762444864e-06, |
|
"loss": 0.238, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.818840579710145e-06, |
|
"loss": 0.2322, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.8109640831758033e-06, |
|
"loss": 0.2371, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.8030875866414619e-06, |
|
"loss": 0.2303, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9282631874084473, |
|
"eval_loss": 0.40024659037590027, |
|
"eval_runtime": 548.3799, |
|
"eval_samples_per_second": 51.552, |
|
"eval_steps_per_second": 12.889, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.7952110901071202e-06, |
|
"loss": 0.221, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.7873345935727788e-06, |
|
"loss": 0.2199, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.779458097038437e-06, |
|
"loss": 0.2097, |
|
"step": 140000 |
|
} |
|
], |
|
"max_steps": 1269600, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.473443106221998e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|