|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.6386263390044107, |
|
"global_step": 335000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.921235034656585e-05, |
|
"loss": 0.3334, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8424700693131696e-05, |
|
"loss": 0.3387, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7637051039697544e-05, |
|
"loss": 0.3327, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.684940138626339e-05, |
|
"loss": 0.3492, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.606175173282924e-05, |
|
"loss": 0.3349, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9155995845794678, |
|
"eval_loss": 0.389266699552536, |
|
"eval_runtime": 551.1932, |
|
"eval_samples_per_second": 51.289, |
|
"eval_steps_per_second": 12.823, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5274102079395087e-05, |
|
"loss": 0.3279, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4486452425960932e-05, |
|
"loss": 0.3301, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.369880277252678e-05, |
|
"loss": 0.3243, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2911153119092628e-05, |
|
"loss": 0.293, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2123503465658477e-05, |
|
"loss": 0.3053, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.9235231876373291, |
|
"eval_loss": 0.3810465931892395, |
|
"eval_runtime": 542.4272, |
|
"eval_samples_per_second": 52.118, |
|
"eval_steps_per_second": 13.03, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1335853812224324e-05, |
|
"loss": 0.3126, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0548204158790173e-05, |
|
"loss": 0.3072, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.760554505356018e-06, |
|
"loss": 0.2957, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.972904851921865e-06, |
|
"loss": 0.2968, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.185255198487714e-06, |
|
"loss": 0.2882, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9224973320960999, |
|
"eval_loss": 0.37537074089050293, |
|
"eval_runtime": 521.9317, |
|
"eval_samples_per_second": 54.164, |
|
"eval_steps_per_second": 13.542, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.3976055450535615e-06, |
|
"loss": 0.2754, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.6099558916194085e-06, |
|
"loss": 0.2607, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.8223062381852555e-06, |
|
"loss": 0.2818, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.034656584751103e-06, |
|
"loss": 0.2736, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.24700693131695e-06, |
|
"loss": 0.2644, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9297842383384705, |
|
"eval_loss": 0.3645715117454529, |
|
"eval_runtime": 521.9055, |
|
"eval_samples_per_second": 54.167, |
|
"eval_steps_per_second": 13.543, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.459357277882798e-06, |
|
"loss": 0.2552, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6717076244486457e-06, |
|
"loss": 0.266, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.884057971014493e-06, |
|
"loss": 0.2684, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0964083175803404e-06, |
|
"loss": 0.2501, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.087586641461878e-07, |
|
"loss": 0.273, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9299964904785156, |
|
"eval_loss": 0.3369257152080536, |
|
"eval_runtime": 522.7551, |
|
"eval_samples_per_second": 54.079, |
|
"eval_steps_per_second": 13.521, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.7952110901071204e-05, |
|
"loss": 0.2834, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.787334593572779e-05, |
|
"loss": 0.3047, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.7794580970384373e-05, |
|
"loss": 0.2963, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.771581600504096e-05, |
|
"loss": 0.3031, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.7637051039697544e-05, |
|
"loss": 0.3033, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9257162809371948, |
|
"eval_loss": 0.4006378650665283, |
|
"eval_runtime": 519.4649, |
|
"eval_samples_per_second": 54.421, |
|
"eval_steps_per_second": 13.606, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.755828607435413e-05, |
|
"loss": 0.3024, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.7479521109010713e-05, |
|
"loss": 0.3135, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.74007561436673e-05, |
|
"loss": 0.3137, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.732199117832388e-05, |
|
"loss": 0.3227, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7243226212980467e-05, |
|
"loss": 0.3246, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.924018383026123, |
|
"eval_loss": 0.3924681842327118, |
|
"eval_runtime": 518.8244, |
|
"eval_samples_per_second": 54.489, |
|
"eval_steps_per_second": 13.623, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.7164461247637053e-05, |
|
"loss": 0.3281, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.708569628229364e-05, |
|
"loss": 0.3256, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.700693131695022e-05, |
|
"loss": 0.313, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.6928166351606807e-05, |
|
"loss": 0.3313, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.684940138626339e-05, |
|
"loss": 0.2953, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.9212592840194702, |
|
"eval_loss": 0.3895967900753021, |
|
"eval_runtime": 526.2623, |
|
"eval_samples_per_second": 53.718, |
|
"eval_steps_per_second": 13.431, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.6770636420919976e-05, |
|
"loss": 0.3103, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.669187145557656e-05, |
|
"loss": 0.3089, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.6613106490233147e-05, |
|
"loss": 0.3095, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.653434152488973e-05, |
|
"loss": 0.3288, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.6455576559546316e-05, |
|
"loss": 0.3199, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.9203749299049377, |
|
"eval_loss": 0.3942428529262543, |
|
"eval_runtime": 520.6801, |
|
"eval_samples_per_second": 54.294, |
|
"eval_steps_per_second": 13.575, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.6376811594202898e-05, |
|
"loss": 0.306, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.6298046628859484e-05, |
|
"loss": 0.3104, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.621928166351607e-05, |
|
"loss": 0.3139, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.6140516698172656e-05, |
|
"loss": 0.3179, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.606175173282924e-05, |
|
"loss": 0.3226, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.9243367314338684, |
|
"eval_loss": 0.4058537185192108, |
|
"eval_runtime": 552.8946, |
|
"eval_samples_per_second": 51.131, |
|
"eval_steps_per_second": 12.784, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.5982986767485824e-05, |
|
"loss": 0.3167, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5904221802142407e-05, |
|
"loss": 0.3034, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5825456836798993e-05, |
|
"loss": 0.2976, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.574669187145558e-05, |
|
"loss": 0.3039, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5667926906112164e-05, |
|
"loss": 0.2889, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.9221436381340027, |
|
"eval_loss": 0.39818692207336426, |
|
"eval_runtime": 508.9512, |
|
"eval_samples_per_second": 55.546, |
|
"eval_steps_per_second": 13.887, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.5589161940768747e-05, |
|
"loss": 0.3079, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.5510396975425333e-05, |
|
"loss": 0.3148, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.5431632010081915e-05, |
|
"loss": 0.2829, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.53528670447385e-05, |
|
"loss": 0.2978, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.5274102079395087e-05, |
|
"loss": 0.2963, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.922957181930542, |
|
"eval_loss": 0.43214836716651917, |
|
"eval_runtime": 528.9348, |
|
"eval_samples_per_second": 53.447, |
|
"eval_steps_per_second": 13.363, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.5195337114051671e-05, |
|
"loss": 0.3007, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.5116572148708256e-05, |
|
"loss": 0.2901, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.5037807183364841e-05, |
|
"loss": 0.2905, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.4959042218021424e-05, |
|
"loss": 0.3082, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.488027725267801e-05, |
|
"loss": 0.2899, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.924124538898468, |
|
"eval_loss": 0.40452826023101807, |
|
"eval_runtime": 520.2943, |
|
"eval_samples_per_second": 54.335, |
|
"eval_steps_per_second": 13.585, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.4801512287334594e-05, |
|
"loss": 0.301, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.472274732199118e-05, |
|
"loss": 0.3014, |
|
"step": 335000 |
|
} |
|
], |
|
"max_steps": 1269600, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.525740928655196e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|