|
{ |
|
"best_metric": 2.190300464630127, |
|
"best_model_checkpoint": "./output_c/checkpoint-615", |
|
"epoch": 59.0, |
|
"global_step": 885, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.077622652053833, |
|
"eval_runtime": 0.2951, |
|
"eval_samples_per_second": 494.804, |
|
"eval_steps_per_second": 16.945, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.9188127517700195, |
|
"eval_runtime": 0.3714, |
|
"eval_samples_per_second": 393.09, |
|
"eval_steps_per_second": 13.462, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.8426482677459717, |
|
"eval_runtime": 0.2609, |
|
"eval_samples_per_second": 559.557, |
|
"eval_steps_per_second": 19.163, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.6221985816955566, |
|
"eval_runtime": 0.2726, |
|
"eval_samples_per_second": 535.638, |
|
"eval_steps_per_second": 18.344, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.6243655681610107, |
|
"eval_runtime": 0.2669, |
|
"eval_samples_per_second": 546.955, |
|
"eval_steps_per_second": 18.731, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.6885011196136475, |
|
"eval_runtime": 0.2608, |
|
"eval_samples_per_second": 559.912, |
|
"eval_steps_per_second": 19.175, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.4477977752685547, |
|
"eval_runtime": 0.266, |
|
"eval_samples_per_second": 548.901, |
|
"eval_steps_per_second": 18.798, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.588456153869629, |
|
"eval_runtime": 0.3103, |
|
"eval_samples_per_second": 470.587, |
|
"eval_steps_per_second": 16.116, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.4026825428009033, |
|
"eval_runtime": 0.2649, |
|
"eval_samples_per_second": 551.053, |
|
"eval_steps_per_second": 18.872, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.3897533416748047, |
|
"eval_runtime": 0.3189, |
|
"eval_samples_per_second": 457.83, |
|
"eval_steps_per_second": 15.679, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.473085641860962, |
|
"eval_runtime": 0.3415, |
|
"eval_samples_per_second": 427.488, |
|
"eval_steps_per_second": 14.64, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.448983907699585, |
|
"eval_runtime": 0.2684, |
|
"eval_samples_per_second": 543.977, |
|
"eval_steps_per_second": 18.629, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.3829126358032227, |
|
"eval_runtime": 0.2785, |
|
"eval_samples_per_second": 524.252, |
|
"eval_steps_per_second": 17.954, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.501011610031128, |
|
"eval_runtime": 0.2671, |
|
"eval_samples_per_second": 546.582, |
|
"eval_steps_per_second": 18.719, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.427177906036377, |
|
"eval_runtime": 0.2718, |
|
"eval_samples_per_second": 537.134, |
|
"eval_steps_per_second": 18.395, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.421048879623413, |
|
"eval_runtime": 0.2742, |
|
"eval_samples_per_second": 532.533, |
|
"eval_steps_per_second": 18.237, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.342533826828003, |
|
"eval_runtime": 0.2891, |
|
"eval_samples_per_second": 505.042, |
|
"eval_steps_per_second": 17.296, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.398080348968506, |
|
"eval_runtime": 0.2794, |
|
"eval_samples_per_second": 522.517, |
|
"eval_steps_per_second": 17.894, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.3011465072631836, |
|
"eval_runtime": 0.2774, |
|
"eval_samples_per_second": 526.407, |
|
"eval_steps_per_second": 18.028, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.5109691619873047, |
|
"eval_runtime": 0.2617, |
|
"eval_samples_per_second": 557.964, |
|
"eval_steps_per_second": 19.108, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 2.381415843963623, |
|
"eval_runtime": 0.2801, |
|
"eval_samples_per_second": 521.25, |
|
"eval_steps_per_second": 17.851, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 2.3828046321868896, |
|
"eval_runtime": 0.2743, |
|
"eval_samples_per_second": 532.299, |
|
"eval_steps_per_second": 18.229, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 2.353680372238159, |
|
"eval_runtime": 0.2779, |
|
"eval_samples_per_second": 525.432, |
|
"eval_steps_per_second": 17.994, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 2.29482364654541, |
|
"eval_runtime": 0.2832, |
|
"eval_samples_per_second": 515.574, |
|
"eval_steps_per_second": 17.657, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 2.3079590797424316, |
|
"eval_runtime": 0.3243, |
|
"eval_samples_per_second": 450.187, |
|
"eval_steps_per_second": 15.417, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 2.379464864730835, |
|
"eval_runtime": 0.2934, |
|
"eval_samples_per_second": 497.692, |
|
"eval_steps_per_second": 17.044, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 2.37467885017395, |
|
"eval_runtime": 0.2869, |
|
"eval_samples_per_second": 508.855, |
|
"eval_steps_per_second": 17.427, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 2.3926637172698975, |
|
"eval_runtime": 0.2792, |
|
"eval_samples_per_second": 523.011, |
|
"eval_steps_per_second": 17.911, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 2.2542331218719482, |
|
"eval_runtime": 0.2705, |
|
"eval_samples_per_second": 539.84, |
|
"eval_steps_per_second": 18.488, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 2.312037944793701, |
|
"eval_runtime": 0.2823, |
|
"eval_samples_per_second": 517.14, |
|
"eval_steps_per_second": 17.71, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 2.2595930099487305, |
|
"eval_runtime": 0.2709, |
|
"eval_samples_per_second": 538.869, |
|
"eval_steps_per_second": 18.454, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 2.3319013118743896, |
|
"eval_runtime": 0.2718, |
|
"eval_samples_per_second": 537.135, |
|
"eval_steps_per_second": 18.395, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 2.321133852005005, |
|
"eval_runtime": 0.3056, |
|
"eval_samples_per_second": 477.789, |
|
"eval_steps_per_second": 16.363, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 2.3662209510803223, |
|
"eval_runtime": 0.2727, |
|
"eval_samples_per_second": 535.325, |
|
"eval_steps_per_second": 18.333, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 2.3607561588287354, |
|
"eval_runtime": 0.2769, |
|
"eval_samples_per_second": 527.269, |
|
"eval_steps_per_second": 18.057, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 2.2733652591705322, |
|
"eval_runtime": 0.3144, |
|
"eval_samples_per_second": 464.449, |
|
"eval_steps_per_second": 15.906, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 2.332275390625, |
|
"eval_runtime": 0.2823, |
|
"eval_samples_per_second": 517.096, |
|
"eval_steps_per_second": 17.709, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 2.3226001262664795, |
|
"eval_runtime": 0.2722, |
|
"eval_samples_per_second": 536.297, |
|
"eval_steps_per_second": 18.366, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 2.2499899864196777, |
|
"eval_runtime": 0.2823, |
|
"eval_samples_per_second": 517.269, |
|
"eval_steps_per_second": 17.715, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 2.3148353099823, |
|
"eval_runtime": 0.2735, |
|
"eval_samples_per_second": 533.755, |
|
"eval_steps_per_second": 18.279, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 40.73, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 2.4323, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 2.190300464630127, |
|
"eval_runtime": 0.279, |
|
"eval_samples_per_second": 523.306, |
|
"eval_steps_per_second": 17.921, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 2.2688183784484863, |
|
"eval_runtime": 0.2786, |
|
"eval_samples_per_second": 524.054, |
|
"eval_steps_per_second": 17.947, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 2.3206570148468018, |
|
"eval_runtime": 0.3166, |
|
"eval_samples_per_second": 461.08, |
|
"eval_steps_per_second": 15.79, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 2.398860454559326, |
|
"eval_runtime": 0.2802, |
|
"eval_samples_per_second": 521.028, |
|
"eval_steps_per_second": 17.843, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 2.329181432723999, |
|
"eval_runtime": 0.2845, |
|
"eval_samples_per_second": 513.222, |
|
"eval_steps_per_second": 17.576, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 2.301910877227783, |
|
"eval_runtime": 0.2825, |
|
"eval_samples_per_second": 516.788, |
|
"eval_steps_per_second": 17.698, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 2.286062002182007, |
|
"eval_runtime": 0.2774, |
|
"eval_samples_per_second": 526.332, |
|
"eval_steps_per_second": 18.025, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 2.2627930641174316, |
|
"eval_runtime": 0.3491, |
|
"eval_samples_per_second": 418.174, |
|
"eval_steps_per_second": 14.321, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 2.3683576583862305, |
|
"eval_runtime": 0.2828, |
|
"eval_samples_per_second": 516.226, |
|
"eval_steps_per_second": 17.679, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 2.3841121196746826, |
|
"eval_runtime": 0.2738, |
|
"eval_samples_per_second": 533.204, |
|
"eval_steps_per_second": 18.26, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 2.3427212238311768, |
|
"eval_runtime": 0.2841, |
|
"eval_samples_per_second": 513.874, |
|
"eval_steps_per_second": 17.598, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 2.3786392211914062, |
|
"eval_runtime": 0.2882, |
|
"eval_samples_per_second": 506.555, |
|
"eval_steps_per_second": 17.348, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 2.3314857482910156, |
|
"eval_runtime": 0.3054, |
|
"eval_samples_per_second": 478.014, |
|
"eval_steps_per_second": 16.37, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 2.4228127002716064, |
|
"eval_runtime": 0.2752, |
|
"eval_samples_per_second": 530.586, |
|
"eval_steps_per_second": 18.171, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 2.2979846000671387, |
|
"eval_runtime": 0.2804, |
|
"eval_samples_per_second": 520.739, |
|
"eval_steps_per_second": 17.834, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 2.288037061691284, |
|
"eval_runtime": 0.2706, |
|
"eval_samples_per_second": 539.536, |
|
"eval_steps_per_second": 18.477, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 2.375304698944092, |
|
"eval_runtime": 0.2754, |
|
"eval_samples_per_second": 530.218, |
|
"eval_steps_per_second": 18.158, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 2.302351474761963, |
|
"eval_runtime": 0.2976, |
|
"eval_samples_per_second": 490.644, |
|
"eval_steps_per_second": 16.803, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 2.2706165313720703, |
|
"eval_runtime": 0.3674, |
|
"eval_samples_per_second": 397.413, |
|
"eval_steps_per_second": 13.61, |
|
"step": 885 |
|
} |
|
], |
|
"max_steps": 900, |
|
"num_train_epochs": 60, |
|
"total_flos": 196520659812864.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|