electra-fr-explorer-mlm / trainer_state.json
edanigoben's picture
Classification tuned over mask lm 50 epochs 32 batch size
c048cb0
raw
history blame
12.4 kB
{
"best_metric": 2.190300464630127,
"best_model_checkpoint": "./output_c/checkpoint-615",
"epoch": 59.0,
"global_step": 885,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 3.077622652053833,
"eval_runtime": 0.2951,
"eval_samples_per_second": 494.804,
"eval_steps_per_second": 16.945,
"step": 15
},
{
"epoch": 2.0,
"eval_loss": 2.9188127517700195,
"eval_runtime": 0.3714,
"eval_samples_per_second": 393.09,
"eval_steps_per_second": 13.462,
"step": 30
},
{
"epoch": 3.0,
"eval_loss": 2.8426482677459717,
"eval_runtime": 0.2609,
"eval_samples_per_second": 559.557,
"eval_steps_per_second": 19.163,
"step": 45
},
{
"epoch": 4.0,
"eval_loss": 2.6221985816955566,
"eval_runtime": 0.2726,
"eval_samples_per_second": 535.638,
"eval_steps_per_second": 18.344,
"step": 60
},
{
"epoch": 5.0,
"eval_loss": 2.6243655681610107,
"eval_runtime": 0.2669,
"eval_samples_per_second": 546.955,
"eval_steps_per_second": 18.731,
"step": 75
},
{
"epoch": 6.0,
"eval_loss": 2.6885011196136475,
"eval_runtime": 0.2608,
"eval_samples_per_second": 559.912,
"eval_steps_per_second": 19.175,
"step": 90
},
{
"epoch": 7.0,
"eval_loss": 2.4477977752685547,
"eval_runtime": 0.266,
"eval_samples_per_second": 548.901,
"eval_steps_per_second": 18.798,
"step": 105
},
{
"epoch": 8.0,
"eval_loss": 2.588456153869629,
"eval_runtime": 0.3103,
"eval_samples_per_second": 470.587,
"eval_steps_per_second": 16.116,
"step": 120
},
{
"epoch": 9.0,
"eval_loss": 2.4026825428009033,
"eval_runtime": 0.2649,
"eval_samples_per_second": 551.053,
"eval_steps_per_second": 18.872,
"step": 135
},
{
"epoch": 10.0,
"eval_loss": 2.3897533416748047,
"eval_runtime": 0.3189,
"eval_samples_per_second": 457.83,
"eval_steps_per_second": 15.679,
"step": 150
},
{
"epoch": 11.0,
"eval_loss": 2.473085641860962,
"eval_runtime": 0.3415,
"eval_samples_per_second": 427.488,
"eval_steps_per_second": 14.64,
"step": 165
},
{
"epoch": 12.0,
"eval_loss": 2.448983907699585,
"eval_runtime": 0.2684,
"eval_samples_per_second": 543.977,
"eval_steps_per_second": 18.629,
"step": 180
},
{
"epoch": 13.0,
"eval_loss": 2.3829126358032227,
"eval_runtime": 0.2785,
"eval_samples_per_second": 524.252,
"eval_steps_per_second": 17.954,
"step": 195
},
{
"epoch": 14.0,
"eval_loss": 2.501011610031128,
"eval_runtime": 0.2671,
"eval_samples_per_second": 546.582,
"eval_steps_per_second": 18.719,
"step": 210
},
{
"epoch": 15.0,
"eval_loss": 2.427177906036377,
"eval_runtime": 0.2718,
"eval_samples_per_second": 537.134,
"eval_steps_per_second": 18.395,
"step": 225
},
{
"epoch": 16.0,
"eval_loss": 2.421048879623413,
"eval_runtime": 0.2742,
"eval_samples_per_second": 532.533,
"eval_steps_per_second": 18.237,
"step": 240
},
{
"epoch": 17.0,
"eval_loss": 2.342533826828003,
"eval_runtime": 0.2891,
"eval_samples_per_second": 505.042,
"eval_steps_per_second": 17.296,
"step": 255
},
{
"epoch": 18.0,
"eval_loss": 2.398080348968506,
"eval_runtime": 0.2794,
"eval_samples_per_second": 522.517,
"eval_steps_per_second": 17.894,
"step": 270
},
{
"epoch": 19.0,
"eval_loss": 2.3011465072631836,
"eval_runtime": 0.2774,
"eval_samples_per_second": 526.407,
"eval_steps_per_second": 18.028,
"step": 285
},
{
"epoch": 20.0,
"eval_loss": 2.5109691619873047,
"eval_runtime": 0.2617,
"eval_samples_per_second": 557.964,
"eval_steps_per_second": 19.108,
"step": 300
},
{
"epoch": 21.0,
"eval_loss": 2.381415843963623,
"eval_runtime": 0.2801,
"eval_samples_per_second": 521.25,
"eval_steps_per_second": 17.851,
"step": 315
},
{
"epoch": 22.0,
"eval_loss": 2.3828046321868896,
"eval_runtime": 0.2743,
"eval_samples_per_second": 532.299,
"eval_steps_per_second": 18.229,
"step": 330
},
{
"epoch": 23.0,
"eval_loss": 2.353680372238159,
"eval_runtime": 0.2779,
"eval_samples_per_second": 525.432,
"eval_steps_per_second": 17.994,
"step": 345
},
{
"epoch": 24.0,
"eval_loss": 2.29482364654541,
"eval_runtime": 0.2832,
"eval_samples_per_second": 515.574,
"eval_steps_per_second": 17.657,
"step": 360
},
{
"epoch": 25.0,
"eval_loss": 2.3079590797424316,
"eval_runtime": 0.3243,
"eval_samples_per_second": 450.187,
"eval_steps_per_second": 15.417,
"step": 375
},
{
"epoch": 26.0,
"eval_loss": 2.379464864730835,
"eval_runtime": 0.2934,
"eval_samples_per_second": 497.692,
"eval_steps_per_second": 17.044,
"step": 390
},
{
"epoch": 27.0,
"eval_loss": 2.37467885017395,
"eval_runtime": 0.2869,
"eval_samples_per_second": 508.855,
"eval_steps_per_second": 17.427,
"step": 405
},
{
"epoch": 28.0,
"eval_loss": 2.3926637172698975,
"eval_runtime": 0.2792,
"eval_samples_per_second": 523.011,
"eval_steps_per_second": 17.911,
"step": 420
},
{
"epoch": 29.0,
"eval_loss": 2.2542331218719482,
"eval_runtime": 0.2705,
"eval_samples_per_second": 539.84,
"eval_steps_per_second": 18.488,
"step": 435
},
{
"epoch": 30.0,
"eval_loss": 2.312037944793701,
"eval_runtime": 0.2823,
"eval_samples_per_second": 517.14,
"eval_steps_per_second": 17.71,
"step": 450
},
{
"epoch": 31.0,
"eval_loss": 2.2595930099487305,
"eval_runtime": 0.2709,
"eval_samples_per_second": 538.869,
"eval_steps_per_second": 18.454,
"step": 465
},
{
"epoch": 32.0,
"eval_loss": 2.3319013118743896,
"eval_runtime": 0.2718,
"eval_samples_per_second": 537.135,
"eval_steps_per_second": 18.395,
"step": 480
},
{
"epoch": 33.0,
"eval_loss": 2.321133852005005,
"eval_runtime": 0.3056,
"eval_samples_per_second": 477.789,
"eval_steps_per_second": 16.363,
"step": 495
},
{
"epoch": 34.0,
"eval_loss": 2.3662209510803223,
"eval_runtime": 0.2727,
"eval_samples_per_second": 535.325,
"eval_steps_per_second": 18.333,
"step": 510
},
{
"epoch": 35.0,
"eval_loss": 2.3607561588287354,
"eval_runtime": 0.2769,
"eval_samples_per_second": 527.269,
"eval_steps_per_second": 18.057,
"step": 525
},
{
"epoch": 36.0,
"eval_loss": 2.2733652591705322,
"eval_runtime": 0.3144,
"eval_samples_per_second": 464.449,
"eval_steps_per_second": 15.906,
"step": 540
},
{
"epoch": 37.0,
"eval_loss": 2.332275390625,
"eval_runtime": 0.2823,
"eval_samples_per_second": 517.096,
"eval_steps_per_second": 17.709,
"step": 555
},
{
"epoch": 38.0,
"eval_loss": 2.3226001262664795,
"eval_runtime": 0.2722,
"eval_samples_per_second": 536.297,
"eval_steps_per_second": 18.366,
"step": 570
},
{
"epoch": 39.0,
"eval_loss": 2.2499899864196777,
"eval_runtime": 0.2823,
"eval_samples_per_second": 517.269,
"eval_steps_per_second": 17.715,
"step": 585
},
{
"epoch": 40.0,
"eval_loss": 2.3148353099823,
"eval_runtime": 0.2735,
"eval_samples_per_second": 533.755,
"eval_steps_per_second": 18.279,
"step": 600
},
{
"epoch": 40.73,
"learning_rate": 6.444444444444445e-06,
"loss": 2.4323,
"step": 611
},
{
"epoch": 41.0,
"eval_loss": 2.190300464630127,
"eval_runtime": 0.279,
"eval_samples_per_second": 523.306,
"eval_steps_per_second": 17.921,
"step": 615
},
{
"epoch": 42.0,
"eval_loss": 2.2688183784484863,
"eval_runtime": 0.2786,
"eval_samples_per_second": 524.054,
"eval_steps_per_second": 17.947,
"step": 630
},
{
"epoch": 43.0,
"eval_loss": 2.3206570148468018,
"eval_runtime": 0.3166,
"eval_samples_per_second": 461.08,
"eval_steps_per_second": 15.79,
"step": 645
},
{
"epoch": 44.0,
"eval_loss": 2.398860454559326,
"eval_runtime": 0.2802,
"eval_samples_per_second": 521.028,
"eval_steps_per_second": 17.843,
"step": 660
},
{
"epoch": 45.0,
"eval_loss": 2.329181432723999,
"eval_runtime": 0.2845,
"eval_samples_per_second": 513.222,
"eval_steps_per_second": 17.576,
"step": 675
},
{
"epoch": 46.0,
"eval_loss": 2.301910877227783,
"eval_runtime": 0.2825,
"eval_samples_per_second": 516.788,
"eval_steps_per_second": 17.698,
"step": 690
},
{
"epoch": 47.0,
"eval_loss": 2.286062002182007,
"eval_runtime": 0.2774,
"eval_samples_per_second": 526.332,
"eval_steps_per_second": 18.025,
"step": 705
},
{
"epoch": 48.0,
"eval_loss": 2.2627930641174316,
"eval_runtime": 0.3491,
"eval_samples_per_second": 418.174,
"eval_steps_per_second": 14.321,
"step": 720
},
{
"epoch": 49.0,
"eval_loss": 2.3683576583862305,
"eval_runtime": 0.2828,
"eval_samples_per_second": 516.226,
"eval_steps_per_second": 17.679,
"step": 735
},
{
"epoch": 50.0,
"eval_loss": 2.3841121196746826,
"eval_runtime": 0.2738,
"eval_samples_per_second": 533.204,
"eval_steps_per_second": 18.26,
"step": 750
},
{
"epoch": 51.0,
"eval_loss": 2.3427212238311768,
"eval_runtime": 0.2841,
"eval_samples_per_second": 513.874,
"eval_steps_per_second": 17.598,
"step": 765
},
{
"epoch": 52.0,
"eval_loss": 2.3786392211914062,
"eval_runtime": 0.2882,
"eval_samples_per_second": 506.555,
"eval_steps_per_second": 17.348,
"step": 780
},
{
"epoch": 53.0,
"eval_loss": 2.3314857482910156,
"eval_runtime": 0.3054,
"eval_samples_per_second": 478.014,
"eval_steps_per_second": 16.37,
"step": 795
},
{
"epoch": 54.0,
"eval_loss": 2.4228127002716064,
"eval_runtime": 0.2752,
"eval_samples_per_second": 530.586,
"eval_steps_per_second": 18.171,
"step": 810
},
{
"epoch": 55.0,
"eval_loss": 2.2979846000671387,
"eval_runtime": 0.2804,
"eval_samples_per_second": 520.739,
"eval_steps_per_second": 17.834,
"step": 825
},
{
"epoch": 56.0,
"eval_loss": 2.288037061691284,
"eval_runtime": 0.2706,
"eval_samples_per_second": 539.536,
"eval_steps_per_second": 18.477,
"step": 840
},
{
"epoch": 57.0,
"eval_loss": 2.375304698944092,
"eval_runtime": 0.2754,
"eval_samples_per_second": 530.218,
"eval_steps_per_second": 18.158,
"step": 855
},
{
"epoch": 58.0,
"eval_loss": 2.302351474761963,
"eval_runtime": 0.2976,
"eval_samples_per_second": 490.644,
"eval_steps_per_second": 16.803,
"step": 870
},
{
"epoch": 59.0,
"eval_loss": 2.2706165313720703,
"eval_runtime": 0.3674,
"eval_samples_per_second": 397.413,
"eval_steps_per_second": 13.61,
"step": 885
}
],
"max_steps": 900,
"num_train_epochs": 60,
"total_flos": 196520659812864.0,
"trial_name": null,
"trial_params": null
}