viT5-large / trainer_state.json
duyvu8373's picture
Upload 12 files
503cde4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 48.63813229571984,
"eval_steps": 500,
"global_step": 12500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bp": 0.4559528592458481,
"eval_counts": [
3210,
1930,
1488,
1066
],
"eval_loss": 2.412109375,
"eval_precisions": [
71.17516629711751,
48.44377510040161,
43.03065355696935,
36.24617477048623
],
"eval_ref_len": 8052,
"eval_runtime": 118.4312,
"eval_samples_per_second": 4.441,
"eval_score": 21.9569286964753,
"eval_steps_per_second": 0.279,
"eval_sys_len": 4510,
"eval_totals": [
4510,
3984,
3458,
2941
],
"step": 257
},
{
"epoch": 1.95,
"learning_rate": 1.9221789883268484e-05,
"loss": 2.8948,
"step": 500
},
{
"epoch": 2.0,
"eval_bp": 0.43532380297987505,
"eval_counts": [
1708,
541,
376,
228
],
"eval_loss": 1.5029296875,
"eval_precisions": [
38.853503184713375,
13.979328165374676,
11.24401913875598,
7.497533706017757
],
"eval_ref_len": 8052,
"eval_runtime": 115.6212,
"eval_samples_per_second": 4.549,
"eval_score": 6.3679777301051494,
"eval_steps_per_second": 0.285,
"eval_sys_len": 4396,
"eval_totals": [
4396,
3870,
3344,
3041
],
"step": 514
},
{
"epoch": 3.0,
"eval_bp": 0.5596896112039585,
"eval_counts": [
2795,
1858,
1416,
991
],
"eval_loss": 0.65576171875,
"eval_precisions": [
54.85770363101079,
40.66535346903042,
35.02349740291862,
28.177423940858688
],
"eval_ref_len": 8052,
"eval_runtime": 115.5065,
"eval_samples_per_second": 4.554,
"eval_score": 21.558969055160425,
"eval_steps_per_second": 0.286,
"eval_sys_len": 5095,
"eval_totals": [
5095,
4569,
4043,
3517
],
"step": 771
},
{
"epoch": 3.89,
"learning_rate": 1.8443579766536967e-05,
"loss": 0.8924,
"step": 1000
},
{
"epoch": 4.0,
"eval_bp": 0.6650198090145658,
"eval_counts": [
3257,
2161,
1704,
1256
],
"eval_loss": 0.485107421875,
"eval_precisions": [
56.950515824444835,
41.61371076449066,
36.51167773730448,
30.330837961844964
],
"eval_ref_len": 8052,
"eval_runtime": 115.5544,
"eval_samples_per_second": 4.552,
"eval_score": 26.766843398496384,
"eval_steps_per_second": 0.286,
"eval_sys_len": 5719,
"eval_totals": [
5719,
5193,
4667,
4141
],
"step": 1028
},
{
"epoch": 5.0,
"eval_bp": 0.41788118238391686,
"eval_counts": [
3699,
2841,
2368,
1900
],
"eval_loss": 0.293701171875,
"eval_precisions": [
86.02325581395348,
75.27821939586646,
72.9064039408867,
69.80161645848641
],
"eval_ref_len": 8052,
"eval_runtime": 115.1699,
"eval_samples_per_second": 4.567,
"eval_score": 31.661530724736487,
"eval_steps_per_second": 0.287,
"eval_sys_len": 4300,
"eval_totals": [
4300,
3774,
3248,
2722
],
"step": 1285
},
{
"epoch": 5.84,
"learning_rate": 1.766536964980545e-05,
"loss": 0.4295,
"step": 1500
},
{
"epoch": 6.0,
"eval_bp": 0.4089581075583404,
"eval_counts": [
3783,
2928,
2446,
1971
],
"eval_loss": 0.2445068359375,
"eval_precisions": [
88.9908256880734,
78.60402684563758,
76.46139418568302,
73.73737373737374
],
"eval_ref_len": 8052,
"eval_runtime": 115.449,
"eval_samples_per_second": 4.556,
"eval_score": 32.408490251125635,
"eval_steps_per_second": 0.286,
"eval_sys_len": 4251,
"eval_totals": [
4251,
3725,
3199,
2673
],
"step": 1542
},
{
"epoch": 7.0,
"eval_bp": 0.417153226107242,
"eval_counts": [
3818,
2965,
2480,
2002
],
"eval_loss": 0.22021484375,
"eval_precisions": [
88.8733705772812,
78.64721485411141,
76.44882860665845,
73.6571008094187
],
"eval_ref_len": 8052,
"eval_runtime": 115.4073,
"eval_samples_per_second": 4.558,
"eval_score": 33.04119304311829,
"eval_steps_per_second": 0.286,
"eval_sys_len": 4296,
"eval_totals": [
4296,
3770,
3244,
2718
],
"step": 1799
},
{
"epoch": 7.78,
"learning_rate": 1.6887159533073932e-05,
"loss": 0.2991,
"step": 2000
},
{
"epoch": 8.0,
"eval_bp": 0.42679131632296613,
"eval_counts": [
3874,
3019,
2524,
2038
],
"eval_loss": 0.2076416015625,
"eval_precisions": [
89.07794895378248,
78.9693957624902,
76.55444343342432,
73.54745579213281
],
"eval_ref_len": 8052,
"eval_runtime": 114.0107,
"eval_samples_per_second": 4.614,
"eval_score": 33.85769159645968,
"eval_steps_per_second": 0.289,
"eval_sys_len": 4349,
"eval_totals": [
4349,
3823,
3297,
2771
],
"step": 2056
},
{
"epoch": 9.0,
"eval_bp": 0.46028228872696303,
"eval_counts": [
4065,
3225,
2700,
2186
],
"eval_loss": 0.1663818359375,
"eval_precisions": [
89.65593295103662,
80.46407185628742,
77.54164273406089,
73.9512855209743
],
"eval_ref_len": 8052,
"eval_runtime": 114.8195,
"eval_samples_per_second": 4.581,
"eval_score": 36.91388078489759,
"eval_steps_per_second": 0.287,
"eval_sys_len": 4534,
"eval_totals": [
4534,
4008,
3482,
2956
],
"step": 2313
},
{
"epoch": 9.73,
"learning_rate": 1.6108949416342414e-05,
"loss": 0.2277,
"step": 2500
},
{
"epoch": 10.0,
"eval_bp": 0.5426087135017283,
"eval_counts": [
4419,
3611,
3062,
2525
],
"eval_loss": 0.1044921875,
"eval_precisions": [
88.43305983590155,
80.76492954596287,
77.617237008872,
73.85200350979818
],
"eval_ref_len": 8052,
"eval_runtime": 114.7152,
"eval_samples_per_second": 4.585,
"eval_score": 43.40364936643555,
"eval_steps_per_second": 0.288,
"eval_sys_len": 4997,
"eval_totals": [
4997,
4471,
3945,
3419
],
"step": 2570
},
{
"epoch": 11.0,
"eval_bp": 0.5841943959505824,
"eval_counts": [
4717,
3950,
3372,
2808
],
"eval_loss": 0.08892822265625,
"eval_precisions": [
90.07065113614665,
83.846317130121,
80.57347670250896,
76.74227931128723
],
"eval_ref_len": 8052,
"eval_runtime": 113.7863,
"eval_samples_per_second": 4.623,
"eval_score": 48.29263576279789,
"eval_steps_per_second": 0.29,
"eval_sys_len": 5237,
"eval_totals": [
5237,
4711,
4185,
3659
],
"step": 2827
},
{
"epoch": 11.67,
"learning_rate": 1.5330739299610897e-05,
"loss": 0.1405,
"step": 3000
},
{
"epoch": 12.0,
"eval_bp": 0.5586477230994942,
"eval_counts": [
4630,
3875,
3303,
2749
],
"eval_loss": 0.08489990234375,
"eval_precisions": [
90.98054627628218,
84.9222003068157,
81.81818181818181,
78.29678154371973
],
"eval_ref_len": 8052,
"eval_runtime": 107.8868,
"eval_samples_per_second": 4.875,
"eval_score": 46.85747814062412,
"eval_steps_per_second": 0.306,
"eval_sys_len": 5089,
"eval_totals": [
5089,
4563,
4037,
3511
],
"step": 3084
},
{
"epoch": 13.0,
"eval_bp": 0.5695616786732568,
"eval_counts": [
4747,
4034,
3464,
2904
],
"eval_loss": 0.08123779296875,
"eval_precisions": [
92.1389751552795,
87.20276696930394,
84.48780487804878,
81.2534974818131
],
"eval_ref_len": 8052,
"eval_runtime": 108.2759,
"eval_samples_per_second": 4.858,
"eval_score": 49.08436700451685,
"eval_steps_per_second": 0.305,
"eval_sys_len": 5152,
"eval_totals": [
5152,
4626,
4100,
3574
],
"step": 3341
},
{
"epoch": 13.62,
"learning_rate": 1.4552529182879378e-05,
"loss": 0.1241,
"step": 3500
},
{
"epoch": 14.0,
"eval_bp": 0.5695616786732568,
"eval_counts": [
4738,
4024,
3452,
2894
],
"eval_loss": 0.07525634765625,
"eval_precisions": [
91.96428571428571,
86.98659749243407,
84.1951219512195,
80.97369893676553
],
"eval_ref_len": 8052,
"eval_runtime": 107.2784,
"eval_samples_per_second": 4.903,
"eval_score": 48.94590635934059,
"eval_steps_per_second": 0.308,
"eval_sys_len": 5152,
"eval_totals": [
5152,
4626,
4100,
3574
],
"step": 3598
},
{
"epoch": 15.0,
"eval_bp": 0.5961628688829712,
"eval_counts": [
4741,
4006,
3444,
2891
],
"eval_loss": 0.07562255859375,
"eval_precisions": [
89.33484077633314,
83.79000209161264,
80.94007050528789,
77.52748726200053
],
"eval_ref_len": 8052,
"eval_runtime": 106.2867,
"eval_samples_per_second": 4.949,
"eval_score": 49.354074470195435,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5307,
"eval_totals": [
5307,
4781,
4255,
3729
],
"step": 3855
},
{
"epoch": 15.56,
"learning_rate": 1.377431906614786e-05,
"loss": 0.1147,
"step": 4000
},
{
"epoch": 16.0,
"eval_bp": 0.5655838797151567,
"eval_counts": [
4743,
4039,
3477,
2925
],
"eval_loss": 0.06915283203125,
"eval_precisions": [
92.47416650419186,
87.74712144253748,
85.28329654157469,
82.37116305266122
],
"eval_ref_len": 8052,
"eval_runtime": 107.836,
"eval_samples_per_second": 4.878,
"eval_score": 49.143959340541095,
"eval_steps_per_second": 0.306,
"eval_sys_len": 5129,
"eval_totals": [
5129,
4603,
4077,
3551
],
"step": 4112
},
{
"epoch": 17.0,
"eval_bp": 0.5932631592602093,
"eval_counts": [
4727,
3996,
3439,
2892
],
"eval_loss": 0.070068359375,
"eval_precisions": [
89.35727788279773,
83.87909319899245,
81.14676734308637,
77.90948275862068
],
"eval_ref_len": 8052,
"eval_runtime": 106.6266,
"eval_samples_per_second": 4.933,
"eval_score": 49.221934768405774,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5290,
"eval_totals": [
5290,
4764,
4238,
3712
],
"step": 4369
},
{
"epoch": 17.51,
"learning_rate": 1.2996108949416343e-05,
"loss": 0.1065,
"step": 4500
},
{
"epoch": 18.0,
"eval_bp": 0.5610779943992972,
"eval_counts": [
4753,
4064,
3505,
2956
],
"eval_loss": 0.0623779296875,
"eval_precisions": [
93.14128943758574,
88.7917850120166,
86.52184645766478,
83.8581560283688
],
"eval_ref_len": 8052,
"eval_runtime": 106.949,
"eval_samples_per_second": 4.918,
"eval_score": 49.382124037917905,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5103,
"eval_totals": [
5103,
4577,
4051,
3525
],
"step": 4626
},
{
"epoch": 19.0,
"eval_bp": 0.5788702549376445,
"eval_counts": [
4784,
4087,
3529,
2977
],
"eval_loss": 0.060699462890625,
"eval_precisions": [
91.89396849788706,
87.32905982905983,
84.95426095329803,
82.05622932745314
],
"eval_ref_len": 8052,
"eval_runtime": 106.4771,
"eval_samples_per_second": 4.94,
"eval_score": 50.0629990425284,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5206,
"eval_totals": [
5206,
4680,
4154,
3628
],
"step": 4883
},
{
"epoch": 19.46,
"learning_rate": 1.2217898832684827e-05,
"loss": 0.0964,
"step": 5000
},
{
"epoch": 20.0,
"eval_bp": 0.5826501698750266,
"eval_counts": [
4773,
4068,
3509,
2957
],
"eval_loss": 0.0595703125,
"eval_precisions": [
91.29686304514155,
86.51637601020842,
84.02777777777777,
81.01369863013699
],
"eval_ref_len": 8052,
"eval_runtime": 106.5688,
"eval_samples_per_second": 4.936,
"eval_score": 49.89324555557292,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5228,
"eval_totals": [
5228,
4702,
4176,
3650
],
"step": 5140
},
{
"epoch": 21.0,
"eval_bp": 0.5824785136401668,
"eval_counts": [
4780,
4078,
3521,
2972
],
"eval_loss": 0.057952880859375,
"eval_precisions": [
91.4482494738856,
86.74750053180175,
84.33532934131736,
81.44697177308852
],
"eval_ref_len": 8052,
"eval_runtime": 105.7144,
"eval_samples_per_second": 4.976,
"eval_score": 50.04481904653482,
"eval_steps_per_second": 0.312,
"eval_sys_len": 5227,
"eval_totals": [
5227,
4701,
4175,
3649
],
"step": 5397
},
{
"epoch": 21.4,
"learning_rate": 1.1439688715953308e-05,
"loss": 0.0925,
"step": 5500
},
{
"epoch": 22.0,
"eval_bp": 0.6060221334079605,
"eval_counts": [
4800,
4076,
3514,
2962
],
"eval_loss": 0.060546875,
"eval_precisions": [
89.46877912395153,
84.23227939656954,
81.47461163923023,
78.21494586744124
],
"eval_ref_len": 8052,
"eval_runtime": 105.9417,
"eval_samples_per_second": 4.965,
"eval_score": 50.4491686657978,
"eval_steps_per_second": 0.311,
"eval_sys_len": 5365,
"eval_totals": [
5365,
4839,
4313,
3787
],
"step": 5654
},
{
"epoch": 23.0,
"eval_bp": 0.5761166700049626,
"eval_counts": [
4832,
4155,
3593,
3036
],
"eval_loss": 0.053558349609375,
"eval_precisions": [
93.10211946050096,
89.08662092624357,
86.82938617689705,
84.0531561461794
],
"eval_ref_len": 8052,
"eval_runtime": 106.564,
"eval_samples_per_second": 4.936,
"eval_score": 50.81695573260325,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5190,
"eval_totals": [
5190,
4664,
4138,
3612
],
"step": 5911
},
{
"epoch": 23.35,
"learning_rate": 1.066147859922179e-05,
"loss": 0.0871,
"step": 6000
},
{
"epoch": 24.0,
"eval_bp": 0.5719791556804446,
"eval_counts": [
4807,
4125,
3565,
3012
],
"eval_loss": 0.052276611328125,
"eval_precisions": [
93.05071622144793,
88.90086206896552,
86.65532328633932,
83.94648829431438
],
"eval_ref_len": 8052,
"eval_runtime": 107.1172,
"eval_samples_per_second": 4.911,
"eval_score": 50.37743722047891,
"eval_steps_per_second": 0.308,
"eval_sys_len": 5166,
"eval_totals": [
5166,
4640,
4114,
3588
],
"step": 6168
},
{
"epoch": 25.0,
"eval_bp": 0.579214183971878,
"eval_counts": [
4838,
4161,
3602,
3050
],
"eval_loss": 0.050567626953125,
"eval_precisions": [
92.89554531490015,
88.87227680478428,
86.66987487969202,
84.02203856749311
],
"eval_ref_len": 8052,
"eval_runtime": 106.5446,
"eval_samples_per_second": 4.937,
"eval_score": 51.0028956058344,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5208,
"eval_totals": [
5208,
4682,
4156,
3630
],
"step": 6425
},
{
"epoch": 25.29,
"learning_rate": 9.883268482490273e-06,
"loss": 0.0843,
"step": 6500
},
{
"epoch": 26.0,
"eval_bp": 0.5607309845734951,
"eval_counts": [
4817,
4157,
3596,
3042
],
"eval_loss": 0.051177978515625,
"eval_precisions": [
94.43246422270143,
90.86338797814207,
88.81205235860706,
86.34686346863468
],
"eval_ref_len": 8052,
"eval_runtime": 107.5728,
"eval_samples_per_second": 4.89,
"eval_score": 50.50236718840154,
"eval_steps_per_second": 0.307,
"eval_sys_len": 5101,
"eval_totals": [
5101,
4575,
4049,
3523
],
"step": 6682
},
{
"epoch": 27.0,
"eval_bp": 0.5869366550146455,
"eval_counts": [
4855,
4170,
3608,
3055
],
"eval_loss": 0.0489501953125,
"eval_precisions": [
92.42337711783743,
88.21662788237784,
85.88431325874792,
83.12925170068027
],
"eval_ref_len": 8052,
"eval_runtime": 109.9833,
"eval_samples_per_second": 4.783,
"eval_score": 51.26739301541927,
"eval_steps_per_second": 0.3,
"eval_sys_len": 5253,
"eval_totals": [
5253,
4727,
4201,
3675
],
"step": 6939
},
{
"epoch": 27.24,
"learning_rate": 9.105058365758756e-06,
"loss": 0.0813,
"step": 7000
},
{
"epoch": 28.0,
"eval_bp": 0.5641984935077309,
"eval_counts": [
4838,
4184,
3624,
3070
],
"eval_loss": 0.047760009765625,
"eval_precisions": [
94.47373559851592,
91.05549510337323,
89.06365200294913,
86.64973186565058
],
"eval_ref_len": 8052,
"eval_runtime": 109.0318,
"eval_samples_per_second": 4.824,
"eval_score": 50.9275946797506,
"eval_steps_per_second": 0.303,
"eval_sys_len": 5121,
"eval_totals": [
5121,
4595,
4069,
3543
],
"step": 7196
},
{
"epoch": 29.0,
"eval_bp": 0.5711161019095474,
"eval_counts": [
4838,
4179,
3625,
3079
],
"eval_loss": 0.0462646484375,
"eval_precisions": [
93.74152296066654,
90.16181229773463,
88.22097834022877,
85.93357521629919
],
"eval_ref_len": 8052,
"eval_runtime": 107.2897,
"eval_samples_per_second": 4.903,
"eval_score": 51.09715872720289,
"eval_steps_per_second": 0.308,
"eval_sys_len": 5161,
"eval_totals": [
5161,
4635,
4109,
3583
],
"step": 7453
},
{
"epoch": 29.18,
"learning_rate": 8.326848249027239e-06,
"loss": 0.0778,
"step": 7500
},
{
"epoch": 30.0,
"eval_bp": 0.587279163676868,
"eval_counts": [
4863,
4185,
3626,
3075
],
"eval_loss": 0.04534912109375,
"eval_precisions": [
92.54043767840152,
88.49651089025164,
86.27171068284558,
83.62795757410933
],
"eval_ref_len": 8052,
"eval_runtime": 108.186,
"eval_samples_per_second": 4.862,
"eval_score": 51.488944843891275,
"eval_steps_per_second": 0.305,
"eval_sys_len": 5255,
"eval_totals": [
5255,
4729,
4203,
3677
],
"step": 7710
},
{
"epoch": 31.0,
"eval_bp": 0.587279163676868,
"eval_counts": [
4847,
4168,
3612,
3064
],
"eval_loss": 0.044677734375,
"eval_precisions": [
92.23596574690771,
88.137026855572,
85.93861527480371,
83.32880065270601
],
"eval_ref_len": 8052,
"eval_runtime": 106.6208,
"eval_samples_per_second": 4.933,
"eval_score": 51.298555626377826,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5255,
"eval_totals": [
5255,
4729,
4203,
3677
],
"step": 7967
},
{
"epoch": 31.13,
"learning_rate": 7.54863813229572e-06,
"loss": 0.0753,
"step": 8000
},
{
"epoch": 32.0,
"eval_bp": 0.5690432735111319,
"eval_counts": [
4866,
4219,
3661,
3111
],
"eval_loss": 0.0438232421875,
"eval_precisions": [
94.50378714313459,
91.26108587497296,
89.35806687820356,
87.11845421450575
],
"eval_ref_len": 8052,
"eval_runtime": 107.5192,
"eval_samples_per_second": 4.892,
"eval_score": 51.50981459551784,
"eval_steps_per_second": 0.307,
"eval_sys_len": 5149,
"eval_totals": [
5149,
4623,
4097,
3571
],
"step": 8224
},
{
"epoch": 33.0,
"eval_bp": 0.5881351685074624,
"eval_counts": [
4869,
4201,
3645,
3097
],
"eval_loss": 0.04400634765625,
"eval_precisions": [
92.56653992395437,
88.74102239121251,
86.62072243346007,
84.11189570885388
],
"eval_ref_len": 8052,
"eval_runtime": 106.8688,
"eval_samples_per_second": 4.922,
"eval_score": 51.729891771805434,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5260,
"eval_totals": [
5260,
4734,
4208,
3682
],
"step": 8481
},
{
"epoch": 33.07,
"learning_rate": 6.770428015564204e-06,
"loss": 0.0714,
"step": 8500
},
{
"epoch": 34.0,
"eval_bp": 0.5823068423133116,
"eval_counts": [
4881,
4226,
3674,
3130
],
"eval_loss": 0.041656494140625,
"eval_precisions": [
93.398392652124,
89.91489361702128,
88.02108289410637,
85.80043859649123
],
"eval_ref_len": 8052,
"eval_runtime": 107.0835,
"eval_samples_per_second": 4.912,
"eval_score": 51.96533200156475,
"eval_steps_per_second": 0.308,
"eval_sys_len": 5226,
"eval_totals": [
5226,
4700,
4174,
3648
],
"step": 8738
},
{
"epoch": 35.0,
"eval_bp": 0.5862514549555176,
"eval_counts": [
4902,
4242,
3685,
3133
],
"eval_loss": 0.042633056640625,
"eval_precisions": [
93.38921699371309,
89.81579504552191,
87.80081010245414,
85.34459275401798
],
"eval_ref_len": 8052,
"eval_runtime": 106.8404,
"eval_samples_per_second": 4.923,
"eval_score": 52.19933016750815,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5249,
"eval_totals": [
5249,
4723,
4197,
3671
],
"step": 8995
},
{
"epoch": 35.02,
"learning_rate": 5.992217898832685e-06,
"loss": 0.0697,
"step": 9000
},
{
"epoch": 36.0,
"eval_bp": 0.5807611221368078,
"eval_counts": [
4907,
4257,
3699,
3149
],
"eval_loss": 0.04095458984375,
"eval_precisions": [
94.05788767490895,
90.74824131315285,
88.81152460984394,
86.53476229733444
],
"eval_ref_len": 8052,
"eval_runtime": 106.3976,
"eval_samples_per_second": 4.944,
"eval_score": 52.266194224133834,
"eval_steps_per_second": 0.31,
"eval_sys_len": 5217,
"eval_totals": [
5217,
4691,
4165,
3639
],
"step": 9252
},
{
"epoch": 36.96,
"learning_rate": 5.214007782101168e-06,
"loss": 0.0686,
"step": 9500
},
{
"epoch": 37.0,
"eval_bp": 0.5983772718445015,
"eval_counts": [
4899,
4227,
3672,
3123
],
"eval_loss": 0.042388916015625,
"eval_precisions": [
92.08646616541354,
88.17271589486859,
86.03561387066541,
83.45804382683058
],
"eval_ref_len": 8052,
"eval_runtime": 106.906,
"eval_samples_per_second": 4.92,
"eval_score": 52.28705860616529,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5320,
"eval_totals": [
5320,
4794,
4268,
3742
],
"step": 9509
},
{
"epoch": 38.0,
"eval_bp": 0.5780101703802235,
"eval_counts": [
4913,
4273,
3718,
3172
],
"eval_loss": 0.0394287109375,
"eval_precisions": [
94.46260334551047,
91.40106951871658,
89.61195468787659,
87.55175269113994
],
"eval_ref_len": 8052,
"eval_runtime": 106.7621,
"eval_samples_per_second": 4.927,
"eval_score": 52.43798269679689,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5201,
"eval_totals": [
5201,
4675,
4149,
3623
],
"step": 9766
},
{
"epoch": 38.91,
"learning_rate": 4.43579766536965e-06,
"loss": 0.0664,
"step": 10000
},
{
"epoch": 39.0,
"eval_bp": 0.5975258891581067,
"eval_counts": [
4912,
4243,
3689,
3141
],
"eval_loss": 0.040374755859375,
"eval_precisions": [
92.41768579492003,
88.59887241595322,
86.5353037766831,
84.05137811078406
],
"eval_ref_len": 8052,
"eval_runtime": 106.9692,
"eval_samples_per_second": 4.917,
"eval_score": 52.491270926490635,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5315,
"eval_totals": [
5315,
4789,
4263,
3737
],
"step": 10023
},
{
"epoch": 40.0,
"eval_bp": 0.587279163676868,
"eval_counts": [
4913,
4259,
3711,
3170
],
"eval_loss": 0.0382080078125,
"eval_precisions": [
93.4919124643197,
90.0613237470924,
88.29407566024268,
86.21158553168344
],
"eval_ref_len": 8052,
"eval_runtime": 107.0316,
"eval_samples_per_second": 4.914,
"eval_score": 52.5468859983503,
"eval_steps_per_second": 0.308,
"eval_sys_len": 5255,
"eval_totals": [
5255,
4729,
4203,
3677
],
"step": 10280
},
{
"epoch": 40.86,
"learning_rate": 3.6575875486381323e-06,
"loss": 0.0658,
"step": 10500
},
{
"epoch": 41.0,
"eval_bp": 0.5811047209098391,
"eval_counts": [
4921,
4278,
3725,
3179
],
"eval_loss": 0.0377197265625,
"eval_precisions": [
94.29009388771796,
91.1570424035798,
89.39284857211423,
87.31117824773413
],
"eval_ref_len": 8052,
"eval_runtime": 106.7065,
"eval_samples_per_second": 4.929,
"eval_score": 52.59102479681527,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5219,
"eval_totals": [
5219,
4693,
4167,
3641
],
"step": 10537
},
{
"epoch": 42.0,
"eval_bp": 0.5817917378355022,
"eval_counts": [
4908,
4261,
3712,
3169
],
"eval_loss": 0.037109375,
"eval_precisions": [
93.96898334290637,
90.7174792420694,
88.99544473747302,
86.94101508916324
],
"eval_ref_len": 8052,
"eval_runtime": 107.4197,
"eval_samples_per_second": 4.897,
"eval_score": 52.43056600057888,
"eval_steps_per_second": 0.307,
"eval_sys_len": 5223,
"eval_totals": [
5223,
4697,
4171,
3645
],
"step": 10794
},
{
"epoch": 42.8,
"learning_rate": 2.879377431906615e-06,
"loss": 0.0643,
"step": 11000
},
{
"epoch": 43.0,
"eval_bp": 0.5804174632159932,
"eval_counts": [
4905,
4264,
3714,
3172
],
"eval_loss": 0.037017822265625,
"eval_precisions": [
94.0556088207095,
90.936233738537,
89.21450876771559,
87.21473742095134
],
"eval_ref_len": 8052,
"eval_runtime": 106.7676,
"eval_samples_per_second": 4.927,
"eval_score": 52.42364666449266,
"eval_steps_per_second": 0.309,
"eval_sys_len": 5215,
"eval_totals": [
5215,
4689,
4163,
3637
],
"step": 11051
},
{
"epoch": 44.0,
"eval_bp": 0.5961628688829712,
"eval_counts": [
4930,
4270,
3718,
3173
],
"eval_loss": 0.0380859375,
"eval_precisions": [
92.89617486338798,
89.31185944363104,
87.37955346650999,
85.08983641727005
],
"eval_ref_len": 8052,
"eval_runtime": 107.4748,
"eval_samples_per_second": 4.894,
"eval_score": 52.834006019511406,
"eval_steps_per_second": 0.307,
"eval_sys_len": 5307,
"eval_totals": [
5307,
4781,
4255,
3729
],
"step": 11308
},
{
"epoch": 44.75,
"learning_rate": 2.1011673151750974e-06,
"loss": 0.0608,
"step": 11500
},
{
"epoch": 45.0,
"eval_bp": 0.5757722034899391,
"eval_counts": [
4915,
4280,
3729,
3186
],
"eval_loss": 0.036224365234375,
"eval_precisions": [
94.7378565921357,
91.8060918060918,
90.15957446808511,
88.25484764542936
],
"eval_ref_len": 8052,
"eval_runtime": 107.743,
"eval_samples_per_second": 4.882,
"eval_score": 52.515446703245765,
"eval_steps_per_second": 0.306,
"eval_sys_len": 5188,
"eval_totals": [
5188,
4662,
4136,
3610
],
"step": 11565
},
{
"epoch": 46.0,
"eval_bp": 0.5843659009664612,
"eval_counts": [
4924,
4278,
3730,
3188
],
"eval_loss": 0.036651611328125,
"eval_precisions": [
94.0053455517373,
90.78947368421052,
89.10654562828476,
87.10382513661202
],
"eval_ref_len": 8052,
"eval_runtime": 107.889,
"eval_samples_per_second": 4.875,
"eval_score": 52.71917275684773,
"eval_steps_per_second": 0.306,
"eval_sys_len": 5238,
"eval_totals": [
5238,
4712,
4186,
3660
],
"step": 11822
},
{
"epoch": 46.69,
"learning_rate": 1.32295719844358e-06,
"loss": 0.0622,
"step": 12000
},
{
"epoch": 47.0,
"eval_bp": 0.586080116901772,
"eval_counts": [
4938,
4295,
3745,
3201
],
"eval_loss": 0.036529541015625,
"eval_precisions": [
94.09298780487805,
90.95722151630665,
89.2516682554814,
87.22070844686648
],
"eval_ref_len": 8052,
"eval_runtime": 109.2986,
"eval_samples_per_second": 4.813,
"eval_score": 52.949832085516945,
"eval_steps_per_second": 0.302,
"eval_sys_len": 5248,
"eval_totals": [
5248,
4722,
4196,
3670
],
"step": 12079
},
{
"epoch": 48.0,
"eval_bp": 0.5817917378355022,
"eval_counts": [
4925,
4285,
3733,
3189
],
"eval_loss": 0.036285400390625,
"eval_precisions": [
94.29446678154318,
91.22844368746009,
89.49892112203308,
87.48971193415638
],
"eval_ref_len": 8052,
"eval_runtime": 108.6659,
"eval_samples_per_second": 4.841,
"eval_score": 52.70664408353883,
"eval_steps_per_second": 0.304,
"eval_sys_len": 5223,
"eval_totals": [
5223,
4697,
4171,
3645
],
"step": 12336
},
{
"epoch": 48.64,
"learning_rate": 5.447470817120623e-07,
"loss": 0.0625,
"step": 12500
}
],
"logging_steps": 500,
"max_steps": 12850,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 8.680648839008256e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}