|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 48.63813229571984, |
|
"eval_steps": 500, |
|
"global_step": 12500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bp": 0.4559528592458481, |
|
"eval_counts": [ |
|
3210, |
|
1930, |
|
1488, |
|
1066 |
|
], |
|
"eval_loss": 2.412109375, |
|
"eval_precisions": [ |
|
71.17516629711751, |
|
48.44377510040161, |
|
43.03065355696935, |
|
36.24617477048623 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 118.4312, |
|
"eval_samples_per_second": 4.441, |
|
"eval_score": 21.9569286964753, |
|
"eval_steps_per_second": 0.279, |
|
"eval_sys_len": 4510, |
|
"eval_totals": [ |
|
4510, |
|
3984, |
|
3458, |
|
2941 |
|
], |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9221789883268484e-05, |
|
"loss": 2.8948, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bp": 0.43532380297987505, |
|
"eval_counts": [ |
|
1708, |
|
541, |
|
376, |
|
228 |
|
], |
|
"eval_loss": 1.5029296875, |
|
"eval_precisions": [ |
|
38.853503184713375, |
|
13.979328165374676, |
|
11.24401913875598, |
|
7.497533706017757 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 115.6212, |
|
"eval_samples_per_second": 4.549, |
|
"eval_score": 6.3679777301051494, |
|
"eval_steps_per_second": 0.285, |
|
"eval_sys_len": 4396, |
|
"eval_totals": [ |
|
4396, |
|
3870, |
|
3344, |
|
3041 |
|
], |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bp": 0.5596896112039585, |
|
"eval_counts": [ |
|
2795, |
|
1858, |
|
1416, |
|
991 |
|
], |
|
"eval_loss": 0.65576171875, |
|
"eval_precisions": [ |
|
54.85770363101079, |
|
40.66535346903042, |
|
35.02349740291862, |
|
28.177423940858688 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 115.5065, |
|
"eval_samples_per_second": 4.554, |
|
"eval_score": 21.558969055160425, |
|
"eval_steps_per_second": 0.286, |
|
"eval_sys_len": 5095, |
|
"eval_totals": [ |
|
5095, |
|
4569, |
|
4043, |
|
3517 |
|
], |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.8443579766536967e-05, |
|
"loss": 0.8924, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bp": 0.6650198090145658, |
|
"eval_counts": [ |
|
3257, |
|
2161, |
|
1704, |
|
1256 |
|
], |
|
"eval_loss": 0.485107421875, |
|
"eval_precisions": [ |
|
56.950515824444835, |
|
41.61371076449066, |
|
36.51167773730448, |
|
30.330837961844964 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 115.5544, |
|
"eval_samples_per_second": 4.552, |
|
"eval_score": 26.766843398496384, |
|
"eval_steps_per_second": 0.286, |
|
"eval_sys_len": 5719, |
|
"eval_totals": [ |
|
5719, |
|
5193, |
|
4667, |
|
4141 |
|
], |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bp": 0.41788118238391686, |
|
"eval_counts": [ |
|
3699, |
|
2841, |
|
2368, |
|
1900 |
|
], |
|
"eval_loss": 0.293701171875, |
|
"eval_precisions": [ |
|
86.02325581395348, |
|
75.27821939586646, |
|
72.9064039408867, |
|
69.80161645848641 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 115.1699, |
|
"eval_samples_per_second": 4.567, |
|
"eval_score": 31.661530724736487, |
|
"eval_steps_per_second": 0.287, |
|
"eval_sys_len": 4300, |
|
"eval_totals": [ |
|
4300, |
|
3774, |
|
3248, |
|
2722 |
|
], |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.766536964980545e-05, |
|
"loss": 0.4295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bp": 0.4089581075583404, |
|
"eval_counts": [ |
|
3783, |
|
2928, |
|
2446, |
|
1971 |
|
], |
|
"eval_loss": 0.2445068359375, |
|
"eval_precisions": [ |
|
88.9908256880734, |
|
78.60402684563758, |
|
76.46139418568302, |
|
73.73737373737374 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 115.449, |
|
"eval_samples_per_second": 4.556, |
|
"eval_score": 32.408490251125635, |
|
"eval_steps_per_second": 0.286, |
|
"eval_sys_len": 4251, |
|
"eval_totals": [ |
|
4251, |
|
3725, |
|
3199, |
|
2673 |
|
], |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bp": 0.417153226107242, |
|
"eval_counts": [ |
|
3818, |
|
2965, |
|
2480, |
|
2002 |
|
], |
|
"eval_loss": 0.22021484375, |
|
"eval_precisions": [ |
|
88.8733705772812, |
|
78.64721485411141, |
|
76.44882860665845, |
|
73.6571008094187 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 115.4073, |
|
"eval_samples_per_second": 4.558, |
|
"eval_score": 33.04119304311829, |
|
"eval_steps_per_second": 0.286, |
|
"eval_sys_len": 4296, |
|
"eval_totals": [ |
|
4296, |
|
3770, |
|
3244, |
|
2718 |
|
], |
|
"step": 1799 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 1.6887159533073932e-05, |
|
"loss": 0.2991, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bp": 0.42679131632296613, |
|
"eval_counts": [ |
|
3874, |
|
3019, |
|
2524, |
|
2038 |
|
], |
|
"eval_loss": 0.2076416015625, |
|
"eval_precisions": [ |
|
89.07794895378248, |
|
78.9693957624902, |
|
76.55444343342432, |
|
73.54745579213281 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 114.0107, |
|
"eval_samples_per_second": 4.614, |
|
"eval_score": 33.85769159645968, |
|
"eval_steps_per_second": 0.289, |
|
"eval_sys_len": 4349, |
|
"eval_totals": [ |
|
4349, |
|
3823, |
|
3297, |
|
2771 |
|
], |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bp": 0.46028228872696303, |
|
"eval_counts": [ |
|
4065, |
|
3225, |
|
2700, |
|
2186 |
|
], |
|
"eval_loss": 0.1663818359375, |
|
"eval_precisions": [ |
|
89.65593295103662, |
|
80.46407185628742, |
|
77.54164273406089, |
|
73.9512855209743 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 114.8195, |
|
"eval_samples_per_second": 4.581, |
|
"eval_score": 36.91388078489759, |
|
"eval_steps_per_second": 0.287, |
|
"eval_sys_len": 4534, |
|
"eval_totals": [ |
|
4534, |
|
4008, |
|
3482, |
|
2956 |
|
], |
|
"step": 2313 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.6108949416342414e-05, |
|
"loss": 0.2277, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bp": 0.5426087135017283, |
|
"eval_counts": [ |
|
4419, |
|
3611, |
|
3062, |
|
2525 |
|
], |
|
"eval_loss": 0.1044921875, |
|
"eval_precisions": [ |
|
88.43305983590155, |
|
80.76492954596287, |
|
77.617237008872, |
|
73.85200350979818 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 114.7152, |
|
"eval_samples_per_second": 4.585, |
|
"eval_score": 43.40364936643555, |
|
"eval_steps_per_second": 0.288, |
|
"eval_sys_len": 4997, |
|
"eval_totals": [ |
|
4997, |
|
4471, |
|
3945, |
|
3419 |
|
], |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bp": 0.5841943959505824, |
|
"eval_counts": [ |
|
4717, |
|
3950, |
|
3372, |
|
2808 |
|
], |
|
"eval_loss": 0.08892822265625, |
|
"eval_precisions": [ |
|
90.07065113614665, |
|
83.846317130121, |
|
80.57347670250896, |
|
76.74227931128723 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 113.7863, |
|
"eval_samples_per_second": 4.623, |
|
"eval_score": 48.29263576279789, |
|
"eval_steps_per_second": 0.29, |
|
"eval_sys_len": 5237, |
|
"eval_totals": [ |
|
5237, |
|
4711, |
|
4185, |
|
3659 |
|
], |
|
"step": 2827 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 1.5330739299610897e-05, |
|
"loss": 0.1405, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bp": 0.5586477230994942, |
|
"eval_counts": [ |
|
4630, |
|
3875, |
|
3303, |
|
2749 |
|
], |
|
"eval_loss": 0.08489990234375, |
|
"eval_precisions": [ |
|
90.98054627628218, |
|
84.9222003068157, |
|
81.81818181818181, |
|
78.29678154371973 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.8868, |
|
"eval_samples_per_second": 4.875, |
|
"eval_score": 46.85747814062412, |
|
"eval_steps_per_second": 0.306, |
|
"eval_sys_len": 5089, |
|
"eval_totals": [ |
|
5089, |
|
4563, |
|
4037, |
|
3511 |
|
], |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bp": 0.5695616786732568, |
|
"eval_counts": [ |
|
4747, |
|
4034, |
|
3464, |
|
2904 |
|
], |
|
"eval_loss": 0.08123779296875, |
|
"eval_precisions": [ |
|
92.1389751552795, |
|
87.20276696930394, |
|
84.48780487804878, |
|
81.2534974818131 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 108.2759, |
|
"eval_samples_per_second": 4.858, |
|
"eval_score": 49.08436700451685, |
|
"eval_steps_per_second": 0.305, |
|
"eval_sys_len": 5152, |
|
"eval_totals": [ |
|
5152, |
|
4626, |
|
4100, |
|
3574 |
|
], |
|
"step": 3341 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 1.4552529182879378e-05, |
|
"loss": 0.1241, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bp": 0.5695616786732568, |
|
"eval_counts": [ |
|
4738, |
|
4024, |
|
3452, |
|
2894 |
|
], |
|
"eval_loss": 0.07525634765625, |
|
"eval_precisions": [ |
|
91.96428571428571, |
|
86.98659749243407, |
|
84.1951219512195, |
|
80.97369893676553 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.2784, |
|
"eval_samples_per_second": 4.903, |
|
"eval_score": 48.94590635934059, |
|
"eval_steps_per_second": 0.308, |
|
"eval_sys_len": 5152, |
|
"eval_totals": [ |
|
5152, |
|
4626, |
|
4100, |
|
3574 |
|
], |
|
"step": 3598 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bp": 0.5961628688829712, |
|
"eval_counts": [ |
|
4741, |
|
4006, |
|
3444, |
|
2891 |
|
], |
|
"eval_loss": 0.07562255859375, |
|
"eval_precisions": [ |
|
89.33484077633314, |
|
83.79000209161264, |
|
80.94007050528789, |
|
77.52748726200053 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.2867, |
|
"eval_samples_per_second": 4.949, |
|
"eval_score": 49.354074470195435, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5307, |
|
"eval_totals": [ |
|
5307, |
|
4781, |
|
4255, |
|
3729 |
|
], |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 1.377431906614786e-05, |
|
"loss": 0.1147, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bp": 0.5655838797151567, |
|
"eval_counts": [ |
|
4743, |
|
4039, |
|
3477, |
|
2925 |
|
], |
|
"eval_loss": 0.06915283203125, |
|
"eval_precisions": [ |
|
92.47416650419186, |
|
87.74712144253748, |
|
85.28329654157469, |
|
82.37116305266122 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.836, |
|
"eval_samples_per_second": 4.878, |
|
"eval_score": 49.143959340541095, |
|
"eval_steps_per_second": 0.306, |
|
"eval_sys_len": 5129, |
|
"eval_totals": [ |
|
5129, |
|
4603, |
|
4077, |
|
3551 |
|
], |
|
"step": 4112 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bp": 0.5932631592602093, |
|
"eval_counts": [ |
|
4727, |
|
3996, |
|
3439, |
|
2892 |
|
], |
|
"eval_loss": 0.070068359375, |
|
"eval_precisions": [ |
|
89.35727788279773, |
|
83.87909319899245, |
|
81.14676734308637, |
|
77.90948275862068 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.6266, |
|
"eval_samples_per_second": 4.933, |
|
"eval_score": 49.221934768405774, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5290, |
|
"eval_totals": [ |
|
5290, |
|
4764, |
|
4238, |
|
3712 |
|
], |
|
"step": 4369 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 1.2996108949416343e-05, |
|
"loss": 0.1065, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bp": 0.5610779943992972, |
|
"eval_counts": [ |
|
4753, |
|
4064, |
|
3505, |
|
2956 |
|
], |
|
"eval_loss": 0.0623779296875, |
|
"eval_precisions": [ |
|
93.14128943758574, |
|
88.7917850120166, |
|
86.52184645766478, |
|
83.8581560283688 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.949, |
|
"eval_samples_per_second": 4.918, |
|
"eval_score": 49.382124037917905, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5103, |
|
"eval_totals": [ |
|
5103, |
|
4577, |
|
4051, |
|
3525 |
|
], |
|
"step": 4626 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bp": 0.5788702549376445, |
|
"eval_counts": [ |
|
4784, |
|
4087, |
|
3529, |
|
2977 |
|
], |
|
"eval_loss": 0.060699462890625, |
|
"eval_precisions": [ |
|
91.89396849788706, |
|
87.32905982905983, |
|
84.95426095329803, |
|
82.05622932745314 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.4771, |
|
"eval_samples_per_second": 4.94, |
|
"eval_score": 50.0629990425284, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5206, |
|
"eval_totals": [ |
|
5206, |
|
4680, |
|
4154, |
|
3628 |
|
], |
|
"step": 4883 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 1.2217898832684827e-05, |
|
"loss": 0.0964, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bp": 0.5826501698750266, |
|
"eval_counts": [ |
|
4773, |
|
4068, |
|
3509, |
|
2957 |
|
], |
|
"eval_loss": 0.0595703125, |
|
"eval_precisions": [ |
|
91.29686304514155, |
|
86.51637601020842, |
|
84.02777777777777, |
|
81.01369863013699 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.5688, |
|
"eval_samples_per_second": 4.936, |
|
"eval_score": 49.89324555557292, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5228, |
|
"eval_totals": [ |
|
5228, |
|
4702, |
|
4176, |
|
3650 |
|
], |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bp": 0.5824785136401668, |
|
"eval_counts": [ |
|
4780, |
|
4078, |
|
3521, |
|
2972 |
|
], |
|
"eval_loss": 0.057952880859375, |
|
"eval_precisions": [ |
|
91.4482494738856, |
|
86.74750053180175, |
|
84.33532934131736, |
|
81.44697177308852 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 105.7144, |
|
"eval_samples_per_second": 4.976, |
|
"eval_score": 50.04481904653482, |
|
"eval_steps_per_second": 0.312, |
|
"eval_sys_len": 5227, |
|
"eval_totals": [ |
|
5227, |
|
4701, |
|
4175, |
|
3649 |
|
], |
|
"step": 5397 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 1.1439688715953308e-05, |
|
"loss": 0.0925, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bp": 0.6060221334079605, |
|
"eval_counts": [ |
|
4800, |
|
4076, |
|
3514, |
|
2962 |
|
], |
|
"eval_loss": 0.060546875, |
|
"eval_precisions": [ |
|
89.46877912395153, |
|
84.23227939656954, |
|
81.47461163923023, |
|
78.21494586744124 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 105.9417, |
|
"eval_samples_per_second": 4.965, |
|
"eval_score": 50.4491686657978, |
|
"eval_steps_per_second": 0.311, |
|
"eval_sys_len": 5365, |
|
"eval_totals": [ |
|
5365, |
|
4839, |
|
4313, |
|
3787 |
|
], |
|
"step": 5654 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bp": 0.5761166700049626, |
|
"eval_counts": [ |
|
4832, |
|
4155, |
|
3593, |
|
3036 |
|
], |
|
"eval_loss": 0.053558349609375, |
|
"eval_precisions": [ |
|
93.10211946050096, |
|
89.08662092624357, |
|
86.82938617689705, |
|
84.0531561461794 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.564, |
|
"eval_samples_per_second": 4.936, |
|
"eval_score": 50.81695573260325, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5190, |
|
"eval_totals": [ |
|
5190, |
|
4664, |
|
4138, |
|
3612 |
|
], |
|
"step": 5911 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 1.066147859922179e-05, |
|
"loss": 0.0871, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bp": 0.5719791556804446, |
|
"eval_counts": [ |
|
4807, |
|
4125, |
|
3565, |
|
3012 |
|
], |
|
"eval_loss": 0.052276611328125, |
|
"eval_precisions": [ |
|
93.05071622144793, |
|
88.90086206896552, |
|
86.65532328633932, |
|
83.94648829431438 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.1172, |
|
"eval_samples_per_second": 4.911, |
|
"eval_score": 50.37743722047891, |
|
"eval_steps_per_second": 0.308, |
|
"eval_sys_len": 5166, |
|
"eval_totals": [ |
|
5166, |
|
4640, |
|
4114, |
|
3588 |
|
], |
|
"step": 6168 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bp": 0.579214183971878, |
|
"eval_counts": [ |
|
4838, |
|
4161, |
|
3602, |
|
3050 |
|
], |
|
"eval_loss": 0.050567626953125, |
|
"eval_precisions": [ |
|
92.89554531490015, |
|
88.87227680478428, |
|
86.66987487969202, |
|
84.02203856749311 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.5446, |
|
"eval_samples_per_second": 4.937, |
|
"eval_score": 51.0028956058344, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5208, |
|
"eval_totals": [ |
|
5208, |
|
4682, |
|
4156, |
|
3630 |
|
], |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 9.883268482490273e-06, |
|
"loss": 0.0843, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bp": 0.5607309845734951, |
|
"eval_counts": [ |
|
4817, |
|
4157, |
|
3596, |
|
3042 |
|
], |
|
"eval_loss": 0.051177978515625, |
|
"eval_precisions": [ |
|
94.43246422270143, |
|
90.86338797814207, |
|
88.81205235860706, |
|
86.34686346863468 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.5728, |
|
"eval_samples_per_second": 4.89, |
|
"eval_score": 50.50236718840154, |
|
"eval_steps_per_second": 0.307, |
|
"eval_sys_len": 5101, |
|
"eval_totals": [ |
|
5101, |
|
4575, |
|
4049, |
|
3523 |
|
], |
|
"step": 6682 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bp": 0.5869366550146455, |
|
"eval_counts": [ |
|
4855, |
|
4170, |
|
3608, |
|
3055 |
|
], |
|
"eval_loss": 0.0489501953125, |
|
"eval_precisions": [ |
|
92.42337711783743, |
|
88.21662788237784, |
|
85.88431325874792, |
|
83.12925170068027 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 109.9833, |
|
"eval_samples_per_second": 4.783, |
|
"eval_score": 51.26739301541927, |
|
"eval_steps_per_second": 0.3, |
|
"eval_sys_len": 5253, |
|
"eval_totals": [ |
|
5253, |
|
4727, |
|
4201, |
|
3675 |
|
], |
|
"step": 6939 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 9.105058365758756e-06, |
|
"loss": 0.0813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bp": 0.5641984935077309, |
|
"eval_counts": [ |
|
4838, |
|
4184, |
|
3624, |
|
3070 |
|
], |
|
"eval_loss": 0.047760009765625, |
|
"eval_precisions": [ |
|
94.47373559851592, |
|
91.05549510337323, |
|
89.06365200294913, |
|
86.64973186565058 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 109.0318, |
|
"eval_samples_per_second": 4.824, |
|
"eval_score": 50.9275946797506, |
|
"eval_steps_per_second": 0.303, |
|
"eval_sys_len": 5121, |
|
"eval_totals": [ |
|
5121, |
|
4595, |
|
4069, |
|
3543 |
|
], |
|
"step": 7196 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bp": 0.5711161019095474, |
|
"eval_counts": [ |
|
4838, |
|
4179, |
|
3625, |
|
3079 |
|
], |
|
"eval_loss": 0.0462646484375, |
|
"eval_precisions": [ |
|
93.74152296066654, |
|
90.16181229773463, |
|
88.22097834022877, |
|
85.93357521629919 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.2897, |
|
"eval_samples_per_second": 4.903, |
|
"eval_score": 51.09715872720289, |
|
"eval_steps_per_second": 0.308, |
|
"eval_sys_len": 5161, |
|
"eval_totals": [ |
|
5161, |
|
4635, |
|
4109, |
|
3583 |
|
], |
|
"step": 7453 |
|
}, |
|
{ |
|
"epoch": 29.18, |
|
"learning_rate": 8.326848249027239e-06, |
|
"loss": 0.0778, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bp": 0.587279163676868, |
|
"eval_counts": [ |
|
4863, |
|
4185, |
|
3626, |
|
3075 |
|
], |
|
"eval_loss": 0.04534912109375, |
|
"eval_precisions": [ |
|
92.54043767840152, |
|
88.49651089025164, |
|
86.27171068284558, |
|
83.62795757410933 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 108.186, |
|
"eval_samples_per_second": 4.862, |
|
"eval_score": 51.488944843891275, |
|
"eval_steps_per_second": 0.305, |
|
"eval_sys_len": 5255, |
|
"eval_totals": [ |
|
5255, |
|
4729, |
|
4203, |
|
3677 |
|
], |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bp": 0.587279163676868, |
|
"eval_counts": [ |
|
4847, |
|
4168, |
|
3612, |
|
3064 |
|
], |
|
"eval_loss": 0.044677734375, |
|
"eval_precisions": [ |
|
92.23596574690771, |
|
88.137026855572, |
|
85.93861527480371, |
|
83.32880065270601 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.6208, |
|
"eval_samples_per_second": 4.933, |
|
"eval_score": 51.298555626377826, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5255, |
|
"eval_totals": [ |
|
5255, |
|
4729, |
|
4203, |
|
3677 |
|
], |
|
"step": 7967 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 7.54863813229572e-06, |
|
"loss": 0.0753, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bp": 0.5690432735111319, |
|
"eval_counts": [ |
|
4866, |
|
4219, |
|
3661, |
|
3111 |
|
], |
|
"eval_loss": 0.0438232421875, |
|
"eval_precisions": [ |
|
94.50378714313459, |
|
91.26108587497296, |
|
89.35806687820356, |
|
87.11845421450575 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.5192, |
|
"eval_samples_per_second": 4.892, |
|
"eval_score": 51.50981459551784, |
|
"eval_steps_per_second": 0.307, |
|
"eval_sys_len": 5149, |
|
"eval_totals": [ |
|
5149, |
|
4623, |
|
4097, |
|
3571 |
|
], |
|
"step": 8224 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bp": 0.5881351685074624, |
|
"eval_counts": [ |
|
4869, |
|
4201, |
|
3645, |
|
3097 |
|
], |
|
"eval_loss": 0.04400634765625, |
|
"eval_precisions": [ |
|
92.56653992395437, |
|
88.74102239121251, |
|
86.62072243346007, |
|
84.11189570885388 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.8688, |
|
"eval_samples_per_second": 4.922, |
|
"eval_score": 51.729891771805434, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5260, |
|
"eval_totals": [ |
|
5260, |
|
4734, |
|
4208, |
|
3682 |
|
], |
|
"step": 8481 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"learning_rate": 6.770428015564204e-06, |
|
"loss": 0.0714, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bp": 0.5823068423133116, |
|
"eval_counts": [ |
|
4881, |
|
4226, |
|
3674, |
|
3130 |
|
], |
|
"eval_loss": 0.041656494140625, |
|
"eval_precisions": [ |
|
93.398392652124, |
|
89.91489361702128, |
|
88.02108289410637, |
|
85.80043859649123 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.0835, |
|
"eval_samples_per_second": 4.912, |
|
"eval_score": 51.96533200156475, |
|
"eval_steps_per_second": 0.308, |
|
"eval_sys_len": 5226, |
|
"eval_totals": [ |
|
5226, |
|
4700, |
|
4174, |
|
3648 |
|
], |
|
"step": 8738 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bp": 0.5862514549555176, |
|
"eval_counts": [ |
|
4902, |
|
4242, |
|
3685, |
|
3133 |
|
], |
|
"eval_loss": 0.042633056640625, |
|
"eval_precisions": [ |
|
93.38921699371309, |
|
89.81579504552191, |
|
87.80081010245414, |
|
85.34459275401798 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.8404, |
|
"eval_samples_per_second": 4.923, |
|
"eval_score": 52.19933016750815, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5249, |
|
"eval_totals": [ |
|
5249, |
|
4723, |
|
4197, |
|
3671 |
|
], |
|
"step": 8995 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 5.992217898832685e-06, |
|
"loss": 0.0697, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bp": 0.5807611221368078, |
|
"eval_counts": [ |
|
4907, |
|
4257, |
|
3699, |
|
3149 |
|
], |
|
"eval_loss": 0.04095458984375, |
|
"eval_precisions": [ |
|
94.05788767490895, |
|
90.74824131315285, |
|
88.81152460984394, |
|
86.53476229733444 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.3976, |
|
"eval_samples_per_second": 4.944, |
|
"eval_score": 52.266194224133834, |
|
"eval_steps_per_second": 0.31, |
|
"eval_sys_len": 5217, |
|
"eval_totals": [ |
|
5217, |
|
4691, |
|
4165, |
|
3639 |
|
], |
|
"step": 9252 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 5.214007782101168e-06, |
|
"loss": 0.0686, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bp": 0.5983772718445015, |
|
"eval_counts": [ |
|
4899, |
|
4227, |
|
3672, |
|
3123 |
|
], |
|
"eval_loss": 0.042388916015625, |
|
"eval_precisions": [ |
|
92.08646616541354, |
|
88.17271589486859, |
|
86.03561387066541, |
|
83.45804382683058 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.906, |
|
"eval_samples_per_second": 4.92, |
|
"eval_score": 52.28705860616529, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5320, |
|
"eval_totals": [ |
|
5320, |
|
4794, |
|
4268, |
|
3742 |
|
], |
|
"step": 9509 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bp": 0.5780101703802235, |
|
"eval_counts": [ |
|
4913, |
|
4273, |
|
3718, |
|
3172 |
|
], |
|
"eval_loss": 0.0394287109375, |
|
"eval_precisions": [ |
|
94.46260334551047, |
|
91.40106951871658, |
|
89.61195468787659, |
|
87.55175269113994 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.7621, |
|
"eval_samples_per_second": 4.927, |
|
"eval_score": 52.43798269679689, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5201, |
|
"eval_totals": [ |
|
5201, |
|
4675, |
|
4149, |
|
3623 |
|
], |
|
"step": 9766 |
|
}, |
|
{ |
|
"epoch": 38.91, |
|
"learning_rate": 4.43579766536965e-06, |
|
"loss": 0.0664, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bp": 0.5975258891581067, |
|
"eval_counts": [ |
|
4912, |
|
4243, |
|
3689, |
|
3141 |
|
], |
|
"eval_loss": 0.040374755859375, |
|
"eval_precisions": [ |
|
92.41768579492003, |
|
88.59887241595322, |
|
86.5353037766831, |
|
84.05137811078406 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.9692, |
|
"eval_samples_per_second": 4.917, |
|
"eval_score": 52.491270926490635, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5315, |
|
"eval_totals": [ |
|
5315, |
|
4789, |
|
4263, |
|
3737 |
|
], |
|
"step": 10023 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bp": 0.587279163676868, |
|
"eval_counts": [ |
|
4913, |
|
4259, |
|
3711, |
|
3170 |
|
], |
|
"eval_loss": 0.0382080078125, |
|
"eval_precisions": [ |
|
93.4919124643197, |
|
90.0613237470924, |
|
88.29407566024268, |
|
86.21158553168344 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.0316, |
|
"eval_samples_per_second": 4.914, |
|
"eval_score": 52.5468859983503, |
|
"eval_steps_per_second": 0.308, |
|
"eval_sys_len": 5255, |
|
"eval_totals": [ |
|
5255, |
|
4729, |
|
4203, |
|
3677 |
|
], |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 3.6575875486381323e-06, |
|
"loss": 0.0658, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bp": 0.5811047209098391, |
|
"eval_counts": [ |
|
4921, |
|
4278, |
|
3725, |
|
3179 |
|
], |
|
"eval_loss": 0.0377197265625, |
|
"eval_precisions": [ |
|
94.29009388771796, |
|
91.1570424035798, |
|
89.39284857211423, |
|
87.31117824773413 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.7065, |
|
"eval_samples_per_second": 4.929, |
|
"eval_score": 52.59102479681527, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5219, |
|
"eval_totals": [ |
|
5219, |
|
4693, |
|
4167, |
|
3641 |
|
], |
|
"step": 10537 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bp": 0.5817917378355022, |
|
"eval_counts": [ |
|
4908, |
|
4261, |
|
3712, |
|
3169 |
|
], |
|
"eval_loss": 0.037109375, |
|
"eval_precisions": [ |
|
93.96898334290637, |
|
90.7174792420694, |
|
88.99544473747302, |
|
86.94101508916324 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.4197, |
|
"eval_samples_per_second": 4.897, |
|
"eval_score": 52.43056600057888, |
|
"eval_steps_per_second": 0.307, |
|
"eval_sys_len": 5223, |
|
"eval_totals": [ |
|
5223, |
|
4697, |
|
4171, |
|
3645 |
|
], |
|
"step": 10794 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"learning_rate": 2.879377431906615e-06, |
|
"loss": 0.0643, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bp": 0.5804174632159932, |
|
"eval_counts": [ |
|
4905, |
|
4264, |
|
3714, |
|
3172 |
|
], |
|
"eval_loss": 0.037017822265625, |
|
"eval_precisions": [ |
|
94.0556088207095, |
|
90.936233738537, |
|
89.21450876771559, |
|
87.21473742095134 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 106.7676, |
|
"eval_samples_per_second": 4.927, |
|
"eval_score": 52.42364666449266, |
|
"eval_steps_per_second": 0.309, |
|
"eval_sys_len": 5215, |
|
"eval_totals": [ |
|
5215, |
|
4689, |
|
4163, |
|
3637 |
|
], |
|
"step": 11051 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bp": 0.5961628688829712, |
|
"eval_counts": [ |
|
4930, |
|
4270, |
|
3718, |
|
3173 |
|
], |
|
"eval_loss": 0.0380859375, |
|
"eval_precisions": [ |
|
92.89617486338798, |
|
89.31185944363104, |
|
87.37955346650999, |
|
85.08983641727005 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.4748, |
|
"eval_samples_per_second": 4.894, |
|
"eval_score": 52.834006019511406, |
|
"eval_steps_per_second": 0.307, |
|
"eval_sys_len": 5307, |
|
"eval_totals": [ |
|
5307, |
|
4781, |
|
4255, |
|
3729 |
|
], |
|
"step": 11308 |
|
}, |
|
{ |
|
"epoch": 44.75, |
|
"learning_rate": 2.1011673151750974e-06, |
|
"loss": 0.0608, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bp": 0.5757722034899391, |
|
"eval_counts": [ |
|
4915, |
|
4280, |
|
3729, |
|
3186 |
|
], |
|
"eval_loss": 0.036224365234375, |
|
"eval_precisions": [ |
|
94.7378565921357, |
|
91.8060918060918, |
|
90.15957446808511, |
|
88.25484764542936 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.743, |
|
"eval_samples_per_second": 4.882, |
|
"eval_score": 52.515446703245765, |
|
"eval_steps_per_second": 0.306, |
|
"eval_sys_len": 5188, |
|
"eval_totals": [ |
|
5188, |
|
4662, |
|
4136, |
|
3610 |
|
], |
|
"step": 11565 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bp": 0.5843659009664612, |
|
"eval_counts": [ |
|
4924, |
|
4278, |
|
3730, |
|
3188 |
|
], |
|
"eval_loss": 0.036651611328125, |
|
"eval_precisions": [ |
|
94.0053455517373, |
|
90.78947368421052, |
|
89.10654562828476, |
|
87.10382513661202 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 107.889, |
|
"eval_samples_per_second": 4.875, |
|
"eval_score": 52.71917275684773, |
|
"eval_steps_per_second": 0.306, |
|
"eval_sys_len": 5238, |
|
"eval_totals": [ |
|
5238, |
|
4712, |
|
4186, |
|
3660 |
|
], |
|
"step": 11822 |
|
}, |
|
{ |
|
"epoch": 46.69, |
|
"learning_rate": 1.32295719844358e-06, |
|
"loss": 0.0622, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_bp": 0.586080116901772, |
|
"eval_counts": [ |
|
4938, |
|
4295, |
|
3745, |
|
3201 |
|
], |
|
"eval_loss": 0.036529541015625, |
|
"eval_precisions": [ |
|
94.09298780487805, |
|
90.95722151630665, |
|
89.2516682554814, |
|
87.22070844686648 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 109.2986, |
|
"eval_samples_per_second": 4.813, |
|
"eval_score": 52.949832085516945, |
|
"eval_steps_per_second": 0.302, |
|
"eval_sys_len": 5248, |
|
"eval_totals": [ |
|
5248, |
|
4722, |
|
4196, |
|
3670 |
|
], |
|
"step": 12079 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_bp": 0.5817917378355022, |
|
"eval_counts": [ |
|
4925, |
|
4285, |
|
3733, |
|
3189 |
|
], |
|
"eval_loss": 0.036285400390625, |
|
"eval_precisions": [ |
|
94.29446678154318, |
|
91.22844368746009, |
|
89.49892112203308, |
|
87.48971193415638 |
|
], |
|
"eval_ref_len": 8052, |
|
"eval_runtime": 108.6659, |
|
"eval_samples_per_second": 4.841, |
|
"eval_score": 52.70664408353883, |
|
"eval_steps_per_second": 0.304, |
|
"eval_sys_len": 5223, |
|
"eval_totals": [ |
|
5223, |
|
4697, |
|
4171, |
|
3645 |
|
], |
|
"step": 12336 |
|
}, |
|
{ |
|
"epoch": 48.64, |
|
"learning_rate": 5.447470817120623e-07, |
|
"loss": 0.0625, |
|
"step": 12500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12850, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 8.680648839008256e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|