|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.99889502762431, |
|
"global_step": 5763, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"gpu_memory": 2987030016, |
|
"learning_rate": 8.32e-06, |
|
"loss": 4.0407, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.664e-05, |
|
"loss": 2.405, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bp": 0.035349686560536234, |
|
"eval_counts": [ |
|
505, |
|
125, |
|
50, |
|
11 |
|
], |
|
"eval_loss": 1.9292821884155273, |
|
"eval_precisions": [ |
|
46.118721461187214, |
|
15.723270440251572, |
|
9.861932938856016, |
|
4.471544715447155 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 44.1807, |
|
"eval_samples_per_second": 6.79, |
|
"eval_score": 0.47271078280719403, |
|
"eval_steps_per_second": 6.79, |
|
"eval_sys_len": 1095, |
|
"eval_totals": [ |
|
1095, |
|
795, |
|
507, |
|
246 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.4959999999999998e-05, |
|
"loss": 2.0089, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 3.2437898089171974e-05, |
|
"loss": 1.8155, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 3.1775477707006364e-05, |
|
"loss": 1.7234, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bp": 0.0840891954437523, |
|
"eval_counts": [ |
|
492, |
|
189, |
|
85, |
|
29 |
|
], |
|
"eval_loss": 1.6681220531463623, |
|
"eval_precisions": [ |
|
35.96491228070175, |
|
17.696629213483146, |
|
10.303030303030303, |
|
4.833333333333333 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 53.1682, |
|
"eval_samples_per_second": 5.642, |
|
"eval_score": 1.1219810390322362, |
|
"eval_steps_per_second": 5.642, |
|
"eval_sys_len": 1368, |
|
"eval_totals": [ |
|
1368, |
|
1068, |
|
825, |
|
600 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 3.111305732484076e-05, |
|
"loss": 1.6058, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 3.0450636942675155e-05, |
|
"loss": 1.5189, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bp": 0.09192776836698148, |
|
"eval_counts": [ |
|
571, |
|
192, |
|
93, |
|
40 |
|
], |
|
"eval_loss": 1.5985139608383179, |
|
"eval_precisions": [ |
|
40.66951566951567, |
|
17.391304347826086, |
|
11.03202846975089, |
|
6.734006734006734 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 44.5545, |
|
"eval_samples_per_second": 6.733, |
|
"eval_score": 1.391807704814939, |
|
"eval_steps_per_second": 6.733, |
|
"eval_sys_len": 1404, |
|
"eval_totals": [ |
|
1404, |
|
1104, |
|
843, |
|
594 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.9788216560509553e-05, |
|
"loss": 1.4885, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.9125796178343946e-05, |
|
"loss": 1.334, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.8463375796178344e-05, |
|
"loss": 1.3861, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bp": 0.034513967404432855, |
|
"eval_counts": [ |
|
432, |
|
173, |
|
84, |
|
35 |
|
], |
|
"eval_loss": 1.6043497323989868, |
|
"eval_precisions": [ |
|
39.66942148760331, |
|
21.926489226869457, |
|
16.184971098265898, |
|
9.48509485094851 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 42.8527, |
|
"eval_samples_per_second": 7.001, |
|
"eval_score": 0.6597653875525311, |
|
"eval_steps_per_second": 7.001, |
|
"eval_sys_len": 1089, |
|
"eval_totals": [ |
|
1089, |
|
789, |
|
519, |
|
369 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.7800955414012737e-05, |
|
"loss": 1.3367, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.713853503184713e-05, |
|
"loss": 1.2828, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.647611464968153e-05, |
|
"loss": 1.2571, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bp": 0.17929973112718744, |
|
"eval_counts": [ |
|
671, |
|
230, |
|
102, |
|
43 |
|
], |
|
"eval_loss": 1.5908681154251099, |
|
"eval_precisions": [ |
|
38.36477987421384, |
|
15.873015873015873, |
|
8.695652173913043, |
|
4.699453551912568 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 55.6404, |
|
"eval_samples_per_second": 5.392, |
|
"eval_score": 2.2519827467510987, |
|
"eval_steps_per_second": 5.392, |
|
"eval_sys_len": 1749, |
|
"eval_totals": [ |
|
1749, |
|
1449, |
|
1173, |
|
915 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.5813694267515922e-05, |
|
"loss": 1.2035, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.515127388535032e-05, |
|
"loss": 1.183, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bp": 0.07050485313640832, |
|
"eval_counts": [ |
|
615, |
|
257, |
|
141, |
|
80 |
|
], |
|
"eval_loss": 1.5943706035614014, |
|
"eval_precisions": [ |
|
47.235023041474655, |
|
25.64870259481038, |
|
19.502074688796682, |
|
14.109347442680775 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 47.1825, |
|
"eval_samples_per_second": 6.358, |
|
"eval_score": 1.6941362350992444, |
|
"eval_steps_per_second": 6.358, |
|
"eval_sys_len": 1302, |
|
"eval_totals": [ |
|
1302, |
|
1002, |
|
723, |
|
567 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.4488853503184713e-05, |
|
"loss": 1.1964, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.3826433121019104e-05, |
|
"loss": 1.1073, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.31640127388535e-05, |
|
"loss": 1.1316, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bp": 0.10421315891869368, |
|
"eval_counts": [ |
|
649, |
|
197, |
|
79, |
|
22 |
|
], |
|
"eval_loss": 1.6070951223373413, |
|
"eval_precisions": [ |
|
44.51303155006859, |
|
17.012089810017272, |
|
9.111880046136102, |
|
3.559870550161812 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 47.6479, |
|
"eval_samples_per_second": 6.296, |
|
"eval_score": 1.3046509061748794, |
|
"eval_steps_per_second": 6.296, |
|
"eval_sys_len": 1458, |
|
"eval_totals": [ |
|
1458, |
|
1158, |
|
867, |
|
618 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 2373 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.2501592356687895e-05, |
|
"loss": 1.0398, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.183917197452229e-05, |
|
"loss": 1.0349, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.1176751592356686e-05, |
|
"loss": 1.0816, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bp": 0.21001389512353258, |
|
"eval_counts": [ |
|
846, |
|
344, |
|
187, |
|
105 |
|
], |
|
"eval_loss": 1.6298103332519531, |
|
"eval_precisions": [ |
|
45.55735056542811, |
|
22.093770070648684, |
|
14.597970335675253, |
|
10.294117647058824 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 54.6716, |
|
"eval_samples_per_second": 5.487, |
|
"eval_score": 4.141670104799348, |
|
"eval_steps_per_second": 5.487, |
|
"eval_sys_len": 1857, |
|
"eval_totals": [ |
|
1857, |
|
1557, |
|
1281, |
|
1020 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 2.051433121019108e-05, |
|
"loss": 0.987, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.9851910828025477e-05, |
|
"loss": 0.9829, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bp": 0.06525766524199453, |
|
"eval_counts": [ |
|
577, |
|
216, |
|
100, |
|
37 |
|
], |
|
"eval_loss": 1.6366333961486816, |
|
"eval_precisions": [ |
|
45.254901960784316, |
|
22.153846153846153, |
|
14.367816091954023, |
|
7.297830374753452 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 49.3567, |
|
"eval_samples_per_second": 6.078, |
|
"eval_score": 1.1750500193614282, |
|
"eval_steps_per_second": 6.078, |
|
"eval_sys_len": 1275, |
|
"eval_totals": [ |
|
1275, |
|
975, |
|
696, |
|
507 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 3051 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.918949044585987e-05, |
|
"loss": 1.003, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.8527070063694264e-05, |
|
"loss": 0.9337, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.786464968152866e-05, |
|
"loss": 0.9325, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bp": 0.16851984622310243, |
|
"eval_counts": [ |
|
667, |
|
248, |
|
121, |
|
62 |
|
], |
|
"eval_loss": 1.67235267162323, |
|
"eval_precisions": [ |
|
39.005847953216374, |
|
17.588652482269502, |
|
10.503472222222221, |
|
6.68824163969795 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 50.0923, |
|
"eval_samples_per_second": 5.989, |
|
"eval_score": 2.4969097127652855, |
|
"eval_steps_per_second": 5.989, |
|
"eval_sys_len": 1710, |
|
"eval_totals": [ |
|
1710, |
|
1410, |
|
1152, |
|
927 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.7202229299363055e-05, |
|
"loss": 0.9075, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.6539808917197452e-05, |
|
"loss": 0.8753, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.5877388535031846e-05, |
|
"loss": 0.9098, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bp": 0.1483387334695538, |
|
"eval_counts": [ |
|
735, |
|
268, |
|
134, |
|
67 |
|
], |
|
"eval_loss": 1.6972090005874634, |
|
"eval_precisions": [ |
|
44.95412844036697, |
|
20.074906367041198, |
|
12.725546058879392, |
|
8.18070818070818 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 46.9235, |
|
"eval_samples_per_second": 6.393, |
|
"eval_score": 2.5970312545681904, |
|
"eval_steps_per_second": 6.393, |
|
"eval_sys_len": 1635, |
|
"eval_totals": [ |
|
1635, |
|
1335, |
|
1053, |
|
819 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 3729 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.5214968152866242e-05, |
|
"loss": 0.839, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.4552547770700635e-05, |
|
"loss": 0.8643, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bp": 0.1320190352563076, |
|
"eval_counts": [ |
|
715, |
|
285, |
|
143, |
|
70 |
|
], |
|
"eval_loss": 1.713928461074829, |
|
"eval_precisions": [ |
|
45.48346055979644, |
|
22.40566037735849, |
|
14.357429718875501, |
|
9.25925925925926 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 46.2792, |
|
"eval_samples_per_second": 6.482, |
|
"eval_score": 2.532809945547002, |
|
"eval_steps_per_second": 6.482, |
|
"eval_sys_len": 1572, |
|
"eval_totals": [ |
|
1572, |
|
1272, |
|
996, |
|
756 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.3890127388535031e-05, |
|
"loss": 0.8264, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.3227707006369426e-05, |
|
"loss": 0.8008, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.2565286624203822e-05, |
|
"loss": 0.7963, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bp": 0.18517745860640325, |
|
"eval_counts": [ |
|
782, |
|
310, |
|
160, |
|
79 |
|
], |
|
"eval_loss": 1.7276182174682617, |
|
"eval_precisions": [ |
|
44.18079096045198, |
|
21.08843537414966, |
|
13.43408900083963, |
|
8.44017094017094 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 49.531, |
|
"eval_samples_per_second": 6.057, |
|
"eval_score": 3.3384697611529055, |
|
"eval_steps_per_second": 6.057, |
|
"eval_sys_len": 1770, |
|
"eval_totals": [ |
|
1770, |
|
1470, |
|
1191, |
|
936 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 4407 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.1902866242038214e-05, |
|
"loss": 0.791, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.124044585987261e-05, |
|
"loss": 0.7591, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 1.0578025477707005e-05, |
|
"loss": 0.7651, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bp": 0.17762954994257873, |
|
"eval_counts": [ |
|
784, |
|
310, |
|
160, |
|
81 |
|
], |
|
"eval_loss": 1.788110375404358, |
|
"eval_precisions": [ |
|
44.97991967871486, |
|
21.48302148302148, |
|
13.605442176870747, |
|
8.653846153846153 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 48.2995, |
|
"eval_samples_per_second": 6.211, |
|
"eval_score": 3.262302153360586, |
|
"eval_steps_per_second": 6.211, |
|
"eval_sys_len": 1743, |
|
"eval_totals": [ |
|
1743, |
|
1443, |
|
1176, |
|
936 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 4746 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 9.9156050955414e-06, |
|
"loss": 0.7389, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 9.253184713375794e-06, |
|
"loss": 0.7292, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bp": 0.19451009506119815, |
|
"eval_counts": [ |
|
756, |
|
286, |
|
139, |
|
66 |
|
], |
|
"eval_loss": 1.8334678411483765, |
|
"eval_precisions": [ |
|
41.930116472545755, |
|
19.028609447771125, |
|
11.356209150326798, |
|
6.790123456790123 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 50.9389, |
|
"eval_samples_per_second": 5.889, |
|
"eval_score": 3.063396343878355, |
|
"eval_steps_per_second": 5.889, |
|
"eval_sys_len": 1803, |
|
"eval_totals": [ |
|
1803, |
|
1503, |
|
1224, |
|
972 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 8.59076433121019e-06, |
|
"loss": 0.7051, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 7.928343949044585e-06, |
|
"loss": 0.6872, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 7.265923566878981e-06, |
|
"loss": 0.6935, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bp": 0.2204937574447589, |
|
"eval_counts": [ |
|
792, |
|
311, |
|
160, |
|
80 |
|
], |
|
"eval_loss": 1.8358988761901855, |
|
"eval_precisions": [ |
|
41.83835182250396, |
|
19.522912743251727, |
|
12.121212121212121, |
|
7.469654528478058 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 51.7295, |
|
"eval_samples_per_second": 5.799, |
|
"eval_score": 3.6361160482722528, |
|
"eval_steps_per_second": 5.799, |
|
"eval_sys_len": 1893, |
|
"eval_totals": [ |
|
1893, |
|
1593, |
|
1320, |
|
1071 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 5424 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 6.6035031847133755e-06, |
|
"loss": 0.6808, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 5.94108280254777e-06, |
|
"loss": 0.6649, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"gpu_memory": 3076460544, |
|
"learning_rate": 5.278662420382165e-06, |
|
"loss": 0.6902, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bp": 0.27059488659440983, |
|
"eval_counts": [ |
|
875, |
|
346, |
|
196, |
|
113 |
|
], |
|
"eval_loss": 1.8474984169006348, |
|
"eval_precisions": [ |
|
42.45511887433285, |
|
19.64792731402612, |
|
13.198653198653199, |
|
9.254709254709255 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 53.1106, |
|
"eval_samples_per_second": 5.649, |
|
"eval_score": 4.834531406134382, |
|
"eval_steps_per_second": 5.649, |
|
"eval_sys_len": 2061, |
|
"eval_totals": [ |
|
2061, |
|
1761, |
|
1485, |
|
1221 |
|
], |
|
"gpu_memory": 3076460544, |
|
"step": 5763 |
|
} |
|
], |
|
"max_steps": 6780, |
|
"num_train_epochs": 20, |
|
"total_flos": 5005888091043840.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|