|
{ |
|
"best_metric": 2.2068374156951904, |
|
"best_model_checkpoint": "card_type_image_detection/checkpoint-12573", |
|
"epoch": 99.0, |
|
"eval_steps": 500, |
|
"global_step": 12573, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.023209876543209877, |
|
"eval_loss": 3.959347724914551, |
|
"eval_runtime": 16.3056, |
|
"eval_samples_per_second": 124.191, |
|
"eval_steps_per_second": 3.925, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.047407407407407405, |
|
"eval_loss": 3.927157163619995, |
|
"eval_runtime": 16.3398, |
|
"eval_samples_per_second": 123.93, |
|
"eval_steps_per_second": 3.917, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.0819753086419753, |
|
"eval_loss": 3.8877668380737305, |
|
"eval_runtime": 16.2853, |
|
"eval_samples_per_second": 124.345, |
|
"eval_steps_per_second": 3.93, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.9288537549407114e-06, |
|
"loss": 3.9022, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.09876543209876543, |
|
"eval_loss": 3.8418056964874268, |
|
"eval_runtime": 16.2864, |
|
"eval_samples_per_second": 124.337, |
|
"eval_steps_per_second": 3.93, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.12, |
|
"eval_loss": 3.7911477088928223, |
|
"eval_runtime": 16.2971, |
|
"eval_samples_per_second": 124.255, |
|
"eval_steps_per_second": 3.927, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.1382716049382716, |
|
"eval_loss": 3.7379021644592285, |
|
"eval_runtime": 16.4902, |
|
"eval_samples_per_second": 122.8, |
|
"eval_steps_per_second": 3.881, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.1580246913580247, |
|
"eval_loss": 3.6840596199035645, |
|
"eval_runtime": 16.2622, |
|
"eval_samples_per_second": 124.522, |
|
"eval_steps_per_second": 3.936, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 1.849802371541502e-06, |
|
"loss": 3.6821, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.17777777777777778, |
|
"eval_loss": 3.6320221424102783, |
|
"eval_runtime": 16.3988, |
|
"eval_samples_per_second": 123.485, |
|
"eval_steps_per_second": 3.903, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.19654320987654322, |
|
"eval_loss": 3.580777883529663, |
|
"eval_runtime": 16.325, |
|
"eval_samples_per_second": 124.043, |
|
"eval_steps_per_second": 3.92, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.21580246913580248, |
|
"eval_loss": 3.5295674800872803, |
|
"eval_runtime": 16.3625, |
|
"eval_samples_per_second": 123.758, |
|
"eval_steps_per_second": 3.911, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.23703703703703705, |
|
"eval_loss": 3.479665994644165, |
|
"eval_runtime": 16.5252, |
|
"eval_samples_per_second": 122.54, |
|
"eval_steps_per_second": 3.873, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 1.7707509881422924e-06, |
|
"loss": 3.4599, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.24592592592592594, |
|
"eval_loss": 3.431598663330078, |
|
"eval_runtime": 16.3048, |
|
"eval_samples_per_second": 124.197, |
|
"eval_steps_per_second": 3.925, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.25728395061728393, |
|
"eval_loss": 3.38520884513855, |
|
"eval_runtime": 16.3716, |
|
"eval_samples_per_second": 123.69, |
|
"eval_steps_per_second": 3.909, |
|
"step": 1651 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.2730864197530864, |
|
"eval_loss": 3.341092348098755, |
|
"eval_runtime": 16.4208, |
|
"eval_samples_per_second": 123.319, |
|
"eval_steps_per_second": 3.897, |
|
"step": 1778 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.291358024691358, |
|
"eval_loss": 3.299830198287964, |
|
"eval_runtime": 16.349, |
|
"eval_samples_per_second": 123.861, |
|
"eval_steps_per_second": 3.915, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 1.691699604743083e-06, |
|
"loss": 3.2748, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.305679012345679, |
|
"eval_loss": 3.2592811584472656, |
|
"eval_runtime": 16.2664, |
|
"eval_samples_per_second": 124.49, |
|
"eval_steps_per_second": 3.934, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.32641975308641974, |
|
"eval_loss": 3.2209341526031494, |
|
"eval_runtime": 16.4355, |
|
"eval_samples_per_second": 123.209, |
|
"eval_steps_per_second": 3.894, |
|
"step": 2159 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.3437037037037037, |
|
"eval_loss": 3.184448003768921, |
|
"eval_runtime": 16.3387, |
|
"eval_samples_per_second": 123.939, |
|
"eval_steps_per_second": 3.917, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.345679012345679, |
|
"eval_loss": 3.1492130756378174, |
|
"eval_runtime": 16.2457, |
|
"eval_samples_per_second": 124.648, |
|
"eval_steps_per_second": 3.939, |
|
"step": 2413 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 1.6126482213438735e-06, |
|
"loss": 3.1183, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.3634567901234568, |
|
"eval_loss": 3.117238998413086, |
|
"eval_runtime": 16.3142, |
|
"eval_samples_per_second": 124.125, |
|
"eval_steps_per_second": 3.923, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.37382716049382714, |
|
"eval_loss": 3.0856010913848877, |
|
"eval_runtime": 16.4136, |
|
"eval_samples_per_second": 123.374, |
|
"eval_steps_per_second": 3.899, |
|
"step": 2667 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.38469135802469134, |
|
"eval_loss": 3.0540201663970947, |
|
"eval_runtime": 16.404, |
|
"eval_samples_per_second": 123.446, |
|
"eval_steps_per_second": 3.901, |
|
"step": 2794 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.3916049382716049, |
|
"eval_loss": 3.025864362716675, |
|
"eval_runtime": 16.3387, |
|
"eval_samples_per_second": 123.939, |
|
"eval_steps_per_second": 3.917, |
|
"step": 2921 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 1.533596837944664e-06, |
|
"loss": 2.9883, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4014814814814815, |
|
"eval_loss": 2.9977123737335205, |
|
"eval_runtime": 16.5183, |
|
"eval_samples_per_second": 122.592, |
|
"eval_steps_per_second": 3.874, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.4103703703703704, |
|
"eval_loss": 2.970736503601074, |
|
"eval_runtime": 16.4478, |
|
"eval_samples_per_second": 123.117, |
|
"eval_steps_per_second": 3.891, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.42962962962962964, |
|
"eval_loss": 2.9437484741210938, |
|
"eval_runtime": 16.3805, |
|
"eval_samples_per_second": 123.623, |
|
"eval_steps_per_second": 3.907, |
|
"step": 3302 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4365432098765432, |
|
"eval_loss": 2.918938636779785, |
|
"eval_runtime": 16.4584, |
|
"eval_samples_per_second": 123.038, |
|
"eval_steps_per_second": 3.889, |
|
"step": 3429 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 1.4545454545454544e-06, |
|
"loss": 2.8743, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.44592592592592595, |
|
"eval_loss": 2.8959240913391113, |
|
"eval_runtime": 16.8148, |
|
"eval_samples_per_second": 120.43, |
|
"eval_steps_per_second": 3.806, |
|
"step": 3556 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.4597530864197531, |
|
"eval_loss": 2.8701860904693604, |
|
"eval_runtime": 16.5161, |
|
"eval_samples_per_second": 122.607, |
|
"eval_steps_per_second": 3.875, |
|
"step": 3683 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.46469135802469136, |
|
"eval_loss": 2.848033905029297, |
|
"eval_runtime": 16.6005, |
|
"eval_samples_per_second": 121.984, |
|
"eval_steps_per_second": 3.855, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.4730864197530864, |
|
"eval_loss": 2.825526714324951, |
|
"eval_runtime": 16.5596, |
|
"eval_samples_per_second": 122.286, |
|
"eval_steps_per_second": 3.865, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"learning_rate": 1.375494071146245e-06, |
|
"loss": 2.773, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.47802469135802467, |
|
"eval_loss": 2.804414749145508, |
|
"eval_runtime": 16.6816, |
|
"eval_samples_per_second": 121.391, |
|
"eval_steps_per_second": 3.837, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.47555555555555556, |
|
"eval_loss": 2.7853612899780273, |
|
"eval_runtime": 16.7524, |
|
"eval_samples_per_second": 120.878, |
|
"eval_steps_per_second": 3.82, |
|
"step": 4191 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4918518518518519, |
|
"eval_loss": 2.7612335681915283, |
|
"eval_runtime": 16.5289, |
|
"eval_samples_per_second": 122.513, |
|
"eval_steps_per_second": 3.872, |
|
"step": 4318 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.497283950617284, |
|
"eval_loss": 2.741335391998291, |
|
"eval_runtime": 16.6912, |
|
"eval_samples_per_second": 121.321, |
|
"eval_steps_per_second": 3.834, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 35.43, |
|
"learning_rate": 1.2964426877470356e-06, |
|
"loss": 2.6822, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.49876543209876545, |
|
"eval_loss": 2.724756956100464, |
|
"eval_runtime": 16.517, |
|
"eval_samples_per_second": 122.601, |
|
"eval_steps_per_second": 3.875, |
|
"step": 4572 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.49777777777777776, |
|
"eval_loss": 2.7084524631500244, |
|
"eval_runtime": 16.6709, |
|
"eval_samples_per_second": 121.469, |
|
"eval_steps_per_second": 3.839, |
|
"step": 4699 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5150617283950617, |
|
"eval_loss": 2.6876704692840576, |
|
"eval_runtime": 16.6152, |
|
"eval_samples_per_second": 121.876, |
|
"eval_steps_per_second": 3.852, |
|
"step": 4826 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5106172839506172, |
|
"eval_loss": 2.6705734729766846, |
|
"eval_runtime": 16.5075, |
|
"eval_samples_per_second": 122.671, |
|
"eval_steps_per_second": 3.877, |
|
"step": 4953 |
|
}, |
|
{ |
|
"epoch": 39.37, |
|
"learning_rate": 1.217391304347826e-06, |
|
"loss": 2.5988, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5116049382716049, |
|
"eval_loss": 2.655836343765259, |
|
"eval_runtime": 16.7707, |
|
"eval_samples_per_second": 120.746, |
|
"eval_steps_per_second": 3.816, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5125925925925926, |
|
"eval_loss": 2.6394340991973877, |
|
"eval_runtime": 16.7747, |
|
"eval_samples_per_second": 120.718, |
|
"eval_steps_per_second": 3.815, |
|
"step": 5207 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5209876543209877, |
|
"eval_loss": 2.623532295227051, |
|
"eval_runtime": 16.7009, |
|
"eval_samples_per_second": 121.251, |
|
"eval_steps_per_second": 3.832, |
|
"step": 5334 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5249382716049382, |
|
"eval_loss": 2.6078622341156006, |
|
"eval_runtime": 16.3629, |
|
"eval_samples_per_second": 123.756, |
|
"eval_steps_per_second": 3.911, |
|
"step": 5461 |
|
}, |
|
{ |
|
"epoch": 43.31, |
|
"learning_rate": 1.1383399209486167e-06, |
|
"loss": 2.5306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5293827160493827, |
|
"eval_loss": 2.592928647994995, |
|
"eval_runtime": 16.7766, |
|
"eval_samples_per_second": 120.704, |
|
"eval_steps_per_second": 3.815, |
|
"step": 5588 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5338271604938272, |
|
"eval_loss": 2.575329542160034, |
|
"eval_runtime": 17.0334, |
|
"eval_samples_per_second": 118.884, |
|
"eval_steps_per_second": 3.757, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.542716049382716, |
|
"eval_loss": 2.5605058670043945, |
|
"eval_runtime": 16.7153, |
|
"eval_samples_per_second": 121.146, |
|
"eval_steps_per_second": 3.829, |
|
"step": 5842 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.548641975308642, |
|
"eval_loss": 2.548151969909668, |
|
"eval_runtime": 16.3632, |
|
"eval_samples_per_second": 123.754, |
|
"eval_steps_per_second": 3.911, |
|
"step": 5969 |
|
}, |
|
{ |
|
"epoch": 47.24, |
|
"learning_rate": 1.0592885375494072e-06, |
|
"loss": 2.4638, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5530864197530864, |
|
"eval_loss": 2.5332412719726562, |
|
"eval_runtime": 16.2257, |
|
"eval_samples_per_second": 124.802, |
|
"eval_steps_per_second": 3.944, |
|
"step": 6096 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5535802469135802, |
|
"eval_loss": 2.521514654159546, |
|
"eval_runtime": 16.2855, |
|
"eval_samples_per_second": 124.344, |
|
"eval_steps_per_second": 3.93, |
|
"step": 6223 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5619753086419753, |
|
"eval_loss": 2.507723569869995, |
|
"eval_runtime": 16.2945, |
|
"eval_samples_per_second": 124.275, |
|
"eval_steps_per_second": 3.928, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.5595061728395062, |
|
"eval_loss": 2.4946696758270264, |
|
"eval_runtime": 16.2486, |
|
"eval_samples_per_second": 124.626, |
|
"eval_steps_per_second": 3.939, |
|
"step": 6477 |
|
}, |
|
{ |
|
"epoch": 51.18, |
|
"learning_rate": 9.802371541501976e-07, |
|
"loss": 2.405, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.5580246913580247, |
|
"eval_loss": 2.4835174083709717, |
|
"eval_runtime": 16.5375, |
|
"eval_samples_per_second": 122.449, |
|
"eval_steps_per_second": 3.87, |
|
"step": 6604 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5585185185185185, |
|
"eval_loss": 2.473555088043213, |
|
"eval_runtime": 17.2437, |
|
"eval_samples_per_second": 117.434, |
|
"eval_steps_per_second": 3.711, |
|
"step": 6731 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5644444444444444, |
|
"eval_loss": 2.4599006175994873, |
|
"eval_runtime": 17.2017, |
|
"eval_samples_per_second": 117.721, |
|
"eval_steps_per_second": 3.721, |
|
"step": 6858 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.5708641975308641, |
|
"eval_loss": 2.444490432739258, |
|
"eval_runtime": 16.4767, |
|
"eval_samples_per_second": 122.901, |
|
"eval_steps_per_second": 3.884, |
|
"step": 6985 |
|
}, |
|
{ |
|
"epoch": 55.12, |
|
"learning_rate": 9.011857707509881e-07, |
|
"loss": 2.3499, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5718518518518518, |
|
"eval_loss": 2.43546199798584, |
|
"eval_runtime": 17.1098, |
|
"eval_samples_per_second": 118.353, |
|
"eval_steps_per_second": 3.741, |
|
"step": 7112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.5738271604938272, |
|
"eval_loss": 2.424128532409668, |
|
"eval_runtime": 16.3356, |
|
"eval_samples_per_second": 123.962, |
|
"eval_steps_per_second": 3.918, |
|
"step": 7239 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5679012345679012, |
|
"eval_loss": 2.4162774085998535, |
|
"eval_runtime": 16.3093, |
|
"eval_samples_per_second": 124.162, |
|
"eval_steps_per_second": 3.924, |
|
"step": 7366 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5733333333333334, |
|
"eval_loss": 2.404547929763794, |
|
"eval_runtime": 16.3838, |
|
"eval_samples_per_second": 123.598, |
|
"eval_steps_per_second": 3.906, |
|
"step": 7493 |
|
}, |
|
{ |
|
"epoch": 59.06, |
|
"learning_rate": 8.221343873517787e-07, |
|
"loss": 2.3047, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5767901234567901, |
|
"eval_loss": 2.3955578804016113, |
|
"eval_runtime": 16.4961, |
|
"eval_samples_per_second": 122.756, |
|
"eval_steps_per_second": 3.88, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.5733333333333334, |
|
"eval_loss": 2.3882274627685547, |
|
"eval_runtime": 16.5941, |
|
"eval_samples_per_second": 122.031, |
|
"eval_steps_per_second": 3.857, |
|
"step": 7747 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.5758024691358025, |
|
"eval_loss": 2.3768720626831055, |
|
"eval_runtime": 16.3063, |
|
"eval_samples_per_second": 124.185, |
|
"eval_steps_per_second": 3.925, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"learning_rate": 7.430830039525692e-07, |
|
"loss": 2.2616, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.5802469135802469, |
|
"eval_loss": 2.3649182319641113, |
|
"eval_runtime": 16.4383, |
|
"eval_samples_per_second": 123.188, |
|
"eval_steps_per_second": 3.893, |
|
"step": 8001 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.5782716049382716, |
|
"eval_loss": 2.3605239391326904, |
|
"eval_runtime": 16.5048, |
|
"eval_samples_per_second": 122.691, |
|
"eval_steps_per_second": 3.878, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.5881481481481482, |
|
"eval_loss": 2.348599433898926, |
|
"eval_runtime": 16.6986, |
|
"eval_samples_per_second": 121.268, |
|
"eval_steps_per_second": 3.833, |
|
"step": 8255 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.5891358024691358, |
|
"eval_loss": 2.3403825759887695, |
|
"eval_runtime": 16.6528, |
|
"eval_samples_per_second": 121.601, |
|
"eval_steps_per_second": 3.843, |
|
"step": 8382 |
|
}, |
|
{ |
|
"epoch": 66.93, |
|
"learning_rate": 6.640316205533597e-07, |
|
"loss": 2.224, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.5876543209876544, |
|
"eval_loss": 2.333630323410034, |
|
"eval_runtime": 16.323, |
|
"eval_samples_per_second": 124.058, |
|
"eval_steps_per_second": 3.921, |
|
"step": 8509 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.5881481481481482, |
|
"eval_loss": 2.3267500400543213, |
|
"eval_runtime": 16.3769, |
|
"eval_samples_per_second": 123.649, |
|
"eval_steps_per_second": 3.908, |
|
"step": 8636 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.5871604938271605, |
|
"eval_loss": 2.3214409351348877, |
|
"eval_runtime": 16.4152, |
|
"eval_samples_per_second": 123.361, |
|
"eval_steps_per_second": 3.899, |
|
"step": 8763 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.5920987654320987, |
|
"eval_loss": 2.3111300468444824, |
|
"eval_runtime": 16.7406, |
|
"eval_samples_per_second": 120.963, |
|
"eval_steps_per_second": 3.823, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 70.87, |
|
"learning_rate": 5.849802371541502e-07, |
|
"loss": 2.1885, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.5906172839506173, |
|
"eval_loss": 2.304325580596924, |
|
"eval_runtime": 16.5534, |
|
"eval_samples_per_second": 122.331, |
|
"eval_steps_per_second": 3.866, |
|
"step": 9017 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.5960493827160493, |
|
"eval_loss": 2.297461986541748, |
|
"eval_runtime": 16.4435, |
|
"eval_samples_per_second": 123.149, |
|
"eval_steps_per_second": 3.892, |
|
"step": 9144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.5871604938271605, |
|
"eval_loss": 2.296058177947998, |
|
"eval_runtime": 16.3403, |
|
"eval_samples_per_second": 123.927, |
|
"eval_steps_per_second": 3.917, |
|
"step": 9271 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.5960493827160493, |
|
"eval_loss": 2.2856781482696533, |
|
"eval_runtime": 16.6488, |
|
"eval_samples_per_second": 121.63, |
|
"eval_steps_per_second": 3.844, |
|
"step": 9398 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"learning_rate": 5.059288537549406e-07, |
|
"loss": 2.1603, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.5965432098765432, |
|
"eval_loss": 2.2791759967803955, |
|
"eval_runtime": 17.0015, |
|
"eval_samples_per_second": 119.107, |
|
"eval_steps_per_second": 3.764, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.5955555555555555, |
|
"eval_loss": 2.2750935554504395, |
|
"eval_runtime": 16.6586, |
|
"eval_samples_per_second": 121.559, |
|
"eval_steps_per_second": 3.842, |
|
"step": 9652 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.5965432098765432, |
|
"eval_loss": 2.268305540084839, |
|
"eval_runtime": 16.6007, |
|
"eval_samples_per_second": 121.983, |
|
"eval_steps_per_second": 3.855, |
|
"step": 9779 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.6049382716049383, |
|
"eval_loss": 2.2604947090148926, |
|
"eval_runtime": 16.3684, |
|
"eval_samples_per_second": 123.714, |
|
"eval_steps_per_second": 3.91, |
|
"step": 9906 |
|
}, |
|
{ |
|
"epoch": 78.74, |
|
"learning_rate": 4.268774703557312e-07, |
|
"loss": 2.1357, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.6064197530864197, |
|
"eval_loss": 2.254981756210327, |
|
"eval_runtime": 16.4537, |
|
"eval_samples_per_second": 123.072, |
|
"eval_steps_per_second": 3.89, |
|
"step": 10033 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.6034567901234568, |
|
"eval_loss": 2.2524936199188232, |
|
"eval_runtime": 16.3798, |
|
"eval_samples_per_second": 123.628, |
|
"eval_steps_per_second": 3.907, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.6054320987654321, |
|
"eval_loss": 2.2475554943084717, |
|
"eval_runtime": 16.2339, |
|
"eval_samples_per_second": 124.739, |
|
"eval_steps_per_second": 3.942, |
|
"step": 10287 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.6004938271604938, |
|
"eval_loss": 2.244779586791992, |
|
"eval_runtime": 16.19, |
|
"eval_samples_per_second": 125.077, |
|
"eval_steps_per_second": 3.953, |
|
"step": 10414 |
|
}, |
|
{ |
|
"epoch": 82.68, |
|
"learning_rate": 3.478260869565217e-07, |
|
"loss": 2.1153, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.6009876543209877, |
|
"eval_loss": 2.242079973220825, |
|
"eval_runtime": 16.2447, |
|
"eval_samples_per_second": 124.656, |
|
"eval_steps_per_second": 3.94, |
|
"step": 10541 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.6009876543209877, |
|
"eval_loss": 2.237128973007202, |
|
"eval_runtime": 16.3552, |
|
"eval_samples_per_second": 123.814, |
|
"eval_steps_per_second": 3.913, |
|
"step": 10668 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.6034567901234568, |
|
"eval_loss": 2.2331736087799072, |
|
"eval_runtime": 16.4131, |
|
"eval_samples_per_second": 123.377, |
|
"eval_steps_per_second": 3.899, |
|
"step": 10795 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.6064197530864197, |
|
"eval_loss": 2.230192184448242, |
|
"eval_runtime": 16.263, |
|
"eval_samples_per_second": 124.516, |
|
"eval_steps_per_second": 3.935, |
|
"step": 10922 |
|
}, |
|
{ |
|
"epoch": 86.61, |
|
"learning_rate": 2.6877470355731227e-07, |
|
"loss": 2.0969, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.6083950617283951, |
|
"eval_loss": 2.2269179821014404, |
|
"eval_runtime": 16.4471, |
|
"eval_samples_per_second": 123.122, |
|
"eval_steps_per_second": 3.891, |
|
"step": 11049 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.6054320987654321, |
|
"eval_loss": 2.224729537963867, |
|
"eval_runtime": 16.5547, |
|
"eval_samples_per_second": 122.322, |
|
"eval_steps_per_second": 3.866, |
|
"step": 11176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.6098765432098765, |
|
"eval_loss": 2.2210581302642822, |
|
"eval_runtime": 16.3636, |
|
"eval_samples_per_second": 123.75, |
|
"eval_steps_per_second": 3.911, |
|
"step": 11303 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.6049382716049383, |
|
"eval_loss": 2.2199292182922363, |
|
"eval_runtime": 16.5723, |
|
"eval_samples_per_second": 122.192, |
|
"eval_steps_per_second": 3.862, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 90.55, |
|
"learning_rate": 1.8972332015810276e-07, |
|
"loss": 2.0851, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.6103703703703703, |
|
"eval_loss": 2.2158267498016357, |
|
"eval_runtime": 16.2141, |
|
"eval_samples_per_second": 124.892, |
|
"eval_steps_per_second": 3.947, |
|
"step": 11557 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.6079012345679012, |
|
"eval_loss": 2.214700937271118, |
|
"eval_runtime": 16.2859, |
|
"eval_samples_per_second": 124.341, |
|
"eval_steps_per_second": 3.93, |
|
"step": 11684 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.6064197530864197, |
|
"eval_loss": 2.2131123542785645, |
|
"eval_runtime": 16.337, |
|
"eval_samples_per_second": 123.952, |
|
"eval_steps_per_second": 3.917, |
|
"step": 11811 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.6098765432098765, |
|
"eval_loss": 2.2114579677581787, |
|
"eval_runtime": 16.393, |
|
"eval_samples_per_second": 123.528, |
|
"eval_steps_per_second": 3.904, |
|
"step": 11938 |
|
}, |
|
{ |
|
"epoch": 94.49, |
|
"learning_rate": 1.1067193675889327e-07, |
|
"loss": 2.0754, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.6138271604938271, |
|
"eval_loss": 2.209113597869873, |
|
"eval_runtime": 16.4652, |
|
"eval_samples_per_second": 122.987, |
|
"eval_steps_per_second": 3.887, |
|
"step": 12065 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.6128395061728396, |
|
"eval_loss": 2.2085654735565186, |
|
"eval_runtime": 16.3862, |
|
"eval_samples_per_second": 123.579, |
|
"eval_steps_per_second": 3.906, |
|
"step": 12192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.6123456790123457, |
|
"eval_loss": 2.207934617996216, |
|
"eval_runtime": 16.3381, |
|
"eval_samples_per_second": 123.943, |
|
"eval_steps_per_second": 3.917, |
|
"step": 12319 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.6128395061728396, |
|
"eval_loss": 2.206979513168335, |
|
"eval_runtime": 16.3485, |
|
"eval_samples_per_second": 123.865, |
|
"eval_steps_per_second": 3.915, |
|
"step": 12446 |
|
}, |
|
{ |
|
"epoch": 98.43, |
|
"learning_rate": 3.162055335968379e-08, |
|
"loss": 2.0667, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.6133333333333333, |
|
"eval_loss": 2.2068374156951904, |
|
"eval_runtime": 16.2194, |
|
"eval_samples_per_second": 124.851, |
|
"eval_steps_per_second": 3.946, |
|
"step": 12573 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12700, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 6.215388042176161e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|