{ "best_metric": 2.2068374156951904, "best_model_checkpoint": "card_type_image_detection/checkpoint-12573", "epoch": 99.0, "eval_steps": 500, "global_step": 12573, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.023209876543209877, "eval_loss": 3.959347724914551, "eval_runtime": 16.3056, "eval_samples_per_second": 124.191, "eval_steps_per_second": 3.925, "step": 127 }, { "epoch": 2.0, "eval_accuracy": 0.047407407407407405, "eval_loss": 3.927157163619995, "eval_runtime": 16.3398, "eval_samples_per_second": 123.93, "eval_steps_per_second": 3.917, "step": 254 }, { "epoch": 3.0, "eval_accuracy": 0.0819753086419753, "eval_loss": 3.8877668380737305, "eval_runtime": 16.2853, "eval_samples_per_second": 124.345, "eval_steps_per_second": 3.93, "step": 381 }, { "epoch": 3.94, "learning_rate": 1.9288537549407114e-06, "loss": 3.9022, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.09876543209876543, "eval_loss": 3.8418056964874268, "eval_runtime": 16.2864, "eval_samples_per_second": 124.337, "eval_steps_per_second": 3.93, "step": 508 }, { "epoch": 5.0, "eval_accuracy": 0.12, "eval_loss": 3.7911477088928223, "eval_runtime": 16.2971, "eval_samples_per_second": 124.255, "eval_steps_per_second": 3.927, "step": 635 }, { "epoch": 6.0, "eval_accuracy": 0.1382716049382716, "eval_loss": 3.7379021644592285, "eval_runtime": 16.4902, "eval_samples_per_second": 122.8, "eval_steps_per_second": 3.881, "step": 762 }, { "epoch": 7.0, "eval_accuracy": 0.1580246913580247, "eval_loss": 3.6840596199035645, "eval_runtime": 16.2622, "eval_samples_per_second": 124.522, "eval_steps_per_second": 3.936, "step": 889 }, { "epoch": 7.87, "learning_rate": 1.849802371541502e-06, "loss": 3.6821, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.17777777777777778, "eval_loss": 3.6320221424102783, "eval_runtime": 16.3988, "eval_samples_per_second": 123.485, "eval_steps_per_second": 3.903, "step": 1016 }, { "epoch": 9.0, "eval_accuracy": 0.19654320987654322, "eval_loss": 3.580777883529663, "eval_runtime": 16.325, "eval_samples_per_second": 124.043, "eval_steps_per_second": 3.92, "step": 1143 }, { "epoch": 10.0, "eval_accuracy": 0.21580246913580248, "eval_loss": 3.5295674800872803, "eval_runtime": 16.3625, "eval_samples_per_second": 123.758, "eval_steps_per_second": 3.911, "step": 1270 }, { "epoch": 11.0, "eval_accuracy": 0.23703703703703705, "eval_loss": 3.479665994644165, "eval_runtime": 16.5252, "eval_samples_per_second": 122.54, "eval_steps_per_second": 3.873, "step": 1397 }, { "epoch": 11.81, "learning_rate": 1.7707509881422924e-06, "loss": 3.4599, "step": 1500 }, { "epoch": 12.0, "eval_accuracy": 0.24592592592592594, "eval_loss": 3.431598663330078, "eval_runtime": 16.3048, "eval_samples_per_second": 124.197, "eval_steps_per_second": 3.925, "step": 1524 }, { "epoch": 13.0, "eval_accuracy": 0.25728395061728393, "eval_loss": 3.38520884513855, "eval_runtime": 16.3716, "eval_samples_per_second": 123.69, "eval_steps_per_second": 3.909, "step": 1651 }, { "epoch": 14.0, "eval_accuracy": 0.2730864197530864, "eval_loss": 3.341092348098755, "eval_runtime": 16.4208, "eval_samples_per_second": 123.319, "eval_steps_per_second": 3.897, "step": 1778 }, { "epoch": 15.0, "eval_accuracy": 0.291358024691358, "eval_loss": 3.299830198287964, "eval_runtime": 16.349, "eval_samples_per_second": 123.861, "eval_steps_per_second": 3.915, "step": 1905 }, { "epoch": 15.75, "learning_rate": 1.691699604743083e-06, "loss": 3.2748, "step": 2000 }, { "epoch": 16.0, "eval_accuracy": 0.305679012345679, "eval_loss": 3.2592811584472656, "eval_runtime": 16.2664, "eval_samples_per_second": 124.49, "eval_steps_per_second": 3.934, "step": 2032 }, { "epoch": 17.0, "eval_accuracy": 0.32641975308641974, "eval_loss": 3.2209341526031494, "eval_runtime": 16.4355, "eval_samples_per_second": 123.209, "eval_steps_per_second": 3.894, "step": 2159 }, { "epoch": 18.0, "eval_accuracy": 0.3437037037037037, "eval_loss": 3.184448003768921, "eval_runtime": 16.3387, "eval_samples_per_second": 123.939, "eval_steps_per_second": 3.917, "step": 2286 }, { "epoch": 19.0, "eval_accuracy": 0.345679012345679, "eval_loss": 3.1492130756378174, "eval_runtime": 16.2457, "eval_samples_per_second": 124.648, "eval_steps_per_second": 3.939, "step": 2413 }, { "epoch": 19.69, "learning_rate": 1.6126482213438735e-06, "loss": 3.1183, "step": 2500 }, { "epoch": 20.0, "eval_accuracy": 0.3634567901234568, "eval_loss": 3.117238998413086, "eval_runtime": 16.3142, "eval_samples_per_second": 124.125, "eval_steps_per_second": 3.923, "step": 2540 }, { "epoch": 21.0, "eval_accuracy": 0.37382716049382714, "eval_loss": 3.0856010913848877, "eval_runtime": 16.4136, "eval_samples_per_second": 123.374, "eval_steps_per_second": 3.899, "step": 2667 }, { "epoch": 22.0, "eval_accuracy": 0.38469135802469134, "eval_loss": 3.0540201663970947, "eval_runtime": 16.404, "eval_samples_per_second": 123.446, "eval_steps_per_second": 3.901, "step": 2794 }, { "epoch": 23.0, "eval_accuracy": 0.3916049382716049, "eval_loss": 3.025864362716675, "eval_runtime": 16.3387, "eval_samples_per_second": 123.939, "eval_steps_per_second": 3.917, "step": 2921 }, { "epoch": 23.62, "learning_rate": 1.533596837944664e-06, "loss": 2.9883, "step": 3000 }, { "epoch": 24.0, "eval_accuracy": 0.4014814814814815, "eval_loss": 2.9977123737335205, "eval_runtime": 16.5183, "eval_samples_per_second": 122.592, "eval_steps_per_second": 3.874, "step": 3048 }, { "epoch": 25.0, "eval_accuracy": 0.4103703703703704, "eval_loss": 2.970736503601074, "eval_runtime": 16.4478, "eval_samples_per_second": 123.117, "eval_steps_per_second": 3.891, "step": 3175 }, { "epoch": 26.0, "eval_accuracy": 0.42962962962962964, "eval_loss": 2.9437484741210938, "eval_runtime": 16.3805, "eval_samples_per_second": 123.623, "eval_steps_per_second": 3.907, "step": 3302 }, { "epoch": 27.0, "eval_accuracy": 0.4365432098765432, "eval_loss": 2.918938636779785, "eval_runtime": 16.4584, "eval_samples_per_second": 123.038, "eval_steps_per_second": 3.889, "step": 3429 }, { "epoch": 27.56, "learning_rate": 1.4545454545454544e-06, "loss": 2.8743, "step": 3500 }, { "epoch": 28.0, "eval_accuracy": 0.44592592592592595, "eval_loss": 2.8959240913391113, "eval_runtime": 16.8148, "eval_samples_per_second": 120.43, "eval_steps_per_second": 3.806, "step": 3556 }, { "epoch": 29.0, "eval_accuracy": 0.4597530864197531, "eval_loss": 2.8701860904693604, "eval_runtime": 16.5161, "eval_samples_per_second": 122.607, "eval_steps_per_second": 3.875, "step": 3683 }, { "epoch": 30.0, "eval_accuracy": 0.46469135802469136, "eval_loss": 2.848033905029297, "eval_runtime": 16.6005, "eval_samples_per_second": 121.984, "eval_steps_per_second": 3.855, "step": 3810 }, { "epoch": 31.0, "eval_accuracy": 0.4730864197530864, "eval_loss": 2.825526714324951, "eval_runtime": 16.5596, "eval_samples_per_second": 122.286, "eval_steps_per_second": 3.865, "step": 3937 }, { "epoch": 31.5, "learning_rate": 1.375494071146245e-06, "loss": 2.773, "step": 4000 }, { "epoch": 32.0, "eval_accuracy": 0.47802469135802467, "eval_loss": 2.804414749145508, "eval_runtime": 16.6816, "eval_samples_per_second": 121.391, "eval_steps_per_second": 3.837, "step": 4064 }, { "epoch": 33.0, "eval_accuracy": 0.47555555555555556, "eval_loss": 2.7853612899780273, "eval_runtime": 16.7524, "eval_samples_per_second": 120.878, "eval_steps_per_second": 3.82, "step": 4191 }, { "epoch": 34.0, "eval_accuracy": 0.4918518518518519, "eval_loss": 2.7612335681915283, "eval_runtime": 16.5289, "eval_samples_per_second": 122.513, "eval_steps_per_second": 3.872, "step": 4318 }, { "epoch": 35.0, "eval_accuracy": 0.497283950617284, "eval_loss": 2.741335391998291, "eval_runtime": 16.6912, "eval_samples_per_second": 121.321, "eval_steps_per_second": 3.834, "step": 4445 }, { "epoch": 35.43, "learning_rate": 1.2964426877470356e-06, "loss": 2.6822, "step": 4500 }, { "epoch": 36.0, "eval_accuracy": 0.49876543209876545, "eval_loss": 2.724756956100464, "eval_runtime": 16.517, "eval_samples_per_second": 122.601, "eval_steps_per_second": 3.875, "step": 4572 }, { "epoch": 37.0, "eval_accuracy": 0.49777777777777776, "eval_loss": 2.7084524631500244, "eval_runtime": 16.6709, "eval_samples_per_second": 121.469, "eval_steps_per_second": 3.839, "step": 4699 }, { "epoch": 38.0, "eval_accuracy": 0.5150617283950617, "eval_loss": 2.6876704692840576, "eval_runtime": 16.6152, "eval_samples_per_second": 121.876, "eval_steps_per_second": 3.852, "step": 4826 }, { "epoch": 39.0, "eval_accuracy": 0.5106172839506172, "eval_loss": 2.6705734729766846, "eval_runtime": 16.5075, "eval_samples_per_second": 122.671, "eval_steps_per_second": 3.877, "step": 4953 }, { "epoch": 39.37, "learning_rate": 1.217391304347826e-06, "loss": 2.5988, "step": 5000 }, { "epoch": 40.0, "eval_accuracy": 0.5116049382716049, "eval_loss": 2.655836343765259, "eval_runtime": 16.7707, "eval_samples_per_second": 120.746, "eval_steps_per_second": 3.816, "step": 5080 }, { "epoch": 41.0, "eval_accuracy": 0.5125925925925926, "eval_loss": 2.6394340991973877, "eval_runtime": 16.7747, "eval_samples_per_second": 120.718, "eval_steps_per_second": 3.815, "step": 5207 }, { "epoch": 42.0, "eval_accuracy": 0.5209876543209877, "eval_loss": 2.623532295227051, "eval_runtime": 16.7009, "eval_samples_per_second": 121.251, "eval_steps_per_second": 3.832, "step": 5334 }, { "epoch": 43.0, "eval_accuracy": 0.5249382716049382, "eval_loss": 2.6078622341156006, "eval_runtime": 16.3629, "eval_samples_per_second": 123.756, "eval_steps_per_second": 3.911, "step": 5461 }, { "epoch": 43.31, "learning_rate": 1.1383399209486167e-06, "loss": 2.5306, "step": 5500 }, { "epoch": 44.0, "eval_accuracy": 0.5293827160493827, "eval_loss": 2.592928647994995, "eval_runtime": 16.7766, "eval_samples_per_second": 120.704, "eval_steps_per_second": 3.815, "step": 5588 }, { "epoch": 45.0, "eval_accuracy": 0.5338271604938272, "eval_loss": 2.575329542160034, "eval_runtime": 17.0334, "eval_samples_per_second": 118.884, "eval_steps_per_second": 3.757, "step": 5715 }, { "epoch": 46.0, "eval_accuracy": 0.542716049382716, "eval_loss": 2.5605058670043945, "eval_runtime": 16.7153, "eval_samples_per_second": 121.146, "eval_steps_per_second": 3.829, "step": 5842 }, { "epoch": 47.0, "eval_accuracy": 0.548641975308642, "eval_loss": 2.548151969909668, "eval_runtime": 16.3632, "eval_samples_per_second": 123.754, "eval_steps_per_second": 3.911, "step": 5969 }, { "epoch": 47.24, "learning_rate": 1.0592885375494072e-06, "loss": 2.4638, "step": 6000 }, { "epoch": 48.0, "eval_accuracy": 0.5530864197530864, "eval_loss": 2.5332412719726562, "eval_runtime": 16.2257, "eval_samples_per_second": 124.802, "eval_steps_per_second": 3.944, "step": 6096 }, { "epoch": 49.0, "eval_accuracy": 0.5535802469135802, "eval_loss": 2.521514654159546, "eval_runtime": 16.2855, "eval_samples_per_second": 124.344, "eval_steps_per_second": 3.93, "step": 6223 }, { "epoch": 50.0, "eval_accuracy": 0.5619753086419753, "eval_loss": 2.507723569869995, "eval_runtime": 16.2945, "eval_samples_per_second": 124.275, "eval_steps_per_second": 3.928, "step": 6350 }, { "epoch": 51.0, "eval_accuracy": 0.5595061728395062, "eval_loss": 2.4946696758270264, "eval_runtime": 16.2486, "eval_samples_per_second": 124.626, "eval_steps_per_second": 3.939, "step": 6477 }, { "epoch": 51.18, "learning_rate": 9.802371541501976e-07, "loss": 2.405, "step": 6500 }, { "epoch": 52.0, "eval_accuracy": 0.5580246913580247, "eval_loss": 2.4835174083709717, "eval_runtime": 16.5375, "eval_samples_per_second": 122.449, "eval_steps_per_second": 3.87, "step": 6604 }, { "epoch": 53.0, "eval_accuracy": 0.5585185185185185, "eval_loss": 2.473555088043213, "eval_runtime": 17.2437, "eval_samples_per_second": 117.434, "eval_steps_per_second": 3.711, "step": 6731 }, { "epoch": 54.0, "eval_accuracy": 0.5644444444444444, "eval_loss": 2.4599006175994873, "eval_runtime": 17.2017, "eval_samples_per_second": 117.721, "eval_steps_per_second": 3.721, "step": 6858 }, { "epoch": 55.0, "eval_accuracy": 0.5708641975308641, "eval_loss": 2.444490432739258, "eval_runtime": 16.4767, "eval_samples_per_second": 122.901, "eval_steps_per_second": 3.884, "step": 6985 }, { "epoch": 55.12, "learning_rate": 9.011857707509881e-07, "loss": 2.3499, "step": 7000 }, { "epoch": 56.0, "eval_accuracy": 0.5718518518518518, "eval_loss": 2.43546199798584, "eval_runtime": 17.1098, "eval_samples_per_second": 118.353, "eval_steps_per_second": 3.741, "step": 7112 }, { "epoch": 57.0, "eval_accuracy": 0.5738271604938272, "eval_loss": 2.424128532409668, "eval_runtime": 16.3356, "eval_samples_per_second": 123.962, "eval_steps_per_second": 3.918, "step": 7239 }, { "epoch": 58.0, "eval_accuracy": 0.5679012345679012, "eval_loss": 2.4162774085998535, "eval_runtime": 16.3093, "eval_samples_per_second": 124.162, "eval_steps_per_second": 3.924, "step": 7366 }, { "epoch": 59.0, "eval_accuracy": 0.5733333333333334, "eval_loss": 2.404547929763794, "eval_runtime": 16.3838, "eval_samples_per_second": 123.598, "eval_steps_per_second": 3.906, "step": 7493 }, { "epoch": 59.06, "learning_rate": 8.221343873517787e-07, "loss": 2.3047, "step": 7500 }, { "epoch": 60.0, "eval_accuracy": 0.5767901234567901, "eval_loss": 2.3955578804016113, "eval_runtime": 16.4961, "eval_samples_per_second": 122.756, "eval_steps_per_second": 3.88, "step": 7620 }, { "epoch": 61.0, "eval_accuracy": 0.5733333333333334, "eval_loss": 2.3882274627685547, "eval_runtime": 16.5941, "eval_samples_per_second": 122.031, "eval_steps_per_second": 3.857, "step": 7747 }, { "epoch": 62.0, "eval_accuracy": 0.5758024691358025, "eval_loss": 2.3768720626831055, "eval_runtime": 16.3063, "eval_samples_per_second": 124.185, "eval_steps_per_second": 3.925, "step": 7874 }, { "epoch": 62.99, "learning_rate": 7.430830039525692e-07, "loss": 2.2616, "step": 8000 }, { "epoch": 63.0, "eval_accuracy": 0.5802469135802469, "eval_loss": 2.3649182319641113, "eval_runtime": 16.4383, "eval_samples_per_second": 123.188, "eval_steps_per_second": 3.893, "step": 8001 }, { "epoch": 64.0, "eval_accuracy": 0.5782716049382716, "eval_loss": 2.3605239391326904, "eval_runtime": 16.5048, "eval_samples_per_second": 122.691, "eval_steps_per_second": 3.878, "step": 8128 }, { "epoch": 65.0, "eval_accuracy": 0.5881481481481482, "eval_loss": 2.348599433898926, "eval_runtime": 16.6986, "eval_samples_per_second": 121.268, "eval_steps_per_second": 3.833, "step": 8255 }, { "epoch": 66.0, "eval_accuracy": 0.5891358024691358, "eval_loss": 2.3403825759887695, "eval_runtime": 16.6528, "eval_samples_per_second": 121.601, "eval_steps_per_second": 3.843, "step": 8382 }, { "epoch": 66.93, "learning_rate": 6.640316205533597e-07, "loss": 2.224, "step": 8500 }, { "epoch": 67.0, "eval_accuracy": 0.5876543209876544, "eval_loss": 2.333630323410034, "eval_runtime": 16.323, "eval_samples_per_second": 124.058, "eval_steps_per_second": 3.921, "step": 8509 }, { "epoch": 68.0, "eval_accuracy": 0.5881481481481482, "eval_loss": 2.3267500400543213, "eval_runtime": 16.3769, "eval_samples_per_second": 123.649, "eval_steps_per_second": 3.908, "step": 8636 }, { "epoch": 69.0, "eval_accuracy": 0.5871604938271605, "eval_loss": 2.3214409351348877, "eval_runtime": 16.4152, "eval_samples_per_second": 123.361, "eval_steps_per_second": 3.899, "step": 8763 }, { "epoch": 70.0, "eval_accuracy": 0.5920987654320987, "eval_loss": 2.3111300468444824, "eval_runtime": 16.7406, "eval_samples_per_second": 120.963, "eval_steps_per_second": 3.823, "step": 8890 }, { "epoch": 70.87, "learning_rate": 5.849802371541502e-07, "loss": 2.1885, "step": 9000 }, { "epoch": 71.0, "eval_accuracy": 0.5906172839506173, "eval_loss": 2.304325580596924, "eval_runtime": 16.5534, "eval_samples_per_second": 122.331, "eval_steps_per_second": 3.866, "step": 9017 }, { "epoch": 72.0, "eval_accuracy": 0.5960493827160493, "eval_loss": 2.297461986541748, "eval_runtime": 16.4435, "eval_samples_per_second": 123.149, "eval_steps_per_second": 3.892, "step": 9144 }, { "epoch": 73.0, "eval_accuracy": 0.5871604938271605, "eval_loss": 2.296058177947998, "eval_runtime": 16.3403, "eval_samples_per_second": 123.927, "eval_steps_per_second": 3.917, "step": 9271 }, { "epoch": 74.0, "eval_accuracy": 0.5960493827160493, "eval_loss": 2.2856781482696533, "eval_runtime": 16.6488, "eval_samples_per_second": 121.63, "eval_steps_per_second": 3.844, "step": 9398 }, { "epoch": 74.8, "learning_rate": 5.059288537549406e-07, "loss": 2.1603, "step": 9500 }, { "epoch": 75.0, "eval_accuracy": 0.5965432098765432, "eval_loss": 2.2791759967803955, "eval_runtime": 17.0015, "eval_samples_per_second": 119.107, "eval_steps_per_second": 3.764, "step": 9525 }, { "epoch": 76.0, "eval_accuracy": 0.5955555555555555, "eval_loss": 2.2750935554504395, "eval_runtime": 16.6586, "eval_samples_per_second": 121.559, "eval_steps_per_second": 3.842, "step": 9652 }, { "epoch": 77.0, "eval_accuracy": 0.5965432098765432, "eval_loss": 2.268305540084839, "eval_runtime": 16.6007, "eval_samples_per_second": 121.983, "eval_steps_per_second": 3.855, "step": 9779 }, { "epoch": 78.0, "eval_accuracy": 0.6049382716049383, "eval_loss": 2.2604947090148926, "eval_runtime": 16.3684, "eval_samples_per_second": 123.714, "eval_steps_per_second": 3.91, "step": 9906 }, { "epoch": 78.74, "learning_rate": 4.268774703557312e-07, "loss": 2.1357, "step": 10000 }, { "epoch": 79.0, "eval_accuracy": 0.6064197530864197, "eval_loss": 2.254981756210327, "eval_runtime": 16.4537, "eval_samples_per_second": 123.072, "eval_steps_per_second": 3.89, "step": 10033 }, { "epoch": 80.0, "eval_accuracy": 0.6034567901234568, "eval_loss": 2.2524936199188232, "eval_runtime": 16.3798, "eval_samples_per_second": 123.628, "eval_steps_per_second": 3.907, "step": 10160 }, { "epoch": 81.0, "eval_accuracy": 0.6054320987654321, "eval_loss": 2.2475554943084717, "eval_runtime": 16.2339, "eval_samples_per_second": 124.739, "eval_steps_per_second": 3.942, "step": 10287 }, { "epoch": 82.0, "eval_accuracy": 0.6004938271604938, "eval_loss": 2.244779586791992, "eval_runtime": 16.19, "eval_samples_per_second": 125.077, "eval_steps_per_second": 3.953, "step": 10414 }, { "epoch": 82.68, "learning_rate": 3.478260869565217e-07, "loss": 2.1153, "step": 10500 }, { "epoch": 83.0, "eval_accuracy": 0.6009876543209877, "eval_loss": 2.242079973220825, "eval_runtime": 16.2447, "eval_samples_per_second": 124.656, "eval_steps_per_second": 3.94, "step": 10541 }, { "epoch": 84.0, "eval_accuracy": 0.6009876543209877, "eval_loss": 2.237128973007202, "eval_runtime": 16.3552, "eval_samples_per_second": 123.814, "eval_steps_per_second": 3.913, "step": 10668 }, { "epoch": 85.0, "eval_accuracy": 0.6034567901234568, "eval_loss": 2.2331736087799072, "eval_runtime": 16.4131, "eval_samples_per_second": 123.377, "eval_steps_per_second": 3.899, "step": 10795 }, { "epoch": 86.0, "eval_accuracy": 0.6064197530864197, "eval_loss": 2.230192184448242, "eval_runtime": 16.263, "eval_samples_per_second": 124.516, "eval_steps_per_second": 3.935, "step": 10922 }, { "epoch": 86.61, "learning_rate": 2.6877470355731227e-07, "loss": 2.0969, "step": 11000 }, { "epoch": 87.0, "eval_accuracy": 0.6083950617283951, "eval_loss": 2.2269179821014404, "eval_runtime": 16.4471, "eval_samples_per_second": 123.122, "eval_steps_per_second": 3.891, "step": 11049 }, { "epoch": 88.0, "eval_accuracy": 0.6054320987654321, "eval_loss": 2.224729537963867, "eval_runtime": 16.5547, "eval_samples_per_second": 122.322, "eval_steps_per_second": 3.866, "step": 11176 }, { "epoch": 89.0, "eval_accuracy": 0.6098765432098765, "eval_loss": 2.2210581302642822, "eval_runtime": 16.3636, "eval_samples_per_second": 123.75, "eval_steps_per_second": 3.911, "step": 11303 }, { "epoch": 90.0, "eval_accuracy": 0.6049382716049383, "eval_loss": 2.2199292182922363, "eval_runtime": 16.5723, "eval_samples_per_second": 122.192, "eval_steps_per_second": 3.862, "step": 11430 }, { "epoch": 90.55, "learning_rate": 1.8972332015810276e-07, "loss": 2.0851, "step": 11500 }, { "epoch": 91.0, "eval_accuracy": 0.6103703703703703, "eval_loss": 2.2158267498016357, "eval_runtime": 16.2141, "eval_samples_per_second": 124.892, "eval_steps_per_second": 3.947, "step": 11557 }, { "epoch": 92.0, "eval_accuracy": 0.6079012345679012, "eval_loss": 2.214700937271118, "eval_runtime": 16.2859, "eval_samples_per_second": 124.341, "eval_steps_per_second": 3.93, "step": 11684 }, { "epoch": 93.0, "eval_accuracy": 0.6064197530864197, "eval_loss": 2.2131123542785645, "eval_runtime": 16.337, "eval_samples_per_second": 123.952, "eval_steps_per_second": 3.917, "step": 11811 }, { "epoch": 94.0, "eval_accuracy": 0.6098765432098765, "eval_loss": 2.2114579677581787, "eval_runtime": 16.393, "eval_samples_per_second": 123.528, "eval_steps_per_second": 3.904, "step": 11938 }, { "epoch": 94.49, "learning_rate": 1.1067193675889327e-07, "loss": 2.0754, "step": 12000 }, { "epoch": 95.0, "eval_accuracy": 0.6138271604938271, "eval_loss": 2.209113597869873, "eval_runtime": 16.4652, "eval_samples_per_second": 122.987, "eval_steps_per_second": 3.887, "step": 12065 }, { "epoch": 96.0, "eval_accuracy": 0.6128395061728396, "eval_loss": 2.2085654735565186, "eval_runtime": 16.3862, "eval_samples_per_second": 123.579, "eval_steps_per_second": 3.906, "step": 12192 }, { "epoch": 97.0, "eval_accuracy": 0.6123456790123457, "eval_loss": 2.207934617996216, "eval_runtime": 16.3381, "eval_samples_per_second": 123.943, "eval_steps_per_second": 3.917, "step": 12319 }, { "epoch": 98.0, "eval_accuracy": 0.6128395061728396, "eval_loss": 2.206979513168335, "eval_runtime": 16.3485, "eval_samples_per_second": 123.865, "eval_steps_per_second": 3.915, "step": 12446 }, { "epoch": 98.43, "learning_rate": 3.162055335968379e-08, "loss": 2.0667, "step": 12500 }, { "epoch": 99.0, "eval_accuracy": 0.6133333333333333, "eval_loss": 2.2068374156951904, "eval_runtime": 16.2194, "eval_samples_per_second": 124.851, "eval_steps_per_second": 3.946, "step": 12573 } ], "logging_steps": 500, "max_steps": 12700, "num_train_epochs": 100, "save_steps": 500, "total_flos": 6.215388042176161e+19, "trial_name": null, "trial_params": null }