dima806's picture
Upload folder using huggingface_hub
501530f
raw
history blame contribute delete
No virus
27.8 kB
{
"best_metric": 2.2068374156951904,
"best_model_checkpoint": "card_type_image_detection/checkpoint-12573",
"epoch": 99.0,
"eval_steps": 500,
"global_step": 12573,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.023209876543209877,
"eval_loss": 3.959347724914551,
"eval_runtime": 16.3056,
"eval_samples_per_second": 124.191,
"eval_steps_per_second": 3.925,
"step": 127
},
{
"epoch": 2.0,
"eval_accuracy": 0.047407407407407405,
"eval_loss": 3.927157163619995,
"eval_runtime": 16.3398,
"eval_samples_per_second": 123.93,
"eval_steps_per_second": 3.917,
"step": 254
},
{
"epoch": 3.0,
"eval_accuracy": 0.0819753086419753,
"eval_loss": 3.8877668380737305,
"eval_runtime": 16.2853,
"eval_samples_per_second": 124.345,
"eval_steps_per_second": 3.93,
"step": 381
},
{
"epoch": 3.94,
"learning_rate": 1.9288537549407114e-06,
"loss": 3.9022,
"step": 500
},
{
"epoch": 4.0,
"eval_accuracy": 0.09876543209876543,
"eval_loss": 3.8418056964874268,
"eval_runtime": 16.2864,
"eval_samples_per_second": 124.337,
"eval_steps_per_second": 3.93,
"step": 508
},
{
"epoch": 5.0,
"eval_accuracy": 0.12,
"eval_loss": 3.7911477088928223,
"eval_runtime": 16.2971,
"eval_samples_per_second": 124.255,
"eval_steps_per_second": 3.927,
"step": 635
},
{
"epoch": 6.0,
"eval_accuracy": 0.1382716049382716,
"eval_loss": 3.7379021644592285,
"eval_runtime": 16.4902,
"eval_samples_per_second": 122.8,
"eval_steps_per_second": 3.881,
"step": 762
},
{
"epoch": 7.0,
"eval_accuracy": 0.1580246913580247,
"eval_loss": 3.6840596199035645,
"eval_runtime": 16.2622,
"eval_samples_per_second": 124.522,
"eval_steps_per_second": 3.936,
"step": 889
},
{
"epoch": 7.87,
"learning_rate": 1.849802371541502e-06,
"loss": 3.6821,
"step": 1000
},
{
"epoch": 8.0,
"eval_accuracy": 0.17777777777777778,
"eval_loss": 3.6320221424102783,
"eval_runtime": 16.3988,
"eval_samples_per_second": 123.485,
"eval_steps_per_second": 3.903,
"step": 1016
},
{
"epoch": 9.0,
"eval_accuracy": 0.19654320987654322,
"eval_loss": 3.580777883529663,
"eval_runtime": 16.325,
"eval_samples_per_second": 124.043,
"eval_steps_per_second": 3.92,
"step": 1143
},
{
"epoch": 10.0,
"eval_accuracy": 0.21580246913580248,
"eval_loss": 3.5295674800872803,
"eval_runtime": 16.3625,
"eval_samples_per_second": 123.758,
"eval_steps_per_second": 3.911,
"step": 1270
},
{
"epoch": 11.0,
"eval_accuracy": 0.23703703703703705,
"eval_loss": 3.479665994644165,
"eval_runtime": 16.5252,
"eval_samples_per_second": 122.54,
"eval_steps_per_second": 3.873,
"step": 1397
},
{
"epoch": 11.81,
"learning_rate": 1.7707509881422924e-06,
"loss": 3.4599,
"step": 1500
},
{
"epoch": 12.0,
"eval_accuracy": 0.24592592592592594,
"eval_loss": 3.431598663330078,
"eval_runtime": 16.3048,
"eval_samples_per_second": 124.197,
"eval_steps_per_second": 3.925,
"step": 1524
},
{
"epoch": 13.0,
"eval_accuracy": 0.25728395061728393,
"eval_loss": 3.38520884513855,
"eval_runtime": 16.3716,
"eval_samples_per_second": 123.69,
"eval_steps_per_second": 3.909,
"step": 1651
},
{
"epoch": 14.0,
"eval_accuracy": 0.2730864197530864,
"eval_loss": 3.341092348098755,
"eval_runtime": 16.4208,
"eval_samples_per_second": 123.319,
"eval_steps_per_second": 3.897,
"step": 1778
},
{
"epoch": 15.0,
"eval_accuracy": 0.291358024691358,
"eval_loss": 3.299830198287964,
"eval_runtime": 16.349,
"eval_samples_per_second": 123.861,
"eval_steps_per_second": 3.915,
"step": 1905
},
{
"epoch": 15.75,
"learning_rate": 1.691699604743083e-06,
"loss": 3.2748,
"step": 2000
},
{
"epoch": 16.0,
"eval_accuracy": 0.305679012345679,
"eval_loss": 3.2592811584472656,
"eval_runtime": 16.2664,
"eval_samples_per_second": 124.49,
"eval_steps_per_second": 3.934,
"step": 2032
},
{
"epoch": 17.0,
"eval_accuracy": 0.32641975308641974,
"eval_loss": 3.2209341526031494,
"eval_runtime": 16.4355,
"eval_samples_per_second": 123.209,
"eval_steps_per_second": 3.894,
"step": 2159
},
{
"epoch": 18.0,
"eval_accuracy": 0.3437037037037037,
"eval_loss": 3.184448003768921,
"eval_runtime": 16.3387,
"eval_samples_per_second": 123.939,
"eval_steps_per_second": 3.917,
"step": 2286
},
{
"epoch": 19.0,
"eval_accuracy": 0.345679012345679,
"eval_loss": 3.1492130756378174,
"eval_runtime": 16.2457,
"eval_samples_per_second": 124.648,
"eval_steps_per_second": 3.939,
"step": 2413
},
{
"epoch": 19.69,
"learning_rate": 1.6126482213438735e-06,
"loss": 3.1183,
"step": 2500
},
{
"epoch": 20.0,
"eval_accuracy": 0.3634567901234568,
"eval_loss": 3.117238998413086,
"eval_runtime": 16.3142,
"eval_samples_per_second": 124.125,
"eval_steps_per_second": 3.923,
"step": 2540
},
{
"epoch": 21.0,
"eval_accuracy": 0.37382716049382714,
"eval_loss": 3.0856010913848877,
"eval_runtime": 16.4136,
"eval_samples_per_second": 123.374,
"eval_steps_per_second": 3.899,
"step": 2667
},
{
"epoch": 22.0,
"eval_accuracy": 0.38469135802469134,
"eval_loss": 3.0540201663970947,
"eval_runtime": 16.404,
"eval_samples_per_second": 123.446,
"eval_steps_per_second": 3.901,
"step": 2794
},
{
"epoch": 23.0,
"eval_accuracy": 0.3916049382716049,
"eval_loss": 3.025864362716675,
"eval_runtime": 16.3387,
"eval_samples_per_second": 123.939,
"eval_steps_per_second": 3.917,
"step": 2921
},
{
"epoch": 23.62,
"learning_rate": 1.533596837944664e-06,
"loss": 2.9883,
"step": 3000
},
{
"epoch": 24.0,
"eval_accuracy": 0.4014814814814815,
"eval_loss": 2.9977123737335205,
"eval_runtime": 16.5183,
"eval_samples_per_second": 122.592,
"eval_steps_per_second": 3.874,
"step": 3048
},
{
"epoch": 25.0,
"eval_accuracy": 0.4103703703703704,
"eval_loss": 2.970736503601074,
"eval_runtime": 16.4478,
"eval_samples_per_second": 123.117,
"eval_steps_per_second": 3.891,
"step": 3175
},
{
"epoch": 26.0,
"eval_accuracy": 0.42962962962962964,
"eval_loss": 2.9437484741210938,
"eval_runtime": 16.3805,
"eval_samples_per_second": 123.623,
"eval_steps_per_second": 3.907,
"step": 3302
},
{
"epoch": 27.0,
"eval_accuracy": 0.4365432098765432,
"eval_loss": 2.918938636779785,
"eval_runtime": 16.4584,
"eval_samples_per_second": 123.038,
"eval_steps_per_second": 3.889,
"step": 3429
},
{
"epoch": 27.56,
"learning_rate": 1.4545454545454544e-06,
"loss": 2.8743,
"step": 3500
},
{
"epoch": 28.0,
"eval_accuracy": 0.44592592592592595,
"eval_loss": 2.8959240913391113,
"eval_runtime": 16.8148,
"eval_samples_per_second": 120.43,
"eval_steps_per_second": 3.806,
"step": 3556
},
{
"epoch": 29.0,
"eval_accuracy": 0.4597530864197531,
"eval_loss": 2.8701860904693604,
"eval_runtime": 16.5161,
"eval_samples_per_second": 122.607,
"eval_steps_per_second": 3.875,
"step": 3683
},
{
"epoch": 30.0,
"eval_accuracy": 0.46469135802469136,
"eval_loss": 2.848033905029297,
"eval_runtime": 16.6005,
"eval_samples_per_second": 121.984,
"eval_steps_per_second": 3.855,
"step": 3810
},
{
"epoch": 31.0,
"eval_accuracy": 0.4730864197530864,
"eval_loss": 2.825526714324951,
"eval_runtime": 16.5596,
"eval_samples_per_second": 122.286,
"eval_steps_per_second": 3.865,
"step": 3937
},
{
"epoch": 31.5,
"learning_rate": 1.375494071146245e-06,
"loss": 2.773,
"step": 4000
},
{
"epoch": 32.0,
"eval_accuracy": 0.47802469135802467,
"eval_loss": 2.804414749145508,
"eval_runtime": 16.6816,
"eval_samples_per_second": 121.391,
"eval_steps_per_second": 3.837,
"step": 4064
},
{
"epoch": 33.0,
"eval_accuracy": 0.47555555555555556,
"eval_loss": 2.7853612899780273,
"eval_runtime": 16.7524,
"eval_samples_per_second": 120.878,
"eval_steps_per_second": 3.82,
"step": 4191
},
{
"epoch": 34.0,
"eval_accuracy": 0.4918518518518519,
"eval_loss": 2.7612335681915283,
"eval_runtime": 16.5289,
"eval_samples_per_second": 122.513,
"eval_steps_per_second": 3.872,
"step": 4318
},
{
"epoch": 35.0,
"eval_accuracy": 0.497283950617284,
"eval_loss": 2.741335391998291,
"eval_runtime": 16.6912,
"eval_samples_per_second": 121.321,
"eval_steps_per_second": 3.834,
"step": 4445
},
{
"epoch": 35.43,
"learning_rate": 1.2964426877470356e-06,
"loss": 2.6822,
"step": 4500
},
{
"epoch": 36.0,
"eval_accuracy": 0.49876543209876545,
"eval_loss": 2.724756956100464,
"eval_runtime": 16.517,
"eval_samples_per_second": 122.601,
"eval_steps_per_second": 3.875,
"step": 4572
},
{
"epoch": 37.0,
"eval_accuracy": 0.49777777777777776,
"eval_loss": 2.7084524631500244,
"eval_runtime": 16.6709,
"eval_samples_per_second": 121.469,
"eval_steps_per_second": 3.839,
"step": 4699
},
{
"epoch": 38.0,
"eval_accuracy": 0.5150617283950617,
"eval_loss": 2.6876704692840576,
"eval_runtime": 16.6152,
"eval_samples_per_second": 121.876,
"eval_steps_per_second": 3.852,
"step": 4826
},
{
"epoch": 39.0,
"eval_accuracy": 0.5106172839506172,
"eval_loss": 2.6705734729766846,
"eval_runtime": 16.5075,
"eval_samples_per_second": 122.671,
"eval_steps_per_second": 3.877,
"step": 4953
},
{
"epoch": 39.37,
"learning_rate": 1.217391304347826e-06,
"loss": 2.5988,
"step": 5000
},
{
"epoch": 40.0,
"eval_accuracy": 0.5116049382716049,
"eval_loss": 2.655836343765259,
"eval_runtime": 16.7707,
"eval_samples_per_second": 120.746,
"eval_steps_per_second": 3.816,
"step": 5080
},
{
"epoch": 41.0,
"eval_accuracy": 0.5125925925925926,
"eval_loss": 2.6394340991973877,
"eval_runtime": 16.7747,
"eval_samples_per_second": 120.718,
"eval_steps_per_second": 3.815,
"step": 5207
},
{
"epoch": 42.0,
"eval_accuracy": 0.5209876543209877,
"eval_loss": 2.623532295227051,
"eval_runtime": 16.7009,
"eval_samples_per_second": 121.251,
"eval_steps_per_second": 3.832,
"step": 5334
},
{
"epoch": 43.0,
"eval_accuracy": 0.5249382716049382,
"eval_loss": 2.6078622341156006,
"eval_runtime": 16.3629,
"eval_samples_per_second": 123.756,
"eval_steps_per_second": 3.911,
"step": 5461
},
{
"epoch": 43.31,
"learning_rate": 1.1383399209486167e-06,
"loss": 2.5306,
"step": 5500
},
{
"epoch": 44.0,
"eval_accuracy": 0.5293827160493827,
"eval_loss": 2.592928647994995,
"eval_runtime": 16.7766,
"eval_samples_per_second": 120.704,
"eval_steps_per_second": 3.815,
"step": 5588
},
{
"epoch": 45.0,
"eval_accuracy": 0.5338271604938272,
"eval_loss": 2.575329542160034,
"eval_runtime": 17.0334,
"eval_samples_per_second": 118.884,
"eval_steps_per_second": 3.757,
"step": 5715
},
{
"epoch": 46.0,
"eval_accuracy": 0.542716049382716,
"eval_loss": 2.5605058670043945,
"eval_runtime": 16.7153,
"eval_samples_per_second": 121.146,
"eval_steps_per_second": 3.829,
"step": 5842
},
{
"epoch": 47.0,
"eval_accuracy": 0.548641975308642,
"eval_loss": 2.548151969909668,
"eval_runtime": 16.3632,
"eval_samples_per_second": 123.754,
"eval_steps_per_second": 3.911,
"step": 5969
},
{
"epoch": 47.24,
"learning_rate": 1.0592885375494072e-06,
"loss": 2.4638,
"step": 6000
},
{
"epoch": 48.0,
"eval_accuracy": 0.5530864197530864,
"eval_loss": 2.5332412719726562,
"eval_runtime": 16.2257,
"eval_samples_per_second": 124.802,
"eval_steps_per_second": 3.944,
"step": 6096
},
{
"epoch": 49.0,
"eval_accuracy": 0.5535802469135802,
"eval_loss": 2.521514654159546,
"eval_runtime": 16.2855,
"eval_samples_per_second": 124.344,
"eval_steps_per_second": 3.93,
"step": 6223
},
{
"epoch": 50.0,
"eval_accuracy": 0.5619753086419753,
"eval_loss": 2.507723569869995,
"eval_runtime": 16.2945,
"eval_samples_per_second": 124.275,
"eval_steps_per_second": 3.928,
"step": 6350
},
{
"epoch": 51.0,
"eval_accuracy": 0.5595061728395062,
"eval_loss": 2.4946696758270264,
"eval_runtime": 16.2486,
"eval_samples_per_second": 124.626,
"eval_steps_per_second": 3.939,
"step": 6477
},
{
"epoch": 51.18,
"learning_rate": 9.802371541501976e-07,
"loss": 2.405,
"step": 6500
},
{
"epoch": 52.0,
"eval_accuracy": 0.5580246913580247,
"eval_loss": 2.4835174083709717,
"eval_runtime": 16.5375,
"eval_samples_per_second": 122.449,
"eval_steps_per_second": 3.87,
"step": 6604
},
{
"epoch": 53.0,
"eval_accuracy": 0.5585185185185185,
"eval_loss": 2.473555088043213,
"eval_runtime": 17.2437,
"eval_samples_per_second": 117.434,
"eval_steps_per_second": 3.711,
"step": 6731
},
{
"epoch": 54.0,
"eval_accuracy": 0.5644444444444444,
"eval_loss": 2.4599006175994873,
"eval_runtime": 17.2017,
"eval_samples_per_second": 117.721,
"eval_steps_per_second": 3.721,
"step": 6858
},
{
"epoch": 55.0,
"eval_accuracy": 0.5708641975308641,
"eval_loss": 2.444490432739258,
"eval_runtime": 16.4767,
"eval_samples_per_second": 122.901,
"eval_steps_per_second": 3.884,
"step": 6985
},
{
"epoch": 55.12,
"learning_rate": 9.011857707509881e-07,
"loss": 2.3499,
"step": 7000
},
{
"epoch": 56.0,
"eval_accuracy": 0.5718518518518518,
"eval_loss": 2.43546199798584,
"eval_runtime": 17.1098,
"eval_samples_per_second": 118.353,
"eval_steps_per_second": 3.741,
"step": 7112
},
{
"epoch": 57.0,
"eval_accuracy": 0.5738271604938272,
"eval_loss": 2.424128532409668,
"eval_runtime": 16.3356,
"eval_samples_per_second": 123.962,
"eval_steps_per_second": 3.918,
"step": 7239
},
{
"epoch": 58.0,
"eval_accuracy": 0.5679012345679012,
"eval_loss": 2.4162774085998535,
"eval_runtime": 16.3093,
"eval_samples_per_second": 124.162,
"eval_steps_per_second": 3.924,
"step": 7366
},
{
"epoch": 59.0,
"eval_accuracy": 0.5733333333333334,
"eval_loss": 2.404547929763794,
"eval_runtime": 16.3838,
"eval_samples_per_second": 123.598,
"eval_steps_per_second": 3.906,
"step": 7493
},
{
"epoch": 59.06,
"learning_rate": 8.221343873517787e-07,
"loss": 2.3047,
"step": 7500
},
{
"epoch": 60.0,
"eval_accuracy": 0.5767901234567901,
"eval_loss": 2.3955578804016113,
"eval_runtime": 16.4961,
"eval_samples_per_second": 122.756,
"eval_steps_per_second": 3.88,
"step": 7620
},
{
"epoch": 61.0,
"eval_accuracy": 0.5733333333333334,
"eval_loss": 2.3882274627685547,
"eval_runtime": 16.5941,
"eval_samples_per_second": 122.031,
"eval_steps_per_second": 3.857,
"step": 7747
},
{
"epoch": 62.0,
"eval_accuracy": 0.5758024691358025,
"eval_loss": 2.3768720626831055,
"eval_runtime": 16.3063,
"eval_samples_per_second": 124.185,
"eval_steps_per_second": 3.925,
"step": 7874
},
{
"epoch": 62.99,
"learning_rate": 7.430830039525692e-07,
"loss": 2.2616,
"step": 8000
},
{
"epoch": 63.0,
"eval_accuracy": 0.5802469135802469,
"eval_loss": 2.3649182319641113,
"eval_runtime": 16.4383,
"eval_samples_per_second": 123.188,
"eval_steps_per_second": 3.893,
"step": 8001
},
{
"epoch": 64.0,
"eval_accuracy": 0.5782716049382716,
"eval_loss": 2.3605239391326904,
"eval_runtime": 16.5048,
"eval_samples_per_second": 122.691,
"eval_steps_per_second": 3.878,
"step": 8128
},
{
"epoch": 65.0,
"eval_accuracy": 0.5881481481481482,
"eval_loss": 2.348599433898926,
"eval_runtime": 16.6986,
"eval_samples_per_second": 121.268,
"eval_steps_per_second": 3.833,
"step": 8255
},
{
"epoch": 66.0,
"eval_accuracy": 0.5891358024691358,
"eval_loss": 2.3403825759887695,
"eval_runtime": 16.6528,
"eval_samples_per_second": 121.601,
"eval_steps_per_second": 3.843,
"step": 8382
},
{
"epoch": 66.93,
"learning_rate": 6.640316205533597e-07,
"loss": 2.224,
"step": 8500
},
{
"epoch": 67.0,
"eval_accuracy": 0.5876543209876544,
"eval_loss": 2.333630323410034,
"eval_runtime": 16.323,
"eval_samples_per_second": 124.058,
"eval_steps_per_second": 3.921,
"step": 8509
},
{
"epoch": 68.0,
"eval_accuracy": 0.5881481481481482,
"eval_loss": 2.3267500400543213,
"eval_runtime": 16.3769,
"eval_samples_per_second": 123.649,
"eval_steps_per_second": 3.908,
"step": 8636
},
{
"epoch": 69.0,
"eval_accuracy": 0.5871604938271605,
"eval_loss": 2.3214409351348877,
"eval_runtime": 16.4152,
"eval_samples_per_second": 123.361,
"eval_steps_per_second": 3.899,
"step": 8763
},
{
"epoch": 70.0,
"eval_accuracy": 0.5920987654320987,
"eval_loss": 2.3111300468444824,
"eval_runtime": 16.7406,
"eval_samples_per_second": 120.963,
"eval_steps_per_second": 3.823,
"step": 8890
},
{
"epoch": 70.87,
"learning_rate": 5.849802371541502e-07,
"loss": 2.1885,
"step": 9000
},
{
"epoch": 71.0,
"eval_accuracy": 0.5906172839506173,
"eval_loss": 2.304325580596924,
"eval_runtime": 16.5534,
"eval_samples_per_second": 122.331,
"eval_steps_per_second": 3.866,
"step": 9017
},
{
"epoch": 72.0,
"eval_accuracy": 0.5960493827160493,
"eval_loss": 2.297461986541748,
"eval_runtime": 16.4435,
"eval_samples_per_second": 123.149,
"eval_steps_per_second": 3.892,
"step": 9144
},
{
"epoch": 73.0,
"eval_accuracy": 0.5871604938271605,
"eval_loss": 2.296058177947998,
"eval_runtime": 16.3403,
"eval_samples_per_second": 123.927,
"eval_steps_per_second": 3.917,
"step": 9271
},
{
"epoch": 74.0,
"eval_accuracy": 0.5960493827160493,
"eval_loss": 2.2856781482696533,
"eval_runtime": 16.6488,
"eval_samples_per_second": 121.63,
"eval_steps_per_second": 3.844,
"step": 9398
},
{
"epoch": 74.8,
"learning_rate": 5.059288537549406e-07,
"loss": 2.1603,
"step": 9500
},
{
"epoch": 75.0,
"eval_accuracy": 0.5965432098765432,
"eval_loss": 2.2791759967803955,
"eval_runtime": 17.0015,
"eval_samples_per_second": 119.107,
"eval_steps_per_second": 3.764,
"step": 9525
},
{
"epoch": 76.0,
"eval_accuracy": 0.5955555555555555,
"eval_loss": 2.2750935554504395,
"eval_runtime": 16.6586,
"eval_samples_per_second": 121.559,
"eval_steps_per_second": 3.842,
"step": 9652
},
{
"epoch": 77.0,
"eval_accuracy": 0.5965432098765432,
"eval_loss": 2.268305540084839,
"eval_runtime": 16.6007,
"eval_samples_per_second": 121.983,
"eval_steps_per_second": 3.855,
"step": 9779
},
{
"epoch": 78.0,
"eval_accuracy": 0.6049382716049383,
"eval_loss": 2.2604947090148926,
"eval_runtime": 16.3684,
"eval_samples_per_second": 123.714,
"eval_steps_per_second": 3.91,
"step": 9906
},
{
"epoch": 78.74,
"learning_rate": 4.268774703557312e-07,
"loss": 2.1357,
"step": 10000
},
{
"epoch": 79.0,
"eval_accuracy": 0.6064197530864197,
"eval_loss": 2.254981756210327,
"eval_runtime": 16.4537,
"eval_samples_per_second": 123.072,
"eval_steps_per_second": 3.89,
"step": 10033
},
{
"epoch": 80.0,
"eval_accuracy": 0.6034567901234568,
"eval_loss": 2.2524936199188232,
"eval_runtime": 16.3798,
"eval_samples_per_second": 123.628,
"eval_steps_per_second": 3.907,
"step": 10160
},
{
"epoch": 81.0,
"eval_accuracy": 0.6054320987654321,
"eval_loss": 2.2475554943084717,
"eval_runtime": 16.2339,
"eval_samples_per_second": 124.739,
"eval_steps_per_second": 3.942,
"step": 10287
},
{
"epoch": 82.0,
"eval_accuracy": 0.6004938271604938,
"eval_loss": 2.244779586791992,
"eval_runtime": 16.19,
"eval_samples_per_second": 125.077,
"eval_steps_per_second": 3.953,
"step": 10414
},
{
"epoch": 82.68,
"learning_rate": 3.478260869565217e-07,
"loss": 2.1153,
"step": 10500
},
{
"epoch": 83.0,
"eval_accuracy": 0.6009876543209877,
"eval_loss": 2.242079973220825,
"eval_runtime": 16.2447,
"eval_samples_per_second": 124.656,
"eval_steps_per_second": 3.94,
"step": 10541
},
{
"epoch": 84.0,
"eval_accuracy": 0.6009876543209877,
"eval_loss": 2.237128973007202,
"eval_runtime": 16.3552,
"eval_samples_per_second": 123.814,
"eval_steps_per_second": 3.913,
"step": 10668
},
{
"epoch": 85.0,
"eval_accuracy": 0.6034567901234568,
"eval_loss": 2.2331736087799072,
"eval_runtime": 16.4131,
"eval_samples_per_second": 123.377,
"eval_steps_per_second": 3.899,
"step": 10795
},
{
"epoch": 86.0,
"eval_accuracy": 0.6064197530864197,
"eval_loss": 2.230192184448242,
"eval_runtime": 16.263,
"eval_samples_per_second": 124.516,
"eval_steps_per_second": 3.935,
"step": 10922
},
{
"epoch": 86.61,
"learning_rate": 2.6877470355731227e-07,
"loss": 2.0969,
"step": 11000
},
{
"epoch": 87.0,
"eval_accuracy": 0.6083950617283951,
"eval_loss": 2.2269179821014404,
"eval_runtime": 16.4471,
"eval_samples_per_second": 123.122,
"eval_steps_per_second": 3.891,
"step": 11049
},
{
"epoch": 88.0,
"eval_accuracy": 0.6054320987654321,
"eval_loss": 2.224729537963867,
"eval_runtime": 16.5547,
"eval_samples_per_second": 122.322,
"eval_steps_per_second": 3.866,
"step": 11176
},
{
"epoch": 89.0,
"eval_accuracy": 0.6098765432098765,
"eval_loss": 2.2210581302642822,
"eval_runtime": 16.3636,
"eval_samples_per_second": 123.75,
"eval_steps_per_second": 3.911,
"step": 11303
},
{
"epoch": 90.0,
"eval_accuracy": 0.6049382716049383,
"eval_loss": 2.2199292182922363,
"eval_runtime": 16.5723,
"eval_samples_per_second": 122.192,
"eval_steps_per_second": 3.862,
"step": 11430
},
{
"epoch": 90.55,
"learning_rate": 1.8972332015810276e-07,
"loss": 2.0851,
"step": 11500
},
{
"epoch": 91.0,
"eval_accuracy": 0.6103703703703703,
"eval_loss": 2.2158267498016357,
"eval_runtime": 16.2141,
"eval_samples_per_second": 124.892,
"eval_steps_per_second": 3.947,
"step": 11557
},
{
"epoch": 92.0,
"eval_accuracy": 0.6079012345679012,
"eval_loss": 2.214700937271118,
"eval_runtime": 16.2859,
"eval_samples_per_second": 124.341,
"eval_steps_per_second": 3.93,
"step": 11684
},
{
"epoch": 93.0,
"eval_accuracy": 0.6064197530864197,
"eval_loss": 2.2131123542785645,
"eval_runtime": 16.337,
"eval_samples_per_second": 123.952,
"eval_steps_per_second": 3.917,
"step": 11811
},
{
"epoch": 94.0,
"eval_accuracy": 0.6098765432098765,
"eval_loss": 2.2114579677581787,
"eval_runtime": 16.393,
"eval_samples_per_second": 123.528,
"eval_steps_per_second": 3.904,
"step": 11938
},
{
"epoch": 94.49,
"learning_rate": 1.1067193675889327e-07,
"loss": 2.0754,
"step": 12000
},
{
"epoch": 95.0,
"eval_accuracy": 0.6138271604938271,
"eval_loss": 2.209113597869873,
"eval_runtime": 16.4652,
"eval_samples_per_second": 122.987,
"eval_steps_per_second": 3.887,
"step": 12065
},
{
"epoch": 96.0,
"eval_accuracy": 0.6128395061728396,
"eval_loss": 2.2085654735565186,
"eval_runtime": 16.3862,
"eval_samples_per_second": 123.579,
"eval_steps_per_second": 3.906,
"step": 12192
},
{
"epoch": 97.0,
"eval_accuracy": 0.6123456790123457,
"eval_loss": 2.207934617996216,
"eval_runtime": 16.3381,
"eval_samples_per_second": 123.943,
"eval_steps_per_second": 3.917,
"step": 12319
},
{
"epoch": 98.0,
"eval_accuracy": 0.6128395061728396,
"eval_loss": 2.206979513168335,
"eval_runtime": 16.3485,
"eval_samples_per_second": 123.865,
"eval_steps_per_second": 3.915,
"step": 12446
},
{
"epoch": 98.43,
"learning_rate": 3.162055335968379e-08,
"loss": 2.0667,
"step": 12500
},
{
"epoch": 99.0,
"eval_accuracy": 0.6133333333333333,
"eval_loss": 2.2068374156951904,
"eval_runtime": 16.2194,
"eval_samples_per_second": 124.851,
"eval_steps_per_second": 3.946,
"step": 12573
}
],
"logging_steps": 500,
"max_steps": 12700,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 6.215388042176161e+19,
"trial_name": null,
"trial_params": null
}