{ "best_metric": 0.8978675645342312, "best_model_checkpoint": "videomae-base-finetuned-dd\\checkpoint-1344", "epoch": 4.005917159763314, "eval_steps": 500, "global_step": 1352, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0073964497041420114, "grad_norm": 22.897619247436523, "learning_rate": 3.6764705882352942e-06, "loss": 0.7648, "step": 10 }, { "epoch": 0.014792899408284023, "grad_norm": 6.239708423614502, "learning_rate": 7.3529411764705884e-06, "loss": 0.6619, "step": 20 }, { "epoch": 0.022189349112426034, "grad_norm": 11.775946617126465, "learning_rate": 1.1029411764705883e-05, "loss": 0.6844, "step": 30 }, { "epoch": 0.029585798816568046, "grad_norm": 12.948445320129395, "learning_rate": 1.4705882352941177e-05, "loss": 0.6158, "step": 40 }, { "epoch": 0.03698224852071006, "grad_norm": 18.087614059448242, "learning_rate": 1.8382352941176472e-05, "loss": 0.6569, "step": 50 }, { "epoch": 0.04437869822485207, "grad_norm": 10.075726509094238, "learning_rate": 2.2058823529411766e-05, "loss": 0.6572, "step": 60 }, { "epoch": 0.051775147928994084, "grad_norm": 4.17144250869751, "learning_rate": 2.5735294117647057e-05, "loss": 0.3788, "step": 70 }, { "epoch": 0.05917159763313609, "grad_norm": 24.00593376159668, "learning_rate": 2.9411764705882354e-05, "loss": 0.5752, "step": 80 }, { "epoch": 0.06656804733727811, "grad_norm": 0.1779569834470749, "learning_rate": 3.308823529411765e-05, "loss": 0.488, "step": 90 }, { "epoch": 0.07396449704142012, "grad_norm": 2.1989564895629883, "learning_rate": 3.6764705882352945e-05, "loss": 1.0225, "step": 100 }, { "epoch": 0.08136094674556213, "grad_norm": 16.50670623779297, "learning_rate": 4.044117647058824e-05, "loss": 1.0776, "step": 110 }, { "epoch": 0.08875739644970414, "grad_norm": 36.278076171875, "learning_rate": 4.411764705882353e-05, "loss": 1.1623, "step": 120 }, { "epoch": 0.09615384615384616, "grad_norm": 8.829483032226562, "learning_rate": 4.7794117647058826e-05, "loss": 0.9484, "step": 130 }, { "epoch": 0.10355029585798817, "grad_norm": 0.47137251496315, "learning_rate": 4.983552631578948e-05, "loss": 0.2775, "step": 140 }, { "epoch": 0.11094674556213018, "grad_norm": 1.3937591314315796, "learning_rate": 4.942434210526316e-05, "loss": 0.959, "step": 150 }, { "epoch": 0.11834319526627218, "grad_norm": 14.168859481811523, "learning_rate": 4.901315789473684e-05, "loss": 1.2776, "step": 160 }, { "epoch": 0.1257396449704142, "grad_norm": 209.46127319335938, "learning_rate": 4.860197368421053e-05, "loss": 1.1028, "step": 170 }, { "epoch": 0.13313609467455623, "grad_norm": 0.22284063696861267, "learning_rate": 4.819078947368421e-05, "loss": 0.2779, "step": 180 }, { "epoch": 0.14053254437869822, "grad_norm": 0.16761070489883423, "learning_rate": 4.7779605263157896e-05, "loss": 1.05, "step": 190 }, { "epoch": 0.14792899408284024, "grad_norm": 33.303768157958984, "learning_rate": 4.736842105263158e-05, "loss": 0.5783, "step": 200 }, { "epoch": 0.15532544378698224, "grad_norm": 1.759582281112671, "learning_rate": 4.6957236842105265e-05, "loss": 0.5052, "step": 210 }, { "epoch": 0.16272189349112426, "grad_norm": 0.16995219886302948, "learning_rate": 4.654605263157895e-05, "loss": 0.8945, "step": 220 }, { "epoch": 0.17011834319526628, "grad_norm": 5.4278740882873535, "learning_rate": 4.6134868421052635e-05, "loss": 1.1972, "step": 230 }, { "epoch": 0.17751479289940827, "grad_norm": 5.3494038581848145, "learning_rate": 4.572368421052632e-05, "loss": 0.5657, "step": 240 }, { "epoch": 0.1849112426035503, "grad_norm": 38.29714584350586, "learning_rate": 4.5312500000000004e-05, "loss": 0.6919, "step": 250 }, { "epoch": 0.19230769230769232, "grad_norm": 31.136438369750977, "learning_rate": 4.490131578947369e-05, "loss": 0.4502, "step": 260 }, { "epoch": 0.1997041420118343, "grad_norm": 16.622379302978516, "learning_rate": 4.449013157894737e-05, "loss": 0.4002, "step": 270 }, { "epoch": 0.20710059171597633, "grad_norm": 0.7369871735572815, "learning_rate": 4.407894736842105e-05, "loss": 0.7867, "step": 280 }, { "epoch": 0.21449704142011836, "grad_norm": 1.566986083984375, "learning_rate": 4.3667763157894735e-05, "loss": 0.9427, "step": 290 }, { "epoch": 0.22189349112426035, "grad_norm": 20.51339340209961, "learning_rate": 4.3256578947368426e-05, "loss": 1.3318, "step": 300 }, { "epoch": 0.22928994082840237, "grad_norm": 16.819499969482422, "learning_rate": 4.284539473684211e-05, "loss": 0.7456, "step": 310 }, { "epoch": 0.23668639053254437, "grad_norm": 0.06786404550075531, "learning_rate": 4.2434210526315796e-05, "loss": 0.0428, "step": 320 }, { "epoch": 0.2440828402366864, "grad_norm": 18.684722900390625, "learning_rate": 4.202302631578947e-05, "loss": 0.5777, "step": 330 }, { "epoch": 0.2485207100591716, "eval_accuracy": 0.8372615039281706, "eval_loss": 0.8213610649108887, "eval_runtime": 1015.4502, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.439, "step": 336 }, { "epoch": 1.0029585798816567, "grad_norm": 0.05207992345094681, "learning_rate": 4.161184210526316e-05, "loss": 0.3122, "step": 340 }, { "epoch": 1.0103550295857988, "grad_norm": 0.08761809021234512, "learning_rate": 4.120065789473684e-05, "loss": 0.8974, "step": 350 }, { "epoch": 1.017751479289941, "grad_norm": 2.162889003753662, "learning_rate": 4.078947368421053e-05, "loss": 0.5468, "step": 360 }, { "epoch": 1.0251479289940828, "grad_norm": 0.3578147888183594, "learning_rate": 4.037828947368421e-05, "loss": 0.4065, "step": 370 }, { "epoch": 1.032544378698225, "grad_norm": 0.1941654235124588, "learning_rate": 3.9967105263157896e-05, "loss": 0.5691, "step": 380 }, { "epoch": 1.0399408284023668, "grad_norm": 0.4009058475494385, "learning_rate": 3.955592105263158e-05, "loss": 1.0974, "step": 390 }, { "epoch": 1.047337278106509, "grad_norm": 1.8374804258346558, "learning_rate": 3.9144736842105265e-05, "loss": 0.3989, "step": 400 }, { "epoch": 1.054733727810651, "grad_norm": 0.1476195603609085, "learning_rate": 3.873355263157895e-05, "loss": 0.9985, "step": 410 }, { "epoch": 1.0621301775147929, "grad_norm": 0.23249651491641998, "learning_rate": 3.8322368421052634e-05, "loss": 0.0116, "step": 420 }, { "epoch": 1.069526627218935, "grad_norm": 120.95174407958984, "learning_rate": 3.791118421052632e-05, "loss": 0.2631, "step": 430 }, { "epoch": 1.0769230769230769, "grad_norm": 0.18491333723068237, "learning_rate": 3.7500000000000003e-05, "loss": 0.6313, "step": 440 }, { "epoch": 1.084319526627219, "grad_norm": 139.33804321289062, "learning_rate": 3.708881578947369e-05, "loss": 0.8515, "step": 450 }, { "epoch": 1.0917159763313609, "grad_norm": 0.22990567982196808, "learning_rate": 3.6677631578947366e-05, "loss": 0.7119, "step": 460 }, { "epoch": 1.099112426035503, "grad_norm": 1.84480881690979, "learning_rate": 3.626644736842105e-05, "loss": 0.2762, "step": 470 }, { "epoch": 1.106508875739645, "grad_norm": 12.12067699432373, "learning_rate": 3.5855263157894735e-05, "loss": 0.3234, "step": 480 }, { "epoch": 1.113905325443787, "grad_norm": 25.058439254760742, "learning_rate": 3.5444078947368426e-05, "loss": 0.869, "step": 490 }, { "epoch": 1.121301775147929, "grad_norm": 20.12900161743164, "learning_rate": 3.503289473684211e-05, "loss": 1.085, "step": 500 }, { "epoch": 1.128698224852071, "grad_norm": 12.086252212524414, "learning_rate": 3.4621710526315795e-05, "loss": 0.7208, "step": 510 }, { "epoch": 1.136094674556213, "grad_norm": 25.66025161743164, "learning_rate": 3.421052631578947e-05, "loss": 0.6842, "step": 520 }, { "epoch": 1.143491124260355, "grad_norm": 15.72556209564209, "learning_rate": 3.379934210526316e-05, "loss": 0.7922, "step": 530 }, { "epoch": 1.150887573964497, "grad_norm": 0.22508259117603302, "learning_rate": 3.338815789473684e-05, "loss": 0.4978, "step": 540 }, { "epoch": 1.1582840236686391, "grad_norm": 0.3129032254219055, "learning_rate": 3.297697368421053e-05, "loss": 0.9733, "step": 550 }, { "epoch": 1.165680473372781, "grad_norm": 2.5810675621032715, "learning_rate": 3.256578947368421e-05, "loss": 1.0189, "step": 560 }, { "epoch": 1.1730769230769231, "grad_norm": 0.23227569460868835, "learning_rate": 3.2154605263157896e-05, "loss": 0.1902, "step": 570 }, { "epoch": 1.180473372781065, "grad_norm": 41.80963134765625, "learning_rate": 3.174342105263158e-05, "loss": 0.676, "step": 580 }, { "epoch": 1.1878698224852071, "grad_norm": 0.23230111598968506, "learning_rate": 3.1332236842105265e-05, "loss": 0.7534, "step": 590 }, { "epoch": 1.195266272189349, "grad_norm": 0.22635290026664734, "learning_rate": 3.092105263157895e-05, "loss": 0.4239, "step": 600 }, { "epoch": 1.202662721893491, "grad_norm": 1.2736923694610596, "learning_rate": 3.0509868421052634e-05, "loss": 0.1946, "step": 610 }, { "epoch": 1.2100591715976332, "grad_norm": 138.90115356445312, "learning_rate": 3.009868421052632e-05, "loss": 0.3972, "step": 620 }, { "epoch": 1.217455621301775, "grad_norm": 0.8171183466911316, "learning_rate": 2.96875e-05, "loss": 0.3475, "step": 630 }, { "epoch": 1.2248520710059172, "grad_norm": 0.14913396537303925, "learning_rate": 2.9276315789473684e-05, "loss": 0.0097, "step": 640 }, { "epoch": 1.232248520710059, "grad_norm": 0.16601596772670746, "learning_rate": 2.886513157894737e-05, "loss": 0.7906, "step": 650 }, { "epoch": 1.2396449704142012, "grad_norm": 0.0358225516974926, "learning_rate": 2.8453947368421054e-05, "loss": 1.0754, "step": 660 }, { "epoch": 1.2470414201183433, "grad_norm": 18.65558433532715, "learning_rate": 2.8042763157894735e-05, "loss": 1.144, "step": 670 }, { "epoch": 1.2492603550295858, "eval_accuracy": 0.8754208754208754, "eval_loss": 0.4453337490558624, "eval_runtime": 1018.2198, "eval_samples_per_second": 0.875, "eval_steps_per_second": 0.438, "step": 673 }, { "epoch": 2.0051775147928996, "grad_norm": 0.26922106742858887, "learning_rate": 2.7631578947368426e-05, "loss": 0.0545, "step": 680 }, { "epoch": 2.0125739644970415, "grad_norm": 0.08718305081129074, "learning_rate": 2.7220394736842107e-05, "loss": 0.2609, "step": 690 }, { "epoch": 2.0199704142011834, "grad_norm": 0.062128640711307526, "learning_rate": 2.6809210526315792e-05, "loss": 0.205, "step": 700 }, { "epoch": 2.0273668639053253, "grad_norm": 0.03102479875087738, "learning_rate": 2.6398026315789476e-05, "loss": 0.2414, "step": 710 }, { "epoch": 2.0347633136094676, "grad_norm": 0.1195179671049118, "learning_rate": 2.598684210526316e-05, "loss": 0.8337, "step": 720 }, { "epoch": 2.0421597633136095, "grad_norm": 0.18848393857479095, "learning_rate": 2.5575657894736842e-05, "loss": 0.938, "step": 730 }, { "epoch": 2.0495562130177514, "grad_norm": 0.30771324038505554, "learning_rate": 2.5164473684210527e-05, "loss": 0.5181, "step": 740 }, { "epoch": 2.0569526627218937, "grad_norm": 45.85493469238281, "learning_rate": 2.4753289473684215e-05, "loss": 0.9593, "step": 750 }, { "epoch": 2.0643491124260356, "grad_norm": 0.8830978870391846, "learning_rate": 2.4342105263157896e-05, "loss": 0.0696, "step": 760 }, { "epoch": 2.0717455621301775, "grad_norm": 0.23656447231769562, "learning_rate": 2.393092105263158e-05, "loss": 0.3063, "step": 770 }, { "epoch": 2.0791420118343193, "grad_norm": 14.187068939208984, "learning_rate": 2.3519736842105265e-05, "loss": 1.1477, "step": 780 }, { "epoch": 2.0865384615384617, "grad_norm": 20.795085906982422, "learning_rate": 2.3108552631578946e-05, "loss": 0.6215, "step": 790 }, { "epoch": 2.0939349112426036, "grad_norm": 0.27157312631607056, "learning_rate": 2.2697368421052634e-05, "loss": 0.271, "step": 800 }, { "epoch": 2.1013313609467454, "grad_norm": 26.25171661376953, "learning_rate": 2.228618421052632e-05, "loss": 0.9012, "step": 810 }, { "epoch": 2.1087278106508878, "grad_norm": 0.18786455690860748, "learning_rate": 2.1875e-05, "loss": 0.1285, "step": 820 }, { "epoch": 2.1161242603550297, "grad_norm": 0.09338750690221786, "learning_rate": 2.1463815789473684e-05, "loss": 0.6236, "step": 830 }, { "epoch": 2.1235207100591715, "grad_norm": 0.15658584237098694, "learning_rate": 2.105263157894737e-05, "loss": 0.2534, "step": 840 }, { "epoch": 2.1309171597633134, "grad_norm": 0.03897108510136604, "learning_rate": 2.0641447368421053e-05, "loss": 0.0317, "step": 850 }, { "epoch": 2.1383136094674557, "grad_norm": 0.17228557169437408, "learning_rate": 2.0230263157894738e-05, "loss": 0.5894, "step": 860 }, { "epoch": 2.1457100591715976, "grad_norm": 37.38359069824219, "learning_rate": 1.9819078947368423e-05, "loss": 0.3589, "step": 870 }, { "epoch": 2.1531065088757395, "grad_norm": 14.766074180603027, "learning_rate": 1.9407894736842107e-05, "loss": 0.2452, "step": 880 }, { "epoch": 2.160502958579882, "grad_norm": 0.09724285453557968, "learning_rate": 1.8996710526315788e-05, "loss": 0.4429, "step": 890 }, { "epoch": 2.1678994082840237, "grad_norm": 0.20985926687717438, "learning_rate": 1.8585526315789476e-05, "loss": 0.3037, "step": 900 }, { "epoch": 2.1752958579881656, "grad_norm": 17.582971572875977, "learning_rate": 1.8174342105263157e-05, "loss": 0.8678, "step": 910 }, { "epoch": 2.1826923076923075, "grad_norm": 0.10595466196537018, "learning_rate": 1.7763157894736842e-05, "loss": 0.5378, "step": 920 }, { "epoch": 2.19008875739645, "grad_norm": 0.08267045021057129, "learning_rate": 1.7351973684210527e-05, "loss": 0.3613, "step": 930 }, { "epoch": 2.1974852071005917, "grad_norm": 0.33162182569503784, "learning_rate": 1.694078947368421e-05, "loss": 0.4509, "step": 940 }, { "epoch": 2.2048816568047336, "grad_norm": 0.070041224360466, "learning_rate": 1.6529605263157896e-05, "loss": 0.2242, "step": 950 }, { "epoch": 2.212278106508876, "grad_norm": 0.07664915174245834, "learning_rate": 1.611842105263158e-05, "loss": 0.0046, "step": 960 }, { "epoch": 2.219674556213018, "grad_norm": 84.51457977294922, "learning_rate": 1.5707236842105265e-05, "loss": 0.4331, "step": 970 }, { "epoch": 2.2270710059171597, "grad_norm": 0.13595078885555267, "learning_rate": 1.5296052631578946e-05, "loss": 0.2439, "step": 980 }, { "epoch": 2.234467455621302, "grad_norm": 107.51094818115234, "learning_rate": 1.4884868421052634e-05, "loss": 0.1195, "step": 990 }, { "epoch": 2.241863905325444, "grad_norm": 6.981244087219238, "learning_rate": 1.4473684210526317e-05, "loss": 0.2644, "step": 1000 }, { "epoch": 2.2492603550295858, "grad_norm": 0.01494936365634203, "learning_rate": 1.4062500000000001e-05, "loss": 0.2783, "step": 1010 }, { "epoch": 2.2492603550295858, "eval_accuracy": 0.8170594837261503, "eval_loss": 0.8930483460426331, "eval_runtime": 977.9049, "eval_samples_per_second": 0.911, "eval_steps_per_second": 0.456, "step": 1010 }, { "epoch": 3.007396449704142, "grad_norm": 2.669276714324951, "learning_rate": 1.3651315789473684e-05, "loss": 0.6067, "step": 1020 }, { "epoch": 3.014792899408284, "grad_norm": 0.0429365374147892, "learning_rate": 1.3240131578947369e-05, "loss": 0.2312, "step": 1030 }, { "epoch": 3.022189349112426, "grad_norm": 0.0598057359457016, "learning_rate": 1.2828947368421055e-05, "loss": 0.0344, "step": 1040 }, { "epoch": 3.029585798816568, "grad_norm": 0.0926346629858017, "learning_rate": 1.2417763157894738e-05, "loss": 0.9614, "step": 1050 }, { "epoch": 3.03698224852071, "grad_norm": 0.01585511490702629, "learning_rate": 1.200657894736842e-05, "loss": 0.7316, "step": 1060 }, { "epoch": 3.044378698224852, "grad_norm": 0.10066540539264679, "learning_rate": 1.1595394736842107e-05, "loss": 0.0041, "step": 1070 }, { "epoch": 3.051775147928994, "grad_norm": 0.06809567660093307, "learning_rate": 1.118421052631579e-05, "loss": 0.0092, "step": 1080 }, { "epoch": 3.059171597633136, "grad_norm": 0.14008688926696777, "learning_rate": 1.0773026315789474e-05, "loss": 1.0007, "step": 1090 }, { "epoch": 3.0665680473372783, "grad_norm": 0.030784226953983307, "learning_rate": 1.0361842105263159e-05, "loss": 0.4198, "step": 1100 }, { "epoch": 3.07396449704142, "grad_norm": 0.07336018979549408, "learning_rate": 9.950657894736842e-06, "loss": 0.2221, "step": 1110 }, { "epoch": 3.081360946745562, "grad_norm": 0.06217151880264282, "learning_rate": 9.539473684210528e-06, "loss": 0.2991, "step": 1120 }, { "epoch": 3.088757396449704, "grad_norm": 0.07419371604919434, "learning_rate": 9.128289473684211e-06, "loss": 0.6161, "step": 1130 }, { "epoch": 3.0961538461538463, "grad_norm": 0.09845346957445145, "learning_rate": 8.717105263157894e-06, "loss": 0.204, "step": 1140 }, { "epoch": 3.103550295857988, "grad_norm": 0.020927123725414276, "learning_rate": 8.30592105263158e-06, "loss": 0.5403, "step": 1150 }, { "epoch": 3.11094674556213, "grad_norm": 0.07396041601896286, "learning_rate": 7.894736842105263e-06, "loss": 0.4637, "step": 1160 }, { "epoch": 3.1183431952662723, "grad_norm": 0.11606968194246292, "learning_rate": 7.483552631578948e-06, "loss": 1.3375, "step": 1170 }, { "epoch": 3.1257396449704142, "grad_norm": 0.08246757835149765, "learning_rate": 7.072368421052632e-06, "loss": 0.4418, "step": 1180 }, { "epoch": 3.133136094674556, "grad_norm": 0.17207257449626923, "learning_rate": 6.661184210526317e-06, "loss": 0.2426, "step": 1190 }, { "epoch": 3.140532544378698, "grad_norm": 0.49036145210266113, "learning_rate": 6.25e-06, "loss": 0.2545, "step": 1200 }, { "epoch": 3.1479289940828403, "grad_norm": 68.61939239501953, "learning_rate": 5.838815789473685e-06, "loss": 0.5084, "step": 1210 }, { "epoch": 3.155325443786982, "grad_norm": 0.08831863105297089, "learning_rate": 5.4276315789473686e-06, "loss": 0.0049, "step": 1220 }, { "epoch": 3.162721893491124, "grad_norm": 0.10537251830101013, "learning_rate": 5.016447368421053e-06, "loss": 0.1406, "step": 1230 }, { "epoch": 3.1701183431952664, "grad_norm": 17.6816349029541, "learning_rate": 4.605263157894737e-06, "loss": 0.486, "step": 1240 }, { "epoch": 3.1775147928994083, "grad_norm": 0.461302250623703, "learning_rate": 4.194078947368421e-06, "loss": 0.0042, "step": 1250 }, { "epoch": 3.18491124260355, "grad_norm": 0.04792853444814682, "learning_rate": 3.7828947368421055e-06, "loss": 0.719, "step": 1260 }, { "epoch": 3.1923076923076925, "grad_norm": 0.058049630373716354, "learning_rate": 3.3717105263157897e-06, "loss": 0.2919, "step": 1270 }, { "epoch": 3.1997041420118344, "grad_norm": 13.063404083251953, "learning_rate": 2.960526315789474e-06, "loss": 0.2856, "step": 1280 }, { "epoch": 3.2071005917159763, "grad_norm": 0.4628206193447113, "learning_rate": 2.549342105263158e-06, "loss": 0.0601, "step": 1290 }, { "epoch": 3.214497041420118, "grad_norm": 0.045592982321977615, "learning_rate": 2.138157894736842e-06, "loss": 0.53, "step": 1300 }, { "epoch": 3.2218934911242605, "grad_norm": 0.04378387704491615, "learning_rate": 1.7269736842105266e-06, "loss": 0.0033, "step": 1310 }, { "epoch": 3.2292899408284024, "grad_norm": 0.052677396684885025, "learning_rate": 1.3157894736842106e-06, "loss": 0.003, "step": 1320 }, { "epoch": 3.2366863905325443, "grad_norm": 0.07824493199586868, "learning_rate": 9.046052631578948e-07, "loss": 0.3084, "step": 1330 }, { "epoch": 3.2440828402366866, "grad_norm": 0.05082060024142265, "learning_rate": 4.934210526315789e-07, "loss": 0.0037, "step": 1340 }, { "epoch": 3.247041420118343, "eval_accuracy": 0.8978675645342312, "eval_loss": 0.40774551033973694, "eval_runtime": 1030.7199, "eval_samples_per_second": 0.864, "eval_steps_per_second": 0.433, "step": 1344 }, { "epoch": 4.004437869822485, "grad_norm": 0.045703090727329254, "learning_rate": 8.223684210526316e-08, "loss": 0.2113, "step": 1350 }, { "epoch": 4.005917159763314, "eval_accuracy": 0.8978675645342312, "eval_loss": 0.40793702006340027, "eval_runtime": 1018.6267, "eval_samples_per_second": 0.875, "eval_steps_per_second": 0.438, "step": 1352 }, { "epoch": 4.005917159763314, "step": 1352, "total_flos": 3.3668665207526523e+18, "train_loss": 0.5293050300165978, "train_runtime": 11070.5286, "train_samples_per_second": 0.244, "train_steps_per_second": 0.122 }, { "epoch": 4.005917159763314, "eval_accuracy": 0.890927624872579, "eval_loss": 0.4576520323753357, "eval_runtime": 1130.3017, "eval_samples_per_second": 0.868, "eval_steps_per_second": 0.434, "step": 1352 }, { "epoch": 4.005917159763314, "eval_accuracy": 0.890927624872579, "eval_loss": 0.4576520621776581, "eval_runtime": 1124.6542, "eval_samples_per_second": 0.872, "eval_steps_per_second": 0.437, "step": 1352 } ], "logging_steps": 10, "max_steps": 1352, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 3.3668665207526523e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }