{ "best_metric": 1.972500205039978, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 2.771667146559171, "eval_steps": 25, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018427872156636913, "grad_norm": 15.992127418518066, "learning_rate": 1e-05, "loss": 10.9897, "step": 1 }, { "epoch": 0.018427872156636913, "eval_loss": 10.919320106506348, "eval_runtime": 0.1933, "eval_samples_per_second": 258.702, "eval_steps_per_second": 67.263, "step": 1 }, { "epoch": 0.036855744313273826, "grad_norm": 18.243144989013672, "learning_rate": 2e-05, "loss": 11.0612, "step": 2 }, { "epoch": 0.05528361646991074, "grad_norm": 22.34180450439453, "learning_rate": 3e-05, "loss": 10.92, "step": 3 }, { "epoch": 0.07371148862654765, "grad_norm": 26.782257080078125, "learning_rate": 4e-05, "loss": 10.9022, "step": 4 }, { "epoch": 0.09213936078318456, "grad_norm": 27.877561569213867, "learning_rate": 5e-05, "loss": 10.9281, "step": 5 }, { "epoch": 0.11056723293982149, "grad_norm": 26.429214477539062, "learning_rate": 6e-05, "loss": 10.7337, "step": 6 }, { "epoch": 0.1289951050964584, "grad_norm": 32.90009689331055, "learning_rate": 7e-05, "loss": 10.3728, "step": 7 }, { "epoch": 0.1474229772530953, "grad_norm": 47.46554183959961, "learning_rate": 8e-05, "loss": 9.8818, "step": 8 }, { "epoch": 0.16585084940973222, "grad_norm": 53.67299270629883, "learning_rate": 9e-05, "loss": 9.2977, "step": 9 }, { "epoch": 0.18427872156636912, "grad_norm": 41.09412384033203, "learning_rate": 0.0001, "loss": 8.5946, "step": 10 }, { "epoch": 0.20270659372300603, "grad_norm": 29.474477767944336, "learning_rate": 9.999486728278741e-05, "loss": 7.969, "step": 11 }, { "epoch": 0.22113446587964297, "grad_norm": 23.831918716430664, "learning_rate": 9.997947030202901e-05, "loss": 7.6579, "step": 12 }, { "epoch": 0.23956233803627988, "grad_norm": 17.510284423828125, "learning_rate": 9.995381257009583e-05, "loss": 8.435, "step": 13 }, { "epoch": 0.2579902101929168, "grad_norm": 18.202348709106445, "learning_rate": 9.991789994004929e-05, "loss": 9.1568, "step": 14 }, { "epoch": 0.2764180823495537, "grad_norm": 26.493127822875977, "learning_rate": 9.987174060430607e-05, "loss": 8.2426, "step": 15 }, { "epoch": 0.2948459545061906, "grad_norm": 26.108442306518555, "learning_rate": 9.981534509276909e-05, "loss": 6.9778, "step": 16 }, { "epoch": 0.31327382666282755, "grad_norm": 32.03369140625, "learning_rate": 9.974872627042566e-05, "loss": 6.4199, "step": 17 }, { "epoch": 0.33170169881946443, "grad_norm": 25.186433792114258, "learning_rate": 9.967189933441243e-05, "loss": 6.0492, "step": 18 }, { "epoch": 0.35012957097610137, "grad_norm": 17.842653274536133, "learning_rate": 9.958488181054886e-05, "loss": 5.655, "step": 19 }, { "epoch": 0.36855744313273825, "grad_norm": 15.799666404724121, "learning_rate": 9.948769354933904e-05, "loss": 5.3229, "step": 20 }, { "epoch": 0.3869853152893752, "grad_norm": 14.595541954040527, "learning_rate": 9.938035672144348e-05, "loss": 5.0638, "step": 21 }, { "epoch": 0.40541318744601207, "grad_norm": 14.175960540771484, "learning_rate": 9.926289581262147e-05, "loss": 4.794, "step": 22 }, { "epoch": 0.423841059602649, "grad_norm": 14.219879150390625, "learning_rate": 9.913533761814537e-05, "loss": 4.5132, "step": 23 }, { "epoch": 0.44226893175928594, "grad_norm": 13.641704559326172, "learning_rate": 9.899771123668812e-05, "loss": 4.2567, "step": 24 }, { "epoch": 0.4606968039159228, "grad_norm": 13.702838897705078, "learning_rate": 9.885004806368512e-05, "loss": 3.9403, "step": 25 }, { "epoch": 0.4606968039159228, "eval_loss": 4.007586479187012, "eval_runtime": 0.1925, "eval_samples_per_second": 259.786, "eval_steps_per_second": 67.544, "step": 25 }, { "epoch": 0.47912467607255976, "grad_norm": 26.746917724609375, "learning_rate": 9.869238178417235e-05, "loss": 5.5681, "step": 26 }, { "epoch": 0.49755254822919665, "grad_norm": 20.92180061340332, "learning_rate": 9.852474836510213e-05, "loss": 5.1229, "step": 27 }, { "epoch": 0.5159804203858336, "grad_norm": 15.183903694152832, "learning_rate": 9.834718604713824e-05, "loss": 3.877, "step": 28 }, { "epoch": 0.5344082925424705, "grad_norm": 11.438804626464844, "learning_rate": 9.81597353359325e-05, "loss": 3.1596, "step": 29 }, { "epoch": 0.5528361646991073, "grad_norm": 10.248758316040039, "learning_rate": 9.796243899288456e-05, "loss": 3.1036, "step": 30 }, { "epoch": 0.5712640368557443, "grad_norm": 9.631898880004883, "learning_rate": 9.775534202538713e-05, "loss": 2.9947, "step": 31 }, { "epoch": 0.5896919090123812, "grad_norm": 7.5189056396484375, "learning_rate": 9.75384916765588e-05, "loss": 2.7947, "step": 32 }, { "epoch": 0.6081197811690181, "grad_norm": 7.427252292633057, "learning_rate": 9.731193741446715e-05, "loss": 2.6937, "step": 33 }, { "epoch": 0.6265476533256551, "grad_norm": 6.640748977661133, "learning_rate": 9.707573092084368e-05, "loss": 2.5984, "step": 34 }, { "epoch": 0.644975525482292, "grad_norm": 5.397184371948242, "learning_rate": 9.682992607929442e-05, "loss": 2.4487, "step": 35 }, { "epoch": 0.6634033976389289, "grad_norm": 5.710893630981445, "learning_rate": 9.657457896300791e-05, "loss": 2.4468, "step": 36 }, { "epoch": 0.6818312697955657, "grad_norm": 6.4503068923950195, "learning_rate": 9.630974782196362e-05, "loss": 2.2927, "step": 37 }, { "epoch": 0.7002591419522027, "grad_norm": 7.673217296600342, "learning_rate": 9.603549306964407e-05, "loss": 2.6988, "step": 38 }, { "epoch": 0.7186870141088396, "grad_norm": 14.466754913330078, "learning_rate": 9.575187726925313e-05, "loss": 2.976, "step": 39 }, { "epoch": 0.7371148862654765, "grad_norm": 8.982857704162598, "learning_rate": 9.545896511944417e-05, "loss": 2.6344, "step": 40 }, { "epoch": 0.7555427584221135, "grad_norm": 9.449691772460938, "learning_rate": 9.51568234395609e-05, "loss": 2.3176, "step": 41 }, { "epoch": 0.7739706305787504, "grad_norm": 6.093954563140869, "learning_rate": 9.484552115439445e-05, "loss": 2.2206, "step": 42 }, { "epoch": 0.7923985027353873, "grad_norm": 4.112945079803467, "learning_rate": 9.452512927846027e-05, "loss": 2.1997, "step": 43 }, { "epoch": 0.8108263748920241, "grad_norm": 2.3723182678222656, "learning_rate": 9.41957208997982e-05, "loss": 2.193, "step": 44 }, { "epoch": 0.8292542470486611, "grad_norm": 3.2265055179595947, "learning_rate": 9.385737116329958e-05, "loss": 2.1391, "step": 45 }, { "epoch": 0.847682119205298, "grad_norm": 2.676258087158203, "learning_rate": 9.351015725356514e-05, "loss": 2.1623, "step": 46 }, { "epoch": 0.8661099913619349, "grad_norm": 2.43552303314209, "learning_rate": 9.315415837729759e-05, "loss": 2.1083, "step": 47 }, { "epoch": 0.8845378635185719, "grad_norm": 2.213883399963379, "learning_rate": 9.278945574523292e-05, "loss": 2.1477, "step": 48 }, { "epoch": 0.9029657356752088, "grad_norm": 1.841592788696289, "learning_rate": 9.241613255361455e-05, "loss": 2.0765, "step": 49 }, { "epoch": 0.9213936078318457, "grad_norm": 1.8612364530563354, "learning_rate": 9.203427396521454e-05, "loss": 2.0881, "step": 50 }, { "epoch": 0.9213936078318457, "eval_loss": 2.107802391052246, "eval_runtime": 0.1929, "eval_samples_per_second": 259.264, "eval_steps_per_second": 67.409, "step": 50 }, { "epoch": 0.9398214799884825, "grad_norm": 3.5257246494293213, "learning_rate": 9.164396708990622e-05, "loss": 2.2586, "step": 51 }, { "epoch": 0.9582493521451195, "grad_norm": 2.693159580230713, "learning_rate": 9.124530096479257e-05, "loss": 2.1224, "step": 52 }, { "epoch": 0.9766772243017564, "grad_norm": 1.2561959028244019, "learning_rate": 9.083836653389502e-05, "loss": 2.083, "step": 53 }, { "epoch": 0.9951050964583933, "grad_norm": 1.5297772884368896, "learning_rate": 9.042325662740726e-05, "loss": 2.0534, "step": 54 }, { "epoch": 1.0172761301468471, "grad_norm": 5.560340404510498, "learning_rate": 9.000006594051873e-05, "loss": 4.0714, "step": 55 }, { "epoch": 1.035704002303484, "grad_norm": 3.4698352813720703, "learning_rate": 8.956889101181262e-05, "loss": 2.2475, "step": 56 }, { "epoch": 1.054131874460121, "grad_norm": 3.710216522216797, "learning_rate": 8.912983020124359e-05, "loss": 2.1335, "step": 57 }, { "epoch": 1.0725597466167578, "grad_norm": 2.7942962646484375, "learning_rate": 8.868298366769954e-05, "loss": 2.0634, "step": 58 }, { "epoch": 1.0909876187733947, "grad_norm": 1.4236139059066772, "learning_rate": 8.822845334615364e-05, "loss": 2.059, "step": 59 }, { "epoch": 1.1094154909300316, "grad_norm": 1.0815931558609009, "learning_rate": 8.776634292441048e-05, "loss": 2.0866, "step": 60 }, { "epoch": 1.1278433630866687, "grad_norm": 1.249880313873291, "learning_rate": 8.729675781945304e-05, "loss": 2.0624, "step": 61 }, { "epoch": 1.1462712352433055, "grad_norm": 1.8093760013580322, "learning_rate": 8.681980515339464e-05, "loss": 1.9774, "step": 62 }, { "epoch": 1.1646991073999424, "grad_norm": 1.277862310409546, "learning_rate": 8.633559372904239e-05, "loss": 2.0796, "step": 63 }, { "epoch": 1.1831269795565793, "grad_norm": 1.3057935237884521, "learning_rate": 8.584423400507679e-05, "loss": 2.0784, "step": 64 }, { "epoch": 1.2015548517132162, "grad_norm": 0.9269258975982666, "learning_rate": 8.534583807085398e-05, "loss": 1.9983, "step": 65 }, { "epoch": 1.219982723869853, "grad_norm": 1.6757804155349731, "learning_rate": 8.484051962083579e-05, "loss": 1.9814, "step": 66 }, { "epoch": 1.23841059602649, "grad_norm": 1.0170243978500366, "learning_rate": 8.432839392865356e-05, "loss": 2.0235, "step": 67 }, { "epoch": 1.256838468183127, "grad_norm": 2.4539759159088135, "learning_rate": 8.380957782081197e-05, "loss": 2.0746, "step": 68 }, { "epoch": 1.275266340339764, "grad_norm": 1.9356954097747803, "learning_rate": 8.328418965003844e-05, "loss": 2.075, "step": 69 }, { "epoch": 1.2936942124964008, "grad_norm": 1.5050348043441772, "learning_rate": 8.275234926828446e-05, "loss": 2.0301, "step": 70 }, { "epoch": 1.3121220846530377, "grad_norm": 1.3827838897705078, "learning_rate": 8.221417799938469e-05, "loss": 1.9986, "step": 71 }, { "epoch": 1.3305499568096746, "grad_norm": 0.7443787455558777, "learning_rate": 8.166979861138077e-05, "loss": 2.016, "step": 72 }, { "epoch": 1.3489778289663115, "grad_norm": 0.9054709672927856, "learning_rate": 8.111933528851511e-05, "loss": 1.989, "step": 73 }, { "epoch": 1.3674057011229483, "grad_norm": 1.1927533149719238, "learning_rate": 8.056291360290201e-05, "loss": 1.9775, "step": 74 }, { "epoch": 1.3858335732795855, "grad_norm": 0.4375220239162445, "learning_rate": 8.000066048588211e-05, "loss": 2.0113, "step": 75 }, { "epoch": 1.3858335732795855, "eval_loss": 1.9943785667419434, "eval_runtime": 0.1925, "eval_samples_per_second": 259.712, "eval_steps_per_second": 67.525, "step": 75 }, { "epoch": 1.4042614454362223, "grad_norm": 0.5842914581298828, "learning_rate": 7.943270419906655e-05, "loss": 2.0109, "step": 76 }, { "epoch": 1.4226893175928592, "grad_norm": 0.7177793383598328, "learning_rate": 7.885917430507797e-05, "loss": 2.0048, "step": 77 }, { "epoch": 1.441117189749496, "grad_norm": 1.3073872327804565, "learning_rate": 7.828020163799455e-05, "loss": 1.9713, "step": 78 }, { "epoch": 1.459545061906133, "grad_norm": 1.0323505401611328, "learning_rate": 7.769591827350404e-05, "loss": 1.9981, "step": 79 }, { "epoch": 1.4779729340627699, "grad_norm": 1.5544973611831665, "learning_rate": 7.710645749877449e-05, "loss": 2.045, "step": 80 }, { "epoch": 1.4964008062194067, "grad_norm": 1.5951658487319946, "learning_rate": 7.651195378204878e-05, "loss": 2.0321, "step": 81 }, { "epoch": 1.5148286783760438, "grad_norm": 0.7805724740028381, "learning_rate": 7.591254274196959e-05, "loss": 2.0335, "step": 82 }, { "epoch": 1.5332565505326807, "grad_norm": 1.8287301063537598, "learning_rate": 7.530836111664192e-05, "loss": 2.0383, "step": 83 }, { "epoch": 1.5516844226893176, "grad_norm": 0.8662983179092407, "learning_rate": 7.469954673244032e-05, "loss": 1.963, "step": 84 }, { "epoch": 1.5701122948459545, "grad_norm": 0.9922555685043335, "learning_rate": 7.40862384725679e-05, "loss": 1.9948, "step": 85 }, { "epoch": 1.5885401670025914, "grad_norm": 1.3063266277313232, "learning_rate": 7.346857624537407e-05, "loss": 2.0508, "step": 86 }, { "epoch": 1.6069680391592285, "grad_norm": 1.0063977241516113, "learning_rate": 7.284670095243859e-05, "loss": 1.97, "step": 87 }, { "epoch": 1.6253959113158651, "grad_norm": 0.5883684754371643, "learning_rate": 7.222075445642904e-05, "loss": 2.0262, "step": 88 }, { "epoch": 1.6438237834725022, "grad_norm": 1.833903431892395, "learning_rate": 7.159087954873896e-05, "loss": 1.9493, "step": 89 }, { "epoch": 1.6622516556291391, "grad_norm": 1.1694128513336182, "learning_rate": 7.095721991691411e-05, "loss": 1.9939, "step": 90 }, { "epoch": 1.680679527785776, "grad_norm": 0.9060803651809692, "learning_rate": 7.031992011187447e-05, "loss": 1.9637, "step": 91 }, { "epoch": 1.6991073999424129, "grad_norm": 0.7915279865264893, "learning_rate": 6.967912551493912e-05, "loss": 1.9875, "step": 92 }, { "epoch": 1.7175352720990498, "grad_norm": 1.037650227546692, "learning_rate": 6.903498230466171e-05, "loss": 2.0221, "step": 93 }, { "epoch": 1.7359631442556869, "grad_norm": 0.8483555316925049, "learning_rate": 6.838763742348415e-05, "loss": 2.0028, "step": 94 }, { "epoch": 1.7543910164123235, "grad_norm": 0.7025566101074219, "learning_rate": 6.773723854421592e-05, "loss": 1.9476, "step": 95 }, { "epoch": 1.7728188885689606, "grad_norm": 1.4136466979980469, "learning_rate": 6.708393403634696e-05, "loss": 2.0033, "step": 96 }, { "epoch": 1.7912467607255975, "grad_norm": 1.7042797803878784, "learning_rate": 6.642787293220136e-05, "loss": 2.0179, "step": 97 }, { "epoch": 1.8096746328822344, "grad_norm": 1.571615219116211, "learning_rate": 6.576920489294011e-05, "loss": 1.9983, "step": 98 }, { "epoch": 1.8281025050388713, "grad_norm": 0.9024497270584106, "learning_rate": 6.510808017442018e-05, "loss": 1.9455, "step": 99 }, { "epoch": 1.8465303771955082, "grad_norm": 1.3372397422790527, "learning_rate": 6.444464959291813e-05, "loss": 2.0109, "step": 100 }, { "epoch": 1.8465303771955082, "eval_loss": 1.987208366394043, "eval_runtime": 0.1932, "eval_samples_per_second": 258.808, "eval_steps_per_second": 67.29, "step": 100 }, { "epoch": 1.8649582493521453, "grad_norm": 0.6175363659858704, "learning_rate": 6.377906449072578e-05, "loss": 1.937, "step": 101 }, { "epoch": 1.883386121508782, "grad_norm": 0.5189071297645569, "learning_rate": 6.311147670162576e-05, "loss": 2.0043, "step": 102 }, { "epoch": 1.901813993665419, "grad_norm": 0.7783696055412292, "learning_rate": 6.244203851625526e-05, "loss": 1.9778, "step": 103 }, { "epoch": 1.9202418658220557, "grad_norm": 1.224826693534851, "learning_rate": 6.177090264736525e-05, "loss": 2.0057, "step": 104 }, { "epoch": 1.9386697379786928, "grad_norm": 2.2593610286712646, "learning_rate": 6.109822219498354e-05, "loss": 1.9667, "step": 105 }, { "epoch": 1.9570976101353297, "grad_norm": 1.2801239490509033, "learning_rate": 6.042415061148954e-05, "loss": 1.9922, "step": 106 }, { "epoch": 1.9755254822919666, "grad_norm": 0.8543593883514404, "learning_rate": 5.9748841666608565e-05, "loss": 1.9925, "step": 107 }, { "epoch": 1.9939533544486037, "grad_norm": 0.5854929685592651, "learning_rate": 5.907244941233371e-05, "loss": 1.9671, "step": 108 }, { "epoch": 2.016124388137057, "grad_norm": 2.585685968399048, "learning_rate": 5.8395128147783474e-05, "loss": 4.0407, "step": 109 }, { "epoch": 2.0345522602936943, "grad_norm": 1.6653348207473755, "learning_rate": 5.771703238400288e-05, "loss": 2.0671, "step": 110 }, { "epoch": 2.052980132450331, "grad_norm": 0.6874579191207886, "learning_rate": 5.703831680871631e-05, "loss": 1.9964, "step": 111 }, { "epoch": 2.071408004606968, "grad_norm": 0.906933605670929, "learning_rate": 5.635913625104e-05, "loss": 1.9681, "step": 112 }, { "epoch": 2.089835876763605, "grad_norm": 0.7688360214233398, "learning_rate": 5.567964564616237e-05, "loss": 2.0265, "step": 113 }, { "epoch": 2.108263748920242, "grad_norm": 1.0667365789413452, "learning_rate": 5.500000000000001e-05, "loss": 1.9802, "step": 114 }, { "epoch": 2.126691621076879, "grad_norm": 1.1549663543701172, "learning_rate": 5.432035435383764e-05, "loss": 2.0294, "step": 115 }, { "epoch": 2.1451194932335156, "grad_norm": 0.9386123418807983, "learning_rate": 5.364086374896001e-05, "loss": 1.9267, "step": 116 }, { "epoch": 2.1635473653901527, "grad_norm": 0.7642337679862976, "learning_rate": 5.296168319128372e-05, "loss": 1.9907, "step": 117 }, { "epoch": 2.1819752375467893, "grad_norm": 0.5543763041496277, "learning_rate": 5.2282967615997125e-05, "loss": 1.9707, "step": 118 }, { "epoch": 2.2004031097034265, "grad_norm": 0.7983617186546326, "learning_rate": 5.160487185221653e-05, "loss": 1.9696, "step": 119 }, { "epoch": 2.218830981860063, "grad_norm": 0.3363061845302582, "learning_rate": 5.092755058766631e-05, "loss": 1.9536, "step": 120 }, { "epoch": 2.2372588540167, "grad_norm": 0.9921276569366455, "learning_rate": 5.025115833339146e-05, "loss": 2.0054, "step": 121 }, { "epoch": 2.2556867261733373, "grad_norm": 1.2545318603515625, "learning_rate": 4.9575849388510473e-05, "loss": 1.9755, "step": 122 }, { "epoch": 2.274114598329974, "grad_norm": 1.0431549549102783, "learning_rate": 4.890177780501648e-05, "loss": 2.047, "step": 123 }, { "epoch": 2.292542470486611, "grad_norm": 0.9374226331710815, "learning_rate": 4.8229097352634765e-05, "loss": 1.9859, "step": 124 }, { "epoch": 2.3109703426432477, "grad_norm": 1.3169200420379639, "learning_rate": 4.755796148374475e-05, "loss": 1.957, "step": 125 }, { "epoch": 2.3109703426432477, "eval_loss": 1.9774408340454102, "eval_runtime": 0.1937, "eval_samples_per_second": 258.081, "eval_steps_per_second": 67.101, "step": 125 }, { "epoch": 2.329398214799885, "grad_norm": 0.6126213669776917, "learning_rate": 4.688852329837424e-05, "loss": 1.9839, "step": 126 }, { "epoch": 2.3478260869565215, "grad_norm": 1.152384638786316, "learning_rate": 4.6220935509274235e-05, "loss": 1.9543, "step": 127 }, { "epoch": 2.3662539591131586, "grad_norm": 0.6893303394317627, "learning_rate": 4.5555350407081863e-05, "loss": 1.9423, "step": 128 }, { "epoch": 2.3846818312697957, "grad_norm": 0.8432124853134155, "learning_rate": 4.489191982557984e-05, "loss": 2.0398, "step": 129 }, { "epoch": 2.4031097034264324, "grad_norm": 0.5620063543319702, "learning_rate": 4.423079510705992e-05, "loss": 1.9957, "step": 130 }, { "epoch": 2.4215375755830695, "grad_norm": 1.0304889678955078, "learning_rate": 4.357212706779864e-05, "loss": 2.0187, "step": 131 }, { "epoch": 2.439965447739706, "grad_norm": 0.32691672444343567, "learning_rate": 4.291606596365304e-05, "loss": 1.952, "step": 132 }, { "epoch": 2.4583933198963432, "grad_norm": 0.4635773301124573, "learning_rate": 4.226276145578408e-05, "loss": 1.9837, "step": 133 }, { "epoch": 2.47682119205298, "grad_norm": 1.219428300857544, "learning_rate": 4.161236257651587e-05, "loss": 1.9887, "step": 134 }, { "epoch": 2.495249064209617, "grad_norm": 0.8382700681686401, "learning_rate": 4.09650176953383e-05, "loss": 1.9683, "step": 135 }, { "epoch": 2.513676936366254, "grad_norm": 0.7893387675285339, "learning_rate": 4.032087448506089e-05, "loss": 1.9834, "step": 136 }, { "epoch": 2.5321048085228908, "grad_norm": 0.38780632615089417, "learning_rate": 3.968007988812552e-05, "loss": 1.9808, "step": 137 }, { "epoch": 2.550532680679528, "grad_norm": 0.579411506652832, "learning_rate": 3.904278008308589e-05, "loss": 1.9948, "step": 138 }, { "epoch": 2.5689605528361645, "grad_norm": 0.5201661586761475, "learning_rate": 3.840912045126106e-05, "loss": 2.0085, "step": 139 }, { "epoch": 2.5873884249928016, "grad_norm": 0.2307479977607727, "learning_rate": 3.777924554357096e-05, "loss": 1.9852, "step": 140 }, { "epoch": 2.6058162971494383, "grad_norm": 0.4744541645050049, "learning_rate": 3.715329904756143e-05, "loss": 1.9508, "step": 141 }, { "epoch": 2.6242441693060754, "grad_norm": 0.33469077944755554, "learning_rate": 3.653142375462596e-05, "loss": 1.9608, "step": 142 }, { "epoch": 2.6426720414627125, "grad_norm": 0.2714967429637909, "learning_rate": 3.591376152743211e-05, "loss": 1.9771, "step": 143 }, { "epoch": 2.661099913619349, "grad_norm": 0.6618818044662476, "learning_rate": 3.530045326755967e-05, "loss": 1.9385, "step": 144 }, { "epoch": 2.6795277857759863, "grad_norm": 0.7056910991668701, "learning_rate": 3.46916388833581e-05, "loss": 1.9505, "step": 145 }, { "epoch": 2.697955657932623, "grad_norm": 0.5343828201293945, "learning_rate": 3.408745725803042e-05, "loss": 2.0025, "step": 146 }, { "epoch": 2.71638353008926, "grad_norm": 1.5520598888397217, "learning_rate": 3.348804621795122e-05, "loss": 1.9265, "step": 147 }, { "epoch": 2.7348114022458967, "grad_norm": 0.4644526243209839, "learning_rate": 3.2893542501225534e-05, "loss": 2.0005, "step": 148 }, { "epoch": 2.753239274402534, "grad_norm": 0.3085757791996002, "learning_rate": 3.2304081726495974e-05, "loss": 1.9679, "step": 149 }, { "epoch": 2.771667146559171, "grad_norm": 1.0300484895706177, "learning_rate": 3.1719798362005444e-05, "loss": 1.9888, "step": 150 }, { "epoch": 2.771667146559171, "eval_loss": 1.972500205039978, "eval_runtime": 0.1941, "eval_samples_per_second": 257.57, "eval_steps_per_second": 66.968, "step": 150 } ], "logging_steps": 1, "max_steps": 218, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1392626572984320.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }