|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.17380271653645946, |
|
"eval_steps": 500, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017380271653645947, |
|
"grad_norm": 1.1632381677627563, |
|
"learning_rate": 4.9978491913828615e-05, |
|
"loss": 3.6439, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0034760543307291894, |
|
"grad_norm": 0.6136592626571655, |
|
"learning_rate": 4.995676657426156e-05, |
|
"loss": 2.3619, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.005214081496093784, |
|
"grad_norm": 0.6270021796226501, |
|
"learning_rate": 4.99350412346945e-05, |
|
"loss": 2.0395, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.006952108661458379, |
|
"grad_norm": 0.9146378636360168, |
|
"learning_rate": 4.991331589512744e-05, |
|
"loss": 1.8894, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.008690135826822973, |
|
"grad_norm": 0.7162560224533081, |
|
"learning_rate": 4.989159055556039e-05, |
|
"loss": 1.8242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.010428162992187568, |
|
"grad_norm": 0.31322506070137024, |
|
"learning_rate": 4.9869865215993326e-05, |
|
"loss": 1.8681, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.012166190157552163, |
|
"grad_norm": 0.5570130348205566, |
|
"learning_rate": 4.984813987642627e-05, |
|
"loss": 1.8099, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.013904217322916758, |
|
"grad_norm": 0.6080171465873718, |
|
"learning_rate": 4.982641453685921e-05, |
|
"loss": 1.7641, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.015642244488281352, |
|
"grad_norm": 0.553460955619812, |
|
"learning_rate": 4.980468919729215e-05, |
|
"loss": 1.7712, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.017380271653645946, |
|
"grad_norm": 0.625199019908905, |
|
"learning_rate": 4.97829638577251e-05, |
|
"loss": 1.7565, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.019118298819010542, |
|
"grad_norm": 0.579010546207428, |
|
"learning_rate": 4.9761238518158044e-05, |
|
"loss": 1.7322, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.020856325984375135, |
|
"grad_norm": 0.7429983615875244, |
|
"learning_rate": 4.9739513178590984e-05, |
|
"loss": 1.7407, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.022594353149739732, |
|
"grad_norm": 0.5801926255226135, |
|
"learning_rate": 4.971778783902393e-05, |
|
"loss": 1.6487, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.024332380315104325, |
|
"grad_norm": 0.7074835300445557, |
|
"learning_rate": 4.969606249945687e-05, |
|
"loss": 1.6959, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.02607040748046892, |
|
"grad_norm": 0.6824275255203247, |
|
"learning_rate": 4.967433715988981e-05, |
|
"loss": 1.6958, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.027808434645833515, |
|
"grad_norm": 0.43216443061828613, |
|
"learning_rate": 4.9652611820322756e-05, |
|
"loss": 1.6824, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.029546461811198108, |
|
"grad_norm": 0.7867545485496521, |
|
"learning_rate": 4.9630886480755695e-05, |
|
"loss": 1.6671, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.031284488976562705, |
|
"grad_norm": 0.77516108751297, |
|
"learning_rate": 4.9609161141188635e-05, |
|
"loss": 1.6389, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0330225161419273, |
|
"grad_norm": 0.5014050602912903, |
|
"learning_rate": 4.958743580162158e-05, |
|
"loss": 1.629, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.03476054330729189, |
|
"grad_norm": 0.6006432771682739, |
|
"learning_rate": 4.956571046205453e-05, |
|
"loss": 1.5977, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.036498570472656484, |
|
"grad_norm": 0.6153438091278076, |
|
"learning_rate": 4.954398512248747e-05, |
|
"loss": 1.5879, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.038236597638021085, |
|
"grad_norm": 0.8877372145652771, |
|
"learning_rate": 4.952225978292041e-05, |
|
"loss": 1.599, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.03997462480338568, |
|
"grad_norm": 0.7173994183540344, |
|
"learning_rate": 4.950053444335335e-05, |
|
"loss": 1.6205, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04171265196875027, |
|
"grad_norm": 0.8379663228988647, |
|
"learning_rate": 4.947880910378629e-05, |
|
"loss": 1.5794, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.043450679134114864, |
|
"grad_norm": 0.6160171031951904, |
|
"learning_rate": 4.945708376421924e-05, |
|
"loss": 1.5656, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.045188706299479464, |
|
"grad_norm": 0.8642494082450867, |
|
"learning_rate": 4.943535842465218e-05, |
|
"loss": 1.5665, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.04692673346484406, |
|
"grad_norm": 0.6872414350509644, |
|
"learning_rate": 4.941363308508512e-05, |
|
"loss": 1.5552, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.04866476063020865, |
|
"grad_norm": 0.9998211860656738, |
|
"learning_rate": 4.9391907745518064e-05, |
|
"loss": 1.5458, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.050402787795573244, |
|
"grad_norm": 1.2175588607788086, |
|
"learning_rate": 4.937018240595101e-05, |
|
"loss": 1.5295, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.05214081496093784, |
|
"grad_norm": 1.0134257078170776, |
|
"learning_rate": 4.934845706638395e-05, |
|
"loss": 1.516, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05387884212630244, |
|
"grad_norm": 0.8104642033576965, |
|
"learning_rate": 4.9326731726816896e-05, |
|
"loss": 1.5285, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.05561686929166703, |
|
"grad_norm": 0.9005429148674011, |
|
"learning_rate": 4.9305006387249836e-05, |
|
"loss": 1.5069, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05735489645703162, |
|
"grad_norm": 0.8855582475662231, |
|
"learning_rate": 4.9283281047682775e-05, |
|
"loss": 1.5046, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.059092923622396216, |
|
"grad_norm": 0.7807704210281372, |
|
"learning_rate": 4.926155570811572e-05, |
|
"loss": 1.4663, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.06083095078776081, |
|
"grad_norm": 1.2552438974380493, |
|
"learning_rate": 4.923983036854866e-05, |
|
"loss": 1.486, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.06256897795312541, |
|
"grad_norm": 1.0079654455184937, |
|
"learning_rate": 4.92181050289816e-05, |
|
"loss": 1.4569, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.06430700511849, |
|
"grad_norm": 1.0267302989959717, |
|
"learning_rate": 4.919637968941455e-05, |
|
"loss": 1.4746, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.0660450322838546, |
|
"grad_norm": 1.1427829265594482, |
|
"learning_rate": 4.9174654349847494e-05, |
|
"loss": 1.4867, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.0677830594492192, |
|
"grad_norm": 0.9080005884170532, |
|
"learning_rate": 4.915292901028043e-05, |
|
"loss": 1.4789, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.06952108661458378, |
|
"grad_norm": 0.78159499168396, |
|
"learning_rate": 4.913120367071338e-05, |
|
"loss": 1.4435, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07125911377994838, |
|
"grad_norm": 0.9199485778808594, |
|
"learning_rate": 4.910947833114632e-05, |
|
"loss": 1.4698, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.07299714094531297, |
|
"grad_norm": 1.1556053161621094, |
|
"learning_rate": 4.908775299157926e-05, |
|
"loss": 1.4233, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.07473516811067757, |
|
"grad_norm": 0.6093395948410034, |
|
"learning_rate": 4.9066027652012205e-05, |
|
"loss": 1.4607, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.07647319527604217, |
|
"grad_norm": 0.7765551209449768, |
|
"learning_rate": 4.9044302312445144e-05, |
|
"loss": 1.4067, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07821122244140676, |
|
"grad_norm": 0.9261316061019897, |
|
"learning_rate": 4.9022576972878084e-05, |
|
"loss": 1.4437, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07994924960677136, |
|
"grad_norm": 0.737016499042511, |
|
"learning_rate": 4.900085163331103e-05, |
|
"loss": 1.4394, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.08168727677213594, |
|
"grad_norm": 1.0518062114715576, |
|
"learning_rate": 4.897912629374397e-05, |
|
"loss": 1.442, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.08342530393750054, |
|
"grad_norm": 0.9163209795951843, |
|
"learning_rate": 4.8957400954176916e-05, |
|
"loss": 1.4126, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.08516333110286514, |
|
"grad_norm": 1.1651362180709839, |
|
"learning_rate": 4.893567561460986e-05, |
|
"loss": 1.4397, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.08690135826822973, |
|
"grad_norm": 1.2389508485794067, |
|
"learning_rate": 4.89139502750428e-05, |
|
"loss": 1.4226, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08863938543359433, |
|
"grad_norm": 1.009730339050293, |
|
"learning_rate": 4.889222493547574e-05, |
|
"loss": 1.4643, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.09037741259895893, |
|
"grad_norm": 1.3371009826660156, |
|
"learning_rate": 4.887049959590869e-05, |
|
"loss": 1.4221, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.09211543976432351, |
|
"grad_norm": 1.0338963270187378, |
|
"learning_rate": 4.884877425634163e-05, |
|
"loss": 1.4122, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.09385346692968811, |
|
"grad_norm": 1.0023767948150635, |
|
"learning_rate": 4.8827048916774574e-05, |
|
"loss": 1.4034, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.0955914940950527, |
|
"grad_norm": 1.4514521360397339, |
|
"learning_rate": 4.880532357720751e-05, |
|
"loss": 1.4356, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.0973295212604173, |
|
"grad_norm": 1.0462247133255005, |
|
"learning_rate": 4.878359823764045e-05, |
|
"loss": 1.4038, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.0990675484257819, |
|
"grad_norm": 1.0881024599075317, |
|
"learning_rate": 4.87618728980734e-05, |
|
"loss": 1.3521, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.10080557559114649, |
|
"grad_norm": 1.1503826379776, |
|
"learning_rate": 4.8740147558506345e-05, |
|
"loss": 1.3455, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.10254360275651109, |
|
"grad_norm": 1.1788356304168701, |
|
"learning_rate": 4.8718422218939285e-05, |
|
"loss": 1.4246, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.10428162992187567, |
|
"grad_norm": 0.9009695649147034, |
|
"learning_rate": 4.8696696879372225e-05, |
|
"loss": 1.3701, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.10601965708724027, |
|
"grad_norm": 0.7886667251586914, |
|
"learning_rate": 4.867497153980517e-05, |
|
"loss": 1.3843, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.10775768425260487, |
|
"grad_norm": 1.0017770528793335, |
|
"learning_rate": 4.865335482693595e-05, |
|
"loss": 1.3785, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.10949571141796946, |
|
"grad_norm": 0.901871383190155, |
|
"learning_rate": 4.863162948736889e-05, |
|
"loss": 1.3627, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.11123373858333406, |
|
"grad_norm": 0.9240642189979553, |
|
"learning_rate": 4.860990414780183e-05, |
|
"loss": 1.3397, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.11297176574869865, |
|
"grad_norm": 1.2550582885742188, |
|
"learning_rate": 4.8588178808234776e-05, |
|
"loss": 1.368, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.11470979291406325, |
|
"grad_norm": 0.9313985705375671, |
|
"learning_rate": 4.8566453468667715e-05, |
|
"loss": 1.344, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.11644782007942785, |
|
"grad_norm": 0.8634843826293945, |
|
"learning_rate": 4.854472812910066e-05, |
|
"loss": 1.3308, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.11818584724479243, |
|
"grad_norm": 1.2060052156448364, |
|
"learning_rate": 4.85230027895336e-05, |
|
"loss": 1.355, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.11992387441015703, |
|
"grad_norm": 1.0419443845748901, |
|
"learning_rate": 4.850127744996655e-05, |
|
"loss": 1.3469, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.12166190157552162, |
|
"grad_norm": 1.2425956726074219, |
|
"learning_rate": 4.847955211039949e-05, |
|
"loss": 1.3368, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.12339992874088622, |
|
"grad_norm": 1.0397825241088867, |
|
"learning_rate": 4.8457826770832433e-05, |
|
"loss": 1.3211, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.12513795590625082, |
|
"grad_norm": 0.8406294584274292, |
|
"learning_rate": 4.843621005796321e-05, |
|
"loss": 1.3375, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.1268759830716154, |
|
"grad_norm": 0.816184401512146, |
|
"learning_rate": 4.841448471839615e-05, |
|
"loss": 1.3351, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.12861401023698, |
|
"grad_norm": 1.1904360055923462, |
|
"learning_rate": 4.839275937882909e-05, |
|
"loss": 1.3174, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.1303520374023446, |
|
"grad_norm": 1.2890825271606445, |
|
"learning_rate": 4.837103403926204e-05, |
|
"loss": 1.3294, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.1320900645677092, |
|
"grad_norm": 0.9586935639381409, |
|
"learning_rate": 4.834930869969498e-05, |
|
"loss": 1.2934, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.13382809173307378, |
|
"grad_norm": 0.9654845595359802, |
|
"learning_rate": 4.832758336012792e-05, |
|
"loss": 1.3386, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.1355661188984384, |
|
"grad_norm": 1.1789395809173584, |
|
"learning_rate": 4.8305858020560864e-05, |
|
"loss": 1.3499, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.13730414606380298, |
|
"grad_norm": 1.2728456258773804, |
|
"learning_rate": 4.82841326809938e-05, |
|
"loss": 1.3396, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.13904217322916756, |
|
"grad_norm": 1.0807838439941406, |
|
"learning_rate": 4.826240734142675e-05, |
|
"loss": 1.3369, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.14078020039453218, |
|
"grad_norm": 1.11849045753479, |
|
"learning_rate": 4.8240682001859696e-05, |
|
"loss": 1.3664, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.14251822755989677, |
|
"grad_norm": 1.5169202089309692, |
|
"learning_rate": 4.821906528899047e-05, |
|
"loss": 1.3352, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.14425625472526135, |
|
"grad_norm": 0.8817140460014343, |
|
"learning_rate": 4.819733994942341e-05, |
|
"loss": 1.2924, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.14599428189062594, |
|
"grad_norm": 1.1285990476608276, |
|
"learning_rate": 4.8175614609856355e-05, |
|
"loss": 1.3497, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.14773230905599055, |
|
"grad_norm": 1.1072745323181152, |
|
"learning_rate": 4.81538892702893e-05, |
|
"loss": 1.3129, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.14947033622135514, |
|
"grad_norm": 1.1911921501159668, |
|
"learning_rate": 4.813216393072224e-05, |
|
"loss": 1.312, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.15120836338671972, |
|
"grad_norm": 0.7891075611114502, |
|
"learning_rate": 4.811043859115518e-05, |
|
"loss": 1.281, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.15294639055208434, |
|
"grad_norm": 0.9016463756561279, |
|
"learning_rate": 4.8088713251588126e-05, |
|
"loss": 1.3118, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.15468441771744892, |
|
"grad_norm": 1.1260063648223877, |
|
"learning_rate": 4.8066987912021066e-05, |
|
"loss": 1.2743, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.1564224448828135, |
|
"grad_norm": 1.0370497703552246, |
|
"learning_rate": 4.8045262572454005e-05, |
|
"loss": 1.3013, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.15816047204817812, |
|
"grad_norm": 1.4182652235031128, |
|
"learning_rate": 4.802353723288695e-05, |
|
"loss": 1.2994, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.1598984992135427, |
|
"grad_norm": 1.1322426795959473, |
|
"learning_rate": 4.800192052001773e-05, |
|
"loss": 1.3339, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.1616365263789073, |
|
"grad_norm": 1.4774497747421265, |
|
"learning_rate": 4.798019518045067e-05, |
|
"loss": 1.3381, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.16337455354427188, |
|
"grad_norm": 1.3371450901031494, |
|
"learning_rate": 4.795846984088361e-05, |
|
"loss": 1.304, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.1651125807096365, |
|
"grad_norm": 0.8607128858566284, |
|
"learning_rate": 4.793674450131656e-05, |
|
"loss": 1.2686, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.16685060787500108, |
|
"grad_norm": 1.1792031526565552, |
|
"learning_rate": 4.79150191617495e-05, |
|
"loss": 1.3099, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.16858863504036567, |
|
"grad_norm": 1.274556040763855, |
|
"learning_rate": 4.789329382218244e-05, |
|
"loss": 1.2745, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.17032666220573028, |
|
"grad_norm": 0.7774292230606079, |
|
"learning_rate": 4.787156848261539e-05, |
|
"loss": 1.2905, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.17206468937109487, |
|
"grad_norm": 1.204541802406311, |
|
"learning_rate": 4.784984314304833e-05, |
|
"loss": 1.3014, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.17380271653645946, |
|
"grad_norm": 0.9959656000137329, |
|
"learning_rate": 4.782811780348127e-05, |
|
"loss": 1.2798, |
|
"step": 20000 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 460292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.178779779072e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|