|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.29985443959243085, |
|
"eval_steps": 103, |
|
"global_step": 309, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009704027171276079, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 2e-07, |
|
"loss": 2.2674, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009704027171276079, |
|
"eval_loss": 2.3277485370635986, |
|
"eval_runtime": 707.6734, |
|
"eval_samples_per_second": 0.916, |
|
"eval_steps_per_second": 0.229, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0019408054342552159, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 4e-07, |
|
"loss": 2.2913, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002911208151382824, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 6e-07, |
|
"loss": 2.2295, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0038816108685104317, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 8e-07, |
|
"loss": 2.3423, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0048520135856380394, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4168, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005822416302765648, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 1.2e-06, |
|
"loss": 2.1116, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006792819019893256, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 1.4e-06, |
|
"loss": 2.3571, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0077632217370208634, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 1.6e-06, |
|
"loss": 2.336, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008733624454148471, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 1.8e-06, |
|
"loss": 2.2734, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009704027171276079, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 2e-06, |
|
"loss": 2.3191, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010674429888403688, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 1.9999957311433394e-06, |
|
"loss": 2.1462, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011644832605531296, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 1.999982924613854e-06, |
|
"loss": 2.3692, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012615235322658904, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 1.999961580533031e-06, |
|
"loss": 2.1327, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013585638039786511, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 1.9999316991033473e-06, |
|
"loss": 2.33, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01455604075691412, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 1.999893280608269e-06, |
|
"loss": 2.1897, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015526443474041727, |
|
"grad_norm": 0.125, |
|
"learning_rate": 1.9998463254122472e-06, |
|
"loss": 2.3865, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016496846191169336, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 1.9997908339607153e-06, |
|
"loss": 2.2324, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017467248908296942, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 1.9997268067800845e-06, |
|
"loss": 2.3878, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018437651625424552, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 1.9996542444777386e-06, |
|
"loss": 2.2695, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.019408054342552158, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 1.9995731477420292e-06, |
|
"loss": 2.2875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.020378457059679767, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 1.999483517342268e-06, |
|
"loss": 2.4014, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.021348859776807377, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 1.9993853541287205e-06, |
|
"loss": 2.3639, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.022319262493934983, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 1.999278659032597e-06, |
|
"loss": 2.2434, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.023289665211062592, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 1.9991634330660437e-06, |
|
"loss": 2.3897, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.024260067928190198, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.999039677322135e-06, |
|
"loss": 2.2402, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.025230470645317808, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.998907392974861e-06, |
|
"loss": 2.401, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.026200873362445413, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 1.9987665812791164e-06, |
|
"loss": 2.2646, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.027171276079573023, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 1.9986172435706903e-06, |
|
"loss": 2.3168, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.028141678796700632, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.9984593812662525e-06, |
|
"loss": 2.3874, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02911208151382824, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 1.9982929958633397e-06, |
|
"loss": 2.3126, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030082484230955848, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.998118088940341e-06, |
|
"loss": 2.233, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.031052886948083454, |
|
"grad_norm": 0.125, |
|
"learning_rate": 1.9979346621564857e-06, |
|
"loss": 2.1122, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03202328966521106, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.9977427172518227e-06, |
|
"loss": 2.2075, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03299369238233867, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 1.9975422560472093e-06, |
|
"loss": 2.3974, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.033964095099466275, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.9973332804442895e-06, |
|
"loss": 2.344, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.034934497816593885, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 1.997115792425479e-06, |
|
"loss": 2.2733, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.035904900533721494, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 1.996889794053945e-06, |
|
"loss": 2.2443, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.036875303250849104, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 1.9966552874735863e-06, |
|
"loss": 2.3253, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03784570596797671, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 1.9964122749090145e-06, |
|
"loss": 2.393, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.038816108685104316, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.996160758665531e-06, |
|
"loss": 2.234, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.039786511402231925, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9959007411291063e-06, |
|
"loss": 2.1832, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.040756914119359534, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 1.995632224766358e-06, |
|
"loss": 2.2993, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.041727316836487144, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 1.995355212124525e-06, |
|
"loss": 2.4293, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04269771955361475, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.9950697058314457e-06, |
|
"loss": 2.1791, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.043668122270742356, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.994775708595533e-06, |
|
"loss": 2.402, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.044638524987869965, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9944732232057465e-06, |
|
"loss": 2.271, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.045608927704997575, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.994162252531567e-06, |
|
"loss": 2.1897, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.046579330422125184, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.9938427995229723e-06, |
|
"loss": 2.268, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04754973313925279, |
|
"grad_norm": 0.125, |
|
"learning_rate": 1.993514867210404e-06, |
|
"loss": 2.2919, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.048520135856380396, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.9931784587047422e-06, |
|
"loss": 2.4426, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.049490538573508006, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.9928335771972748e-06, |
|
"loss": 2.3823, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.050460941290635615, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.9924802259596686e-06, |
|
"loss": 2.2299, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.051431344007763224, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.9921184083439354e-06, |
|
"loss": 2.4699, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05240174672489083, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.991748127782404e-06, |
|
"loss": 2.2251, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.053372149442018436, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.9913693877876844e-06, |
|
"loss": 2.204, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.054342552159146046, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.9909821919526363e-06, |
|
"loss": 2.228, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.055312954876273655, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 1.9905865439503337e-06, |
|
"loss": 2.2021, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.056283357593401265, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.9901824475340314e-06, |
|
"loss": 2.2048, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05725376031052887, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.9897699065371285e-06, |
|
"loss": 2.2993, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05822416302765648, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.9893489248731336e-06, |
|
"loss": 2.2354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.059194565744784086, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 1.9889195065356238e-06, |
|
"loss": 2.3262, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.060164968461911696, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.988481655598212e-06, |
|
"loss": 2.4042, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0611353711790393, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.988035376214504e-06, |
|
"loss": 2.2258, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06210577389616691, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.987580672618062e-06, |
|
"loss": 2.2404, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06307617661329452, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.987117549122363e-06, |
|
"loss": 2.398, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06404657933042213, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.986646010120756e-06, |
|
"loss": 2.264, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06501698204754973, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.986166060086425e-06, |
|
"loss": 2.3085, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06598738476467735, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.985677703572344e-06, |
|
"loss": 2.219, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06695778748180495, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.9851809452112317e-06, |
|
"loss": 2.2302, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06792819019893255, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.9846757897155116e-06, |
|
"loss": 2.3431, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06889859291606017, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.984162241877264e-06, |
|
"loss": 2.2894, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06986899563318777, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.983640306568183e-06, |
|
"loss": 2.3387, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07083939835031539, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.9831099887395287e-06, |
|
"loss": 2.092, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07180980106744299, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 1.98257129342208e-06, |
|
"loss": 2.3139, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07278020378457059, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.9820242257260884e-06, |
|
"loss": 2.3842, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07375060650169821, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.981468790841229e-06, |
|
"loss": 2.2733, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07472100921882581, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 1.9809049940365504e-06, |
|
"loss": 2.1439, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07569141193595343, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.980332840660425e-06, |
|
"loss": 2.316, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07666181465308103, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 1.979752336140499e-06, |
|
"loss": 2.4238, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07763221737020863, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9791634859836408e-06, |
|
"loss": 2.2081, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07860262008733625, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.978566295775887e-06, |
|
"loss": 2.3303, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07957302280446385, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.977960771182393e-06, |
|
"loss": 2.3657, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08054342552159147, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.9773469179473754e-06, |
|
"loss": 2.3921, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08151382823871907, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.9767247418940593e-06, |
|
"loss": 2.3947, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08248423095584667, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.9760942489246236e-06, |
|
"loss": 2.2361, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08345463367297429, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.975455445020144e-06, |
|
"loss": 2.1025, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08442503639010189, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.9748083362405373e-06, |
|
"loss": 2.4577, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0853954391072295, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.974152928724502e-06, |
|
"loss": 2.3706, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08636584182435711, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.973489228689463e-06, |
|
"loss": 2.4223, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08733624454148471, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.9728172424315087e-06, |
|
"loss": 2.1975, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08830664725861233, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9721369763253348e-06, |
|
"loss": 2.3638, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08927704997573993, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9714484368241828e-06, |
|
"loss": 2.278, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09024745269286755, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.9707516304597783e-06, |
|
"loss": 2.3421, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09121785540999515, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.9700465638422686e-06, |
|
"loss": 2.3418, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09218825812712275, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9693332436601613e-06, |
|
"loss": 2.1814, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09315866084425037, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 1.96861167668026e-06, |
|
"loss": 2.299, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09412906356137797, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.9678818697476e-06, |
|
"loss": 2.1708, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09509946627850557, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.9671438297853845e-06, |
|
"loss": 2.3062, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09606986899563319, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.9663975637949172e-06, |
|
"loss": 2.2044, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09704027171276079, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9656430788555372e-06, |
|
"loss": 2.1654, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09801067442988841, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.964880382124551e-06, |
|
"loss": 2.3448, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09898107714701601, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.964109480837165e-06, |
|
"loss": 2.2951, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09995147986414361, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9633303823064186e-06, |
|
"loss": 2.2252, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09995147986414361, |
|
"eval_loss": 2.3053860664367676, |
|
"eval_runtime": 714.5363, |
|
"eval_samples_per_second": 0.907, |
|
"eval_steps_per_second": 0.227, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.10092188258127123, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.96254309392311e-06, |
|
"loss": 2.2504, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.10189228529839883, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 1.9617476231557315e-06, |
|
"loss": 2.3294, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10286268801552645, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 1.960943977550397e-06, |
|
"loss": 2.2063, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.10383309073265405, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.960132164730766e-06, |
|
"loss": 2.2253, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.10480349344978165, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.95931219239798e-06, |
|
"loss": 2.3711, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10577389616690927, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 1.9584840683305802e-06, |
|
"loss": 2.2904, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10674429888403687, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.957647800384441e-06, |
|
"loss": 2.3879, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10771470160116449, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 1.9568033964926904e-06, |
|
"loss": 2.3677, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10868510431829209, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 1.9559508646656384e-06, |
|
"loss": 2.4216, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1096555070354197, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.9550902129906976e-06, |
|
"loss": 2.197, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.11062590975254731, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.954221449632311e-06, |
|
"loss": 2.2467, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.11159631246967491, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.953344582831869e-06, |
|
"loss": 2.1098, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11256671518680253, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.952459620907636e-06, |
|
"loss": 2.2719, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11353711790393013, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.951566572254669e-06, |
|
"loss": 2.3066, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11450752062105773, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.9506654453447375e-06, |
|
"loss": 2.3814, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11547792333818535, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 1.9497562487262457e-06, |
|
"loss": 2.233, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11644832605531295, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.9488389910241497e-06, |
|
"loss": 2.3341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11741872877244056, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.947913680939874e-06, |
|
"loss": 2.1674, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11838913148956817, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 1.9469803272512343e-06, |
|
"loss": 2.3772, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11935953420669577, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.946038938812347e-06, |
|
"loss": 2.1713, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.12032993692382339, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.9450895245535508e-06, |
|
"loss": 2.1877, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.121300339640951, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.9441320934813205e-06, |
|
"loss": 2.2995, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1222707423580786, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 1.9431666546781793e-06, |
|
"loss": 2.3268, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.12324114507520621, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 1.942193217302617e-06, |
|
"loss": 2.3007, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12421154779233382, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 1.9412117905889995e-06, |
|
"loss": 2.1957, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.12518195050946143, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.9402223838474815e-06, |
|
"loss": 2.3008, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12615235322658905, |
|
"grad_norm": 0.103515625, |
|
"learning_rate": 1.93922500646392e-06, |
|
"loss": 2.3233, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12712275594371664, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 1.9382196678997843e-06, |
|
"loss": 2.342, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12809315866084425, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 1.937206377692066e-06, |
|
"loss": 2.2727, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12906356137797187, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.936185145453189e-06, |
|
"loss": 2.3136, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.13003396409509946, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.9351559808709173e-06, |
|
"loss": 2.3071, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.13100436681222707, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.9341188937082645e-06, |
|
"loss": 2.3851, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1319747695293547, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.9330738938034004e-06, |
|
"loss": 2.1945, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.13294517224648228, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 1.9320209910695587e-06, |
|
"loss": 2.3302, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1339155749636099, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.9309601954949403e-06, |
|
"loss": 2.1342, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1348859776807375, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.9298915171426217e-06, |
|
"loss": 2.3877, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1358563803978651, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 1.9288149661504584e-06, |
|
"loss": 2.246, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13682678311499272, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.9277305527309865e-06, |
|
"loss": 2.2299, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.13779718583212033, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.9266382871713306e-06, |
|
"loss": 2.2701, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13876758854924795, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 1.9255381798331015e-06, |
|
"loss": 2.1286, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13973799126637554, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.9244302411523e-06, |
|
"loss": 2.3215, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.14070839398350315, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 1.9233144816392187e-06, |
|
"loss": 2.0745, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14167879670063077, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.9221909118783407e-06, |
|
"loss": 2.3284, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.14264919941775836, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.92105954252824e-06, |
|
"loss": 2.3162, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.14361960213488598, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.9199203843214794e-06, |
|
"loss": 2.1617, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1445900048520136, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.918773448064511e-06, |
|
"loss": 2.3815, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.14556040756914118, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.9176187446375713e-06, |
|
"loss": 2.1546, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1465308102862688, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.9164562849945785e-06, |
|
"loss": 2.2754, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14750121300339641, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 1.9152860801630306e-06, |
|
"loss": 2.1837, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14847161572052403, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.9141081412438966e-06, |
|
"loss": 2.2695, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14944201843765162, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.912922479411516e-06, |
|
"loss": 2.2561, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.15041242115477924, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9117291059134893e-06, |
|
"loss": 2.201, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15138282387190685, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9105280320705733e-06, |
|
"loss": 2.2406, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.15235322658903444, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.9093192692765728e-06, |
|
"loss": 2.2358, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.15332362930616206, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.9081028289982322e-06, |
|
"loss": 2.2847, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.15429403202328967, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.9068787227751273e-06, |
|
"loss": 2.23, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15526443474041726, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9056469622195561e-06, |
|
"loss": 2.1406, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15623483745754488, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.904407559016428e-06, |
|
"loss": 2.3344, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1572052401746725, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.9031605249231528e-06, |
|
"loss": 2.307, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.15817564289180008, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 1.9019058717695309e-06, |
|
"loss": 2.3153, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1591460456089277, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9006436114576385e-06, |
|
"loss": 2.2753, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.16011644832605532, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.8993737559617163e-06, |
|
"loss": 2.3172, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.16108685104318293, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.8980963173280556e-06, |
|
"loss": 2.419, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.16205725376031052, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.8968113076748841e-06, |
|
"loss": 2.3402, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.16302765647743814, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.8955187391922506e-06, |
|
"loss": 2.2971, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.16399805919456575, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.89421862414191e-06, |
|
"loss": 2.4186, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.16496846191169334, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 1.892910974857206e-06, |
|
"loss": 2.2934, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16593886462882096, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.891595803742955e-06, |
|
"loss": 2.2529, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.16690926734594858, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.890273123275328e-06, |
|
"loss": 2.2834, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.16787967006307616, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.8889429460017328e-06, |
|
"loss": 2.3267, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.16885007278020378, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.8876052845406932e-06, |
|
"loss": 2.3528, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1698204754973314, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.8862601515817325e-06, |
|
"loss": 2.2583, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.170790878214459, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.8849075598852497e-06, |
|
"loss": 2.0822, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1717612809315866, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.8835475222824004e-06, |
|
"loss": 2.2797, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.17273168364871422, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 1.8821800516749753e-06, |
|
"loss": 2.1859, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.17370208636584183, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 1.8808051610352764e-06, |
|
"loss": 2.3002, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.17467248908296942, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.8794228634059947e-06, |
|
"loss": 2.4352, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17564289180009704, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.878033171900087e-06, |
|
"loss": 2.2839, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.17661329451722466, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.8766360997006506e-06, |
|
"loss": 2.3349, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.17758369723435224, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.8752316600607989e-06, |
|
"loss": 2.3102, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.17855409995147986, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.8738198663035351e-06, |
|
"loss": 2.2898, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.17952450266860748, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.8724007318216263e-06, |
|
"loss": 2.3098, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1804949053857351, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 1.8709742700774764e-06, |
|
"loss": 2.6074, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.18146530810286268, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.869540494602998e-06, |
|
"loss": 2.3196, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1824357108199903, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.8680994189994842e-06, |
|
"loss": 2.365, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.18340611353711792, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.8666510569374804e-06, |
|
"loss": 2.2361, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1843765162542455, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.8651954221566527e-06, |
|
"loss": 2.2856, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18534691897137312, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.8637325284656598e-06, |
|
"loss": 2.3472, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18631732168850074, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.8622623897420201e-06, |
|
"loss": 2.2649, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.18728772440562833, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.860785019931982e-06, |
|
"loss": 2.281, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.18825812712275594, |
|
"grad_norm": 0.12451171875, |
|
"learning_rate": 1.85930043305039e-06, |
|
"loss": 2.2947, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.18922852983988356, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.8578086431805507e-06, |
|
"loss": 2.193, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.19019893255701115, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.8563096644741034e-06, |
|
"loss": 2.351, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.19116933527413876, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.8548035111508815e-06, |
|
"loss": 2.0055, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.19213973799126638, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.85329019749878e-06, |
|
"loss": 2.2949, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.193110140708394, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 1.8517697378736188e-06, |
|
"loss": 2.0795, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.19408054342552158, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.8502421466990075e-06, |
|
"loss": 2.2709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1950509461426492, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.8487074384662076e-06, |
|
"loss": 2.2284, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.19602134885977682, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.8471656277339956e-06, |
|
"loss": 2.2379, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1969917515769044, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 1.845616729128525e-06, |
|
"loss": 2.3339, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.19796215429403202, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 1.844060757343187e-06, |
|
"loss": 2.2919, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.19893255701115964, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.842497727138472e-06, |
|
"loss": 2.1203, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.19990295972828723, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 1.8409276533418283e-06, |
|
"loss": 2.1531, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.19990295972828723, |
|
"eval_loss": 2.299978494644165, |
|
"eval_runtime": 713.9402, |
|
"eval_samples_per_second": 0.908, |
|
"eval_steps_per_second": 0.227, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.20087336244541484, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.839350550847523e-06, |
|
"loss": 2.0748, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.20184376516254246, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.837766434616499e-06, |
|
"loss": 2.2511, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.20281416787967008, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.836175319676234e-06, |
|
"loss": 2.2153, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.20378457059679767, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.8345772211205997e-06, |
|
"loss": 2.2422, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20475497331392528, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.8329721541097135e-06, |
|
"loss": 2.1181, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2057253760310529, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.831360133869801e-06, |
|
"loss": 2.4016, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.2066957787481805, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.829741175693047e-06, |
|
"loss": 2.3543, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2076661814653081, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.8281152949374526e-06, |
|
"loss": 2.2398, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.20863658418243572, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.8264825070266885e-06, |
|
"loss": 2.1783, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2096069868995633, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.8248428274499493e-06, |
|
"loss": 2.2381, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.21057738961669092, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.823196271761806e-06, |
|
"loss": 2.0432, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.21154779233381854, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 1.8215428555820598e-06, |
|
"loss": 2.3025, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.21251819505094613, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.8198825945955917e-06, |
|
"loss": 2.1607, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.21348859776807375, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 1.8182155045522156e-06, |
|
"loss": 2.2584, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.21445900048520136, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.8165416012665275e-06, |
|
"loss": 2.2633, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.21542940320232898, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.8148609006177572e-06, |
|
"loss": 2.1962, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.21639980591945657, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.8131734185496164e-06, |
|
"loss": 2.2501, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.21737020863658418, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.811479171070147e-06, |
|
"loss": 2.2471, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2183406113537118, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 1.8097781742515703e-06, |
|
"loss": 2.3646, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2193110140708394, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.8080704442301349e-06, |
|
"loss": 2.3021, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.220281416787967, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.8063559972059617e-06, |
|
"loss": 2.2512, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.22125181950509462, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.8046348494428925e-06, |
|
"loss": 2.4135, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.8029070172683338e-06, |
|
"loss": 2.2335, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.22319262493934983, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.8011725170731031e-06, |
|
"loss": 2.2618, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22416302765647744, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.7994313653112738e-06, |
|
"loss": 2.3062, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.22513343037360506, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.7976835785000166e-06, |
|
"loss": 2.2639, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.22610383309073265, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.7959291732194468e-06, |
|
"loss": 2.1366, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.22707423580786026, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.7941681661124625e-06, |
|
"loss": 2.4276, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.22804463852498788, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.792400573884591e-06, |
|
"loss": 2.2464, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.22901504124211547, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.7906264133038272e-06, |
|
"loss": 2.3464, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.22998544395924309, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.788845701200476e-06, |
|
"loss": 2.2957, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2309558466763707, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 1.7870584544669926e-06, |
|
"loss": 2.3, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.2319262493934983, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.7852646900578213e-06, |
|
"loss": 2.2659, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.2328966521106259, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.7834644249892363e-06, |
|
"loss": 2.2278, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23386705482775352, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.7816576763391785e-06, |
|
"loss": 2.2099, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.2348374575448811, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.7798444612470942e-06, |
|
"loss": 2.3325, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.23580786026200873, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.7780247969137735e-06, |
|
"loss": 2.2195, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.23677826297913634, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.776198700601186e-06, |
|
"loss": 2.2823, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.23774866569626396, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.7743661896323164e-06, |
|
"loss": 2.2858, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.23871906841339155, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.7725272813910018e-06, |
|
"loss": 2.3179, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.23968947113051917, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.7706819933217663e-06, |
|
"loss": 2.2138, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.24065987384764678, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.7688303429296547e-06, |
|
"loss": 2.2176, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.24163027656477437, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.7669723477800673e-06, |
|
"loss": 2.2987, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.242600679281902, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.765108025498593e-06, |
|
"loss": 2.2912, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2435710819990296, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.763237393770842e-06, |
|
"loss": 2.3544, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2445414847161572, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.761360470342278e-06, |
|
"loss": 2.4314, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2455118874332848, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 1.7594772730180504e-06, |
|
"loss": 2.1832, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.24648229015041243, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.7575878196628237e-06, |
|
"loss": 2.3464, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.24745269286754004, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.7556921282006113e-06, |
|
"loss": 2.2656, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.24842309558466763, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.7537902166146017e-06, |
|
"loss": 2.2514, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.24939349830179525, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.7518821029469907e-06, |
|
"loss": 2.2708, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.25036390101892286, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.7499678052988082e-06, |
|
"loss": 2.2224, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.2513343037360505, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.748047341829749e-06, |
|
"loss": 2.2772, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2523047064531781, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.7461207307579976e-06, |
|
"loss": 2.2778, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25327510917030566, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.7441879903600576e-06, |
|
"loss": 2.3843, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2542455118874333, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 1.7422491389705771e-06, |
|
"loss": 2.0716, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2552159146045609, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.7403041949821752e-06, |
|
"loss": 2.4164, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2561863173216885, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 1.738353176845268e-06, |
|
"loss": 2.1967, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2571567200388161, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.7363961030678926e-06, |
|
"loss": 2.1584, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.25812712275594374, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 1.7344329922155325e-06, |
|
"loss": 2.2014, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2590975254730713, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.73246386291094e-06, |
|
"loss": 2.1515, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2600679281901989, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.7304887338339618e-06, |
|
"loss": 2.2637, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.26103833090732653, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.7285076237213596e-06, |
|
"loss": 2.1817, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.26200873362445415, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.7265205513666342e-06, |
|
"loss": 2.2453, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.26297913634158177, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 1.7245275356198447e-06, |
|
"loss": 2.3512, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2639495390587094, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 1.7225285953874332e-06, |
|
"loss": 2.2318, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.264919941775837, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.720523749632042e-06, |
|
"loss": 2.4672, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.26589034449296456, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.7185130173723362e-06, |
|
"loss": 2.1982, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2668607472100922, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.7164964176828225e-06, |
|
"loss": 2.4835, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2678311499272198, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.7144739696936668e-06, |
|
"loss": 2.2988, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2688015526443474, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.7124456925905152e-06, |
|
"loss": 2.3901, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.269771955361475, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.7104116056143105e-06, |
|
"loss": 2.2672, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.27074235807860264, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 1.7083717280611096e-06, |
|
"loss": 2.3082, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2717127607957302, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 1.7063260792819014e-06, |
|
"loss": 2.2103, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2726831635128578, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.7042746786824222e-06, |
|
"loss": 2.1556, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.27365356622998543, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.7022175457229725e-06, |
|
"loss": 2.3191, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.27462396894711305, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.7001546999182313e-06, |
|
"loss": 2.292, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.27559437166424067, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 1.698086160837072e-06, |
|
"loss": 2.2801, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2765647743813683, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 1.6960119481023771e-06, |
|
"loss": 2.2532, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2775351770984959, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.6939320813908504e-06, |
|
"loss": 2.2095, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.27850557981562346, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 1.6918465804328313e-06, |
|
"loss": 2.3242, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2794759825327511, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.6897554650121081e-06, |
|
"loss": 2.2221, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2804463852498787, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 1.6876587549657298e-06, |
|
"loss": 2.2123, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2814167879670063, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.6855564701838179e-06, |
|
"loss": 2.277, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2823871906841339, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.6834486306093772e-06, |
|
"loss": 2.3219, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.28335759340126154, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.6813352562381085e-06, |
|
"loss": 2.4264, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.28432799611838916, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.6792163671182163e-06, |
|
"loss": 2.1156, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2852983988355167, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.6770919833502204e-06, |
|
"loss": 2.3589, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.28626880155264434, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.674962125086765e-06, |
|
"loss": 2.2588, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.28723920426977195, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 1.6728268125324273e-06, |
|
"loss": 2.1033, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.28820960698689957, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 1.6706860659435251e-06, |
|
"loss": 2.144, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2891800097040272, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.6685399056279257e-06, |
|
"loss": 2.1544, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2901504124211548, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.6663883519448536e-06, |
|
"loss": 2.2841, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.29112081513828236, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 1.6642314253046958e-06, |
|
"loss": 2.2148, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29209121785541, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.6620691461688095e-06, |
|
"loss": 2.2841, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.2930616205725376, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.6599015350493267e-06, |
|
"loss": 2.1961, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2940320232896652, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.6577286125089614e-06, |
|
"loss": 2.2598, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.29500242600679283, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.6555503991608135e-06, |
|
"loss": 2.3087, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.29597282872392044, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.6533669156681733e-06, |
|
"loss": 2.1518, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.29694323144104806, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 1.651178182744325e-06, |
|
"loss": 2.2685, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2979136341581756, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.648984221152351e-06, |
|
"loss": 2.3493, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.29888403687530324, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.6467850517049353e-06, |
|
"loss": 2.2456, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.29985443959243085, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.6445806952641642e-06, |
|
"loss": 2.1538, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.29985443959243085, |
|
"eval_loss": 2.2966904640197754, |
|
"eval_runtime": 714.9767, |
|
"eval_samples_per_second": 0.906, |
|
"eval_steps_per_second": 0.227, |
|
"step": 309 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1030, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 103, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.381926389143306e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|