|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.09995147986414361, |
|
"eval_steps": 103, |
|
"global_step": 103, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009704027171276079, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 2e-07, |
|
"loss": 2.2674, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009704027171276079, |
|
"eval_loss": 2.3277485370635986, |
|
"eval_runtime": 707.6734, |
|
"eval_samples_per_second": 0.916, |
|
"eval_steps_per_second": 0.229, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0019408054342552159, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 4e-07, |
|
"loss": 2.2913, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002911208151382824, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 6e-07, |
|
"loss": 2.2295, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0038816108685104317, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 8e-07, |
|
"loss": 2.3423, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0048520135856380394, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4168, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005822416302765648, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 1.2e-06, |
|
"loss": 2.1116, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006792819019893256, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 1.4e-06, |
|
"loss": 2.3571, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0077632217370208634, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 1.6e-06, |
|
"loss": 2.336, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008733624454148471, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 1.8e-06, |
|
"loss": 2.2734, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009704027171276079, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 2e-06, |
|
"loss": 2.3191, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010674429888403688, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 1.9999957311433394e-06, |
|
"loss": 2.1462, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011644832605531296, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 1.999982924613854e-06, |
|
"loss": 2.3692, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012615235322658904, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 1.999961580533031e-06, |
|
"loss": 2.1327, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013585638039786511, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 1.9999316991033473e-06, |
|
"loss": 2.33, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01455604075691412, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 1.999893280608269e-06, |
|
"loss": 2.1897, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015526443474041727, |
|
"grad_norm": 0.125, |
|
"learning_rate": 1.9998463254122472e-06, |
|
"loss": 2.3865, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016496846191169336, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 1.9997908339607153e-06, |
|
"loss": 2.2324, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017467248908296942, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 1.9997268067800845e-06, |
|
"loss": 2.3878, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018437651625424552, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 1.9996542444777386e-06, |
|
"loss": 2.2695, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.019408054342552158, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 1.9995731477420292e-06, |
|
"loss": 2.2875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.020378457059679767, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 1.999483517342268e-06, |
|
"loss": 2.4014, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.021348859776807377, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 1.9993853541287205e-06, |
|
"loss": 2.3639, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.022319262493934983, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 1.999278659032597e-06, |
|
"loss": 2.2434, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.023289665211062592, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 1.9991634330660437e-06, |
|
"loss": 2.3897, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.024260067928190198, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.999039677322135e-06, |
|
"loss": 2.2402, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.025230470645317808, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.998907392974861e-06, |
|
"loss": 2.401, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.026200873362445413, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 1.9987665812791164e-06, |
|
"loss": 2.2646, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.027171276079573023, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 1.9986172435706903e-06, |
|
"loss": 2.3168, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.028141678796700632, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.9984593812662525e-06, |
|
"loss": 2.3874, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02911208151382824, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 1.9982929958633397e-06, |
|
"loss": 2.3126, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030082484230955848, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 1.998118088940341e-06, |
|
"loss": 2.233, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.031052886948083454, |
|
"grad_norm": 0.125, |
|
"learning_rate": 1.9979346621564857e-06, |
|
"loss": 2.1122, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03202328966521106, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.9977427172518227e-06, |
|
"loss": 2.2075, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03299369238233867, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 1.9975422560472093e-06, |
|
"loss": 2.3974, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.033964095099466275, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.9973332804442895e-06, |
|
"loss": 2.344, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.034934497816593885, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 1.997115792425479e-06, |
|
"loss": 2.2733, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.035904900533721494, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 1.996889794053945e-06, |
|
"loss": 2.2443, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.036875303250849104, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 1.9966552874735863e-06, |
|
"loss": 2.3253, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03784570596797671, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 1.9964122749090145e-06, |
|
"loss": 2.393, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.038816108685104316, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.996160758665531e-06, |
|
"loss": 2.234, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.039786511402231925, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9959007411291063e-06, |
|
"loss": 2.1832, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.040756914119359534, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 1.995632224766358e-06, |
|
"loss": 2.2993, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.041727316836487144, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 1.995355212124525e-06, |
|
"loss": 2.4293, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04269771955361475, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.9950697058314457e-06, |
|
"loss": 2.1791, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.043668122270742356, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.994775708595533e-06, |
|
"loss": 2.402, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.044638524987869965, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9944732232057465e-06, |
|
"loss": 2.271, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.045608927704997575, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.994162252531567e-06, |
|
"loss": 2.1897, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.046579330422125184, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.9938427995229723e-06, |
|
"loss": 2.268, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04754973313925279, |
|
"grad_norm": 0.125, |
|
"learning_rate": 1.993514867210404e-06, |
|
"loss": 2.2919, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.048520135856380396, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.9931784587047422e-06, |
|
"loss": 2.4426, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.049490538573508006, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.9928335771972748e-06, |
|
"loss": 2.3823, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.050460941290635615, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.9924802259596686e-06, |
|
"loss": 2.2299, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.051431344007763224, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.9921184083439354e-06, |
|
"loss": 2.4699, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05240174672489083, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.991748127782404e-06, |
|
"loss": 2.2251, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.053372149442018436, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.9913693877876844e-06, |
|
"loss": 2.204, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.054342552159146046, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.9909821919526363e-06, |
|
"loss": 2.228, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.055312954876273655, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 1.9905865439503337e-06, |
|
"loss": 2.2021, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.056283357593401265, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.9901824475340314e-06, |
|
"loss": 2.2048, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05725376031052887, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 1.9897699065371285e-06, |
|
"loss": 2.2993, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05822416302765648, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.9893489248731336e-06, |
|
"loss": 2.2354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.059194565744784086, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 1.9889195065356238e-06, |
|
"loss": 2.3262, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.060164968461911696, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 1.988481655598212e-06, |
|
"loss": 2.4042, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0611353711790393, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.988035376214504e-06, |
|
"loss": 2.2258, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06210577389616691, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.987580672618062e-06, |
|
"loss": 2.2404, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06307617661329452, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.987117549122363e-06, |
|
"loss": 2.398, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06404657933042213, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.986646010120756e-06, |
|
"loss": 2.264, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06501698204754973, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.986166060086425e-06, |
|
"loss": 2.3085, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06598738476467735, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.985677703572344e-06, |
|
"loss": 2.219, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06695778748180495, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 1.9851809452112317e-06, |
|
"loss": 2.2302, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06792819019893255, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.9846757897155116e-06, |
|
"loss": 2.3431, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06889859291606017, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.984162241877264e-06, |
|
"loss": 2.2894, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06986899563318777, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 1.983640306568183e-06, |
|
"loss": 2.3387, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07083939835031539, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.9831099887395287e-06, |
|
"loss": 2.092, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07180980106744299, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 1.98257129342208e-06, |
|
"loss": 2.3139, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07278020378457059, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.9820242257260884e-06, |
|
"loss": 2.3842, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07375060650169821, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.981468790841229e-06, |
|
"loss": 2.2733, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07472100921882581, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 1.9809049940365504e-06, |
|
"loss": 2.1439, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07569141193595343, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 1.980332840660425e-06, |
|
"loss": 2.316, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07666181465308103, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 1.979752336140499e-06, |
|
"loss": 2.4238, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07763221737020863, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9791634859836408e-06, |
|
"loss": 2.2081, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07860262008733625, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 1.978566295775887e-06, |
|
"loss": 2.3303, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07957302280446385, |
|
"grad_norm": 0.111328125, |
|
"learning_rate": 1.977960771182393e-06, |
|
"loss": 2.3657, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08054342552159147, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.9773469179473754e-06, |
|
"loss": 2.3921, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08151382823871907, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.9767247418940593e-06, |
|
"loss": 2.3947, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08248423095584667, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.9760942489246236e-06, |
|
"loss": 2.2361, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08345463367297429, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 1.975455445020144e-06, |
|
"loss": 2.1025, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08442503639010189, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 1.9748083362405373e-06, |
|
"loss": 2.4577, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0853954391072295, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.974152928724502e-06, |
|
"loss": 2.3706, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08636584182435711, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.973489228689463e-06, |
|
"loss": 2.4223, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08733624454148471, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 1.9728172424315087e-06, |
|
"loss": 2.1975, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08830664725861233, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9721369763253348e-06, |
|
"loss": 2.3638, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08927704997573993, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9714484368241828e-06, |
|
"loss": 2.278, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09024745269286755, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 1.9707516304597783e-06, |
|
"loss": 2.3421, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09121785540999515, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.9700465638422686e-06, |
|
"loss": 2.3418, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09218825812712275, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9693332436601613e-06, |
|
"loss": 2.1814, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09315866084425037, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 1.96861167668026e-06, |
|
"loss": 2.299, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09412906356137797, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 1.9678818697476e-06, |
|
"loss": 2.1708, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09509946627850557, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.9671438297853845e-06, |
|
"loss": 2.3062, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09606986899563319, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 1.9663975637949172e-06, |
|
"loss": 2.2044, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09704027171276079, |
|
"grad_norm": 0.11181640625, |
|
"learning_rate": 1.9656430788555372e-06, |
|
"loss": 2.1654, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09801067442988841, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.964880382124551e-06, |
|
"loss": 2.3448, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09898107714701601, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 1.964109480837165e-06, |
|
"loss": 2.2951, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09995147986414361, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 1.9633303823064186e-06, |
|
"loss": 2.2252, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09995147986414361, |
|
"eval_loss": 2.3053860664367676, |
|
"eval_runtime": 714.5363, |
|
"eval_samples_per_second": 0.907, |
|
"eval_steps_per_second": 0.227, |
|
"step": 103 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1030, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 103, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1285759393293926e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|