Boffl's picture
Upload trainer_state.json with huggingface_hub
46afe63 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997049277072882,
"eval_steps": 500,
"global_step": 847,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011802891708468575,
"grad_norm": 1.0318430662155151,
"learning_rate": 5.882352941176471e-06,
"loss": 2.0002,
"step": 10
},
{
"epoch": 0.02360578341693715,
"grad_norm": 0.7277640104293823,
"learning_rate": 1.1764705882352942e-05,
"loss": 1.5384,
"step": 20
},
{
"epoch": 0.03540867512540572,
"grad_norm": 0.5723221898078918,
"learning_rate": 1.7647058823529414e-05,
"loss": 1.2868,
"step": 30
},
{
"epoch": 0.0472115668338743,
"grad_norm": 0.5436434149742126,
"learning_rate": 2.3529411764705884e-05,
"loss": 1.195,
"step": 40
},
{
"epoch": 0.05901445854234287,
"grad_norm": 0.5486343502998352,
"learning_rate": 2.9411764705882354e-05,
"loss": 1.099,
"step": 50
},
{
"epoch": 0.07081735025081144,
"grad_norm": 0.540969967842102,
"learning_rate": 3.529411764705883e-05,
"loss": 1.0529,
"step": 60
},
{
"epoch": 0.08262024195928003,
"grad_norm": 0.5404631495475769,
"learning_rate": 4.11764705882353e-05,
"loss": 0.9962,
"step": 70
},
{
"epoch": 0.0944231336677486,
"grad_norm": 0.7088269591331482,
"learning_rate": 4.705882352941177e-05,
"loss": 1.0458,
"step": 80
},
{
"epoch": 0.10622602537621717,
"grad_norm": 0.5803569555282593,
"learning_rate": 4.9994688411216076e-05,
"loss": 1.0237,
"step": 90
},
{
"epoch": 0.11802891708468574,
"grad_norm": 0.5251590013504028,
"learning_rate": 4.99522092422138e-05,
"loss": 1.005,
"step": 100
},
{
"epoch": 0.1298318087931543,
"grad_norm": 0.5402054786682129,
"learning_rate": 4.986732309873992e-05,
"loss": 0.9771,
"step": 110
},
{
"epoch": 0.14163470050162288,
"grad_norm": 0.5150293111801147,
"learning_rate": 4.9740174247159156e-05,
"loss": 0.9695,
"step": 120
},
{
"epoch": 0.15343759221009148,
"grad_norm": 0.6688190698623657,
"learning_rate": 4.95709787804856e-05,
"loss": 0.9605,
"step": 130
},
{
"epoch": 0.16524048391856005,
"grad_norm": 0.4473928213119507,
"learning_rate": 4.936002425112657e-05,
"loss": 0.8765,
"step": 140
},
{
"epoch": 0.17704337562702863,
"grad_norm": 0.5843707919120789,
"learning_rate": 4.910766918217935e-05,
"loss": 0.9371,
"step": 150
},
{
"epoch": 0.1888462673354972,
"grad_norm": 0.5653939247131348,
"learning_rate": 4.881434245811115e-05,
"loss": 0.9091,
"step": 160
},
{
"epoch": 0.20064915904396577,
"grad_norm": 0.725297212600708,
"learning_rate": 4.8480542595858025e-05,
"loss": 0.9217,
"step": 170
},
{
"epoch": 0.21245205075243434,
"grad_norm": 0.4762970805168152,
"learning_rate": 4.810683689758147e-05,
"loss": 0.9,
"step": 180
},
{
"epoch": 0.2242549424609029,
"grad_norm": 0.6104872226715088,
"learning_rate": 4.7693860486522604e-05,
"loss": 0.8662,
"step": 190
},
{
"epoch": 0.23605783416937148,
"grad_norm": 0.5987859964370728,
"learning_rate": 4.7242315227592496e-05,
"loss": 0.8754,
"step": 200
},
{
"epoch": 0.24786072587784008,
"grad_norm": 0.5462383031845093,
"learning_rate": 4.675296853453326e-05,
"loss": 0.8838,
"step": 210
},
{
"epoch": 0.2596636175863086,
"grad_norm": 0.7473201155662537,
"learning_rate": 4.6226652065676974e-05,
"loss": 0.8798,
"step": 220
},
{
"epoch": 0.2714665092947772,
"grad_norm": 0.6000027656555176,
"learning_rate": 4.566426031051922e-05,
"loss": 0.9065,
"step": 230
},
{
"epoch": 0.28326940100324577,
"grad_norm": 0.6105000972747803,
"learning_rate": 4.506674906950929e-05,
"loss": 0.9111,
"step": 240
},
{
"epoch": 0.29507229271171437,
"grad_norm": 0.5564777851104736,
"learning_rate": 4.4435133829640645e-05,
"loss": 0.8646,
"step": 250
},
{
"epoch": 0.30687518442018297,
"grad_norm": 0.6954275369644165,
"learning_rate": 4.3770488038602555e-05,
"loss": 0.8485,
"step": 260
},
{
"epoch": 0.3186780761286515,
"grad_norm": 0.8191194534301758,
"learning_rate": 4.30739412804258e-05,
"loss": 0.826,
"step": 270
},
{
"epoch": 0.3304809678371201,
"grad_norm": 0.6449839472770691,
"learning_rate": 4.234667735572323e-05,
"loss": 0.8685,
"step": 280
},
{
"epoch": 0.34228385954558865,
"grad_norm": 0.7887718677520752,
"learning_rate": 4.158993226978757e-05,
"loss": 0.8229,
"step": 290
},
{
"epoch": 0.35408675125405725,
"grad_norm": 0.764539361000061,
"learning_rate": 4.080499213196607e-05,
"loss": 0.8303,
"step": 300
},
{
"epoch": 0.3658896429625258,
"grad_norm": 0.6305603384971619,
"learning_rate": 3.999319096988183e-05,
"loss": 0.829,
"step": 310
},
{
"epoch": 0.3776925346709944,
"grad_norm": 0.5482339859008789,
"learning_rate": 3.915590846221669e-05,
"loss": 0.8356,
"step": 320
},
{
"epoch": 0.389495426379463,
"grad_norm": 0.6555970311164856,
"learning_rate": 3.8294567593908915e-05,
"loss": 0.8281,
"step": 330
},
{
"epoch": 0.40129831808793154,
"grad_norm": 0.8127148151397705,
"learning_rate": 3.741063223775066e-05,
"loss": 0.8543,
"step": 340
},
{
"epoch": 0.41310120979640014,
"grad_norm": 0.8948593735694885,
"learning_rate": 3.650560466649538e-05,
"loss": 0.8639,
"step": 350
},
{
"epoch": 0.4249041015048687,
"grad_norm": 0.6402966976165771,
"learning_rate": 3.5581022999703464e-05,
"loss": 0.8324,
"step": 360
},
{
"epoch": 0.4367069932133373,
"grad_norm": 0.6675844192504883,
"learning_rate": 3.4638458589665194e-05,
"loss": 0.8265,
"step": 370
},
{
"epoch": 0.4485098849218058,
"grad_norm": 0.6756200194358826,
"learning_rate": 3.367951335084379e-05,
"loss": 0.7834,
"step": 380
},
{
"epoch": 0.4603127766302744,
"grad_norm": 0.7358006834983826,
"learning_rate": 3.270581703737716e-05,
"loss": 0.8107,
"step": 390
},
{
"epoch": 0.47211566833874297,
"grad_norm": 0.6496703028678894,
"learning_rate": 3.171902447326536e-05,
"loss": 0.8055,
"step": 400
},
{
"epoch": 0.48391856004721157,
"grad_norm": 0.6885930895805359,
"learning_rate": 3.07208127399511e-05,
"loss": 0.8249,
"step": 410
},
{
"epoch": 0.49572145175568016,
"grad_norm": 0.7303836941719055,
"learning_rate": 2.9712878326073168e-05,
"loss": 0.8054,
"step": 420
},
{
"epoch": 0.5075243434641488,
"grad_norm": 0.6711559295654297,
"learning_rate": 2.869693424423673e-05,
"loss": 0.7779,
"step": 430
},
{
"epoch": 0.5193272351726173,
"grad_norm": 0.6829948425292969,
"learning_rate": 2.767470711970067e-05,
"loss": 0.7729,
"step": 440
},
{
"epoch": 0.5311301268810859,
"grad_norm": 0.6073248386383057,
"learning_rate": 2.6647934255929933e-05,
"loss": 0.7867,
"step": 450
},
{
"epoch": 0.5429330185895545,
"grad_norm": 0.7291135787963867,
"learning_rate": 2.5618360681999876e-05,
"loss": 0.7751,
"step": 460
},
{
"epoch": 0.554735910298023,
"grad_norm": 0.6531949043273926,
"learning_rate": 2.4587736186870766e-05,
"loss": 0.7979,
"step": 470
},
{
"epoch": 0.5665388020064915,
"grad_norm": 0.5947457551956177,
"learning_rate": 2.3557812345572718e-05,
"loss": 0.7807,
"step": 480
},
{
"epoch": 0.5783416937149601,
"grad_norm": 0.7103855609893799,
"learning_rate": 2.2530339542355145e-05,
"loss": 0.8293,
"step": 490
},
{
"epoch": 0.5901445854234287,
"grad_norm": 1.0487534999847412,
"learning_rate": 2.150706399585999e-05,
"loss": 0.798,
"step": 500
},
{
"epoch": 0.6019474771318973,
"grad_norm": 0.8106992244720459,
"learning_rate": 2.048972479137449e-05,
"loss": 0.7426,
"step": 510
},
{
"epoch": 0.6137503688403659,
"grad_norm": 0.6543154120445251,
"learning_rate": 1.948005092520735e-05,
"loss": 0.7813,
"step": 520
},
{
"epoch": 0.6255532605488344,
"grad_norm": 0.6375657916069031,
"learning_rate": 1.8479758366211334e-05,
"loss": 0.7701,
"step": 530
},
{
"epoch": 0.637356152257303,
"grad_norm": 0.6001560091972351,
"learning_rate": 1.7490547139446407e-05,
"loss": 0.7777,
"step": 540
},
{
"epoch": 0.6491590439657716,
"grad_norm": 0.7287290096282959,
"learning_rate": 1.6514098436939835e-05,
"loss": 0.7693,
"step": 550
},
{
"epoch": 0.6609619356742402,
"grad_norm": 0.6269923448562622,
"learning_rate": 1.555207176045349e-05,
"loss": 0.7672,
"step": 560
},
{
"epoch": 0.6727648273827088,
"grad_norm": 0.622016966342926,
"learning_rate": 1.4606102101114391e-05,
"loss": 0.7504,
"step": 570
},
{
"epoch": 0.6845677190911773,
"grad_norm": 0.5838598012924194,
"learning_rate": 1.367779716070179e-05,
"loss": 0.7865,
"step": 580
},
{
"epoch": 0.6963706107996459,
"grad_norm": 0.656366765499115,
"learning_rate": 1.2768734619313147e-05,
"loss": 0.7696,
"step": 590
},
{
"epoch": 0.7081735025081145,
"grad_norm": 0.6976104378700256,
"learning_rate": 1.188045945405299e-05,
"loss": 0.7652,
"step": 600
},
{
"epoch": 0.7199763942165831,
"grad_norm": 0.7407099604606628,
"learning_rate": 1.1014481313301172e-05,
"loss": 0.7533,
"step": 610
},
{
"epoch": 0.7317792859250516,
"grad_norm": 0.5191411375999451,
"learning_rate": 1.017227195102352e-05,
"loss": 0.7578,
"step": 620
},
{
"epoch": 0.7435821776335202,
"grad_norm": 0.6771509051322937,
"learning_rate": 9.355262725484901e-06,
"loss": 0.7768,
"step": 630
},
{
"epoch": 0.7553850693419888,
"grad_norm": 0.6330916881561279,
"learning_rate": 8.564842166616047e-06,
"loss": 0.7071,
"step": 640
},
{
"epoch": 0.7671879610504574,
"grad_norm": 0.693899929523468,
"learning_rate": 7.802353616168229e-06,
"loss": 0.7544,
"step": 650
},
{
"epoch": 0.778990852758926,
"grad_norm": 0.6973963379859924,
"learning_rate": 7.069092944666586e-06,
"loss": 0.7418,
"step": 660
},
{
"epoch": 0.7907937444673945,
"grad_norm": 0.758264422416687,
"learning_rate": 6.3663063490420336e-06,
"loss": 0.7564,
"step": 670
},
{
"epoch": 0.8025966361758631,
"grad_norm": 0.6236333847045898,
"learning_rate": 5.695188234684898e-06,
"loss": 0.7431,
"step": 680
},
{
"epoch": 0.8143995278843317,
"grad_norm": 0.6301143169403076,
"learning_rate": 5.056879185519714e-06,
"loss": 0.7307,
"step": 690
},
{
"epoch": 0.8262024195928003,
"grad_norm": 0.5712493062019348,
"learning_rate": 4.452464025551037e-06,
"loss": 0.7157,
"step": 700
},
{
"epoch": 0.8380053113012688,
"grad_norm": 0.6849854588508606,
"learning_rate": 3.8829699751748885e-06,
"loss": 0.7367,
"step": 710
},
{
"epoch": 0.8498082030097374,
"grad_norm": 0.6399794816970825,
"learning_rate": 3.3493649053890326e-06,
"loss": 0.7288,
"step": 720
},
{
"epoch": 0.861611094718206,
"grad_norm": 0.8012081384658813,
"learning_rate": 2.8525556928693186e-06,
"loss": 0.7237,
"step": 730
},
{
"epoch": 0.8734139864266746,
"grad_norm": 0.7375155687332153,
"learning_rate": 2.3933866787074627e-06,
"loss": 0.7543,
"step": 740
},
{
"epoch": 0.8852168781351432,
"grad_norm": 0.6023644208908081,
"learning_rate": 1.9726382334298883e-06,
"loss": 0.74,
"step": 750
},
{
"epoch": 0.8970197698436116,
"grad_norm": 0.6464205980300903,
"learning_rate": 1.5910254307362705e-06,
"loss": 0.7578,
"step": 760
},
{
"epoch": 0.9088226615520802,
"grad_norm": 0.6287794709205627,
"learning_rate": 1.2491968322118685e-06,
"loss": 0.7513,
"step": 770
},
{
"epoch": 0.9206255532605488,
"grad_norm": 0.6092919707298279,
"learning_rate": 9.477333850790554e-07,
"loss": 0.7187,
"step": 780
},
{
"epoch": 0.9324284449690174,
"grad_norm": 0.6522098183631897,
"learning_rate": 6.871474348613266e-07,
"loss": 0.7519,
"step": 790
},
{
"epoch": 0.9442313366774859,
"grad_norm": 0.5721604228019714,
"learning_rate": 4.678818546378333e-07,
"loss": 0.7502,
"step": 800
},
{
"epoch": 0.9560342283859545,
"grad_norm": 0.8141267895698547,
"learning_rate": 2.903092923682266e-07,
"loss": 0.7512,
"step": 810
},
{
"epoch": 0.9678371200944231,
"grad_norm": 0.5925819277763367,
"learning_rate": 1.5473153756709046e-07,
"loss": 0.795,
"step": 820
},
{
"epoch": 0.9796400118028917,
"grad_norm": 0.599314272403717,
"learning_rate": 6.137900840425815e-08,
"loss": 0.7319,
"step": 830
},
{
"epoch": 0.9914429035113603,
"grad_norm": 0.7140465974807739,
"learning_rate": 1.0410360102702799e-08,
"loss": 0.7747,
"step": 840
},
{
"epoch": 0.9997049277072882,
"step": 847,
"total_flos": 6.067900108221972e+17,
"train_loss": 0.8547630963105941,
"train_runtime": 5769.0005,
"train_samples_per_second": 4.699,
"train_steps_per_second": 0.147
}
],
"logging_steps": 10,
"max_steps": 847,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.067900108221972e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}