marianna13's picture
Upload folder using huggingface_hub
4bb4409 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9921259842519685,
"eval_steps": 500,
"global_step": 570,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05249343832020997,
"grad_norm": 5.856240742467741,
"learning_rate": 5e-06,
"loss": 0.9788,
"step": 10
},
{
"epoch": 0.10498687664041995,
"grad_norm": 1.504758475031017,
"learning_rate": 5e-06,
"loss": 0.8672,
"step": 20
},
{
"epoch": 0.15748031496062992,
"grad_norm": 0.9699579752356402,
"learning_rate": 5e-06,
"loss": 0.8312,
"step": 30
},
{
"epoch": 0.2099737532808399,
"grad_norm": 2.3401202479822167,
"learning_rate": 5e-06,
"loss": 0.8176,
"step": 40
},
{
"epoch": 0.26246719160104987,
"grad_norm": 1.4328785905158876,
"learning_rate": 5e-06,
"loss": 0.8021,
"step": 50
},
{
"epoch": 0.31496062992125984,
"grad_norm": 1.776027549071983,
"learning_rate": 5e-06,
"loss": 0.7924,
"step": 60
},
{
"epoch": 0.3674540682414698,
"grad_norm": 1.6731596205975654,
"learning_rate": 5e-06,
"loss": 0.7862,
"step": 70
},
{
"epoch": 0.4199475065616798,
"grad_norm": 0.7241122381033481,
"learning_rate": 5e-06,
"loss": 0.7745,
"step": 80
},
{
"epoch": 0.47244094488188976,
"grad_norm": 0.667641025148353,
"learning_rate": 5e-06,
"loss": 0.7684,
"step": 90
},
{
"epoch": 0.5249343832020997,
"grad_norm": 0.5504196367515327,
"learning_rate": 5e-06,
"loss": 0.7659,
"step": 100
},
{
"epoch": 0.5774278215223098,
"grad_norm": 0.5097698406084038,
"learning_rate": 5e-06,
"loss": 0.7644,
"step": 110
},
{
"epoch": 0.6299212598425197,
"grad_norm": 0.7982727448245113,
"learning_rate": 5e-06,
"loss": 0.7621,
"step": 120
},
{
"epoch": 0.6824146981627297,
"grad_norm": 0.7024212096277765,
"learning_rate": 5e-06,
"loss": 0.7587,
"step": 130
},
{
"epoch": 0.7349081364829396,
"grad_norm": 0.6368075359040738,
"learning_rate": 5e-06,
"loss": 0.7528,
"step": 140
},
{
"epoch": 0.7874015748031497,
"grad_norm": 0.5954968543585505,
"learning_rate": 5e-06,
"loss": 0.7523,
"step": 150
},
{
"epoch": 0.8398950131233596,
"grad_norm": 0.670163545471037,
"learning_rate": 5e-06,
"loss": 0.7534,
"step": 160
},
{
"epoch": 0.8923884514435696,
"grad_norm": 0.5996022754065202,
"learning_rate": 5e-06,
"loss": 0.7517,
"step": 170
},
{
"epoch": 0.9448818897637795,
"grad_norm": 0.8415836628541776,
"learning_rate": 5e-06,
"loss": 0.7459,
"step": 180
},
{
"epoch": 0.9973753280839895,
"grad_norm": 0.690137189983478,
"learning_rate": 5e-06,
"loss": 0.7488,
"step": 190
},
{
"epoch": 1.0498687664041995,
"grad_norm": 0.5870306499065036,
"learning_rate": 5e-06,
"loss": 0.7462,
"step": 200
},
{
"epoch": 1.1023622047244095,
"grad_norm": 0.6711935804641513,
"learning_rate": 5e-06,
"loss": 0.7045,
"step": 210
},
{
"epoch": 1.1548556430446195,
"grad_norm": 0.6793309667692513,
"learning_rate": 5e-06,
"loss": 0.7054,
"step": 220
},
{
"epoch": 1.2073490813648293,
"grad_norm": 0.4945921653765713,
"learning_rate": 5e-06,
"loss": 0.7007,
"step": 230
},
{
"epoch": 1.2598425196850394,
"grad_norm": 0.7994708362053626,
"learning_rate": 5e-06,
"loss": 0.7035,
"step": 240
},
{
"epoch": 1.3123359580052494,
"grad_norm": 0.6645996514564108,
"learning_rate": 5e-06,
"loss": 0.7029,
"step": 250
},
{
"epoch": 1.3648293963254594,
"grad_norm": 0.6524179166081678,
"learning_rate": 5e-06,
"loss": 0.7069,
"step": 260
},
{
"epoch": 1.4173228346456692,
"grad_norm": 0.8192112613994083,
"learning_rate": 5e-06,
"loss": 0.7058,
"step": 270
},
{
"epoch": 1.4698162729658792,
"grad_norm": 0.7359384256295017,
"learning_rate": 5e-06,
"loss": 0.7096,
"step": 280
},
{
"epoch": 1.5223097112860893,
"grad_norm": 0.8479895809019217,
"learning_rate": 5e-06,
"loss": 0.7014,
"step": 290
},
{
"epoch": 1.574803149606299,
"grad_norm": 0.5643654766970888,
"learning_rate": 5e-06,
"loss": 0.7027,
"step": 300
},
{
"epoch": 1.627296587926509,
"grad_norm": 0.5431958308145564,
"learning_rate": 5e-06,
"loss": 0.7014,
"step": 310
},
{
"epoch": 1.6797900262467191,
"grad_norm": 0.520988761182218,
"learning_rate": 5e-06,
"loss": 0.7004,
"step": 320
},
{
"epoch": 1.7322834645669292,
"grad_norm": 0.5293919501601649,
"learning_rate": 5e-06,
"loss": 0.7014,
"step": 330
},
{
"epoch": 1.7847769028871392,
"grad_norm": 0.5558746861224813,
"learning_rate": 5e-06,
"loss": 0.7051,
"step": 340
},
{
"epoch": 1.8372703412073492,
"grad_norm": 0.5864088226331533,
"learning_rate": 5e-06,
"loss": 0.7028,
"step": 350
},
{
"epoch": 1.889763779527559,
"grad_norm": 0.5203657185554663,
"learning_rate": 5e-06,
"loss": 0.7029,
"step": 360
},
{
"epoch": 1.942257217847769,
"grad_norm": 0.5249283309342107,
"learning_rate": 5e-06,
"loss": 0.6985,
"step": 370
},
{
"epoch": 1.9947506561679789,
"grad_norm": 0.5014731402840785,
"learning_rate": 5e-06,
"loss": 0.698,
"step": 380
},
{
"epoch": 2.047244094488189,
"grad_norm": 0.6606881961316096,
"learning_rate": 5e-06,
"loss": 0.701,
"step": 390
},
{
"epoch": 2.099737532808399,
"grad_norm": 0.7956310597917616,
"learning_rate": 5e-06,
"loss": 0.6552,
"step": 400
},
{
"epoch": 2.152230971128609,
"grad_norm": 0.7409823582769325,
"learning_rate": 5e-06,
"loss": 0.6529,
"step": 410
},
{
"epoch": 2.204724409448819,
"grad_norm": 0.5301522848133626,
"learning_rate": 5e-06,
"loss": 0.6583,
"step": 420
},
{
"epoch": 2.257217847769029,
"grad_norm": 0.5707394351699061,
"learning_rate": 5e-06,
"loss": 0.6567,
"step": 430
},
{
"epoch": 2.309711286089239,
"grad_norm": 0.8305197146184463,
"learning_rate": 5e-06,
"loss": 0.6591,
"step": 440
},
{
"epoch": 2.362204724409449,
"grad_norm": 0.6345171704808862,
"learning_rate": 5e-06,
"loss": 0.6586,
"step": 450
},
{
"epoch": 2.4146981627296586,
"grad_norm": 0.540143075416848,
"learning_rate": 5e-06,
"loss": 0.6577,
"step": 460
},
{
"epoch": 2.4671916010498687,
"grad_norm": 0.6047688082843984,
"learning_rate": 5e-06,
"loss": 0.6583,
"step": 470
},
{
"epoch": 2.5196850393700787,
"grad_norm": 0.5089956964635003,
"learning_rate": 5e-06,
"loss": 0.6597,
"step": 480
},
{
"epoch": 2.5721784776902887,
"grad_norm": 0.6189665519218218,
"learning_rate": 5e-06,
"loss": 0.6593,
"step": 490
},
{
"epoch": 2.6246719160104988,
"grad_norm": 0.6080333484551841,
"learning_rate": 5e-06,
"loss": 0.6586,
"step": 500
},
{
"epoch": 2.677165354330709,
"grad_norm": 0.6796282219494648,
"learning_rate": 5e-06,
"loss": 0.6613,
"step": 510
},
{
"epoch": 2.729658792650919,
"grad_norm": 0.6734905288037869,
"learning_rate": 5e-06,
"loss": 0.6593,
"step": 520
},
{
"epoch": 2.7821522309711284,
"grad_norm": 0.7251898875558835,
"learning_rate": 5e-06,
"loss": 0.6579,
"step": 530
},
{
"epoch": 2.8346456692913384,
"grad_norm": 0.5903651504718952,
"learning_rate": 5e-06,
"loss": 0.6613,
"step": 540
},
{
"epoch": 2.8871391076115485,
"grad_norm": 0.6998542288142676,
"learning_rate": 5e-06,
"loss": 0.6589,
"step": 550
},
{
"epoch": 2.9396325459317585,
"grad_norm": 0.6240795370124482,
"learning_rate": 5e-06,
"loss": 0.6594,
"step": 560
},
{
"epoch": 2.9921259842519685,
"grad_norm": 0.5469561259952066,
"learning_rate": 5e-06,
"loss": 0.6579,
"step": 570
},
{
"epoch": 2.9921259842519685,
"step": 570,
"total_flos": 954352470589440.0,
"train_loss": 0.718004734474316,
"train_runtime": 8541.7768,
"train_samples_per_second": 34.252,
"train_steps_per_second": 0.067
}
],
"logging_steps": 10,
"max_steps": 570,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 954352470589440.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}