SujanKarki's picture
Upload folder using huggingface_hub
6527b45 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9161704076958315,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01832340815391663,
"grad_norm": 0.06011037901043892,
"learning_rate": 4e-05,
"loss": 1.296,
"step": 10
},
{
"epoch": 0.03664681630783326,
"grad_norm": 0.05856110155582428,
"learning_rate": 8e-05,
"loss": 1.3316,
"step": 20
},
{
"epoch": 0.054970224461749886,
"grad_norm": 0.0607464499771595,
"learning_rate": 0.00012,
"loss": 1.2794,
"step": 30
},
{
"epoch": 0.07329363261566652,
"grad_norm": 0.06632011383771896,
"learning_rate": 0.00016,
"loss": 1.3129,
"step": 40
},
{
"epoch": 0.09161704076958314,
"grad_norm": 0.06631691753864288,
"learning_rate": 0.0002,
"loss": 1.2741,
"step": 50
},
{
"epoch": 0.10994044892349977,
"grad_norm": 0.056466877460479736,
"learning_rate": 0.00019998035748930052,
"loss": 1.2717,
"step": 60
},
{
"epoch": 0.1282638570774164,
"grad_norm": 0.05860245227813721,
"learning_rate": 0.00019992143767376668,
"loss": 1.2091,
"step": 70
},
{
"epoch": 0.14658726523133303,
"grad_norm": 0.06553175300359726,
"learning_rate": 0.00019982326370006058,
"loss": 1.1926,
"step": 80
},
{
"epoch": 0.16491067338524965,
"grad_norm": 0.07061401754617691,
"learning_rate": 0.00019968587413584876,
"loss": 1.1767,
"step": 90
},
{
"epoch": 0.1832340815391663,
"grad_norm": 0.07183243334293365,
"learning_rate": 0.000199509322954651,
"loss": 1.1183,
"step": 100
},
{
"epoch": 0.2015574896930829,
"grad_norm": 0.06944898515939713,
"learning_rate": 0.00019929367951463655,
"loss": 1.0868,
"step": 110
},
{
"epoch": 0.21988089784699955,
"grad_norm": 0.06642703711986542,
"learning_rate": 0.00019903902853137703,
"loss": 1.048,
"step": 120
},
{
"epoch": 0.23820430600091616,
"grad_norm": 0.06603793054819107,
"learning_rate": 0.00019874547004456562,
"loss": 1.0195,
"step": 130
},
{
"epoch": 0.2565277141548328,
"grad_norm": 0.06488285213708878,
"learning_rate": 0.00019841311937871675,
"loss": 1.0014,
"step": 140
},
{
"epoch": 0.2748511223087494,
"grad_norm": 0.05940372124314308,
"learning_rate": 0.0001980421070978606,
"loss": 0.9943,
"step": 150
},
{
"epoch": 0.29317453046266606,
"grad_norm": 0.059967171400785446,
"learning_rate": 0.00019763257895425113,
"loss": 0.9349,
"step": 160
},
{
"epoch": 0.3114979386165827,
"grad_norm": 0.0554397851228714,
"learning_rate": 0.0001971846958311071,
"loss": 0.9045,
"step": 170
},
{
"epoch": 0.3298213467704993,
"grad_norm": 0.055131904780864716,
"learning_rate": 0.00019669863367940935,
"loss": 0.8799,
"step": 180
},
{
"epoch": 0.34814475492441593,
"grad_norm": 0.04358826205134392,
"learning_rate": 0.00019617458344877816,
"loss": 0.8504,
"step": 190
},
{
"epoch": 0.3664681630783326,
"grad_norm": 0.04535752162337303,
"learning_rate": 0.00019561275101245883,
"loss": 0.828,
"step": 200
},
{
"epoch": 0.3847915712322492,
"grad_norm": 0.04672062397003174,
"learning_rate": 0.00019501335708644414,
"loss": 0.8114,
"step": 210
},
{
"epoch": 0.4031149793861658,
"grad_norm": 0.04161343351006508,
"learning_rate": 0.00019437663714276618,
"loss": 0.846,
"step": 220
},
{
"epoch": 0.42143838754008245,
"grad_norm": 0.03887801244854927,
"learning_rate": 0.0001937028413169911,
"loss": 0.7911,
"step": 230
},
{
"epoch": 0.4397617956939991,
"grad_norm": 0.03659196197986603,
"learning_rate": 0.00019299223430995323,
"loss": 0.7669,
"step": 240
},
{
"epoch": 0.45808520384791573,
"grad_norm": 0.03447382524609566,
"learning_rate": 0.00019224509528376738,
"loss": 0.782,
"step": 250
},
{
"epoch": 0.4764086120018323,
"grad_norm": 0.028725607320666313,
"learning_rate": 0.00019146171775215982,
"loss": 0.7183,
"step": 260
},
{
"epoch": 0.49473202015574896,
"grad_norm": 0.027673941105604172,
"learning_rate": 0.0001906424094651615,
"loss": 0.7018,
"step": 270
},
{
"epoch": 0.5130554283096656,
"grad_norm": 0.10227353870868683,
"learning_rate": 0.00018978749228820826,
"loss": 0.72,
"step": 280
},
{
"epoch": 0.5313788364635822,
"grad_norm": 0.022650673985481262,
"learning_rate": 0.00018889730207569607,
"loss": 0.6936,
"step": 290
},
{
"epoch": 0.5497022446174988,
"grad_norm": 0.023469725623726845,
"learning_rate": 0.00018797218853904037,
"loss": 0.6765,
"step": 300
},
{
"epoch": 0.5680256527714155,
"grad_norm": 0.018101360648870468,
"learning_rate": 0.000187012515109292,
"loss": 0.6799,
"step": 310
},
{
"epoch": 0.5863490609253321,
"grad_norm": 0.016794538125395775,
"learning_rate": 0.00018601865879436317,
"loss": 0.6732,
"step": 320
},
{
"epoch": 0.6046724690792488,
"grad_norm": 0.017263714224100113,
"learning_rate": 0.00018499101003091993,
"loss": 0.6695,
"step": 330
},
{
"epoch": 0.6229958772331654,
"grad_norm": 0.016381224617362022,
"learning_rate": 0.0001839299725309989,
"loss": 0.6928,
"step": 340
},
{
"epoch": 0.641319285387082,
"grad_norm": 0.015325487591326237,
"learning_rate": 0.00018283596312340891,
"loss": 0.6622,
"step": 350
},
{
"epoch": 0.6596426935409986,
"grad_norm": 0.014056784100830555,
"learning_rate": 0.0001817094115899799,
"loss": 0.7612,
"step": 360
},
{
"epoch": 0.6779661016949152,
"grad_norm": 0.015031951479613781,
"learning_rate": 0.00018055076049672283,
"loss": 0.6596,
"step": 370
},
{
"epoch": 0.6962895098488319,
"grad_norm": 0.01640532910823822,
"learning_rate": 0.00017936046501996762,
"loss": 0.6837,
"step": 380
},
{
"epoch": 0.7146129180027485,
"grad_norm": 0.01830482669174671,
"learning_rate": 0.000178138992767547,
"loss": 0.6812,
"step": 390
},
{
"epoch": 0.7329363261566652,
"grad_norm": 0.0472831092774868,
"learning_rate": 0.00017688682359509678,
"loss": 0.674,
"step": 400
},
{
"epoch": 0.7512597343105818,
"grad_norm": 0.012456170283257961,
"learning_rate": 0.00017560444941754427,
"loss": 0.6518,
"step": 410
},
{
"epoch": 0.7695831424644984,
"grad_norm": 0.01401186641305685,
"learning_rate": 0.0001742923740158595,
"loss": 0.6418,
"step": 420
},
{
"epoch": 0.7879065506184151,
"grad_norm": 0.015530922450125217,
"learning_rate": 0.00017295111283914487,
"loss": 0.6465,
"step": 430
},
{
"epoch": 0.8062299587723316,
"grad_norm": 0.01402275450527668,
"learning_rate": 0.0001715811928021406,
"loss": 0.6642,
"step": 440
},
{
"epoch": 0.8245533669262483,
"grad_norm": 0.01176263578236103,
"learning_rate": 0.0001701831520782264,
"loss": 0.6336,
"step": 450
},
{
"epoch": 0.8428767750801649,
"grad_norm": 0.013003438711166382,
"learning_rate": 0.00016875753988799982,
"loss": 0.6469,
"step": 460
},
{
"epoch": 0.8612001832340815,
"grad_norm": 0.011523702181875706,
"learning_rate": 0.00016730491628351487,
"loss": 0.6434,
"step": 470
},
{
"epoch": 0.8795235913879982,
"grad_norm": 0.011919384822249413,
"learning_rate": 0.00016582585192826543,
"loss": 0.6588,
"step": 480
},
{
"epoch": 0.8978469995419148,
"grad_norm": 0.013994649983942509,
"learning_rate": 0.00016432092787299992,
"loss": 0.6315,
"step": 490
},
{
"epoch": 0.9161704076958315,
"grad_norm": 0.013580686412751675,
"learning_rate": 0.00016279073532745553,
"loss": 0.6782,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 1635,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.6890178748416e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}