Phi_2_finetune / trainer_state.json
ogundipe72's picture
Training in progress, epoch 11
2de1839 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1830,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.273224043715847,
"grad_norm": 1.0141575336456299,
"learning_rate": 0.00019995065603657316,
"loss": 1.8945,
"step": 50
},
{
"epoch": 0.546448087431694,
"grad_norm": 0.9003917574882507,
"learning_rate": 0.00019980267284282717,
"loss": 1.3737,
"step": 100
},
{
"epoch": 0.819672131147541,
"grad_norm": 0.855492889881134,
"learning_rate": 0.00019955619646030802,
"loss": 1.3506,
"step": 150
},
{
"epoch": 1.092896174863388,
"grad_norm": 1.1789681911468506,
"learning_rate": 0.0001992114701314478,
"loss": 1.2946,
"step": 200
},
{
"epoch": 1.366120218579235,
"grad_norm": 0.9413456320762634,
"learning_rate": 0.00019876883405951377,
"loss": 1.2914,
"step": 250
},
{
"epoch": 1.639344262295082,
"grad_norm": 0.8401021957397461,
"learning_rate": 0.0001982287250728689,
"loss": 1.2756,
"step": 300
},
{
"epoch": 1.9125683060109289,
"grad_norm": 0.9392536878585815,
"learning_rate": 0.00019759167619387476,
"loss": 1.2785,
"step": 350
},
{
"epoch": 2.185792349726776,
"grad_norm": 0.918136477470398,
"learning_rate": 0.0001968583161128631,
"loss": 1.2199,
"step": 400
},
{
"epoch": 2.459016393442623,
"grad_norm": 0.9809663891792297,
"learning_rate": 0.0001960293685676943,
"loss": 1.201,
"step": 450
},
{
"epoch": 2.73224043715847,
"grad_norm": 1.0254710912704468,
"learning_rate": 0.00019510565162951537,
"loss": 1.1842,
"step": 500
},
{
"epoch": 3.0054644808743167,
"grad_norm": 1.1089431047439575,
"learning_rate": 0.00019408807689542257,
"loss": 1.1819,
"step": 550
},
{
"epoch": 3.278688524590164,
"grad_norm": 1.2321062088012695,
"learning_rate": 0.00019297764858882514,
"loss": 1.1113,
"step": 600
},
{
"epoch": 3.551912568306011,
"grad_norm": 1.0911256074905396,
"learning_rate": 0.00019177546256839812,
"loss": 1.1212,
"step": 650
},
{
"epoch": 3.8251366120218577,
"grad_norm": 1.1500061750411987,
"learning_rate": 0.00019048270524660196,
"loss": 1.1247,
"step": 700
},
{
"epoch": 4.098360655737705,
"grad_norm": 1.259513258934021,
"learning_rate": 0.0001891006524188368,
"loss": 1.0826,
"step": 750
},
{
"epoch": 4.371584699453552,
"grad_norm": 1.377414345741272,
"learning_rate": 0.00018763066800438636,
"loss": 1.0593,
"step": 800
},
{
"epoch": 4.644808743169399,
"grad_norm": 1.2397098541259766,
"learning_rate": 0.0001860742027003944,
"loss": 1.0414,
"step": 850
},
{
"epoch": 4.918032786885246,
"grad_norm": 1.2820392847061157,
"learning_rate": 0.00018443279255020152,
"loss": 1.0601,
"step": 900
},
{
"epoch": 5.191256830601093,
"grad_norm": 1.6708155870437622,
"learning_rate": 0.00018270805742745617,
"loss": 0.973,
"step": 950
},
{
"epoch": 5.46448087431694,
"grad_norm": 1.546794056892395,
"learning_rate": 0.00018090169943749476,
"loss": 0.9904,
"step": 1000
},
{
"epoch": 5.737704918032787,
"grad_norm": 1.437908411026001,
"learning_rate": 0.00017901550123756906,
"loss": 0.9863,
"step": 1050
},
{
"epoch": 6.0109289617486334,
"grad_norm": 1.4555143117904663,
"learning_rate": 0.00017705132427757895,
"loss": 0.9768,
"step": 1100
},
{
"epoch": 6.284153005464481,
"grad_norm": 1.494957447052002,
"learning_rate": 0.00017501110696304596,
"loss": 0.8969,
"step": 1150
},
{
"epoch": 6.557377049180328,
"grad_norm": 1.4257054328918457,
"learning_rate": 0.00017289686274214118,
"loss": 0.9207,
"step": 1200
},
{
"epoch": 6.830601092896175,
"grad_norm": 1.6431266069412231,
"learning_rate": 0.00017071067811865476,
"loss": 0.9116,
"step": 1250
},
{
"epoch": 7.103825136612022,
"grad_norm": 1.4786570072174072,
"learning_rate": 0.00016845471059286887,
"loss": 0.8975,
"step": 1300
},
{
"epoch": 7.377049180327869,
"grad_norm": 1.5059996843338013,
"learning_rate": 0.00016613118653236518,
"loss": 0.8519,
"step": 1350
},
{
"epoch": 7.6502732240437155,
"grad_norm": 1.5110268592834473,
"learning_rate": 0.000163742398974869,
"loss": 0.8471,
"step": 1400
},
{
"epoch": 7.923497267759563,
"grad_norm": 1.6930420398712158,
"learning_rate": 0.00016129070536529766,
"loss": 0.8544,
"step": 1450
},
{
"epoch": 8.19672131147541,
"grad_norm": 1.8286707401275635,
"learning_rate": 0.00015877852522924732,
"loss": 0.8102,
"step": 1500
},
{
"epoch": 8.469945355191257,
"grad_norm": 1.4673559665679932,
"learning_rate": 0.00015620833778521307,
"loss": 0.7986,
"step": 1550
},
{
"epoch": 8.743169398907105,
"grad_norm": 1.6546106338500977,
"learning_rate": 0.00015358267949789966,
"loss": 0.7985,
"step": 1600
},
{
"epoch": 9.01639344262295,
"grad_norm": 1.7138121128082275,
"learning_rate": 0.00015090414157503714,
"loss": 0.8194,
"step": 1650
},
{
"epoch": 9.289617486338798,
"grad_norm": 1.5631183385849,
"learning_rate": 0.00014817536741017152,
"loss": 0.7317,
"step": 1700
},
{
"epoch": 9.562841530054644,
"grad_norm": 1.936880111694336,
"learning_rate": 0.00014539904997395468,
"loss": 0.7479,
"step": 1750
},
{
"epoch": 9.836065573770492,
"grad_norm": 1.6515196561813354,
"learning_rate": 0.00014257792915650728,
"loss": 0.7435,
"step": 1800
}
],
"logging_steps": 50,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 28,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.968783346244608e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}