Phi_2_finetune / trainer_state.json

Training in progress, epoch 11

2de1839 verified 9 months ago

6.97 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 10.0,
	"eval_steps": 500,
	"global_step": 1830,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.273224043715847,
	"grad_norm": 1.0141575336456299,
	"learning_rate": 0.00019995065603657316,
	"loss": 1.8945,
	"step": 50
	},
	{
	"epoch": 0.546448087431694,
	"grad_norm": 0.9003917574882507,
	"learning_rate": 0.00019980267284282717,
	"loss": 1.3737,
	"step": 100
	},
	{
	"epoch": 0.819672131147541,
	"grad_norm": 0.855492889881134,
	"learning_rate": 0.00019955619646030802,
	"loss": 1.3506,
	"step": 150
	},
	{
	"epoch": 1.092896174863388,
	"grad_norm": 1.1789681911468506,
	"learning_rate": 0.0001992114701314478,
	"loss": 1.2946,
	"step": 200
	},
	{
	"epoch": 1.366120218579235,
	"grad_norm": 0.9413456320762634,
	"learning_rate": 0.00019876883405951377,
	"loss": 1.2914,
	"step": 250
	},
	{
	"epoch": 1.639344262295082,
	"grad_norm": 0.8401021957397461,
	"learning_rate": 0.0001982287250728689,
	"loss": 1.2756,
	"step": 300
	},
	{
	"epoch": 1.9125683060109289,
	"grad_norm": 0.9392536878585815,
	"learning_rate": 0.00019759167619387476,
	"loss": 1.2785,
	"step": 350
	},
	{
	"epoch": 2.185792349726776,
	"grad_norm": 0.918136477470398,
	"learning_rate": 0.0001968583161128631,
	"loss": 1.2199,
	"step": 400
	},
	{
	"epoch": 2.459016393442623,
	"grad_norm": 0.9809663891792297,
	"learning_rate": 0.0001960293685676943,
	"loss": 1.201,
	"step": 450
	},
	{
	"epoch": 2.73224043715847,
	"grad_norm": 1.0254710912704468,
	"learning_rate": 0.00019510565162951537,
	"loss": 1.1842,
	"step": 500
	},
	{
	"epoch": 3.0054644808743167,
	"grad_norm": 1.1089431047439575,
	"learning_rate": 0.00019408807689542257,
	"loss": 1.1819,
	"step": 550
	},
	{
	"epoch": 3.278688524590164,
	"grad_norm": 1.2321062088012695,
	"learning_rate": 0.00019297764858882514,
	"loss": 1.1113,
	"step": 600
	},
	{
	"epoch": 3.551912568306011,
	"grad_norm": 1.0911256074905396,
	"learning_rate": 0.00019177546256839812,
	"loss": 1.1212,
	"step": 650
	},
	{
	"epoch": 3.8251366120218577,
	"grad_norm": 1.1500061750411987,
	"learning_rate": 0.00019048270524660196,
	"loss": 1.1247,
	"step": 700
	},
	{
	"epoch": 4.098360655737705,
	"grad_norm": 1.259513258934021,
	"learning_rate": 0.0001891006524188368,
	"loss": 1.0826,
	"step": 750
	},
	{
	"epoch": 4.371584699453552,
	"grad_norm": 1.377414345741272,
	"learning_rate": 0.00018763066800438636,
	"loss": 1.0593,
	"step": 800
	},
	{
	"epoch": 4.644808743169399,
	"grad_norm": 1.2397098541259766,
	"learning_rate": 0.0001860742027003944,
	"loss": 1.0414,
	"step": 850
	},
	{
	"epoch": 4.918032786885246,
	"grad_norm": 1.2820392847061157,
	"learning_rate": 0.00018443279255020152,
	"loss": 1.0601,
	"step": 900
	},
	{
	"epoch": 5.191256830601093,
	"grad_norm": 1.6708155870437622,
	"learning_rate": 0.00018270805742745617,
	"loss": 0.973,
	"step": 950
	},
	{
	"epoch": 5.46448087431694,
	"grad_norm": 1.546794056892395,
	"learning_rate": 0.00018090169943749476,
	"loss": 0.9904,
	"step": 1000
	},
	{
	"epoch": 5.737704918032787,
	"grad_norm": 1.437908411026001,
	"learning_rate": 0.00017901550123756906,
	"loss": 0.9863,
	"step": 1050
	},
	{
	"epoch": 6.0109289617486334,
	"grad_norm": 1.4555143117904663,
	"learning_rate": 0.00017705132427757895,
	"loss": 0.9768,
	"step": 1100
	},
	{
	"epoch": 6.284153005464481,
	"grad_norm": 1.494957447052002,
	"learning_rate": 0.00017501110696304596,
	"loss": 0.8969,
	"step": 1150
	},
	{
	"epoch": 6.557377049180328,
	"grad_norm": 1.4257054328918457,
	"learning_rate": 0.00017289686274214118,
	"loss": 0.9207,
	"step": 1200
	},
	{
	"epoch": 6.830601092896175,
	"grad_norm": 1.6431266069412231,
	"learning_rate": 0.00017071067811865476,
	"loss": 0.9116,
	"step": 1250
	},
	{
	"epoch": 7.103825136612022,
	"grad_norm": 1.4786570072174072,
	"learning_rate": 0.00016845471059286887,
	"loss": 0.8975,
	"step": 1300
	},
	{
	"epoch": 7.377049180327869,
	"grad_norm": 1.5059996843338013,
	"learning_rate": 0.00016613118653236518,
	"loss": 0.8519,
	"step": 1350
	},
	{
	"epoch": 7.6502732240437155,
	"grad_norm": 1.5110268592834473,
	"learning_rate": 0.000163742398974869,
	"loss": 0.8471,
	"step": 1400
	},
	{
	"epoch": 7.923497267759563,
	"grad_norm": 1.6930420398712158,
	"learning_rate": 0.00016129070536529766,
	"loss": 0.8544,
	"step": 1450
	},
	{
	"epoch": 8.19672131147541,
	"grad_norm": 1.8286707401275635,
	"learning_rate": 0.00015877852522924732,
	"loss": 0.8102,
	"step": 1500
	},
	{
	"epoch": 8.469945355191257,
	"grad_norm": 1.4673559665679932,
	"learning_rate": 0.00015620833778521307,
	"loss": 0.7986,
	"step": 1550
	},
	{
	"epoch": 8.743169398907105,
	"grad_norm": 1.6546106338500977,
	"learning_rate": 0.00015358267949789966,
	"loss": 0.7985,
	"step": 1600
	},
	{
	"epoch": 9.01639344262295,
	"grad_norm": 1.7138121128082275,
	"learning_rate": 0.00015090414157503714,
	"loss": 0.8194,
	"step": 1650
	},
	{
	"epoch": 9.289617486338798,
	"grad_norm": 1.5631183385849,
	"learning_rate": 0.00014817536741017152,
	"loss": 0.7317,
	"step": 1700
	},
	{
	"epoch": 9.562841530054644,
	"grad_norm": 1.936880111694336,
	"learning_rate": 0.00014539904997395468,
	"loss": 0.7479,
	"step": 1750
	},
	{
	"epoch": 9.836065573770492,
	"grad_norm": 1.6515196561813354,
	"learning_rate": 0.00014257792915650728,
	"loss": 0.7435,
	"step": 1800
	}
	],
	"logging_steps": 50,
	"max_steps": 5000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 28,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.968783346244608e+16,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}