llama-2-7b-fourierft-alpaca / trainer_state.json

Upload folder using huggingface_hub

3bd7913 verified 13 days ago

9.86 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 5,
	"global_step": 2588,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.019319938176197836,
	"grad_norm": 0.00098650180734694,
	"learning_rate": 0.009615384615384616,
	"loss": 0.9907,
	"step": 50
	},
	{
	"epoch": 0.03863987635239567,
	"grad_norm": 0.000779022928327322,
	"learning_rate": 0.019230769230769232,
	"loss": 0.9647,
	"step": 100
	},
	{
	"epoch": 0.05795981452859351,
	"grad_norm": 0.000611725845374167,
	"learning_rate": 0.028846153846153844,
	"loss": 0.9412,
	"step": 150
	},
	{
	"epoch": 0.07727975270479134,
	"grad_norm": 0.0005838441429659724,
	"learning_rate": 0.029457236842105262,
	"loss": 0.9322,
	"step": 200
	},
	{
	"epoch": 0.09659969088098919,
	"grad_norm": 0.0007691067876294255,
	"learning_rate": 0.028840460526315788,
	"loss": 0.9131,
	"step": 250
	},
	{
	"epoch": 0.11591962905718702,
	"grad_norm": 0.0005935626104474068,
	"learning_rate": 0.028223684210526314,
	"loss": 0.9104,
	"step": 300
	},
	{
	"epoch": 0.13523956723338484,
	"grad_norm": 0.0006890599033795297,
	"learning_rate": 0.02760690789473684,
	"loss": 0.9214,
	"step": 350
	},
	{
	"epoch": 0.1545595054095827,
	"grad_norm": 0.0006042916793376207,
	"learning_rate": 0.02699013157894737,
	"loss": 0.9,
	"step": 400
	},
	{
	"epoch": 0.17387944358578053,
	"grad_norm": 0.0005447549629025161,
	"learning_rate": 0.026373355263157892,
	"loss": 0.9097,
	"step": 450
	},
	{
	"epoch": 0.19319938176197837,
	"grad_norm": 0.0004888740368187428,
	"learning_rate": 0.02575657894736842,
	"loss": 0.9037,
	"step": 500
	},
	{
	"epoch": 0.2125193199381762,
	"grad_norm": 0.0008238813607022166,
	"learning_rate": 0.025139802631578945,
	"loss": 0.899,
	"step": 550
	},
	{
	"epoch": 0.23183925811437403,
	"grad_norm": 0.000727724633179605,
	"learning_rate": 0.024523026315789474,
	"loss": 0.923,
	"step": 600
	},
	{
	"epoch": 0.2511591962905719,
	"grad_norm": 0.0005605846527032554,
	"learning_rate": 0.02390625,
	"loss": 0.9031,
	"step": 650
	},
	{
	"epoch": 0.2704791344667697,
	"grad_norm": 0.0007705381722189486,
	"learning_rate": 0.023289473684210523,
	"loss": 0.9013,
	"step": 700
	},
	{
	"epoch": 0.28979907264296756,
	"grad_norm": 0.0007164838025346398,
	"learning_rate": 0.022672697368421053,
	"loss": 0.8971,
	"step": 750
	},
	{
	"epoch": 0.3091190108191654,
	"grad_norm": 0.000717374321538955,
	"learning_rate": 0.02205592105263158,
	"loss": 0.8866,
	"step": 800
	},
	{
	"epoch": 0.3284389489953632,
	"grad_norm": 0.0006394012016244233,
	"learning_rate": 0.021439144736842105,
	"loss": 0.899,
	"step": 850
	},
	{
	"epoch": 0.34775888717156106,
	"grad_norm": 0.0006252205348573625,
	"learning_rate": 0.02082236842105263,
	"loss": 0.894,
	"step": 900
	},
	{
	"epoch": 0.3670788253477589,
	"grad_norm": 0.0006903470493853092,
	"learning_rate": 0.020205592105263157,
	"loss": 0.8858,
	"step": 950
	},
	{
	"epoch": 0.38639876352395675,
	"grad_norm": 0.0008341589127667248,
	"learning_rate": 0.019588815789473683,
	"loss": 0.9168,
	"step": 1000
	},
	{
	"epoch": 0.40571870170015456,
	"grad_norm": 0.0005771280848421156,
	"learning_rate": 0.01897203947368421,
	"loss": 0.9117,
	"step": 1050
	},
	{
	"epoch": 0.4250386398763524,
	"grad_norm": 0.000522978079970926,
	"learning_rate": 0.018355263157894736,
	"loss": 0.8939,
	"step": 1100
	},
	{
	"epoch": 0.44435857805255025,
	"grad_norm": 0.0005450574099086225,
	"learning_rate": 0.017738486842105265,
	"loss": 0.9049,
	"step": 1150
	},
	{
	"epoch": 0.46367851622874806,
	"grad_norm": 0.0005660468013957143,
	"learning_rate": 0.017121710526315788,
	"loss": 0.8944,
	"step": 1200
	},
	{
	"epoch": 0.48299845440494593,
	"grad_norm": 0.0006663696258328855,
	"learning_rate": 0.016504934210526314,
	"loss": 0.8971,
	"step": 1250
	},
	{
	"epoch": 0.5023183925811437,
	"grad_norm": 0.0005968479672446847,
	"learning_rate": 0.01588815789473684,
	"loss": 0.8917,
	"step": 1300
	},
	{
	"epoch": 0.5216383307573416,
	"grad_norm": 0.0007491153082810342,
	"learning_rate": 0.01527138157894737,
	"loss": 0.8829,
	"step": 1350
	},
	{
	"epoch": 0.5409582689335394,
	"grad_norm": 0.0006275599589571357,
	"learning_rate": 0.014654605263157894,
	"loss": 0.9058,
	"step": 1400
	},
	{
	"epoch": 0.5602782071097373,
	"grad_norm": 0.0007617810624651611,
	"learning_rate": 0.01403782894736842,
	"loss": 0.9051,
	"step": 1450
	},
	{
	"epoch": 0.5795981452859351,
	"grad_norm": 0.0006214394234120846,
	"learning_rate": 0.013421052631578946,
	"loss": 0.8879,
	"step": 1500
	},
	{
	"epoch": 0.5989180834621329,
	"grad_norm": 0.0006560624460689723,
	"learning_rate": 0.012804276315789473,
	"loss": 0.8991,
	"step": 1550
	},
	{
	"epoch": 0.6182380216383307,
	"grad_norm": 0.0007683933363296092,
	"learning_rate": 0.0121875,
	"loss": 0.9081,
	"step": 1600
	},
	{
	"epoch": 0.6375579598145286,
	"grad_norm": 0.0005783849046565592,
	"learning_rate": 0.011570723684210527,
	"loss": 0.9067,
	"step": 1650
	},
	{
	"epoch": 0.6568778979907264,
	"grad_norm": 0.0007958198548294604,
	"learning_rate": 0.010953947368421053,
	"loss": 0.885,
	"step": 1700
	},
	{
	"epoch": 0.6761978361669243,
	"grad_norm": 0.0006095783319324255,
	"learning_rate": 0.010337171052631579,
	"loss": 0.8928,
	"step": 1750
	},
	{
	"epoch": 0.6955177743431221,
	"grad_norm": 0.000699816329870373,
	"learning_rate": 0.009720394736842105,
	"loss": 0.903,
	"step": 1800
	},
	{
	"epoch": 0.7148377125193199,
	"grad_norm": 0.0008128538611344993,
	"learning_rate": 0.009103618421052631,
	"loss": 0.9036,
	"step": 1850
	},
	{
	"epoch": 0.7341576506955177,
	"grad_norm": 0.0006495247362181544,
	"learning_rate": 0.008486842105263157,
	"loss": 0.8907,
	"step": 1900
	},
	{
	"epoch": 0.7534775888717156,
	"grad_norm": 0.0005265743238851428,
	"learning_rate": 0.007870065789473685,
	"loss": 0.8843,
	"step": 1950
	},
	{
	"epoch": 0.7727975270479135,
	"grad_norm": 0.0006601494387723505,
	"learning_rate": 0.0072532894736842095,
	"loss": 0.8925,
	"step": 2000
	},
	{
	"epoch": 0.7921174652241113,
	"grad_norm": 0.0005823367391712964,
	"learning_rate": 0.0066365131578947365,
	"loss": 0.8954,
	"step": 2050
	},
	{
	"epoch": 0.8114374034003091,
	"grad_norm": 0.0005229181842878461,
	"learning_rate": 0.0060197368421052635,
	"loss": 0.903,
	"step": 2100
	},
	{
	"epoch": 0.8307573415765069,
	"grad_norm": 0.0005145368631929159,
	"learning_rate": 0.00540296052631579,
	"loss": 0.8923,
	"step": 2150
	},
	{
	"epoch": 0.8500772797527048,
	"grad_norm": 0.0006071292445994914,
	"learning_rate": 0.004786184210526316,
	"loss": 0.8804,
	"step": 2200
	},
	{
	"epoch": 0.8693972179289027,
	"grad_norm": 0.0006730407476425171,
	"learning_rate": 0.004169407894736842,
	"loss": 0.8919,
	"step": 2250
	},
	{
	"epoch": 0.8887171561051005,
	"grad_norm": 0.0006455178954638541,
	"learning_rate": 0.003552631578947368,
	"loss": 0.896,
	"step": 2300
	},
	{
	"epoch": 0.9080370942812983,
	"grad_norm": 0.0004997382056899369,
	"learning_rate": 0.002935855263157895,
	"loss": 0.8921,
	"step": 2350
	},
	{
	"epoch": 0.9273570324574961,
	"grad_norm": 0.00045192165998741984,
	"learning_rate": 0.002319078947368421,
	"loss": 0.8839,
	"step": 2400
	},
	{
	"epoch": 0.9466769706336939,
	"grad_norm": 0.0004822098126169294,
	"learning_rate": 0.0017023026315789475,
	"loss": 0.8988,
	"step": 2450
	},
	{
	"epoch": 0.9659969088098919,
	"grad_norm": 0.0005721400957554579,
	"learning_rate": 0.0010855263157894736,
	"loss": 0.9045,
	"step": 2500
	},
	{
	"epoch": 0.9853168469860897,
	"grad_norm": 0.0005698847235180438,
	"learning_rate": 0.00046875,
	"loss": 0.893,
	"step": 2550
	},
	{
	"epoch": 1.0,
	"eval_loss": 0.8954795002937317,
	"eval_runtime": 1619.6937,
	"eval_samples_per_second": 6.391,
	"eval_steps_per_second": 0.799,
	"step": 2588
	}
	],
	"logging_steps": 50,
	"max_steps": 2588,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6.394319248976609e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}