Upload folder using huggingface_hub

6527b45 verified about 1 month ago

9.81 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9161704076958315,
	"eval_steps": 500,
	"global_step": 500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.01832340815391663,
	"grad_norm": 0.06011037901043892,
	"learning_rate": 4e-05,
	"loss": 1.296,
	"step": 10
	},
	{
	"epoch": 0.03664681630783326,
	"grad_norm": 0.05856110155582428,
	"learning_rate": 8e-05,
	"loss": 1.3316,
	"step": 20
	},
	{
	"epoch": 0.054970224461749886,
	"grad_norm": 0.0607464499771595,
	"learning_rate": 0.00012,
	"loss": 1.2794,
	"step": 30
	},
	{
	"epoch": 0.07329363261566652,
	"grad_norm": 0.06632011383771896,
	"learning_rate": 0.00016,
	"loss": 1.3129,
	"step": 40
	},
	{
	"epoch": 0.09161704076958314,
	"grad_norm": 0.06631691753864288,
	"learning_rate": 0.0002,
	"loss": 1.2741,
	"step": 50
	},
	{
	"epoch": 0.10994044892349977,
	"grad_norm": 0.056466877460479736,
	"learning_rate": 0.00019998035748930052,
	"loss": 1.2717,
	"step": 60
	},
	{
	"epoch": 0.1282638570774164,
	"grad_norm": 0.05860245227813721,
	"learning_rate": 0.00019992143767376668,
	"loss": 1.2091,
	"step": 70
	},
	{
	"epoch": 0.14658726523133303,
	"grad_norm": 0.06553175300359726,
	"learning_rate": 0.00019982326370006058,
	"loss": 1.1926,
	"step": 80
	},
	{
	"epoch": 0.16491067338524965,
	"grad_norm": 0.07061401754617691,
	"learning_rate": 0.00019968587413584876,
	"loss": 1.1767,
	"step": 90
	},
	{
	"epoch": 0.1832340815391663,
	"grad_norm": 0.07183243334293365,
	"learning_rate": 0.000199509322954651,
	"loss": 1.1183,
	"step": 100
	},
	{
	"epoch": 0.2015574896930829,
	"grad_norm": 0.06944898515939713,
	"learning_rate": 0.00019929367951463655,
	"loss": 1.0868,
	"step": 110
	},
	{
	"epoch": 0.21988089784699955,
	"grad_norm": 0.06642703711986542,
	"learning_rate": 0.00019903902853137703,
	"loss": 1.048,
	"step": 120
	},
	{
	"epoch": 0.23820430600091616,
	"grad_norm": 0.06603793054819107,
	"learning_rate": 0.00019874547004456562,
	"loss": 1.0195,
	"step": 130
	},
	{
	"epoch": 0.2565277141548328,
	"grad_norm": 0.06488285213708878,
	"learning_rate": 0.00019841311937871675,
	"loss": 1.0014,
	"step": 140
	},
	{
	"epoch": 0.2748511223087494,
	"grad_norm": 0.05940372124314308,
	"learning_rate": 0.0001980421070978606,
	"loss": 0.9943,
	"step": 150
	},
	{
	"epoch": 0.29317453046266606,
	"grad_norm": 0.059967171400785446,
	"learning_rate": 0.00019763257895425113,
	"loss": 0.9349,
	"step": 160
	},
	{
	"epoch": 0.3114979386165827,
	"grad_norm": 0.0554397851228714,
	"learning_rate": 0.0001971846958311071,
	"loss": 0.9045,
	"step": 170
	},
	{
	"epoch": 0.3298213467704993,
	"grad_norm": 0.055131904780864716,
	"learning_rate": 0.00019669863367940935,
	"loss": 0.8799,
	"step": 180
	},
	{
	"epoch": 0.34814475492441593,
	"grad_norm": 0.04358826205134392,
	"learning_rate": 0.00019617458344877816,
	"loss": 0.8504,
	"step": 190
	},
	{
	"epoch": 0.3664681630783326,
	"grad_norm": 0.04535752162337303,
	"learning_rate": 0.00019561275101245883,
	"loss": 0.828,
	"step": 200
	},
	{
	"epoch": 0.3847915712322492,
	"grad_norm": 0.04672062397003174,
	"learning_rate": 0.00019501335708644414,
	"loss": 0.8114,
	"step": 210
	},
	{
	"epoch": 0.4031149793861658,
	"grad_norm": 0.04161343351006508,
	"learning_rate": 0.00019437663714276618,
	"loss": 0.846,
	"step": 220
	},
	{
	"epoch": 0.42143838754008245,
	"grad_norm": 0.03887801244854927,
	"learning_rate": 0.0001937028413169911,
	"loss": 0.7911,
	"step": 230
	},
	{
	"epoch": 0.4397617956939991,
	"grad_norm": 0.03659196197986603,
	"learning_rate": 0.00019299223430995323,
	"loss": 0.7669,
	"step": 240
	},
	{
	"epoch": 0.45808520384791573,
	"grad_norm": 0.03447382524609566,
	"learning_rate": 0.00019224509528376738,
	"loss": 0.782,
	"step": 250
	},
	{
	"epoch": 0.4764086120018323,
	"grad_norm": 0.028725607320666313,
	"learning_rate": 0.00019146171775215982,
	"loss": 0.7183,
	"step": 260
	},
	{
	"epoch": 0.49473202015574896,
	"grad_norm": 0.027673941105604172,
	"learning_rate": 0.0001906424094651615,
	"loss": 0.7018,
	"step": 270
	},
	{
	"epoch": 0.5130554283096656,
	"grad_norm": 0.10227353870868683,
	"learning_rate": 0.00018978749228820826,
	"loss": 0.72,
	"step": 280
	},
	{
	"epoch": 0.5313788364635822,
	"grad_norm": 0.022650673985481262,
	"learning_rate": 0.00018889730207569607,
	"loss": 0.6936,
	"step": 290
	},
	{
	"epoch": 0.5497022446174988,
	"grad_norm": 0.023469725623726845,
	"learning_rate": 0.00018797218853904037,
	"loss": 0.6765,
	"step": 300
	},
	{
	"epoch": 0.5680256527714155,
	"grad_norm": 0.018101360648870468,
	"learning_rate": 0.000187012515109292,
	"loss": 0.6799,
	"step": 310
	},
	{
	"epoch": 0.5863490609253321,
	"grad_norm": 0.016794538125395775,
	"learning_rate": 0.00018601865879436317,
	"loss": 0.6732,
	"step": 320
	},
	{
	"epoch": 0.6046724690792488,
	"grad_norm": 0.017263714224100113,
	"learning_rate": 0.00018499101003091993,
	"loss": 0.6695,
	"step": 330
	},
	{
	"epoch": 0.6229958772331654,
	"grad_norm": 0.016381224617362022,
	"learning_rate": 0.0001839299725309989,
	"loss": 0.6928,
	"step": 340
	},
	{
	"epoch": 0.641319285387082,
	"grad_norm": 0.015325487591326237,
	"learning_rate": 0.00018283596312340891,
	"loss": 0.6622,
	"step": 350
	},
	{
	"epoch": 0.6596426935409986,
	"grad_norm": 0.014056784100830555,
	"learning_rate": 0.0001817094115899799,
	"loss": 0.7612,
	"step": 360
	},
	{
	"epoch": 0.6779661016949152,
	"grad_norm": 0.015031951479613781,
	"learning_rate": 0.00018055076049672283,
	"loss": 0.6596,
	"step": 370
	},
	{
	"epoch": 0.6962895098488319,
	"grad_norm": 0.01640532910823822,
	"learning_rate": 0.00017936046501996762,
	"loss": 0.6837,
	"step": 380
	},
	{
	"epoch": 0.7146129180027485,
	"grad_norm": 0.01830482669174671,
	"learning_rate": 0.000178138992767547,
	"loss": 0.6812,
	"step": 390
	},
	{
	"epoch": 0.7329363261566652,
	"grad_norm": 0.0472831092774868,
	"learning_rate": 0.00017688682359509678,
	"loss": 0.674,
	"step": 400
	},
	{
	"epoch": 0.7512597343105818,
	"grad_norm": 0.012456170283257961,
	"learning_rate": 0.00017560444941754427,
	"loss": 0.6518,
	"step": 410
	},
	{
	"epoch": 0.7695831424644984,
	"grad_norm": 0.01401186641305685,
	"learning_rate": 0.0001742923740158595,
	"loss": 0.6418,
	"step": 420
	},
	{
	"epoch": 0.7879065506184151,
	"grad_norm": 0.015530922450125217,
	"learning_rate": 0.00017295111283914487,
	"loss": 0.6465,
	"step": 430
	},
	{
	"epoch": 0.8062299587723316,
	"grad_norm": 0.01402275450527668,
	"learning_rate": 0.0001715811928021406,
	"loss": 0.6642,
	"step": 440
	},
	{
	"epoch": 0.8245533669262483,
	"grad_norm": 0.01176263578236103,
	"learning_rate": 0.0001701831520782264,
	"loss": 0.6336,
	"step": 450
	},
	{
	"epoch": 0.8428767750801649,
	"grad_norm": 0.013003438711166382,
	"learning_rate": 0.00016875753988799982,
	"loss": 0.6469,
	"step": 460
	},
	{
	"epoch": 0.8612001832340815,
	"grad_norm": 0.011523702181875706,
	"learning_rate": 0.00016730491628351487,
	"loss": 0.6434,
	"step": 470
	},
	{
	"epoch": 0.8795235913879982,
	"grad_norm": 0.011919384822249413,
	"learning_rate": 0.00016582585192826543,
	"loss": 0.6588,
	"step": 480
	},
	{
	"epoch": 0.8978469995419148,
	"grad_norm": 0.013994649983942509,
	"learning_rate": 0.00016432092787299992,
	"loss": 0.6315,
	"step": 490
	},
	{
	"epoch": 0.9161704076958315,
	"grad_norm": 0.013580686412751675,
	"learning_rate": 0.00016279073532745553,
	"loss": 0.6782,
	"step": 500
	}
	],
	"logging_steps": 10,
	"max_steps": 1635,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.6890178748416e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}