gemma2_9b_multilabel_lora_adapter_ver1 / trainer_state.json

Upload folder using huggingface_hub

4f1d439 verified about 2 months ago

18.6 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 1036,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.009652509652509652,
	"grad_norm": 1.4631391763687134,
	"learning_rate": 0.0001,
	"loss": 0.664,
	"step": 10
	},
	{
	"epoch": 0.019305019305019305,
	"grad_norm": 1.8917441368103027,
	"learning_rate": 0.0002,
	"loss": 0.1575,
	"step": 20
	},
	{
	"epoch": 0.02895752895752896,
	"grad_norm": 1.5227961540222168,
	"learning_rate": 0.00019803149606299213,
	"loss": 0.1233,
	"step": 30
	},
	{
	"epoch": 0.03861003861003861,
	"grad_norm": 0.9390913248062134,
	"learning_rate": 0.00019606299212598428,
	"loss": 0.1106,
	"step": 40
	},
	{
	"epoch": 0.04826254826254826,
	"grad_norm": 0.8157206177711487,
	"learning_rate": 0.0001940944881889764,
	"loss": 0.0869,
	"step": 50
	},
	{
	"epoch": 0.05791505791505792,
	"grad_norm": 0.5291563868522644,
	"learning_rate": 0.0001921259842519685,
	"loss": 0.0903,
	"step": 60
	},
	{
	"epoch": 0.06756756756756757,
	"grad_norm": 1.155928373336792,
	"learning_rate": 0.00019015748031496065,
	"loss": 0.1186,
	"step": 70
	},
	{
	"epoch": 0.07722007722007722,
	"grad_norm": 0.5892627239227295,
	"learning_rate": 0.00018818897637795277,
	"loss": 0.0914,
	"step": 80
	},
	{
	"epoch": 0.08687258687258688,
	"grad_norm": 0.5989981889724731,
	"learning_rate": 0.0001862204724409449,
	"loss": 0.0917,
	"step": 90
	},
	{
	"epoch": 0.09652509652509653,
	"grad_norm": 0.9171364903450012,
	"learning_rate": 0.000184251968503937,
	"loss": 0.0888,
	"step": 100
	},
	{
	"epoch": 0.10617760617760617,
	"grad_norm": 0.37568220496177673,
	"learning_rate": 0.00018228346456692916,
	"loss": 0.0865,
	"step": 110
	},
	{
	"epoch": 0.11583011583011583,
	"grad_norm": 0.5606808662414551,
	"learning_rate": 0.00018031496062992125,
	"loss": 0.0898,
	"step": 120
	},
	{
	"epoch": 0.12548262548262548,
	"grad_norm": 0.6872594356536865,
	"learning_rate": 0.00017834645669291338,
	"loss": 0.0851,
	"step": 130
	},
	{
	"epoch": 0.13513513513513514,
	"grad_norm": 0.6313247680664062,
	"learning_rate": 0.00017637795275590552,
	"loss": 0.088,
	"step": 140
	},
	{
	"epoch": 0.14478764478764478,
	"grad_norm": 0.7841348648071289,
	"learning_rate": 0.00017440944881889765,
	"loss": 0.0966,
	"step": 150
	},
	{
	"epoch": 0.15444015444015444,
	"grad_norm": 0.8097236752510071,
	"learning_rate": 0.00017244094488188977,
	"loss": 0.0626,
	"step": 160
	},
	{
	"epoch": 0.1640926640926641,
	"grad_norm": 0.7307904362678528,
	"learning_rate": 0.00017047244094488192,
	"loss": 0.084,
	"step": 170
	},
	{
	"epoch": 0.17374517374517376,
	"grad_norm": 0.5725339651107788,
	"learning_rate": 0.000168503937007874,
	"loss": 0.071,
	"step": 180
	},
	{
	"epoch": 0.1833976833976834,
	"grad_norm": 0.7641273736953735,
	"learning_rate": 0.00016653543307086613,
	"loss": 0.0958,
	"step": 190
	},
	{
	"epoch": 0.19305019305019305,
	"grad_norm": 0.6539800763130188,
	"learning_rate": 0.00016456692913385828,
	"loss": 0.0803,
	"step": 200
	},
	{
	"epoch": 0.20270270270270271,
	"grad_norm": 0.6908950805664062,
	"learning_rate": 0.0001625984251968504,
	"loss": 0.0885,
	"step": 210
	},
	{
	"epoch": 0.21235521235521235,
	"grad_norm": 0.31019431352615356,
	"learning_rate": 0.00016062992125984252,
	"loss": 0.0588,
	"step": 220
	},
	{
	"epoch": 0.222007722007722,
	"grad_norm": 0.7413542866706848,
	"learning_rate": 0.00015866141732283467,
	"loss": 0.0827,
	"step": 230
	},
	{
	"epoch": 0.23166023166023167,
	"grad_norm": 0.7324005365371704,
	"learning_rate": 0.0001566929133858268,
	"loss": 0.0926,
	"step": 240
	},
	{
	"epoch": 0.2413127413127413,
	"grad_norm": 0.5140088796615601,
	"learning_rate": 0.0001547244094488189,
	"loss": 0.0768,
	"step": 250
	},
	{
	"epoch": 0.25096525096525096,
	"grad_norm": 0.3749833405017853,
	"learning_rate": 0.00015275590551181104,
	"loss": 0.0741,
	"step": 260
	},
	{
	"epoch": 0.2606177606177606,
	"grad_norm": 0.5479316711425781,
	"learning_rate": 0.00015078740157480316,
	"loss": 0.068,
	"step": 270
	},
	{
	"epoch": 0.2702702702702703,
	"grad_norm": 0.5078131556510925,
	"learning_rate": 0.00014881889763779528,
	"loss": 0.0763,
	"step": 280
	},
	{
	"epoch": 0.2799227799227799,
	"grad_norm": 0.5565307140350342,
	"learning_rate": 0.0001468503937007874,
	"loss": 0.0657,
	"step": 290
	},
	{
	"epoch": 0.28957528957528955,
	"grad_norm": 0.5333523750305176,
	"learning_rate": 0.00014488188976377955,
	"loss": 0.0467,
	"step": 300
	},
	{
	"epoch": 0.29922779922779924,
	"grad_norm": 0.9170491695404053,
	"learning_rate": 0.00014291338582677165,
	"loss": 0.0677,
	"step": 310
	},
	{
	"epoch": 0.3088803088803089,
	"grad_norm": 0.6521899104118347,
	"learning_rate": 0.00014094488188976377,
	"loss": 0.0692,
	"step": 320
	},
	{
	"epoch": 0.3185328185328185,
	"grad_norm": 0.5061705708503723,
	"learning_rate": 0.00013897637795275592,
	"loss": 0.086,
	"step": 330
	},
	{
	"epoch": 0.3281853281853282,
	"grad_norm": 0.643752932548523,
	"learning_rate": 0.00013700787401574804,
	"loss": 0.0621,
	"step": 340
	},
	{
	"epoch": 0.33783783783783783,
	"grad_norm": 0.5780313014984131,
	"learning_rate": 0.00013503937007874016,
	"loss": 0.0691,
	"step": 350
	},
	{
	"epoch": 0.3474903474903475,
	"grad_norm": 0.36350390315055847,
	"learning_rate": 0.0001330708661417323,
	"loss": 0.0679,
	"step": 360
	},
	{
	"epoch": 0.35714285714285715,
	"grad_norm": 0.6011677384376526,
	"learning_rate": 0.0001311023622047244,
	"loss": 0.0707,
	"step": 370
	},
	{
	"epoch": 0.3667953667953668,
	"grad_norm": 0.5352160930633545,
	"learning_rate": 0.00012913385826771653,
	"loss": 0.0543,
	"step": 380
	},
	{
	"epoch": 0.3764478764478765,
	"grad_norm": 0.4793628752231598,
	"learning_rate": 0.00012716535433070867,
	"loss": 0.058,
	"step": 390
	},
	{
	"epoch": 0.3861003861003861,
	"grad_norm": 0.6923650503158569,
	"learning_rate": 0.0001251968503937008,
	"loss": 0.0691,
	"step": 400
	},
	{
	"epoch": 0.39575289575289574,
	"grad_norm": 0.49190232157707214,
	"learning_rate": 0.00012322834645669292,
	"loss": 0.0579,
	"step": 410
	},
	{
	"epoch": 0.40540540540540543,
	"grad_norm": 0.23852688074111938,
	"learning_rate": 0.00012125984251968505,
	"loss": 0.0524,
	"step": 420
	},
	{
	"epoch": 0.41505791505791506,
	"grad_norm": 0.6561082005500793,
	"learning_rate": 0.00011929133858267719,
	"loss": 0.057,
	"step": 430
	},
	{
	"epoch": 0.4247104247104247,
	"grad_norm": 0.613944411277771,
	"learning_rate": 0.00011732283464566928,
	"loss": 0.0575,
	"step": 440
	},
	{
	"epoch": 0.4343629343629344,
	"grad_norm": 0.3899982273578644,
	"learning_rate": 0.00011535433070866142,
	"loss": 0.0527,
	"step": 450
	},
	{
	"epoch": 0.444015444015444,
	"grad_norm": 0.5278599262237549,
	"learning_rate": 0.00011338582677165355,
	"loss": 0.0589,
	"step": 460
	},
	{
	"epoch": 0.45366795366795365,
	"grad_norm": 0.29473400115966797,
	"learning_rate": 0.00011141732283464567,
	"loss": 0.0271,
	"step": 470
	},
	{
	"epoch": 0.46332046332046334,
	"grad_norm": 0.6821677088737488,
	"learning_rate": 0.00010944881889763781,
	"loss": 0.0772,
	"step": 480
	},
	{
	"epoch": 0.47297297297297297,
	"grad_norm": 0.3420783281326294,
	"learning_rate": 0.00010748031496062993,
	"loss": 0.0543,
	"step": 490
	},
	{
	"epoch": 0.4826254826254826,
	"grad_norm": 0.5966827869415283,
	"learning_rate": 0.00010551181102362204,
	"loss": 0.0552,
	"step": 500
	},
	{
	"epoch": 0.4922779922779923,
	"grad_norm": 0.656173825263977,
	"learning_rate": 0.00010354330708661417,
	"loss": 0.0692,
	"step": 510
	},
	{
	"epoch": 0.5019305019305019,
	"grad_norm": 0.46832337975502014,
	"learning_rate": 0.0001015748031496063,
	"loss": 0.0456,
	"step": 520
	},
	{
	"epoch": 0.5115830115830116,
	"grad_norm": 0.5552840828895569,
	"learning_rate": 9.960629921259843e-05,
	"loss": 0.0581,
	"step": 530
	},
	{
	"epoch": 0.5212355212355212,
	"grad_norm": 0.7662914395332336,
	"learning_rate": 9.763779527559055e-05,
	"loss": 0.0542,
	"step": 540
	},
	{
	"epoch": 0.5308880308880309,
	"grad_norm": 0.5977205634117126,
	"learning_rate": 9.566929133858268e-05,
	"loss": 0.0432,
	"step": 550
	},
	{
	"epoch": 0.5405405405405406,
	"grad_norm": 0.41738152503967285,
	"learning_rate": 9.370078740157481e-05,
	"loss": 0.0606,
	"step": 560
	},
	{
	"epoch": 0.5501930501930502,
	"grad_norm": 0.3176082670688629,
	"learning_rate": 9.173228346456693e-05,
	"loss": 0.053,
	"step": 570
	},
	{
	"epoch": 0.5598455598455598,
	"grad_norm": 0.5319856405258179,
	"learning_rate": 8.976377952755905e-05,
	"loss": 0.0548,
	"step": 580
	},
	{
	"epoch": 0.5694980694980695,
	"grad_norm": 0.6625571250915527,
	"learning_rate": 8.779527559055119e-05,
	"loss": 0.0735,
	"step": 590
	},
	{
	"epoch": 0.5791505791505791,
	"grad_norm": 0.6264726519584656,
	"learning_rate": 8.582677165354331e-05,
	"loss": 0.0574,
	"step": 600
	},
	{
	"epoch": 0.5888030888030888,
	"grad_norm": 0.49182403087615967,
	"learning_rate": 8.385826771653543e-05,
	"loss": 0.046,
	"step": 610
	},
	{
	"epoch": 0.5984555984555985,
	"grad_norm": 0.712940514087677,
	"learning_rate": 8.188976377952757e-05,
	"loss": 0.0451,
	"step": 620
	},
	{
	"epoch": 0.6081081081081081,
	"grad_norm": 0.5095399022102356,
	"learning_rate": 7.992125984251969e-05,
	"loss": 0.0527,
	"step": 630
	},
	{
	"epoch": 0.6177606177606177,
	"grad_norm": 0.4520854949951172,
	"learning_rate": 7.795275590551181e-05,
	"loss": 0.0498,
	"step": 640
	},
	{
	"epoch": 0.6274131274131274,
	"grad_norm": 0.7156594395637512,
	"learning_rate": 7.598425196850393e-05,
	"loss": 0.0482,
	"step": 650
	},
	{
	"epoch": 0.637065637065637,
	"grad_norm": 0.3221661150455475,
	"learning_rate": 7.401574803149607e-05,
	"loss": 0.0399,
	"step": 660
	},
	{
	"epoch": 0.6467181467181468,
	"grad_norm": 0.22723270952701569,
	"learning_rate": 7.20472440944882e-05,
	"loss": 0.0388,
	"step": 670
	},
	{
	"epoch": 0.6563706563706564,
	"grad_norm": 0.5212529301643372,
	"learning_rate": 7.007874015748031e-05,
	"loss": 0.0389,
	"step": 680
	},
	{
	"epoch": 0.666023166023166,
	"grad_norm": 0.5126072764396667,
	"learning_rate": 6.811023622047245e-05,
	"loss": 0.0525,
	"step": 690
	},
	{
	"epoch": 0.6756756756756757,
	"grad_norm": 0.7248953580856323,
	"learning_rate": 6.614173228346457e-05,
	"loss": 0.0619,
	"step": 700
	},
	{
	"epoch": 0.6853281853281853,
	"grad_norm": 0.3085402548313141,
	"learning_rate": 6.417322834645669e-05,
	"loss": 0.053,
	"step": 710
	},
	{
	"epoch": 0.694980694980695,
	"grad_norm": 0.3684390187263489,
	"learning_rate": 6.220472440944882e-05,
	"loss": 0.0352,
	"step": 720
	},
	{
	"epoch": 0.7046332046332047,
	"grad_norm": 0.5736101269721985,
	"learning_rate": 6.0236220472440953e-05,
	"loss": 0.0524,
	"step": 730
	},
	{
	"epoch": 0.7142857142857143,
	"grad_norm": 0.24105365574359894,
	"learning_rate": 5.826771653543307e-05,
	"loss": 0.0361,
	"step": 740
	},
	{
	"epoch": 0.7239382239382239,
	"grad_norm": 0.4142036736011505,
	"learning_rate": 5.62992125984252e-05,
	"loss": 0.0423,
	"step": 750
	},
	{
	"epoch": 0.7335907335907336,
	"grad_norm": 0.43454453349113464,
	"learning_rate": 5.433070866141733e-05,
	"loss": 0.0573,
	"step": 760
	},
	{
	"epoch": 0.7432432432432432,
	"grad_norm": 0.6893749833106995,
	"learning_rate": 5.236220472440945e-05,
	"loss": 0.0578,
	"step": 770
	},
	{
	"epoch": 0.752895752895753,
	"grad_norm": 0.38158169388771057,
	"learning_rate": 5.0393700787401575e-05,
	"loss": 0.042,
	"step": 780
	},
	{
	"epoch": 0.7625482625482626,
	"grad_norm": 0.6105143427848816,
	"learning_rate": 4.84251968503937e-05,
	"loss": 0.0632,
	"step": 790
	},
	{
	"epoch": 0.7722007722007722,
	"grad_norm": 0.28586989641189575,
	"learning_rate": 4.645669291338583e-05,
	"loss": 0.0453,
	"step": 800
	},
	{
	"epoch": 0.7818532818532818,
	"grad_norm": 0.4869031012058258,
	"learning_rate": 4.4488188976377954e-05,
	"loss": 0.0426,
	"step": 810
	},
	{
	"epoch": 0.7915057915057915,
	"grad_norm": 0.09411308914422989,
	"learning_rate": 4.251968503937008e-05,
	"loss": 0.034,
	"step": 820
	},
	{
	"epoch": 0.8011583011583011,
	"grad_norm": 0.43962639570236206,
	"learning_rate": 4.0551181102362204e-05,
	"loss": 0.0528,
	"step": 830
	},
	{
	"epoch": 0.8108108108108109,
	"grad_norm": 0.33696189522743225,
	"learning_rate": 3.858267716535433e-05,
	"loss": 0.0331,
	"step": 840
	},
	{
	"epoch": 0.8204633204633205,
	"grad_norm": 0.4021511673927307,
	"learning_rate": 3.661417322834646e-05,
	"loss": 0.0406,
	"step": 850
	},
	{
	"epoch": 0.8301158301158301,
	"grad_norm": 0.6140969395637512,
	"learning_rate": 3.464566929133858e-05,
	"loss": 0.0466,
	"step": 860
	},
	{
	"epoch": 0.8397683397683398,
	"grad_norm": 0.4614850878715515,
	"learning_rate": 3.2677165354330704e-05,
	"loss": 0.0285,
	"step": 870
	},
	{
	"epoch": 0.8494208494208494,
	"grad_norm": 0.6398894786834717,
	"learning_rate": 3.070866141732284e-05,
	"loss": 0.0515,
	"step": 880
	},
	{
	"epoch": 0.859073359073359,
	"grad_norm": 0.3512854278087616,
	"learning_rate": 2.874015748031496e-05,
	"loss": 0.0336,
	"step": 890
	},
	{
	"epoch": 0.8687258687258688,
	"grad_norm": 0.2678048610687256,
	"learning_rate": 2.677165354330709e-05,
	"loss": 0.0286,
	"step": 900
	},
	{
	"epoch": 0.8783783783783784,
	"grad_norm": 0.5848315358161926,
	"learning_rate": 2.4803149606299215e-05,
	"loss": 0.0384,
	"step": 910
	},
	{
	"epoch": 0.888030888030888,
	"grad_norm": 0.2881620228290558,
	"learning_rate": 2.283464566929134e-05,
	"loss": 0.0424,
	"step": 920
	},
	{
	"epoch": 0.8976833976833977,
	"grad_norm": 0.3219210207462311,
	"learning_rate": 2.0866141732283465e-05,
	"loss": 0.0488,
	"step": 930
	},
	{
	"epoch": 0.9073359073359073,
	"grad_norm": 0.408877432346344,
	"learning_rate": 1.889763779527559e-05,
	"loss": 0.0345,
	"step": 940
	},
	{
	"epoch": 0.916988416988417,
	"grad_norm": 0.20342448353767395,
	"learning_rate": 1.692913385826772e-05,
	"loss": 0.049,
	"step": 950
	},
	{
	"epoch": 0.9266409266409267,
	"grad_norm": 0.4167528748512268,
	"learning_rate": 1.4960629921259845e-05,
	"loss": 0.043,
	"step": 960
	},
	{
	"epoch": 0.9362934362934363,
	"grad_norm": 0.49826258420944214,
	"learning_rate": 1.2992125984251968e-05,
	"loss": 0.0412,
	"step": 970
	},
	{
	"epoch": 0.9459459459459459,
	"grad_norm": 0.5426783561706543,
	"learning_rate": 1.1023622047244095e-05,
	"loss": 0.0538,
	"step": 980
	},
	{
	"epoch": 0.9555984555984556,
	"grad_norm": 0.170461505651474,
	"learning_rate": 9.055118110236222e-06,
	"loss": 0.0483,
	"step": 990
	},
	{
	"epoch": 0.9652509652509652,
	"grad_norm": 0.3618116080760956,
	"learning_rate": 7.086614173228347e-06,
	"loss": 0.038,
	"step": 1000
	},
	{
	"epoch": 0.974903474903475,
	"grad_norm": 0.417107492685318,
	"learning_rate": 5.118110236220473e-06,
	"loss": 0.0373,
	"step": 1010
	},
	{
	"epoch": 0.9845559845559846,
	"grad_norm": 0.4384624660015106,
	"learning_rate": 3.1496062992125985e-06,
	"loss": 0.0534,
	"step": 1020
	},
	{
	"epoch": 0.9942084942084942,
	"grad_norm": 0.7793737649917603,
	"learning_rate": 1.1811023622047244e-06,
	"loss": 0.0406,
	"step": 1030
	}
	],
	"logging_steps": 10,
	"max_steps": 1036,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.0789424666431488e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}