lora_fulldata_chk1000 / trainer_state.json

upload_model

aac4c04 verified 4 months ago

No virus

4.82 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.461607677094324,
	"eval_steps": 500,
	"global_step": 10000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0001461607677094324,
	"grad_norm": 6.625,
	"learning_rate": 4.8721071863581e-08,
	"loss": 1.8813,
	"step": 1
	},
	{
	"epoch": 0.05846430708377296,
	"grad_norm": 0.3515625,
	"learning_rate": 1.94884287454324e-05,
	"loss": 1.2825,
	"step": 400
	},
	{
	"epoch": 0.11692861416754592,
	"grad_norm": 0.287109375,
	"learning_rate": 3.89768574908648e-05,
	"loss": 1.1339,
	"step": 800
	},
	{
	"epoch": 0.17539292125131886,
	"grad_norm": 0.234375,
	"learning_rate": 5.84652862362972e-05,
	"loss": 1.0996,
	"step": 1200
	},
	{
	"epoch": 0.23385722833509184,
	"grad_norm": 0.2177734375,
	"learning_rate": 7.79537149817296e-05,
	"loss": 1.0737,
	"step": 1600
	},
	{
	"epoch": 0.2923215354188648,
	"grad_norm": 0.2294921875,
	"learning_rate": 9.7442143727162e-05,
	"loss": 1.054,
	"step": 2000
	},
	{
	"epoch": 0.3507858425026377,
	"grad_norm": 0.2080078125,
	"learning_rate": 0.0001169305724725944,
	"loss": 1.0348,
	"step": 2400
	},
	{
	"epoch": 0.4092501495864107,
	"grad_norm": 0.224609375,
	"learning_rate": 0.0001364190012180268,
	"loss": 1.0185,
	"step": 2800
	},
	{
	"epoch": 0.4677144566701837,
	"grad_norm": 0.2275390625,
	"learning_rate": 0.0001559074299634592,
	"loss": 1.0078,
	"step": 3200
	},
	{
	"epoch": 0.5261787637539567,
	"grad_norm": 0.2314453125,
	"learning_rate": 0.0001753958587088916,
	"loss": 0.9962,
	"step": 3600
	},
	{
	"epoch": 0.5846430708377296,
	"grad_norm": 0.251953125,
	"learning_rate": 0.000194884287454324,
	"loss": 0.9884,
	"step": 4000
	},
	{
	"epoch": 0.6431073779215025,
	"grad_norm": 0.2431640625,
	"learning_rate": 0.00019996853166916095,
	"loss": 0.9802,
	"step": 4400
	},
	{
	"epoch": 0.7015716850052754,
	"grad_norm": 0.232421875,
	"learning_rate": 0.00019982537933878626,
	"loss": 0.9697,
	"step": 4800
	},
	{
	"epoch": 0.7600359920890485,
	"grad_norm": 0.236328125,
	"learning_rate": 0.0001995667218123705,
	"loss": 0.9594,
	"step": 5200
	},
	{
	"epoch": 0.8185002991728214,
	"grad_norm": 0.244140625,
	"learning_rate": 0.00019919285837541084,
	"loss": 0.9493,
	"step": 5600
	},
	{
	"epoch": 0.8769646062565943,
	"grad_norm": 0.255859375,
	"learning_rate": 0.00019870422161498958,
	"loss": 0.9399,
	"step": 6000
	},
	{
	"epoch": 0.9354289133403674,
	"grad_norm": 0.255859375,
	"learning_rate": 0.00019810137691923923,
	"loss": 0.9322,
	"step": 6400
	},
	{
	"epoch": 0.9938932204241403,
	"grad_norm": 0.24609375,
	"learning_rate": 0.00019738502182314765,
	"loss": 0.9238,
	"step": 6800
	},
	{
	"epoch": 1.0523575275079133,
	"grad_norm": 0.25390625,
	"learning_rate": 0.00019655598520145953,
	"loss": 0.8823,
	"step": 7200
	},
	{
	"epoch": 1.1108218345916863,
	"grad_norm": 0.251953125,
	"learning_rate": 0.00019561522630960813,
	"loss": 0.8803,
	"step": 7600
	},
	{
	"epoch": 1.1692861416754592,
	"grad_norm": 0.2578125,
	"learning_rate": 0.00019456383367378742,
	"loss": 0.8727,
	"step": 8000
	},
	{
	"epoch": 1.2277504487592321,
	"grad_norm": 0.265625,
	"learning_rate": 0.00019340302383144832,
	"loss": 0.8737,
	"step": 8400
	},
	{
	"epoch": 1.286214755843005,
	"grad_norm": 0.263671875,
	"learning_rate": 0.00019213413992367673,
	"loss": 0.8672,
	"step": 8800
	},
	{
	"epoch": 1.344679062926778,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00019075865014108194,
	"loss": 0.8652,
	"step": 9200
	},
	{
	"epoch": 1.4031433700105511,
	"grad_norm": 0.26171875,
	"learning_rate": 0.00018927814602499394,
	"loss": 0.8605,
	"step": 9600
	},
	{
	"epoch": 1.461607677094324,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00018769434062593454,
	"loss": 0.8569,
	"step": 10000
	}
	],
	"logging_steps": 400,
	"max_steps": 41046,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 6,
	"save_steps": 5000,
	"total_flos": 2.2714013468356038e+20,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}