sarvam_sft_ep5_rank264 / trainer_state.json

Upload folder using huggingface_hub

ea8d0a5 verified 2 months ago

6.12 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 4.934306569343065,
	"eval_steps": 500,
	"global_step": 340,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.145985401459854,
	"grad_norm": 0.1969512701034546,
	"learning_rate": 0.0002,
	"loss": 1.6345,
	"step": 10
	},
	{
	"epoch": 0.291970802919708,
	"grad_norm": 0.17513811588287354,
	"learning_rate": 0.0002,
	"loss": 1.4433,
	"step": 20
	},
	{
	"epoch": 0.43795620437956206,
	"grad_norm": 0.17957448959350586,
	"learning_rate": 0.0002,
	"loss": 1.4281,
	"step": 30
	},
	{
	"epoch": 0.583941605839416,
	"grad_norm": 0.16534289717674255,
	"learning_rate": 0.0002,
	"loss": 1.3108,
	"step": 40
	},
	{
	"epoch": 0.7299270072992701,
	"grad_norm": 0.1696271151304245,
	"learning_rate": 0.0002,
	"loss": 1.3387,
	"step": 50
	},
	{
	"epoch": 0.8759124087591241,
	"grad_norm": 0.14049474895000458,
	"learning_rate": 0.0002,
	"loss": 1.3062,
	"step": 60
	},
	{
	"epoch": 1.0145985401459854,
	"grad_norm": 0.14177313446998596,
	"learning_rate": 0.0002,
	"loss": 1.3354,
	"step": 70
	},
	{
	"epoch": 1.1605839416058394,
	"grad_norm": 0.18519847095012665,
	"learning_rate": 0.0002,
	"loss": 1.1708,
	"step": 80
	},
	{
	"epoch": 1.3065693430656935,
	"grad_norm": 0.1618432253599167,
	"learning_rate": 0.0002,
	"loss": 1.09,
	"step": 90
	},
	{
	"epoch": 1.4525547445255476,
	"grad_norm": 0.22861599922180176,
	"learning_rate": 0.0002,
	"loss": 1.1921,
	"step": 100
	},
	{
	"epoch": 1.5985401459854014,
	"grad_norm": 0.18863703310489655,
	"learning_rate": 0.0002,
	"loss": 1.1883,
	"step": 110
	},
	{
	"epoch": 1.7445255474452555,
	"grad_norm": 0.21330659091472626,
	"learning_rate": 0.0002,
	"loss": 1.1134,
	"step": 120
	},
	{
	"epoch": 1.8905109489051095,
	"grad_norm": 0.16281643509864807,
	"learning_rate": 0.0002,
	"loss": 1.1218,
	"step": 130
	},
	{
	"epoch": 2.0291970802919708,
	"grad_norm": 0.1913510113954544,
	"learning_rate": 0.0002,
	"loss": 1.0937,
	"step": 140
	},
	{
	"epoch": 2.1751824817518246,
	"grad_norm": 0.258828729391098,
	"learning_rate": 0.0002,
	"loss": 0.844,
	"step": 150
	},
	{
	"epoch": 2.321167883211679,
	"grad_norm": 0.2650935649871826,
	"learning_rate": 0.0002,
	"loss": 0.9038,
	"step": 160
	},
	{
	"epoch": 2.4671532846715327,
	"grad_norm": 0.21534548699855804,
	"learning_rate": 0.0002,
	"loss": 0.8955,
	"step": 170
	},
	{
	"epoch": 2.613138686131387,
	"grad_norm": 0.28936412930488586,
	"learning_rate": 0.0002,
	"loss": 0.9623,
	"step": 180
	},
	{
	"epoch": 2.759124087591241,
	"grad_norm": 0.22461599111557007,
	"learning_rate": 0.0002,
	"loss": 0.9206,
	"step": 190
	},
	{
	"epoch": 2.905109489051095,
	"grad_norm": 0.2333170771598816,
	"learning_rate": 0.0002,
	"loss": 0.9143,
	"step": 200
	},
	{
	"epoch": 3.0437956204379564,
	"grad_norm": 0.23609858751296997,
	"learning_rate": 0.0002,
	"loss": 0.8417,
	"step": 210
	},
	{
	"epoch": 3.18978102189781,
	"grad_norm": 0.2533261477947235,
	"learning_rate": 0.0002,
	"loss": 0.6803,
	"step": 220
	},
	{
	"epoch": 3.335766423357664,
	"grad_norm": 0.28708019852638245,
	"learning_rate": 0.0002,
	"loss": 0.6947,
	"step": 230
	},
	{
	"epoch": 3.4817518248175183,
	"grad_norm": 0.2757578194141388,
	"learning_rate": 0.0002,
	"loss": 0.6416,
	"step": 240
	},
	{
	"epoch": 3.627737226277372,
	"grad_norm": 0.29800841212272644,
	"learning_rate": 0.0002,
	"loss": 0.6616,
	"step": 250
	},
	{
	"epoch": 3.7737226277372264,
	"grad_norm": 0.28990235924720764,
	"learning_rate": 0.0002,
	"loss": 0.6788,
	"step": 260
	},
	{
	"epoch": 3.9197080291970803,
	"grad_norm": 0.2645922303199768,
	"learning_rate": 0.0002,
	"loss": 0.6891,
	"step": 270
	},
	{
	"epoch": 4.0583941605839415,
	"grad_norm": 0.2718052864074707,
	"learning_rate": 0.0002,
	"loss": 0.6122,
	"step": 280
	},
	{
	"epoch": 4.204379562043796,
	"grad_norm": 0.2716304063796997,
	"learning_rate": 0.0002,
	"loss": 0.4449,
	"step": 290
	},
	{
	"epoch": 4.350364963503649,
	"grad_norm": 0.30169153213500977,
	"learning_rate": 0.0002,
	"loss": 0.4474,
	"step": 300
	},
	{
	"epoch": 4.4963503649635035,
	"grad_norm": 0.29635927081108093,
	"learning_rate": 0.0002,
	"loss": 0.4637,
	"step": 310
	},
	{
	"epoch": 4.642335766423358,
	"grad_norm": 0.3702072501182556,
	"learning_rate": 0.0002,
	"loss": 0.4961,
	"step": 320
	},
	{
	"epoch": 4.788321167883212,
	"grad_norm": 0.3127199709415436,
	"learning_rate": 0.0002,
	"loss": 0.4497,
	"step": 330
	},
	{
	"epoch": 4.934306569343065,
	"grad_norm": 0.2875048816204071,
	"learning_rate": 0.0002,
	"loss": 0.4823,
	"step": 340
	}
	],
	"logging_steps": 10,
	"max_steps": 340,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.364208997702451e+16,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}