Qwen2.5-32B-Lora-HQ-e-1 / trainer_state.json
FINGU-AI's picture
Upload folder using huggingface_hub
5a6ad11 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7866273352999017,
"eval_steps": 100,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03933136676499508,
"grad_norm": 0.481609046459198,
"learning_rate": 0.00015,
"loss": 2.0722,
"step": 5
},
{
"epoch": 0.07866273352999016,
"grad_norm": 0.15720224380493164,
"learning_rate": 0.0003,
"loss": 1.4825,
"step": 10
},
{
"epoch": 0.11799410029498525,
"grad_norm": 0.06716315448284149,
"learning_rate": 0.00029759999999999997,
"loss": 1.3333,
"step": 15
},
{
"epoch": 0.15732546705998032,
"grad_norm": 0.06133478134870529,
"learning_rate": 0.00029519999999999997,
"loss": 1.2341,
"step": 20
},
{
"epoch": 0.19665683382497542,
"grad_norm": 0.07264667749404907,
"learning_rate": 0.00029279999999999996,
"loss": 1.1756,
"step": 25
},
{
"epoch": 0.2359882005899705,
"grad_norm": 0.07928217202425003,
"learning_rate": 0.00029039999999999996,
"loss": 1.1197,
"step": 30
},
{
"epoch": 0.2753195673549656,
"grad_norm": 0.09420346468687057,
"learning_rate": 0.00028799999999999995,
"loss": 1.0834,
"step": 35
},
{
"epoch": 0.31465093411996065,
"grad_norm": 0.0862259566783905,
"learning_rate": 0.00028559999999999995,
"loss": 1.044,
"step": 40
},
{
"epoch": 0.35398230088495575,
"grad_norm": 0.09086894243955612,
"learning_rate": 0.00028319999999999994,
"loss": 1.0205,
"step": 45
},
{
"epoch": 0.39331366764995085,
"grad_norm": 0.08469890058040619,
"learning_rate": 0.0002808,
"loss": 0.9798,
"step": 50
},
{
"epoch": 0.4326450344149459,
"grad_norm": 0.10012397915124893,
"learning_rate": 0.0002784,
"loss": 0.9811,
"step": 55
},
{
"epoch": 0.471976401179941,
"grad_norm": 0.08633492887020111,
"learning_rate": 0.000276,
"loss": 0.9556,
"step": 60
},
{
"epoch": 0.511307767944936,
"grad_norm": 0.09879346191883087,
"learning_rate": 0.0002736,
"loss": 0.9446,
"step": 65
},
{
"epoch": 0.5506391347099312,
"grad_norm": 0.08795857429504395,
"learning_rate": 0.0002712,
"loss": 0.9228,
"step": 70
},
{
"epoch": 0.5899705014749262,
"grad_norm": 0.0837111845612526,
"learning_rate": 0.0002688,
"loss": 0.9279,
"step": 75
},
{
"epoch": 0.6293018682399213,
"grad_norm": 0.08551318198442459,
"learning_rate": 0.00026639999999999997,
"loss": 0.9267,
"step": 80
},
{
"epoch": 0.6686332350049164,
"grad_norm": 0.08481767773628235,
"learning_rate": 0.00026399999999999997,
"loss": 0.9082,
"step": 85
},
{
"epoch": 0.7079646017699115,
"grad_norm": 0.100365050137043,
"learning_rate": 0.00026159999999999996,
"loss": 0.9028,
"step": 90
},
{
"epoch": 0.7472959685349065,
"grad_norm": 0.08463772386312485,
"learning_rate": 0.00025919999999999996,
"loss": 0.8866,
"step": 95
},
{
"epoch": 0.7866273352999017,
"grad_norm": 0.09628409892320633,
"learning_rate": 0.00025679999999999995,
"loss": 0.8787,
"step": 100
},
{
"epoch": 0.7866273352999017,
"eval_loss": 0.8853636980056763,
"eval_runtime": 24.3719,
"eval_samples_per_second": 6.729,
"eval_steps_per_second": 0.862,
"step": 100
}
],
"logging_steps": 5,
"max_steps": 635,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.073075182302659e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}