alpaca / trainer_state.json
esfrankel17's picture
Upload folder using huggingface_hub
3dbedcb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9993815708101422,
"eval_steps": 500,
"global_step": 404,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.024737167594310452,
"grad_norm": 3.8928613137780697,
"learning_rate": 5e-06,
"loss": 0.8929,
"step": 10
},
{
"epoch": 0.049474335188620905,
"grad_norm": 4.590030022211483,
"learning_rate": 5e-06,
"loss": 0.7739,
"step": 20
},
{
"epoch": 0.07421150278293136,
"grad_norm": 2.0445731869289467,
"learning_rate": 5e-06,
"loss": 0.7286,
"step": 30
},
{
"epoch": 0.09894867037724181,
"grad_norm": 1.4578350201244652,
"learning_rate": 5e-06,
"loss": 0.6964,
"step": 40
},
{
"epoch": 0.12368583797155226,
"grad_norm": 1.2041164043429626,
"learning_rate": 5e-06,
"loss": 0.6808,
"step": 50
},
{
"epoch": 0.14842300556586271,
"grad_norm": 0.9753852138770908,
"learning_rate": 5e-06,
"loss": 0.6577,
"step": 60
},
{
"epoch": 0.17316017316017315,
"grad_norm": 0.8981690468785772,
"learning_rate": 5e-06,
"loss": 0.6476,
"step": 70
},
{
"epoch": 0.19789734075448362,
"grad_norm": 0.7458796589430092,
"learning_rate": 5e-06,
"loss": 0.6395,
"step": 80
},
{
"epoch": 0.22263450834879406,
"grad_norm": 0.7140456505991135,
"learning_rate": 5e-06,
"loss": 0.6376,
"step": 90
},
{
"epoch": 0.24737167594310452,
"grad_norm": 0.9200299620458595,
"learning_rate": 5e-06,
"loss": 0.6307,
"step": 100
},
{
"epoch": 0.272108843537415,
"grad_norm": 0.5761268046084219,
"learning_rate": 5e-06,
"loss": 0.6175,
"step": 110
},
{
"epoch": 0.29684601113172543,
"grad_norm": 0.5836896660196662,
"learning_rate": 5e-06,
"loss": 0.6177,
"step": 120
},
{
"epoch": 0.32158317872603587,
"grad_norm": 1.038827197557366,
"learning_rate": 5e-06,
"loss": 0.6132,
"step": 130
},
{
"epoch": 0.3463203463203463,
"grad_norm": 0.6673252980412175,
"learning_rate": 5e-06,
"loss": 0.6091,
"step": 140
},
{
"epoch": 0.37105751391465674,
"grad_norm": 0.6307785927320235,
"learning_rate": 5e-06,
"loss": 0.6046,
"step": 150
},
{
"epoch": 0.39579468150896724,
"grad_norm": 0.5244651264271686,
"learning_rate": 5e-06,
"loss": 0.601,
"step": 160
},
{
"epoch": 0.4205318491032777,
"grad_norm": 0.5705407579445089,
"learning_rate": 5e-06,
"loss": 0.6082,
"step": 170
},
{
"epoch": 0.4452690166975881,
"grad_norm": 0.5554154594013059,
"learning_rate": 5e-06,
"loss": 0.6003,
"step": 180
},
{
"epoch": 0.47000618429189855,
"grad_norm": 0.6019873008818303,
"learning_rate": 5e-06,
"loss": 0.5994,
"step": 190
},
{
"epoch": 0.49474335188620905,
"grad_norm": 0.7034894074017951,
"learning_rate": 5e-06,
"loss": 0.5986,
"step": 200
},
{
"epoch": 0.5194805194805194,
"grad_norm": 0.5638344674323469,
"learning_rate": 5e-06,
"loss": 0.6023,
"step": 210
},
{
"epoch": 0.54421768707483,
"grad_norm": 0.5443793743216905,
"learning_rate": 5e-06,
"loss": 0.5927,
"step": 220
},
{
"epoch": 0.5689548546691404,
"grad_norm": 0.5802674598015297,
"learning_rate": 5e-06,
"loss": 0.5925,
"step": 230
},
{
"epoch": 0.5936920222634509,
"grad_norm": 0.592738891502665,
"learning_rate": 5e-06,
"loss": 0.5928,
"step": 240
},
{
"epoch": 0.6184291898577613,
"grad_norm": 0.5388550762260421,
"learning_rate": 5e-06,
"loss": 0.5858,
"step": 250
},
{
"epoch": 0.6431663574520717,
"grad_norm": 0.5593031272628818,
"learning_rate": 5e-06,
"loss": 0.5879,
"step": 260
},
{
"epoch": 0.6679035250463822,
"grad_norm": 0.6608335560611281,
"learning_rate": 5e-06,
"loss": 0.5844,
"step": 270
},
{
"epoch": 0.6926406926406926,
"grad_norm": 0.6327217733233739,
"learning_rate": 5e-06,
"loss": 0.5755,
"step": 280
},
{
"epoch": 0.717377860235003,
"grad_norm": 0.5769636309953428,
"learning_rate": 5e-06,
"loss": 0.5843,
"step": 290
},
{
"epoch": 0.7421150278293135,
"grad_norm": 0.5532053151787545,
"learning_rate": 5e-06,
"loss": 0.5864,
"step": 300
},
{
"epoch": 0.766852195423624,
"grad_norm": 0.6400898941077486,
"learning_rate": 5e-06,
"loss": 0.5822,
"step": 310
},
{
"epoch": 0.7915893630179345,
"grad_norm": 0.602629447160874,
"learning_rate": 5e-06,
"loss": 0.5727,
"step": 320
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.5999318227987905,
"learning_rate": 5e-06,
"loss": 0.5794,
"step": 330
},
{
"epoch": 0.8410636982065554,
"grad_norm": 0.5332757259893975,
"learning_rate": 5e-06,
"loss": 0.5793,
"step": 340
},
{
"epoch": 0.8658008658008658,
"grad_norm": 0.5492421058512896,
"learning_rate": 5e-06,
"loss": 0.5744,
"step": 350
},
{
"epoch": 0.8905380333951762,
"grad_norm": 0.6007108771595042,
"learning_rate": 5e-06,
"loss": 0.5707,
"step": 360
},
{
"epoch": 0.9152752009894867,
"grad_norm": 0.5866994201925174,
"learning_rate": 5e-06,
"loss": 0.5765,
"step": 370
},
{
"epoch": 0.9400123685837971,
"grad_norm": 0.4956333122054928,
"learning_rate": 5e-06,
"loss": 0.5714,
"step": 380
},
{
"epoch": 0.9647495361781077,
"grad_norm": 0.5508918734344029,
"learning_rate": 5e-06,
"loss": 0.5682,
"step": 390
},
{
"epoch": 0.9894867037724181,
"grad_norm": 0.599971023810852,
"learning_rate": 5e-06,
"loss": 0.5719,
"step": 400
},
{
"epoch": 0.9993815708101422,
"eval_loss": 0.5685587525367737,
"eval_runtime": 289.4782,
"eval_samples_per_second": 37.626,
"eval_steps_per_second": 0.591,
"step": 404
}
],
"logging_steps": 10,
"max_steps": 1212,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 676924426813440.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}