llama-2-7b-fourierft-alpaca / trainer_state.json
vantaa32's picture
Upload folder using huggingface_hub
3bd7913 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 5,
"global_step": 2588,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019319938176197836,
"grad_norm": 0.00098650180734694,
"learning_rate": 0.009615384615384616,
"loss": 0.9907,
"step": 50
},
{
"epoch": 0.03863987635239567,
"grad_norm": 0.000779022928327322,
"learning_rate": 0.019230769230769232,
"loss": 0.9647,
"step": 100
},
{
"epoch": 0.05795981452859351,
"grad_norm": 0.000611725845374167,
"learning_rate": 0.028846153846153844,
"loss": 0.9412,
"step": 150
},
{
"epoch": 0.07727975270479134,
"grad_norm": 0.0005838441429659724,
"learning_rate": 0.029457236842105262,
"loss": 0.9322,
"step": 200
},
{
"epoch": 0.09659969088098919,
"grad_norm": 0.0007691067876294255,
"learning_rate": 0.028840460526315788,
"loss": 0.9131,
"step": 250
},
{
"epoch": 0.11591962905718702,
"grad_norm": 0.0005935626104474068,
"learning_rate": 0.028223684210526314,
"loss": 0.9104,
"step": 300
},
{
"epoch": 0.13523956723338484,
"grad_norm": 0.0006890599033795297,
"learning_rate": 0.02760690789473684,
"loss": 0.9214,
"step": 350
},
{
"epoch": 0.1545595054095827,
"grad_norm": 0.0006042916793376207,
"learning_rate": 0.02699013157894737,
"loss": 0.9,
"step": 400
},
{
"epoch": 0.17387944358578053,
"grad_norm": 0.0005447549629025161,
"learning_rate": 0.026373355263157892,
"loss": 0.9097,
"step": 450
},
{
"epoch": 0.19319938176197837,
"grad_norm": 0.0004888740368187428,
"learning_rate": 0.02575657894736842,
"loss": 0.9037,
"step": 500
},
{
"epoch": 0.2125193199381762,
"grad_norm": 0.0008238813607022166,
"learning_rate": 0.025139802631578945,
"loss": 0.899,
"step": 550
},
{
"epoch": 0.23183925811437403,
"grad_norm": 0.000727724633179605,
"learning_rate": 0.024523026315789474,
"loss": 0.923,
"step": 600
},
{
"epoch": 0.2511591962905719,
"grad_norm": 0.0005605846527032554,
"learning_rate": 0.02390625,
"loss": 0.9031,
"step": 650
},
{
"epoch": 0.2704791344667697,
"grad_norm": 0.0007705381722189486,
"learning_rate": 0.023289473684210523,
"loss": 0.9013,
"step": 700
},
{
"epoch": 0.28979907264296756,
"grad_norm": 0.0007164838025346398,
"learning_rate": 0.022672697368421053,
"loss": 0.8971,
"step": 750
},
{
"epoch": 0.3091190108191654,
"grad_norm": 0.000717374321538955,
"learning_rate": 0.02205592105263158,
"loss": 0.8866,
"step": 800
},
{
"epoch": 0.3284389489953632,
"grad_norm": 0.0006394012016244233,
"learning_rate": 0.021439144736842105,
"loss": 0.899,
"step": 850
},
{
"epoch": 0.34775888717156106,
"grad_norm": 0.0006252205348573625,
"learning_rate": 0.02082236842105263,
"loss": 0.894,
"step": 900
},
{
"epoch": 0.3670788253477589,
"grad_norm": 0.0006903470493853092,
"learning_rate": 0.020205592105263157,
"loss": 0.8858,
"step": 950
},
{
"epoch": 0.38639876352395675,
"grad_norm": 0.0008341589127667248,
"learning_rate": 0.019588815789473683,
"loss": 0.9168,
"step": 1000
},
{
"epoch": 0.40571870170015456,
"grad_norm": 0.0005771280848421156,
"learning_rate": 0.01897203947368421,
"loss": 0.9117,
"step": 1050
},
{
"epoch": 0.4250386398763524,
"grad_norm": 0.000522978079970926,
"learning_rate": 0.018355263157894736,
"loss": 0.8939,
"step": 1100
},
{
"epoch": 0.44435857805255025,
"grad_norm": 0.0005450574099086225,
"learning_rate": 0.017738486842105265,
"loss": 0.9049,
"step": 1150
},
{
"epoch": 0.46367851622874806,
"grad_norm": 0.0005660468013957143,
"learning_rate": 0.017121710526315788,
"loss": 0.8944,
"step": 1200
},
{
"epoch": 0.48299845440494593,
"grad_norm": 0.0006663696258328855,
"learning_rate": 0.016504934210526314,
"loss": 0.8971,
"step": 1250
},
{
"epoch": 0.5023183925811437,
"grad_norm": 0.0005968479672446847,
"learning_rate": 0.01588815789473684,
"loss": 0.8917,
"step": 1300
},
{
"epoch": 0.5216383307573416,
"grad_norm": 0.0007491153082810342,
"learning_rate": 0.01527138157894737,
"loss": 0.8829,
"step": 1350
},
{
"epoch": 0.5409582689335394,
"grad_norm": 0.0006275599589571357,
"learning_rate": 0.014654605263157894,
"loss": 0.9058,
"step": 1400
},
{
"epoch": 0.5602782071097373,
"grad_norm": 0.0007617810624651611,
"learning_rate": 0.01403782894736842,
"loss": 0.9051,
"step": 1450
},
{
"epoch": 0.5795981452859351,
"grad_norm": 0.0006214394234120846,
"learning_rate": 0.013421052631578946,
"loss": 0.8879,
"step": 1500
},
{
"epoch": 0.5989180834621329,
"grad_norm": 0.0006560624460689723,
"learning_rate": 0.012804276315789473,
"loss": 0.8991,
"step": 1550
},
{
"epoch": 0.6182380216383307,
"grad_norm": 0.0007683933363296092,
"learning_rate": 0.0121875,
"loss": 0.9081,
"step": 1600
},
{
"epoch": 0.6375579598145286,
"grad_norm": 0.0005783849046565592,
"learning_rate": 0.011570723684210527,
"loss": 0.9067,
"step": 1650
},
{
"epoch": 0.6568778979907264,
"grad_norm": 0.0007958198548294604,
"learning_rate": 0.010953947368421053,
"loss": 0.885,
"step": 1700
},
{
"epoch": 0.6761978361669243,
"grad_norm": 0.0006095783319324255,
"learning_rate": 0.010337171052631579,
"loss": 0.8928,
"step": 1750
},
{
"epoch": 0.6955177743431221,
"grad_norm": 0.000699816329870373,
"learning_rate": 0.009720394736842105,
"loss": 0.903,
"step": 1800
},
{
"epoch": 0.7148377125193199,
"grad_norm": 0.0008128538611344993,
"learning_rate": 0.009103618421052631,
"loss": 0.9036,
"step": 1850
},
{
"epoch": 0.7341576506955177,
"grad_norm": 0.0006495247362181544,
"learning_rate": 0.008486842105263157,
"loss": 0.8907,
"step": 1900
},
{
"epoch": 0.7534775888717156,
"grad_norm": 0.0005265743238851428,
"learning_rate": 0.007870065789473685,
"loss": 0.8843,
"step": 1950
},
{
"epoch": 0.7727975270479135,
"grad_norm": 0.0006601494387723505,
"learning_rate": 0.0072532894736842095,
"loss": 0.8925,
"step": 2000
},
{
"epoch": 0.7921174652241113,
"grad_norm": 0.0005823367391712964,
"learning_rate": 0.0066365131578947365,
"loss": 0.8954,
"step": 2050
},
{
"epoch": 0.8114374034003091,
"grad_norm": 0.0005229181842878461,
"learning_rate": 0.0060197368421052635,
"loss": 0.903,
"step": 2100
},
{
"epoch": 0.8307573415765069,
"grad_norm": 0.0005145368631929159,
"learning_rate": 0.00540296052631579,
"loss": 0.8923,
"step": 2150
},
{
"epoch": 0.8500772797527048,
"grad_norm": 0.0006071292445994914,
"learning_rate": 0.004786184210526316,
"loss": 0.8804,
"step": 2200
},
{
"epoch": 0.8693972179289027,
"grad_norm": 0.0006730407476425171,
"learning_rate": 0.004169407894736842,
"loss": 0.8919,
"step": 2250
},
{
"epoch": 0.8887171561051005,
"grad_norm": 0.0006455178954638541,
"learning_rate": 0.003552631578947368,
"loss": 0.896,
"step": 2300
},
{
"epoch": 0.9080370942812983,
"grad_norm": 0.0004997382056899369,
"learning_rate": 0.002935855263157895,
"loss": 0.8921,
"step": 2350
},
{
"epoch": 0.9273570324574961,
"grad_norm": 0.00045192165998741984,
"learning_rate": 0.002319078947368421,
"loss": 0.8839,
"step": 2400
},
{
"epoch": 0.9466769706336939,
"grad_norm": 0.0004822098126169294,
"learning_rate": 0.0017023026315789475,
"loss": 0.8988,
"step": 2450
},
{
"epoch": 0.9659969088098919,
"grad_norm": 0.0005721400957554579,
"learning_rate": 0.0010855263157894736,
"loss": 0.9045,
"step": 2500
},
{
"epoch": 0.9853168469860897,
"grad_norm": 0.0005698847235180438,
"learning_rate": 0.00046875,
"loss": 0.893,
"step": 2550
},
{
"epoch": 1.0,
"eval_loss": 0.8954795002937317,
"eval_runtime": 1619.6937,
"eval_samples_per_second": 6.391,
"eval_steps_per_second": 0.799,
"step": 2588
}
],
"logging_steps": 50,
"max_steps": 2588,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.394319248976609e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}