lewtun's picture
lewtun HF staff
Add HuggingFaceH4/mistral-7b-ift-v12.1 checkpoint
2347028
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4661369655694287,
"eval_steps": 500,
"global_step": 308,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.0303030303030305e-07,
"loss": 1.7199,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.5151515151515152e-06,
"loss": 1.6612,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 3.0303030303030305e-06,
"loss": 1.3333,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 4.5454545454545455e-06,
"loss": 1.1785,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 6.060606060606061e-06,
"loss": 1.0964,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 7.5757575757575764e-06,
"loss": 1.0473,
"step": 25
},
{
"epoch": 0.05,
"learning_rate": 9.090909090909091e-06,
"loss": 1.0209,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 1.0606060606060606e-05,
"loss": 0.9926,
"step": 35
},
{
"epoch": 0.06,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.9776,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 1.3636363636363637e-05,
"loss": 0.9621,
"step": 45
},
{
"epoch": 0.08,
"learning_rate": 1.5151515151515153e-05,
"loss": 0.97,
"step": 50
},
{
"epoch": 0.08,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.9582,
"step": 55
},
{
"epoch": 0.09,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.9425,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 1.96969696969697e-05,
"loss": 0.9514,
"step": 65
},
{
"epoch": 0.11,
"learning_rate": 1.999776230627102e-05,
"loss": 0.948,
"step": 70
},
{
"epoch": 0.11,
"learning_rate": 1.9988673391830082e-05,
"loss": 0.9323,
"step": 75
},
{
"epoch": 0.12,
"learning_rate": 1.9972599751485225e-05,
"loss": 0.9316,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 1.994955262496446e-05,
"loss": 0.9449,
"step": 85
},
{
"epoch": 0.14,
"learning_rate": 1.9919548128307954e-05,
"loss": 0.9339,
"step": 90
},
{
"epoch": 0.14,
"learning_rate": 1.9882607242598663e-05,
"loss": 0.9338,
"step": 95
},
{
"epoch": 0.15,
"learning_rate": 1.9838755799290993e-05,
"loss": 0.9266,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 1.978802446214779e-05,
"loss": 0.9271,
"step": 105
},
{
"epoch": 0.17,
"learning_rate": 1.973044870579824e-05,
"loss": 0.9191,
"step": 110
},
{
"epoch": 0.17,
"learning_rate": 1.9666068790931733e-05,
"loss": 0.9168,
"step": 115
},
{
"epoch": 0.18,
"learning_rate": 1.9594929736144978e-05,
"loss": 0.9268,
"step": 120
},
{
"epoch": 0.19,
"learning_rate": 1.9517081286462082e-05,
"loss": 0.9216,
"step": 125
},
{
"epoch": 0.2,
"learning_rate": 1.9432577878549635e-05,
"loss": 0.9196,
"step": 130
},
{
"epoch": 0.2,
"learning_rate": 1.9341478602651068e-05,
"loss": 0.912,
"step": 135
},
{
"epoch": 0.21,
"learning_rate": 1.9243847161266924e-05,
"loss": 0.9076,
"step": 140
},
{
"epoch": 0.22,
"learning_rate": 1.913975182460996e-05,
"loss": 0.9104,
"step": 145
},
{
"epoch": 0.23,
"learning_rate": 1.9029265382866216e-05,
"loss": 0.9097,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 1.891246509529539e-05,
"loss": 0.9129,
"step": 155
},
{
"epoch": 0.24,
"learning_rate": 1.8789432636206197e-05,
"loss": 0.9069,
"step": 160
},
{
"epoch": 0.25,
"learning_rate": 1.866025403784439e-05,
"loss": 0.9116,
"step": 165
},
{
"epoch": 0.26,
"learning_rate": 1.8525019630233463e-05,
"loss": 0.899,
"step": 170
},
{
"epoch": 0.26,
"learning_rate": 1.8383823978010077e-05,
"loss": 0.9107,
"step": 175
},
{
"epoch": 0.27,
"learning_rate": 1.8236765814298328e-05,
"loss": 0.9037,
"step": 180
},
{
"epoch": 0.28,
"learning_rate": 1.808394797166919e-05,
"loss": 0.9085,
"step": 185
},
{
"epoch": 0.29,
"learning_rate": 1.792547731023332e-05,
"loss": 0.9084,
"step": 190
},
{
"epoch": 0.3,
"learning_rate": 1.776146464291757e-05,
"loss": 0.8987,
"step": 195
},
{
"epoch": 0.3,
"learning_rate": 1.7592024657977432e-05,
"loss": 0.9144,
"step": 200
},
{
"epoch": 0.31,
"learning_rate": 1.7417275838799596e-05,
"loss": 0.913,
"step": 205
},
{
"epoch": 0.32,
"learning_rate": 1.72373403810507e-05,
"loss": 0.9063,
"step": 210
},
{
"epoch": 0.33,
"learning_rate": 1.7052344107230244e-05,
"loss": 0.905,
"step": 215
},
{
"epoch": 0.33,
"learning_rate": 1.686241637868734e-05,
"loss": 0.901,
"step": 220
},
{
"epoch": 0.34,
"learning_rate": 1.666769000516292e-05,
"loss": 0.8992,
"step": 225
},
{
"epoch": 0.35,
"learning_rate": 1.6468301151920576e-05,
"loss": 0.8965,
"step": 230
},
{
"epoch": 0.36,
"learning_rate": 1.6264389244531015e-05,
"loss": 0.9033,
"step": 235
},
{
"epoch": 0.36,
"learning_rate": 1.6056096871376667e-05,
"loss": 0.9095,
"step": 240
},
{
"epoch": 0.37,
"learning_rate": 1.584356968394471e-05,
"loss": 0.8989,
"step": 245
},
{
"epoch": 0.38,
"learning_rate": 1.5626956294978103e-05,
"loss": 0.8845,
"step": 250
},
{
"epoch": 0.39,
"learning_rate": 1.5406408174555978e-05,
"loss": 0.8935,
"step": 255
},
{
"epoch": 0.39,
"learning_rate": 1.5182079544175957e-05,
"loss": 0.8885,
"step": 260
},
{
"epoch": 0.4,
"learning_rate": 1.4954127268912525e-05,
"loss": 0.8837,
"step": 265
},
{
"epoch": 0.41,
"learning_rate": 1.472271074772683e-05,
"loss": 0.8817,
"step": 270
},
{
"epoch": 0.42,
"learning_rate": 1.4487991802004625e-05,
"loss": 0.8915,
"step": 275
},
{
"epoch": 0.42,
"learning_rate": 1.4250134562400301e-05,
"loss": 0.8914,
"step": 280
},
{
"epoch": 0.43,
"learning_rate": 1.4009305354066138e-05,
"loss": 0.8957,
"step": 285
},
{
"epoch": 0.44,
"learning_rate": 1.3765672580346986e-05,
"loss": 0.8861,
"step": 290
},
{
"epoch": 0.45,
"learning_rate": 1.3519406605021797e-05,
"loss": 0.8835,
"step": 295
},
{
"epoch": 0.45,
"learning_rate": 1.3270679633174219e-05,
"loss": 0.8872,
"step": 300
},
{
"epoch": 0.46,
"learning_rate": 1.3019665590775717e-05,
"loss": 0.8776,
"step": 305
},
{
"epoch": 0.47,
"eval_loss": 0.8892367482185364,
"eval_runtime": 215.6654,
"eval_samples_per_second": 170.769,
"eval_steps_per_second": 0.668,
"step": 308
},
{
"epoch": 0.47,
"step": 308,
"total_flos": 516068506337280.0,
"train_loss": 0.9461887153712186,
"train_runtime": 7026.0194,
"train_samples_per_second": 48.135,
"train_steps_per_second": 0.094
}
],
"logging_steps": 5,
"max_steps": 660,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 516068506337280.0,
"trial_name": null,
"trial_params": null
}