Mistral-7B-v0.1_mbe_no / trainer_state.json
zlucia's picture
End of training
d455384 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.003338898163606,
"eval_steps": 10,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 3e-05,
"loss": 1.5245,
"step": 10
},
{
"epoch": 0.07,
"eval_accuracy": 0.3355263157894737,
"eval_loss": 0.6506821513175964,
"eval_runtime": 16.9243,
"eval_samples_per_second": 17.962,
"eval_steps_per_second": 4.491,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 3e-05,
"loss": 0.6666,
"step": 20
},
{
"epoch": 0.13,
"eval_accuracy": 0.3815789473684211,
"eval_loss": 0.6464425325393677,
"eval_runtime": 16.9195,
"eval_samples_per_second": 17.967,
"eval_steps_per_second": 4.492,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 3e-05,
"loss": 0.6527,
"step": 30
},
{
"epoch": 0.2,
"eval_accuracy": 0.3684210526315789,
"eval_loss": 0.6426967978477478,
"eval_runtime": 16.9282,
"eval_samples_per_second": 17.958,
"eval_steps_per_second": 4.49,
"step": 30
},
{
"epoch": 0.27,
"learning_rate": 3e-05,
"loss": 0.6168,
"step": 40
},
{
"epoch": 0.27,
"eval_accuracy": 0.3980263157894737,
"eval_loss": 0.6321499943733215,
"eval_runtime": 17.0092,
"eval_samples_per_second": 17.873,
"eval_steps_per_second": 4.468,
"step": 40
},
{
"epoch": 0.33,
"learning_rate": 3e-05,
"loss": 0.6584,
"step": 50
},
{
"epoch": 0.33,
"eval_accuracy": 0.39144736842105265,
"eval_loss": 0.6181844472885132,
"eval_runtime": 16.9419,
"eval_samples_per_second": 17.944,
"eval_steps_per_second": 4.486,
"step": 50
},
{
"epoch": 0.4,
"learning_rate": 3e-05,
"loss": 0.586,
"step": 60
},
{
"epoch": 0.4,
"eval_accuracy": 0.4144736842105263,
"eval_loss": 0.6244160532951355,
"eval_runtime": 16.9269,
"eval_samples_per_second": 17.96,
"eval_steps_per_second": 4.49,
"step": 60
},
{
"epoch": 0.47,
"learning_rate": 3e-05,
"loss": 0.5924,
"step": 70
},
{
"epoch": 0.47,
"eval_accuracy": 0.4342105263157895,
"eval_loss": 0.6033625602722168,
"eval_runtime": 16.9236,
"eval_samples_per_second": 17.963,
"eval_steps_per_second": 4.491,
"step": 70
},
{
"epoch": 0.53,
"learning_rate": 3e-05,
"loss": 0.6069,
"step": 80
},
{
"epoch": 0.53,
"eval_accuracy": 0.4375,
"eval_loss": 0.6096391677856445,
"eval_runtime": 16.9238,
"eval_samples_per_second": 17.963,
"eval_steps_per_second": 4.491,
"step": 80
},
{
"epoch": 0.6,
"learning_rate": 3e-05,
"loss": 0.5999,
"step": 90
},
{
"epoch": 0.6,
"eval_accuracy": 0.4407894736842105,
"eval_loss": 0.6095999479293823,
"eval_runtime": 16.9342,
"eval_samples_per_second": 17.952,
"eval_steps_per_second": 4.488,
"step": 90
},
{
"epoch": 0.67,
"learning_rate": 3e-05,
"loss": 0.6206,
"step": 100
},
{
"epoch": 0.67,
"eval_accuracy": 0.45723684210526316,
"eval_loss": 0.607021152973175,
"eval_runtime": 16.9304,
"eval_samples_per_second": 17.956,
"eval_steps_per_second": 4.489,
"step": 100
},
{
"epoch": 0.73,
"learning_rate": 3e-05,
"loss": 0.5793,
"step": 110
},
{
"epoch": 0.73,
"eval_accuracy": 0.45723684210526316,
"eval_loss": 0.601601243019104,
"eval_runtime": 16.9375,
"eval_samples_per_second": 17.948,
"eval_steps_per_second": 4.487,
"step": 110
},
{
"epoch": 0.8,
"learning_rate": 3e-05,
"loss": 0.6208,
"step": 120
},
{
"epoch": 0.8,
"eval_accuracy": 0.4605263157894737,
"eval_loss": 0.5902404189109802,
"eval_runtime": 16.924,
"eval_samples_per_second": 17.963,
"eval_steps_per_second": 4.491,
"step": 120
},
{
"epoch": 0.87,
"learning_rate": 3e-05,
"loss": 0.5622,
"step": 130
},
{
"epoch": 0.87,
"eval_accuracy": 0.4769736842105263,
"eval_loss": 0.5775408744812012,
"eval_runtime": 16.9329,
"eval_samples_per_second": 17.953,
"eval_steps_per_second": 4.488,
"step": 130
},
{
"epoch": 0.93,
"learning_rate": 3e-05,
"loss": 0.5502,
"step": 140
},
{
"epoch": 0.93,
"eval_accuracy": 0.46710526315789475,
"eval_loss": 0.57607102394104,
"eval_runtime": 16.9226,
"eval_samples_per_second": 17.964,
"eval_steps_per_second": 4.491,
"step": 140
},
{
"epoch": 1.0,
"learning_rate": 3e-05,
"loss": 0.5958,
"step": 150
},
{
"epoch": 1.0,
"eval_accuracy": 0.4901315789473684,
"eval_loss": 0.5606401562690735,
"eval_runtime": 16.929,
"eval_samples_per_second": 17.957,
"eval_steps_per_second": 4.489,
"step": 150
},
{
"epoch": 1.07,
"learning_rate": 3e-05,
"loss": 0.4558,
"step": 160
},
{
"epoch": 1.07,
"eval_accuracy": 0.47368421052631576,
"eval_loss": 0.5839833617210388,
"eval_runtime": 16.9304,
"eval_samples_per_second": 17.956,
"eval_steps_per_second": 4.489,
"step": 160
},
{
"epoch": 1.14,
"learning_rate": 3e-05,
"loss": 0.4411,
"step": 170
},
{
"epoch": 1.14,
"eval_accuracy": 0.4901315789473684,
"eval_loss": 0.5631235837936401,
"eval_runtime": 16.9238,
"eval_samples_per_second": 17.963,
"eval_steps_per_second": 4.491,
"step": 170
},
{
"epoch": 1.2,
"learning_rate": 3e-05,
"loss": 0.4144,
"step": 180
},
{
"epoch": 1.2,
"eval_accuracy": 0.5,
"eval_loss": 0.5744868516921997,
"eval_runtime": 16.9382,
"eval_samples_per_second": 17.948,
"eval_steps_per_second": 4.487,
"step": 180
},
{
"epoch": 1.27,
"learning_rate": 3e-05,
"loss": 0.4647,
"step": 190
},
{
"epoch": 1.27,
"eval_accuracy": 0.4605263157894737,
"eval_loss": 0.593177080154419,
"eval_runtime": 16.932,
"eval_samples_per_second": 17.954,
"eval_steps_per_second": 4.489,
"step": 190
},
{
"epoch": 1.34,
"learning_rate": 3e-05,
"loss": 0.4504,
"step": 200
},
{
"epoch": 1.34,
"eval_accuracy": 0.5098684210526315,
"eval_loss": 0.5798581838607788,
"eval_runtime": 16.9337,
"eval_samples_per_second": 17.952,
"eval_steps_per_second": 4.488,
"step": 200
},
{
"epoch": 1.4,
"learning_rate": 3e-05,
"loss": 0.4299,
"step": 210
},
{
"epoch": 1.4,
"eval_accuracy": 0.4934210526315789,
"eval_loss": 0.64882493019104,
"eval_runtime": 16.9391,
"eval_samples_per_second": 17.947,
"eval_steps_per_second": 4.487,
"step": 210
},
{
"epoch": 1.47,
"learning_rate": 3e-05,
"loss": 0.425,
"step": 220
},
{
"epoch": 1.47,
"eval_accuracy": 0.5131578947368421,
"eval_loss": 0.5704348683357239,
"eval_runtime": 16.9325,
"eval_samples_per_second": 17.954,
"eval_steps_per_second": 4.488,
"step": 220
},
{
"epoch": 1.54,
"learning_rate": 3e-05,
"loss": 0.4152,
"step": 230
},
{
"epoch": 1.54,
"eval_accuracy": 0.506578947368421,
"eval_loss": 0.5582014322280884,
"eval_runtime": 16.9258,
"eval_samples_per_second": 17.961,
"eval_steps_per_second": 4.49,
"step": 230
},
{
"epoch": 1.6,
"learning_rate": 3e-05,
"loss": 0.425,
"step": 240
},
{
"epoch": 1.6,
"eval_accuracy": 0.5328947368421053,
"eval_loss": 0.5488855838775635,
"eval_runtime": 16.9288,
"eval_samples_per_second": 17.958,
"eval_steps_per_second": 4.489,
"step": 240
},
{
"epoch": 1.67,
"learning_rate": 3e-05,
"loss": 0.446,
"step": 250
},
{
"epoch": 1.67,
"eval_accuracy": 0.5197368421052632,
"eval_loss": 0.5479023456573486,
"eval_runtime": 16.9319,
"eval_samples_per_second": 17.954,
"eval_steps_per_second": 4.489,
"step": 250
},
{
"epoch": 1.74,
"learning_rate": 3e-05,
"loss": 0.3908,
"step": 260
},
{
"epoch": 1.74,
"eval_accuracy": 0.5164473684210527,
"eval_loss": 0.5564107894897461,
"eval_runtime": 16.9414,
"eval_samples_per_second": 17.944,
"eval_steps_per_second": 4.486,
"step": 260
},
{
"epoch": 1.8,
"learning_rate": 3e-05,
"loss": 0.443,
"step": 270
},
{
"epoch": 1.8,
"eval_accuracy": 0.5032894736842105,
"eval_loss": 0.5418796539306641,
"eval_runtime": 16.9208,
"eval_samples_per_second": 17.966,
"eval_steps_per_second": 4.492,
"step": 270
},
{
"epoch": 1.87,
"learning_rate": 3e-05,
"loss": 0.4081,
"step": 280
},
{
"epoch": 1.87,
"eval_accuracy": 0.506578947368421,
"eval_loss": 0.5948407053947449,
"eval_runtime": 16.9289,
"eval_samples_per_second": 17.957,
"eval_steps_per_second": 4.489,
"step": 280
},
{
"epoch": 1.94,
"learning_rate": 3e-05,
"loss": 0.3944,
"step": 290
},
{
"epoch": 1.94,
"eval_accuracy": 0.5394736842105263,
"eval_loss": 0.554680347442627,
"eval_runtime": 16.9311,
"eval_samples_per_second": 17.955,
"eval_steps_per_second": 4.489,
"step": 290
},
{
"epoch": 2.0,
"learning_rate": 3e-05,
"loss": 0.4005,
"step": 300
},
{
"epoch": 2.0,
"eval_accuracy": 0.5361842105263158,
"eval_loss": 0.5615983009338379,
"eval_runtime": 16.9277,
"eval_samples_per_second": 17.959,
"eval_steps_per_second": 4.49,
"step": 300
},
{
"epoch": 2.0,
"step": 300,
"total_flos": 7.380159778455552e+16,
"train_loss": 0.5479170862833659,
"train_runtime": 1462.1781,
"train_samples_per_second": 3.283,
"train_steps_per_second": 0.205
}
],
"logging_steps": 10,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 150,
"total_flos": 7.380159778455552e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}