nllb-200-distilled-600M-lo / trainer_state.json
Xmm's picture
Upload 8 files
538c5ac verified
{
"best_metric": 0.26939964294433594,
"best_model_checkpoint": "./checkpoint/checkpoint-4000",
"epoch": 4.790692369111441,
"eval_steps": 500,
"global_step": 10500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4e-08,
"loss": 12.0623,
"step": 1
},
{
"epoch": 0.23,
"learning_rate": 1.9960000000000002e-05,
"loss": 7.4437,
"step": 500
},
{
"epoch": 0.23,
"eval_bleu": 33.6574,
"eval_gen_len": 31.2317,
"eval_loss": 3.4493963718414307,
"eval_runtime": 576.3536,
"eval_samples_per_second": 1.73,
"eval_steps_per_second": 1.73,
"step": 500
},
{
"epoch": 0.46,
"learning_rate": 1.904543280726925e-05,
"loss": 1.3107,
"step": 1000
},
{
"epoch": 0.46,
"eval_bleu": 33.3513,
"eval_gen_len": 31.7091,
"eval_loss": 0.2903362810611725,
"eval_runtime": 476.1637,
"eval_samples_per_second": 2.094,
"eval_steps_per_second": 2.094,
"step": 1000
},
{
"epoch": 0.68,
"learning_rate": 1.8088952654232427e-05,
"loss": 0.1582,
"step": 1500
},
{
"epoch": 0.68,
"eval_bleu": 33.7009,
"eval_gen_len": 31.7202,
"eval_loss": 0.2747056484222412,
"eval_runtime": 477.4288,
"eval_samples_per_second": 2.088,
"eval_steps_per_second": 2.088,
"step": 1500
},
{
"epoch": 0.91,
"learning_rate": 1.71324725011956e-05,
"loss": 0.1477,
"step": 2000
},
{
"epoch": 0.91,
"eval_bleu": 34.2274,
"eval_gen_len": 31.5557,
"eval_loss": 0.2713315188884735,
"eval_runtime": 473.8787,
"eval_samples_per_second": 2.104,
"eval_steps_per_second": 2.104,
"step": 2000
},
{
"epoch": 1.14,
"learning_rate": 1.617599234815878e-05,
"loss": 0.1413,
"step": 2500
},
{
"epoch": 1.14,
"eval_bleu": 34.0664,
"eval_gen_len": 31.674,
"eval_loss": 0.2716849446296692,
"eval_runtime": 477.2223,
"eval_samples_per_second": 2.089,
"eval_steps_per_second": 2.089,
"step": 2500
},
{
"epoch": 1.37,
"learning_rate": 1.5219512195121952e-05,
"loss": 0.1355,
"step": 3000
},
{
"epoch": 1.37,
"eval_bleu": 34.1168,
"eval_gen_len": 31.8506,
"eval_loss": 0.2718922197818756,
"eval_runtime": 479.68,
"eval_samples_per_second": 2.078,
"eval_steps_per_second": 2.078,
"step": 3000
},
{
"epoch": 1.6,
"learning_rate": 1.4263032042085128e-05,
"loss": 0.136,
"step": 3500
},
{
"epoch": 1.6,
"eval_bleu": 34.2638,
"eval_gen_len": 31.7523,
"eval_loss": 0.270623117685318,
"eval_runtime": 479.0203,
"eval_samples_per_second": 2.081,
"eval_steps_per_second": 2.081,
"step": 3500
},
{
"epoch": 1.83,
"learning_rate": 1.3306551889048302e-05,
"loss": 0.1316,
"step": 4000
},
{
"epoch": 1.83,
"eval_bleu": 34.1582,
"eval_gen_len": 31.6931,
"eval_loss": 0.26939964294433594,
"eval_runtime": 477.4424,
"eval_samples_per_second": 2.088,
"eval_steps_per_second": 2.088,
"step": 4000
},
{
"epoch": 2.05,
"learning_rate": 1.235007173601148e-05,
"loss": 0.1312,
"step": 4500
},
{
"epoch": 2.05,
"eval_bleu": 34.4277,
"eval_gen_len": 31.662,
"eval_loss": 0.2704804539680481,
"eval_runtime": 479.1942,
"eval_samples_per_second": 2.081,
"eval_steps_per_second": 2.081,
"step": 4500
},
{
"epoch": 2.28,
"learning_rate": 1.1393591582974655e-05,
"loss": 0.1258,
"step": 5000
},
{
"epoch": 2.28,
"eval_bleu": 34.3594,
"eval_gen_len": 31.651,
"eval_loss": 0.27046987414360046,
"eval_runtime": 478.936,
"eval_samples_per_second": 2.082,
"eval_steps_per_second": 2.082,
"step": 5000
},
{
"epoch": 2.51,
"learning_rate": 1.043711142993783e-05,
"loss": 0.1271,
"step": 5500
},
{
"epoch": 2.51,
"eval_bleu": 34.3412,
"eval_gen_len": 31.8094,
"eval_loss": 0.27054643630981445,
"eval_runtime": 481.7151,
"eval_samples_per_second": 2.07,
"eval_steps_per_second": 2.07,
"step": 5500
},
{
"epoch": 2.74,
"learning_rate": 9.480631276901005e-06,
"loss": 0.1249,
"step": 6000
},
{
"epoch": 2.74,
"eval_bleu": 34.2387,
"eval_gen_len": 31.7212,
"eval_loss": 0.2704330086708069,
"eval_runtime": 479.9945,
"eval_samples_per_second": 2.077,
"eval_steps_per_second": 2.077,
"step": 6000
},
{
"epoch": 2.97,
"learning_rate": 8.52415112386418e-06,
"loss": 0.1245,
"step": 6500
},
{
"epoch": 2.97,
"eval_bleu": 34.3033,
"eval_gen_len": 31.8616,
"eval_loss": 0.27082785964012146,
"eval_runtime": 482.2741,
"eval_samples_per_second": 2.067,
"eval_steps_per_second": 2.067,
"step": 6500
},
{
"epoch": 3.19,
"learning_rate": 7.5676709708273554e-06,
"loss": 0.1195,
"step": 7000
},
{
"epoch": 3.19,
"eval_bleu": 34.2748,
"eval_gen_len": 31.9017,
"eval_loss": 0.27176010608673096,
"eval_runtime": 484.0841,
"eval_samples_per_second": 2.06,
"eval_steps_per_second": 2.06,
"step": 7000
},
{
"epoch": 3.42,
"learning_rate": 6.611190817790531e-06,
"loss": 0.1198,
"step": 7500
},
{
"epoch": 3.42,
"eval_bleu": 34.2897,
"eval_gen_len": 31.7312,
"eval_loss": 0.27175214886665344,
"eval_runtime": 479.5665,
"eval_samples_per_second": 2.079,
"eval_steps_per_second": 2.079,
"step": 7500
},
{
"epoch": 3.65,
"learning_rate": 5.654710664753707e-06,
"loss": 0.1209,
"step": 8000
},
{
"epoch": 3.65,
"eval_bleu": 34.4446,
"eval_gen_len": 31.7272,
"eval_loss": 0.2709992527961731,
"eval_runtime": 478.4748,
"eval_samples_per_second": 2.084,
"eval_steps_per_second": 2.084,
"step": 8000
},
{
"epoch": 3.88,
"learning_rate": 4.6982305117168825e-06,
"loss": 0.1201,
"step": 8500
},
{
"epoch": 3.88,
"eval_bleu": 34.3571,
"eval_gen_len": 31.7432,
"eval_loss": 0.2712614834308624,
"eval_runtime": 478.6295,
"eval_samples_per_second": 2.083,
"eval_steps_per_second": 2.083,
"step": 8500
},
{
"epoch": 4.11,
"learning_rate": 3.7417503586800574e-06,
"loss": 0.1201,
"step": 9000
},
{
"epoch": 4.11,
"eval_bleu": 34.4398,
"eval_gen_len": 31.7613,
"eval_loss": 0.27223262190818787,
"eval_runtime": 478.6225,
"eval_samples_per_second": 2.083,
"eval_steps_per_second": 2.083,
"step": 9000
},
{
"epoch": 4.33,
"learning_rate": 2.785270205643233e-06,
"loss": 0.1178,
"step": 9500
},
{
"epoch": 4.33,
"eval_bleu": 34.4074,
"eval_gen_len": 31.7753,
"eval_loss": 0.27177131175994873,
"eval_runtime": 479.0762,
"eval_samples_per_second": 2.081,
"eval_steps_per_second": 2.081,
"step": 9500
},
{
"epoch": 4.56,
"learning_rate": 1.8287900526064088e-06,
"loss": 0.1181,
"step": 10000
},
{
"epoch": 4.56,
"eval_bleu": 34.4628,
"eval_gen_len": 31.8034,
"eval_loss": 0.2723881006240845,
"eval_runtime": 479.5721,
"eval_samples_per_second": 2.079,
"eval_steps_per_second": 2.079,
"step": 10000
},
{
"epoch": 4.79,
"learning_rate": 8.72309899569584e-07,
"loss": 0.1169,
"step": 10500
},
{
"epoch": 4.79,
"eval_bleu": 34.563,
"eval_gen_len": 31.7442,
"eval_loss": 0.2720402777194977,
"eval_runtime": 478.0313,
"eval_samples_per_second": 2.086,
"eval_steps_per_second": 2.086,
"step": 10500
}
],
"logging_steps": 500,
"max_steps": 10955,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 1.820292017113006e+17,
"trial_name": null,
"trial_params": null
}