german-jeopardy-mt5-large-128 / trainer_state.json
Marvin
Initial commit
8875396 unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.789564097058193,
"eval_steps": 500,
"global_step": 1440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"learning_rate": 0.0001,
"loss": 3.9659,
"step": 72
},
{
"epoch": 0.99,
"eval_bleu": 10.8438,
"eval_bp": 0.7379,
"eval_counts_1": 7244,
"eval_counts_2": 2547,
"eval_counts_3": 1183,
"eval_counts_4": 565,
"eval_exact_match": 0.0136,
"eval_f1": 0.3139,
"eval_gen_len": 11.7786,
"eval_loss": 1.4144511222839355,
"eval_precisions_1": 44.4526,
"eval_precisions_2": 18.0741,
"eval_precisions_3": 9.9512,
"eval_precisions_4": 5.8344,
"eval_ref_len": 21250,
"eval_rouge1": 0.3213,
"eval_rouge2": 0.1608,
"eval_rougeL": 0.3091,
"eval_rougeLsum": 0.309,
"eval_runtime": 2106.9539,
"eval_samples_per_second": 1.046,
"eval_steps_per_second": 1.046,
"eval_sys_len": 16296,
"eval_totals_1": 16296,
"eval_totals_2": 14092,
"eval_totals_3": 11888,
"eval_totals_4": 9684,
"step": 72
},
{
"epoch": 1.99,
"learning_rate": 0.0001,
"loss": 1.7081,
"step": 145
},
{
"epoch": 1.99,
"eval_bleu": 13.2044,
"eval_bp": 0.7697,
"eval_counts_1": 7865,
"eval_counts_2": 3037,
"eval_counts_3": 1498,
"eval_counts_4": 759,
"eval_exact_match": 0.0181,
"eval_f1": 0.3481,
"eval_gen_len": 12.225,
"eval_loss": 1.263157844543457,
"eval_precisions_1": 46.7015,
"eval_precisions_2": 20.7488,
"eval_precisions_3": 12.0486,
"eval_precisions_4": 7.4201,
"eval_ref_len": 21250,
"eval_rouge1": 0.3577,
"eval_rouge2": 0.189,
"eval_rougeL": 0.3438,
"eval_rougeLsum": 0.3439,
"eval_runtime": 3942.8178,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"eval_sys_len": 16841,
"eval_totals_1": 16841,
"eval_totals_2": 14637,
"eval_totals_3": 12433,
"eval_totals_4": 10229,
"step": 145
},
{
"epoch": 3.0,
"learning_rate": 0.0001,
"loss": 1.4856,
"step": 218
},
{
"epoch": 3.0,
"eval_bleu": 15.6014,
"eval_bp": 0.8142,
"eval_counts_1": 8608,
"eval_counts_2": 3519,
"eval_counts_3": 1818,
"eval_counts_4": 969,
"eval_exact_match": 0.0268,
"eval_f1": 0.3882,
"eval_gen_len": 13.0027,
"eval_loss": 1.1974213123321533,
"eval_precisions_1": 48.8342,
"eval_precisions_2": 22.8166,
"eval_precisions_3": 13.7529,
"eval_precisions_4": 8.7971,
"eval_ref_len": 21250,
"eval_rouge1": 0.3969,
"eval_rouge2": 0.2181,
"eval_rougeL": 0.381,
"eval_rougeLsum": 0.3812,
"eval_runtime": 4069.754,
"eval_samples_per_second": 0.542,
"eval_steps_per_second": 0.542,
"eval_sys_len": 17627,
"eval_totals_1": 17627,
"eval_totals_2": 15423,
"eval_totals_3": 13219,
"eval_totals_4": 11015,
"step": 218
},
{
"epoch": 4.0,
"learning_rate": 0.0001,
"loss": 1.3277,
"step": 291
},
{
"epoch": 4.0,
"eval_bleu": 16.4313,
"eval_bp": 0.8052,
"eval_counts_1": 9018,
"eval_counts_2": 3702,
"eval_counts_3": 1907,
"eval_counts_4": 1029,
"eval_exact_match": 0.0313,
"eval_f1": 0.4156,
"eval_gen_len": 12.8716,
"eval_loss": 1.1393847465515137,
"eval_precisions_1": 51.6347,
"eval_precisions_2": 24.2579,
"eval_precisions_3": 14.6052,
"eval_precisions_4": 9.4812,
"eval_ref_len": 21250,
"eval_rouge1": 0.424,
"eval_rouge2": 0.2321,
"eval_rougeL": 0.4087,
"eval_rougeLsum": 0.4085,
"eval_runtime": 4037.7601,
"eval_samples_per_second": 0.546,
"eval_steps_per_second": 0.546,
"eval_sys_len": 17465,
"eval_totals_1": 17465,
"eval_totals_2": 15261,
"eval_totals_3": 13057,
"eval_totals_4": 10853,
"step": 291
},
{
"epoch": 4.99,
"learning_rate": 0.0001,
"loss": 1.2314,
"step": 363
},
{
"epoch": 4.99,
"eval_bleu": 17.0718,
"eval_bp": 0.8235,
"eval_counts_1": 9240,
"eval_counts_2": 3869,
"eval_counts_3": 1994,
"eval_counts_4": 1076,
"eval_exact_match": 0.0363,
"eval_f1": 0.4256,
"eval_gen_len": 13.2137,
"eval_loss": 1.1193382740020752,
"eval_precisions_1": 51.9276,
"eval_precisions_2": 24.8172,
"eval_precisions_3": 14.8962,
"eval_precisions_4": 9.6226,
"eval_ref_len": 21250,
"eval_rouge1": 0.4336,
"eval_rouge2": 0.2413,
"eval_rougeL": 0.4183,
"eval_rougeLsum": 0.418,
"eval_runtime": 4116.6581,
"eval_samples_per_second": 0.535,
"eval_steps_per_second": 0.535,
"eval_sys_len": 17794,
"eval_totals_1": 17794,
"eval_totals_2": 15590,
"eval_totals_3": 13386,
"eval_totals_4": 11182,
"step": 363
},
{
"epoch": 5.99,
"learning_rate": 0.0001,
"loss": 1.1264,
"step": 436
},
{
"epoch": 5.99,
"eval_bleu": 17.4744,
"eval_bp": 0.8072,
"eval_counts_1": 9263,
"eval_counts_2": 3908,
"eval_counts_3": 2055,
"eval_counts_4": 1127,
"eval_exact_match": 0.0372,
"eval_f1": 0.4309,
"eval_gen_len": 13.034,
"eval_loss": 1.1085509061813354,
"eval_precisions_1": 52.9254,
"eval_precisions_2": 25.5458,
"eval_precisions_3": 15.6942,
"eval_precisions_4": 10.3489,
"eval_ref_len": 21250,
"eval_rouge1": 0.4383,
"eval_rouge2": 0.2452,
"eval_rougeL": 0.4239,
"eval_rougeLsum": 0.4237,
"eval_runtime": 3709.3886,
"eval_samples_per_second": 0.594,
"eval_steps_per_second": 0.594,
"eval_sys_len": 17502,
"eval_totals_1": 17502,
"eval_totals_2": 15298,
"eval_totals_3": 13094,
"eval_totals_4": 10890,
"step": 436
},
{
"epoch": 7.0,
"learning_rate": 0.0001,
"loss": 1.0469,
"step": 509
},
{
"epoch": 7.0,
"eval_bleu": 18.0906,
"eval_bp": 0.8363,
"eval_counts_1": 9434,
"eval_counts_2": 4034,
"eval_counts_3": 2146,
"eval_counts_4": 1189,
"eval_exact_match": 0.039,
"eval_f1": 0.4348,
"eval_gen_len": 13.422,
"eval_loss": 1.103752851486206,
"eval_precisions_1": 52.3297,
"eval_precisions_2": 25.4929,
"eval_precisions_3": 15.7562,
"eval_precisions_4": 10.4152,
"eval_ref_len": 21250,
"eval_rouge1": 0.4433,
"eval_rouge2": 0.2505,
"eval_rougeL": 0.4286,
"eval_rougeLsum": 0.4282,
"eval_runtime": 4081.2971,
"eval_samples_per_second": 0.54,
"eval_steps_per_second": 0.54,
"eval_sys_len": 18028,
"eval_totals_1": 18028,
"eval_totals_2": 15824,
"eval_totals_3": 13620,
"eval_totals_4": 11416,
"step": 509
},
{
"epoch": 8.0,
"learning_rate": 0.0001,
"loss": 0.9874,
"step": 582
},
{
"epoch": 8.0,
"eval_bleu": 19.1287,
"eval_bp": 0.8539,
"eval_counts_1": 9746,
"eval_counts_2": 4265,
"eval_counts_3": 2287,
"eval_counts_4": 1285,
"eval_exact_match": 0.0454,
"eval_f1": 0.4498,
"eval_gen_len": 13.6466,
"eval_loss": 1.0989724397659302,
"eval_precisions_1": 53.1088,
"eval_precisions_2": 26.4136,
"eval_precisions_3": 16.4025,
"eval_precisions_4": 10.9464,
"eval_ref_len": 21250,
"eval_rouge1": 0.457,
"eval_rouge2": 0.2627,
"eval_rougeL": 0.4417,
"eval_rougeLsum": 0.4416,
"eval_runtime": 2875.9709,
"eval_samples_per_second": 0.766,
"eval_steps_per_second": 0.766,
"eval_sys_len": 18351,
"eval_totals_1": 18351,
"eval_totals_2": 16147,
"eval_totals_3": 13943,
"eval_totals_4": 11739,
"step": 582
},
{
"epoch": 8.99,
"learning_rate": 0.0001,
"loss": 0.9488,
"step": 654
},
{
"epoch": 8.99,
"eval_bleu": 18.2172,
"eval_bp": 0.8255,
"eval_counts_1": 9484,
"eval_counts_2": 4062,
"eval_counts_3": 2158,
"eval_counts_4": 1197,
"eval_exact_match": 0.0431,
"eval_f1": 0.4399,
"eval_gen_len": 13.2763,
"eval_loss": 1.1175453662872314,
"eval_precisions_1": 53.1883,
"eval_precisions_2": 25.9935,
"eval_precisions_3": 16.0769,
"eval_precisions_4": 10.6694,
"eval_ref_len": 21250,
"eval_rouge1": 0.4482,
"eval_rouge2": 0.2548,
"eval_rougeL": 0.4338,
"eval_rougeLsum": 0.4333,
"eval_runtime": 4231.6184,
"eval_samples_per_second": 0.521,
"eval_steps_per_second": 0.521,
"eval_sys_len": 17831,
"eval_totals_1": 17831,
"eval_totals_2": 15627,
"eval_totals_3": 13423,
"eval_totals_4": 11219,
"step": 654
},
{
"epoch": 9.99,
"learning_rate": 0.0001,
"loss": 0.8893,
"step": 727
},
{
"epoch": 9.99,
"eval_bleu": 19.064,
"eval_bp": 0.8357,
"eval_counts_1": 9650,
"eval_counts_2": 4205,
"eval_counts_3": 2289,
"eval_counts_4": 1289,
"eval_exact_match": 0.0463,
"eval_f1": 0.4472,
"eval_gen_len": 13.4251,
"eval_loss": 1.1221915483474731,
"eval_precisions_1": 53.5605,
"eval_precisions_2": 26.592,
"eval_precisions_3": 16.8198,
"eval_precisions_4": 11.3021,
"eval_ref_len": 21250,
"eval_rouge1": 0.4543,
"eval_rouge2": 0.262,
"eval_rougeL": 0.4396,
"eval_rougeLsum": 0.4394,
"eval_runtime": 4369.7974,
"eval_samples_per_second": 0.504,
"eval_steps_per_second": 0.504,
"eval_sys_len": 18017,
"eval_totals_1": 18017,
"eval_totals_2": 15813,
"eval_totals_3": 13609,
"eval_totals_4": 11405,
"step": 727
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 0.8362,
"step": 800
},
{
"epoch": 10.99,
"eval_bleu": 19.052,
"eval_bp": 0.8474,
"eval_counts_1": 9706,
"eval_counts_2": 4232,
"eval_counts_3": 2279,
"eval_counts_4": 1281,
"eval_exact_match": 0.0472,
"eval_f1": 0.4473,
"eval_gen_len": 13.6021,
"eval_loss": 1.1342219114303589,
"eval_precisions_1": 53.2361,
"eval_precisions_2": 26.4038,
"eval_precisions_3": 16.4858,
"eval_precisions_4": 11.0241,
"eval_ref_len": 21250,
"eval_rouge1": 0.4551,
"eval_rouge2": 0.2632,
"eval_rougeL": 0.4395,
"eval_rougeLsum": 0.4393,
"eval_runtime": 4741.4712,
"eval_samples_per_second": 0.465,
"eval_steps_per_second": 0.465,
"eval_sys_len": 18232,
"eval_totals_1": 18232,
"eval_totals_2": 16028,
"eval_totals_3": 13824,
"eval_totals_4": 11620,
"step": 800
},
{
"epoch": 12.0,
"learning_rate": 0.0001,
"loss": 0.7835,
"step": 873
},
{
"epoch": 12.0,
"eval_bleu": 19.169,
"eval_bp": 0.8614,
"eval_counts_1": 9802,
"eval_counts_2": 4280,
"eval_counts_3": 2292,
"eval_counts_4": 1285,
"eval_exact_match": 0.0472,
"eval_f1": 0.4497,
"eval_gen_len": 14.0168,
"eval_loss": 1.1426819562911987,
"eval_precisions_1": 53.0096,
"eval_precisions_2": 26.2786,
"eval_precisions_3": 16.2749,
"eval_precisions_4": 10.8174,
"eval_ref_len": 21250,
"eval_rouge1": 0.458,
"eval_rouge2": 0.2634,
"eval_rougeL": 0.4414,
"eval_rougeLsum": 0.4412,
"eval_runtime": 2858.9204,
"eval_samples_per_second": 0.771,
"eval_steps_per_second": 0.771,
"eval_sys_len": 18491,
"eval_totals_1": 18491,
"eval_totals_2": 16287,
"eval_totals_3": 14083,
"eval_totals_4": 11879,
"step": 873
},
{
"epoch": 12.99,
"learning_rate": 0.0001,
"loss": 0.7441,
"step": 945
},
{
"epoch": 12.99,
"eval_bleu": 19.3443,
"eval_bp": 0.8618,
"eval_counts_1": 9816,
"eval_counts_2": 4323,
"eval_counts_3": 2334,
"eval_counts_4": 1294,
"eval_exact_match": 0.0463,
"eval_f1": 0.4493,
"eval_gen_len": 13.8348,
"eval_loss": 1.1669002771377563,
"eval_precisions_1": 53.0652,
"eval_precisions_2": 26.5312,
"eval_precisions_3": 16.5649,
"eval_precisions_4": 10.8868,
"eval_ref_len": 21250,
"eval_rouge1": 0.4577,
"eval_rouge2": 0.2659,
"eval_rougeL": 0.4418,
"eval_rougeLsum": 0.4417,
"eval_runtime": 2130.8,
"eval_samples_per_second": 1.034,
"eval_steps_per_second": 1.034,
"eval_sys_len": 18498,
"eval_totals_1": 18498,
"eval_totals_2": 16294,
"eval_totals_3": 14090,
"eval_totals_4": 11886,
"step": 945
},
{
"epoch": 13.99,
"learning_rate": 0.0001,
"loss": 0.7012,
"step": 1018
},
{
"epoch": 13.99,
"eval_bleu": 19.7341,
"eval_bp": 0.8639,
"eval_counts_1": 9856,
"eval_counts_2": 4364,
"eval_counts_3": 2375,
"eval_counts_4": 1360,
"eval_exact_match": 0.0476,
"eval_f1": 0.4514,
"eval_gen_len": 13.976,
"eval_loss": 1.1739834547042847,
"eval_precisions_1": 53.1693,
"eval_precisions_2": 26.7189,
"eval_precisions_3": 16.8094,
"eval_precisions_4": 11.4046,
"eval_ref_len": 21250,
"eval_rouge1": 0.4591,
"eval_rouge2": 0.2653,
"eval_rougeL": 0.443,
"eval_rougeLsum": 0.4428,
"eval_runtime": 2149.1056,
"eval_samples_per_second": 1.026,
"eval_steps_per_second": 1.026,
"eval_sys_len": 18537,
"eval_totals_1": 18537,
"eval_totals_2": 16333,
"eval_totals_3": 14129,
"eval_totals_4": 11925,
"step": 1018
},
{
"epoch": 14.99,
"learning_rate": 0.0001,
"loss": 0.6597,
"step": 1091
},
{
"epoch": 14.99,
"eval_bleu": 19.3289,
"eval_bp": 0.8602,
"eval_counts_1": 9780,
"eval_counts_2": 4292,
"eval_counts_3": 2336,
"eval_counts_4": 1302,
"eval_exact_match": 0.0485,
"eval_f1": 0.4492,
"eval_gen_len": 13.8802,
"eval_loss": 1.1987030506134033,
"eval_precisions_1": 52.9565,
"eval_precisions_2": 26.3896,
"eval_precisions_3": 16.6145,
"eval_precisions_4": 10.9818,
"eval_ref_len": 21250,
"eval_rouge1": 0.457,
"eval_rouge2": 0.2633,
"eval_rougeL": 0.4418,
"eval_rougeLsum": 0.4416,
"eval_runtime": 2149.2833,
"eval_samples_per_second": 1.025,
"eval_steps_per_second": 1.025,
"eval_sys_len": 18468,
"eval_totals_1": 18468,
"eval_totals_2": 16264,
"eval_totals_3": 14060,
"eval_totals_4": 11856,
"step": 1091
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 0.6236,
"step": 1164
},
{
"epoch": 16.0,
"eval_bleu": 19.8055,
"eval_bp": 0.8734,
"eval_counts_1": 9931,
"eval_counts_2": 4388,
"eval_counts_3": 2390,
"eval_counts_4": 1359,
"eval_exact_match": 0.0495,
"eval_f1": 0.4538,
"eval_gen_len": 14.044,
"eval_loss": 1.2135030031204224,
"eval_precisions_1": 53.0587,
"eval_precisions_2": 26.573,
"eval_precisions_3": 16.7028,
"eval_precisions_4": 11.2268,
"eval_ref_len": 21250,
"eval_rouge1": 0.4618,
"eval_rouge2": 0.2682,
"eval_rougeL": 0.4452,
"eval_rougeLsum": 0.445,
"eval_runtime": 2168.1341,
"eval_samples_per_second": 1.017,
"eval_steps_per_second": 1.017,
"eval_sys_len": 18717,
"eval_totals_1": 18717,
"eval_totals_2": 16513,
"eval_totals_3": 14309,
"eval_totals_4": 12105,
"step": 1164
},
{
"epoch": 17.0,
"learning_rate": 0.0001,
"loss": 0.5933,
"step": 1237
},
{
"epoch": 17.0,
"eval_bleu": 19.5893,
"eval_bp": 0.8654,
"eval_counts_1": 9806,
"eval_counts_2": 4316,
"eval_counts_3": 2366,
"eval_counts_4": 1348,
"eval_exact_match": 0.049,
"eval_f1": 0.4485,
"eval_gen_len": 14.0622,
"eval_loss": 1.2305341958999634,
"eval_precisions_1": 52.817,
"eval_precisions_2": 26.3782,
"eval_precisions_3": 16.7114,
"eval_precisions_4": 11.2766,
"eval_ref_len": 21250,
"eval_rouge1": 0.4571,
"eval_rouge2": 0.2628,
"eval_rougeL": 0.4407,
"eval_rougeLsum": 0.4409,
"eval_runtime": 2171.7325,
"eval_samples_per_second": 1.015,
"eval_steps_per_second": 1.015,
"eval_sys_len": 18566,
"eval_totals_1": 18566,
"eval_totals_2": 16362,
"eval_totals_3": 14158,
"eval_totals_4": 11954,
"step": 1237
},
{
"epoch": 17.99,
"learning_rate": 0.0001,
"loss": 0.5622,
"step": 1309
},
{
"epoch": 17.99,
"eval_bleu": 19.4914,
"eval_bp": 0.865,
"eval_counts_1": 9787,
"eval_counts_2": 4306,
"eval_counts_3": 2346,
"eval_counts_4": 1338,
"eval_exact_match": 0.0476,
"eval_f1": 0.447,
"eval_gen_len": 13.7763,
"eval_loss": 1.2796473503112793,
"eval_precisions_1": 52.7345,
"eval_precisions_2": 26.3283,
"eval_precisions_3": 16.5783,
"eval_precisions_4": 11.1995,
"eval_ref_len": 21250,
"eval_rouge1": 0.4549,
"eval_rouge2": 0.2609,
"eval_rougeL": 0.4383,
"eval_rougeLsum": 0.4382,
"eval_runtime": 2158.5699,
"eval_samples_per_second": 1.021,
"eval_steps_per_second": 1.021,
"eval_sys_len": 18559,
"eval_totals_1": 18559,
"eval_totals_2": 16355,
"eval_totals_3": 14151,
"eval_totals_4": 11947,
"step": 1309
},
{
"epoch": 18.99,
"learning_rate": 0.0001,
"loss": 0.5275,
"step": 1382
},
{
"epoch": 18.99,
"eval_bleu": 19.6947,
"eval_bp": 0.8857,
"eval_counts_1": 9918,
"eval_counts_2": 4363,
"eval_counts_3": 2374,
"eval_counts_4": 1355,
"eval_exact_match": 0.0508,
"eval_f1": 0.4499,
"eval_gen_len": 14.1647,
"eval_loss": 1.2833356857299805,
"eval_precisions_1": 52.3377,
"eval_precisions_2": 26.054,
"eval_precisions_3": 16.3251,
"eval_precisions_4": 10.9823,
"eval_ref_len": 21250,
"eval_rouge1": 0.4573,
"eval_rouge2": 0.2624,
"eval_rougeL": 0.441,
"eval_rougeLsum": 0.4408,
"eval_runtime": 2190.1704,
"eval_samples_per_second": 1.006,
"eval_steps_per_second": 1.006,
"eval_sys_len": 18950,
"eval_totals_1": 18950,
"eval_totals_2": 16746,
"eval_totals_3": 14542,
"eval_totals_4": 12338,
"step": 1382
},
{
"epoch": 19.79,
"learning_rate": 0.0001,
"loss": 0.4986,
"step": 1440
},
{
"epoch": 19.79,
"eval_bleu": 19.4544,
"eval_bp": 0.8847,
"eval_counts_1": 9879,
"eval_counts_2": 4315,
"eval_counts_3": 2347,
"eval_counts_4": 1324,
"eval_exact_match": 0.0495,
"eval_f1": 0.4478,
"eval_gen_len": 14.2827,
"eval_loss": 1.3059108257293701,
"eval_precisions_1": 52.1842,
"eval_precisions_2": 25.7966,
"eval_precisions_3": 16.1606,
"eval_precisions_4": 10.7476,
"eval_ref_len": 21250,
"eval_rouge1": 0.4564,
"eval_rouge2": 0.2622,
"eval_rougeL": 0.4407,
"eval_rougeLsum": 0.4403,
"eval_runtime": 3646.8693,
"eval_samples_per_second": 0.604,
"eval_steps_per_second": 0.604,
"eval_sys_len": 18931,
"eval_totals_1": 18931,
"eval_totals_2": 16727,
"eval_totals_3": 14523,
"eval_totals_4": 12319,
"step": 1440
},
{
"epoch": 19.79,
"step": 1440,
"total_flos": 1.102412878184448e+18,
"train_loss": 1.0667428798145717,
"train_runtime": 140813.6912,
"train_samples_per_second": 1.323,
"train_steps_per_second": 0.01
}
],
"logging_steps": 500,
"max_steps": 1440,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.102412878184448e+18,
"trial_name": null,
"trial_params": null
}