|
{ |
|
"best_metric": 0.7115575075149536, |
|
"best_model_checkpoint": "./Marian-Training/checkpoint-123440", |
|
"epoch": 10.0, |
|
"global_step": 123440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.020090732339598e-07, |
|
"loss": 5.3537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6121192482177577e-06, |
|
"loss": 3.8139, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.4222294232015557e-06, |
|
"loss": 3.1041, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.2323395981853535e-06, |
|
"loss": 2.8508, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.042449773169151e-06, |
|
"loss": 2.608, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.852559948152949e-06, |
|
"loss": 2.431, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.662670123136748e-06, |
|
"loss": 2.2877, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.472780298120544e-06, |
|
"loss": 2.1334, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.2828904731043424e-06, |
|
"loss": 2.0944, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.09300064808814e-06, |
|
"loss": 2.0048, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.903110823071939e-06, |
|
"loss": 1.9408, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.713220998055737e-06, |
|
"loss": 1.8732, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.972541528805813e-06, |
|
"loss": 1.7753, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.929904151175087e-06, |
|
"loss": 1.7068, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.88726677354436e-06, |
|
"loss": 1.7098, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.844629395913635e-06, |
|
"loss": 1.6999, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.802077293038171e-06, |
|
"loss": 1.6395, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.759525190162705e-06, |
|
"loss": 1.6286, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.716887812531979e-06, |
|
"loss": 1.5806, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.674250434901252e-06, |
|
"loss": 1.5524, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.631613057270527e-06, |
|
"loss": 1.539, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.5889756796398e-06, |
|
"loss": 1.4994, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.546338302009073e-06, |
|
"loss": 1.4842, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.50378619913361e-06, |
|
"loss": 1.4552, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_BLEU": 13.80898407426215, |
|
"eval_BLEU-Bigram-Precision": 17.21054465218906, |
|
"eval_BLEU-Trigram-Precision": 9.359245405757035, |
|
"eval_BLEU-Unigram-Precision": 36.29109715552271, |
|
"eval_ROUGE-2": 12.17499077134061, |
|
"eval_ROUGE-L": 24.485794311989313, |
|
"eval_Sacre-Bigram-Precision": 14.59220505333171, |
|
"eval_Sacre-Trigram-Precision": 8.842869342442357, |
|
"eval_Sacre-Unigram-Precision": 32.78751834401792, |
|
"eval_SacreBLEU": 12.548344635447839, |
|
"eval_loss": 1.3949493169784546, |
|
"eval_runtime": 492.6032, |
|
"eval_samples_per_second": 2.511, |
|
"eval_steps_per_second": 2.511, |
|
"step": 12344 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.461148821502884e-06, |
|
"loss": 1.4572, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.418511443872157e-06, |
|
"loss": 1.3886, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.37587406624143e-06, |
|
"loss": 1.3534, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.333236688610704e-06, |
|
"loss": 1.3521, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.290599310979979e-06, |
|
"loss": 1.358, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.247961933349252e-06, |
|
"loss": 1.37, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.205324555718525e-06, |
|
"loss": 1.3668, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.1626871780878e-06, |
|
"loss": 1.2571, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.120135075212335e-06, |
|
"loss": 1.3178, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.077497697581608e-06, |
|
"loss": 1.2597, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.034860319950883e-06, |
|
"loss": 1.2609, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.992222942320156e-06, |
|
"loss": 1.2334, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.949585564689431e-06, |
|
"loss": 1.2459, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 8.907033461813965e-06, |
|
"loss": 1.2724, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.864396084183239e-06, |
|
"loss": 1.2892, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.821758706552514e-06, |
|
"loss": 1.2363, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.779121328921787e-06, |
|
"loss": 1.197, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.73648395129106e-06, |
|
"loss": 1.1751, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.693931848415596e-06, |
|
"loss": 1.1762, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.65129447078487e-06, |
|
"loss": 1.2173, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.608657093154144e-06, |
|
"loss": 1.201, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.566019715523418e-06, |
|
"loss": 1.1811, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.52338233789269e-06, |
|
"loss": 1.1282, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.480830235017227e-06, |
|
"loss": 1.1472, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.4381928573865e-06, |
|
"loss": 1.1326, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_BLEU": 21.670590139777346, |
|
"eval_BLEU-Bigram-Precision": 28.321056642113284, |
|
"eval_BLEU-Trigram-Precision": 17.486990459670427, |
|
"eval_BLEU-Unigram-Precision": 52.16518497275595, |
|
"eval_ROUGE-2": 16.34953956013002, |
|
"eval_ROUGE-L": 29.922494808680927, |
|
"eval_Sacre-Bigram-Precision": 24.960306597317274, |
|
"eval_Sacre-Trigram-Precision": 16.90744655860935, |
|
"eval_Sacre-Unigram-Precision": 48.938570403035584, |
|
"eval_SacreBLEU": 19.634211590885588, |
|
"eval_loss": 1.1314517259597778, |
|
"eval_runtime": 388.1931, |
|
"eval_samples_per_second": 3.187, |
|
"eval_steps_per_second": 3.187, |
|
"step": 24688 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.395555479755773e-06, |
|
"loss": 1.0742, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.352918102125047e-06, |
|
"loss": 1.1024, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.310280724494321e-06, |
|
"loss": 1.053, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.267643346863595e-06, |
|
"loss": 1.0807, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.22509124398813e-06, |
|
"loss": 1.0717, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.182453866357404e-06, |
|
"loss": 1.0766, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.139816488726679e-06, |
|
"loss": 1.0646, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.097179111095952e-06, |
|
"loss": 1.0151, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.054627008220487e-06, |
|
"loss": 1.0718, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.01198963058976e-06, |
|
"loss": 1.0117, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.969352252959035e-06, |
|
"loss": 1.0445, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.92680015008357e-06, |
|
"loss": 1.0253, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.884162772452844e-06, |
|
"loss": 1.0, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.841525394822117e-06, |
|
"loss": 1.0218, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.798888017191392e-06, |
|
"loss": 1.0237, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.756250639560665e-06, |
|
"loss": 1.0485, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.713613261929939e-06, |
|
"loss": 1.0115, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.670975884299212e-06, |
|
"loss": 0.9875, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 7.628338506668487e-06, |
|
"loss": 0.9991, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.58570112903776e-06, |
|
"loss": 0.9866, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 7.543149026162295e-06, |
|
"loss": 1.0031, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.50051164853157e-06, |
|
"loss": 0.9531, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.4578742709008435e-06, |
|
"loss": 0.9882, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.415236893270117e-06, |
|
"loss": 0.9695, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.372684790394652e-06, |
|
"loss": 0.9998, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_BLEU": 24.46985618625074, |
|
"eval_BLEU-Bigram-Precision": 30.066847510172444, |
|
"eval_BLEU-Trigram-Precision": 19.431133096305455, |
|
"eval_BLEU-Unigram-Precision": 52.068004204561035, |
|
"eval_ROUGE-2": 19.0673225300493, |
|
"eval_ROUGE-L": 32.35584155291452, |
|
"eval_Sacre-Bigram-Precision": 27.059490669141148, |
|
"eval_Sacre-Trigram-Precision": 18.9141567094323, |
|
"eval_Sacre-Unigram-Precision": 48.92658105161134, |
|
"eval_SacreBLEU": 22.68244879199697, |
|
"eval_loss": 0.9878150820732117, |
|
"eval_runtime": 400.1699, |
|
"eval_samples_per_second": 3.091, |
|
"eval_steps_per_second": 3.091, |
|
"step": 37032 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.330132687519187e-06, |
|
"loss": 0.8451, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.287495309888461e-06, |
|
"loss": 0.9014, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.244857932257735e-06, |
|
"loss": 0.9325, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.202220554627009e-06, |
|
"loss": 0.8954, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.1595831769962835e-06, |
|
"loss": 0.9068, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.116945799365557e-06, |
|
"loss": 0.9003, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.07430842173483e-06, |
|
"loss": 0.9159, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.031671044104103e-06, |
|
"loss": 0.8778, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.989033666473378e-06, |
|
"loss": 0.8932, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.9463962888426514e-06, |
|
"loss": 0.9176, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.903844185967187e-06, |
|
"loss": 0.8882, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.86120680833646e-06, |
|
"loss": 0.8761, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 6.818569430705735e-06, |
|
"loss": 0.9093, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.775932053075008e-06, |
|
"loss": 0.904, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 6.733294675444282e-06, |
|
"loss": 0.8823, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 6.6907425725688165e-06, |
|
"loss": 0.8619, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 6.6481051949380915e-06, |
|
"loss": 0.8634, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.605467817307365e-06, |
|
"loss": 0.8398, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.562830439676639e-06, |
|
"loss": 0.8783, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.520193062045912e-06, |
|
"loss": 0.8331, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.477555684415187e-06, |
|
"loss": 0.8587, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.435003581539721e-06, |
|
"loss": 0.8672, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.392366203908995e-06, |
|
"loss": 0.8561, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.349728826278269e-06, |
|
"loss": 0.877, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_BLEU": 27.332441014627662, |
|
"eval_BLEU-Bigram-Precision": 32.294467921628886, |
|
"eval_BLEU-Trigram-Precision": 22.247179539537495, |
|
"eval_BLEU-Unigram-Precision": 53.48805584515661, |
|
"eval_ROUGE-2": 21.864275273004786, |
|
"eval_ROUGE-L": 35.28369042116672, |
|
"eval_Sacre-Bigram-Precision": 29.44255297525281, |
|
"eval_Sacre-Trigram-Precision": 21.635397462314284, |
|
"eval_Sacre-Unigram-Precision": 50.406387454580226, |
|
"eval_SacreBLEU": 25.615100588685642, |
|
"eval_loss": 0.8949469923973083, |
|
"eval_runtime": 398.8764, |
|
"eval_samples_per_second": 3.101, |
|
"eval_steps_per_second": 3.101, |
|
"step": 49376 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.307091448647544e-06, |
|
"loss": 0.8618, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 6.264454071016817e-06, |
|
"loss": 0.8095, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 6.22181669338609e-06, |
|
"loss": 0.7941, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 6.179264590510625e-06, |
|
"loss": 0.788, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 6.1366272128799e-06, |
|
"loss": 0.8061, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 6.0939898352491735e-06, |
|
"loss": 0.7797, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 6.051352457618447e-06, |
|
"loss": 0.8, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 6.008800354742982e-06, |
|
"loss": 0.7896, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.966162977112257e-06, |
|
"loss": 0.7732, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 5.92352559948153e-06, |
|
"loss": 0.7747, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.880888221850803e-06, |
|
"loss": 0.751, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.838250844220078e-06, |
|
"loss": 0.7639, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5.7956134665893515e-06, |
|
"loss": 0.7889, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 5.752976088958625e-06, |
|
"loss": 0.7936, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 5.710338711327899e-06, |
|
"loss": 0.776, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 5.667701333697173e-06, |
|
"loss": 0.799, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.625063956066447e-06, |
|
"loss": 0.7859, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.58242657843572e-06, |
|
"loss": 0.7894, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.5398744755602555e-06, |
|
"loss": 0.769, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.4972370979295296e-06, |
|
"loss": 0.7695, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 5.454599720298804e-06, |
|
"loss": 0.7525, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.411962342668077e-06, |
|
"loss": 0.811, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 5.369410239792612e-06, |
|
"loss": 0.7797, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.326772862161886e-06, |
|
"loss": 0.7392, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.28413548453116e-06, |
|
"loss": 0.7361, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_BLEU": 29.41273385332665, |
|
"eval_BLEU-Bigram-Precision": 34.164481525625746, |
|
"eval_BLEU-Trigram-Precision": 23.999392035667242, |
|
"eval_BLEU-Unigram-Precision": 55.1953898793445, |
|
"eval_ROUGE-2": 23.905521720555196, |
|
"eval_ROUGE-L": 37.268890137733166, |
|
"eval_Sacre-Bigram-Precision": 31.466626305433632, |
|
"eval_Sacre-Trigram-Precision": 23.62247094274645, |
|
"eval_Sacre-Unigram-Precision": 52.30316269351315, |
|
"eval_SacreBLEU": 27.853036673062565, |
|
"eval_loss": 0.8335286378860474, |
|
"eval_runtime": 390.3818, |
|
"eval_samples_per_second": 3.169, |
|
"eval_steps_per_second": 3.169, |
|
"step": 61720 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 5.241583381655695e-06, |
|
"loss": 0.7361, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.198946004024969e-06, |
|
"loss": 0.7126, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 5.156308626394243e-06, |
|
"loss": 0.6857, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 5.113671248763517e-06, |
|
"loss": 0.688, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 5.07103387113279e-06, |
|
"loss": 0.7319, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 5.028396493502064e-06, |
|
"loss": 0.7158, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.985759115871338e-06, |
|
"loss": 0.7076, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.943121738240612e-06, |
|
"loss": 0.7048, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.900569635365147e-06, |
|
"loss": 0.742, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 4.857932257734421e-06, |
|
"loss": 0.6841, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.815294880103695e-06, |
|
"loss": 0.7293, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.772657502472968e-06, |
|
"loss": 0.7025, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.730020124842242e-06, |
|
"loss": 0.7436, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.6874680219667775e-06, |
|
"loss": 0.687, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.644830644336052e-06, |
|
"loss": 0.7248, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.602193266705325e-06, |
|
"loss": 0.684, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.559555889074599e-06, |
|
"loss": 0.7064, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.516918511443872e-06, |
|
"loss": 0.7013, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.474281133813146e-06, |
|
"loss": 0.6804, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.4317290309376815e-06, |
|
"loss": 0.7065, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 4.3890916533069556e-06, |
|
"loss": 0.7256, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 4.346454275676229e-06, |
|
"loss": 0.6758, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 4.303816898045503e-06, |
|
"loss": 0.6763, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 4.261179520414776e-06, |
|
"loss": 0.6821, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.21854214278405e-06, |
|
"loss": 0.6605, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_BLEU": 32.72417295832384, |
|
"eval_BLEU-Bigram-Precision": 37.61842927908431, |
|
"eval_BLEU-Trigram-Precision": 27.745960319083657, |
|
"eval_BLEU-Unigram-Precision": 57.63504312301407, |
|
"eval_ROUGE-2": 27.224503232983654, |
|
"eval_ROUGE-L": 40.47406685392675, |
|
"eval_Sacre-Bigram-Precision": 35.30222585256279, |
|
"eval_Sacre-Trigram-Precision": 27.50803770911667, |
|
"eval_Sacre-Unigram-Precision": 55.02040816326531, |
|
"eval_SacreBLEU": 31.1982138624902, |
|
"eval_loss": 0.7896061539649963, |
|
"eval_runtime": 391.9845, |
|
"eval_samples_per_second": 3.156, |
|
"eval_steps_per_second": 3.156, |
|
"step": 74064 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 4.175904765153324e-06, |
|
"loss": 0.6441, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 4.133267387522598e-06, |
|
"loss": 0.6353, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.0906300098918725e-06, |
|
"loss": 0.6078, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.048163181771669e-06, |
|
"loss": 0.6786, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.005611078896204e-06, |
|
"loss": 0.6697, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.962973701265478e-06, |
|
"loss": 0.6403, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 3.920336323634751e-06, |
|
"loss": 0.6237, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.8776989460040255e-06, |
|
"loss": 0.6746, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.835061568373299e-06, |
|
"loss": 0.6245, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.792424190742573e-06, |
|
"loss": 0.6516, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.7497868131118465e-06, |
|
"loss": 0.6345, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 3.7071494354811206e-06, |
|
"loss": 0.613, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.6645120578503946e-06, |
|
"loss": 0.6364, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.621874680219668e-06, |
|
"loss": 0.6429, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.579237302588942e-06, |
|
"loss": 0.6234, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.536685199713477e-06, |
|
"loss": 0.6536, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.4940478220827513e-06, |
|
"loss": 0.6707, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.4514104444520245e-06, |
|
"loss": 0.6509, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.4087730668212986e-06, |
|
"loss": 0.6466, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.3661356891905723e-06, |
|
"loss": 0.6406, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 3.3234983115598464e-06, |
|
"loss": 0.6632, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 3.2808609339291196e-06, |
|
"loss": 0.6659, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.2382235562983937e-06, |
|
"loss": 0.6162, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.195671453422929e-06, |
|
"loss": 0.6443, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_BLEU": 35.38370123074148, |
|
"eval_BLEU-Bigram-Precision": 39.67419614528579, |
|
"eval_BLEU-Trigram-Precision": 30.140845070422532, |
|
"eval_BLEU-Unigram-Precision": 58.794846559899796, |
|
"eval_ROUGE-2": 29.810110522896206, |
|
"eval_ROUGE-L": 42.83661338697426, |
|
"eval_Sacre-Bigram-Precision": 37.397885029820074, |
|
"eval_Sacre-Trigram-Precision": 29.77666167984612, |
|
"eval_Sacre-Unigram-Precision": 56.32373761208117, |
|
"eval_SacreBLEU": 33.81865169489427, |
|
"eval_loss": 0.7533065676689148, |
|
"eval_runtime": 395.5312, |
|
"eval_samples_per_second": 3.127, |
|
"eval_steps_per_second": 3.127, |
|
"step": 86408 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.153034075792203e-06, |
|
"loss": 0.6477, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 3.1104819729167378e-06, |
|
"loss": 0.6331, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.067844595286012e-06, |
|
"loss": 0.6005, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 3.0252072176552855e-06, |
|
"loss": 0.6052, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 2.9825698400245596e-06, |
|
"loss": 0.6162, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.939932462393833e-06, |
|
"loss": 0.641, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 2.897295084763107e-06, |
|
"loss": 0.6031, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.8546577071323806e-06, |
|
"loss": 0.6136, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 2.8120203295016547e-06, |
|
"loss": 0.5797, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 2.769382951870928e-06, |
|
"loss": 0.609, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.726745574240202e-06, |
|
"loss": 0.5878, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 2.6841934713647373e-06, |
|
"loss": 0.5932, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.6415560937340113e-06, |
|
"loss": 0.5731, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.5989187161032846e-06, |
|
"loss": 0.5829, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 2.5562813384725587e-06, |
|
"loss": 0.5742, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 2.5136439608418328e-06, |
|
"loss": 0.5848, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 2.4710065832111064e-06, |
|
"loss": 0.6118, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.42836920558038e-06, |
|
"loss": 0.5972, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 2.3857318279496538e-06, |
|
"loss": 0.6177, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 2.343179725074189e-06, |
|
"loss": 0.6164, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.300542347443463e-06, |
|
"loss": 0.6025, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 2.2579049698127367e-06, |
|
"loss": 0.602, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 2.2152675921820104e-06, |
|
"loss": 0.6002, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 2.172800764061807e-06, |
|
"loss": 0.582, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 2.1301633864310812e-06, |
|
"loss": 0.5614, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_BLEU": 36.070748388519384, |
|
"eval_BLEU-Bigram-Precision": 39.989641209153405, |
|
"eval_BLEU-Trigram-Precision": 30.633468326583667, |
|
"eval_BLEU-Unigram-Precision": 59.06562847608454, |
|
"eval_ROUGE-2": 30.543248365770637, |
|
"eval_ROUGE-L": 43.305001563308394, |
|
"eval_Sacre-Bigram-Precision": 37.6866967697117, |
|
"eval_Sacre-Trigram-Precision": 30.228378092620005, |
|
"eval_Sacre-Unigram-Precision": 56.45628798503974, |
|
"eval_SacreBLEU": 34.57110367444417, |
|
"eval_loss": 0.730923056602478, |
|
"eval_runtime": 400.0156, |
|
"eval_samples_per_second": 3.092, |
|
"eval_steps_per_second": 3.092, |
|
"step": 98752 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 2.087526008800355e-06, |
|
"loss": 0.6152, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 2.0448886311696286e-06, |
|
"loss": 0.5414, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.0022512535389022e-06, |
|
"loss": 0.5572, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 1.9596138759081763e-06, |
|
"loss": 0.5659, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.91697649827745e-06, |
|
"loss": 0.5767, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.8744243954019852e-06, |
|
"loss": 0.5579, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 1.831787017771259e-06, |
|
"loss": 0.587, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 1.7891496401405327e-06, |
|
"loss": 0.5458, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.7465122625098068e-06, |
|
"loss": 0.5766, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1.7038748848790807e-06, |
|
"loss": 0.5818, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.6612375072483544e-06, |
|
"loss": 0.5768, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1.6186001296176283e-06, |
|
"loss": 0.5563, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.5760480267421635e-06, |
|
"loss": 0.5931, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1.5334106491114373e-06, |
|
"loss": 0.5756, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 1.490773271480711e-06, |
|
"loss": 0.5468, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 1.4481358938499849e-06, |
|
"loss": 0.5834, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 1.4054985162192586e-06, |
|
"loss": 0.5638, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 1.3628611385885324e-06, |
|
"loss": 0.5723, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1.320223760957806e-06, |
|
"loss": 0.5676, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.27758638332708e-06, |
|
"loss": 0.5726, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 1.2349490056963539e-06, |
|
"loss": 0.5876, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 1.192396902820889e-06, |
|
"loss": 0.5986, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1.1498447999454243e-06, |
|
"loss": 0.5614, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.1072074223146981e-06, |
|
"loss": 0.5698, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 1.0645700446839718e-06, |
|
"loss": 0.5609, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_BLEU": 36.67611797546187, |
|
"eval_BLEU-Bigram-Precision": 40.95097852935588, |
|
"eval_BLEU-Trigram-Precision": 31.551854655563965, |
|
"eval_BLEU-Unigram-Precision": 59.832204226299964, |
|
"eval_ROUGE-2": 31.384718934506893, |
|
"eval_ROUGE-L": 43.88977571967912, |
|
"eval_Sacre-Bigram-Precision": 38.729744644559275, |
|
"eval_Sacre-Trigram-Precision": 31.1724433033804, |
|
"eval_Sacre-Unigram-Precision": 57.373641946150215, |
|
"eval_SacreBLEU": 35.152191088822335, |
|
"eval_loss": 0.7173283696174622, |
|
"eval_runtime": 396.278, |
|
"eval_samples_per_second": 3.122, |
|
"eval_steps_per_second": 3.122, |
|
"step": 111096 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.0219326670532457e-06, |
|
"loss": 0.5586, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 9.792952894225194e-07, |
|
"loss": 0.546, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 9.366579117917933e-07, |
|
"loss": 0.5307, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 8.941058089163284e-07, |
|
"loss": 0.5357, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.515537060408638e-07, |
|
"loss": 0.5383, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 8.089163284101375e-07, |
|
"loss": 0.547, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 7.662789507794113e-07, |
|
"loss": 0.5516, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.236415731486851e-07, |
|
"loss": 0.5494, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.810041955179588e-07, |
|
"loss": 0.5406, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 6.383668178872326e-07, |
|
"loss": 0.561, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 5.957294402565065e-07, |
|
"loss": 0.5327, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.530920626257804e-07, |
|
"loss": 0.5389, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.104546849950541e-07, |
|
"loss": 0.5639, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 4.6781730736432786e-07, |
|
"loss": 0.5789, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.251799297336017e-07, |
|
"loss": 0.554, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 3.8262782685813695e-07, |
|
"loss": 0.5561, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.399904492274107e-07, |
|
"loss": 0.5556, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 2.9735307159668455e-07, |
|
"loss": 0.5547, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 2.547156939659583e-07, |
|
"loss": 0.5532, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.1207831633523215e-07, |
|
"loss": 0.5583, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.6944093870450592e-07, |
|
"loss": 0.5478, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1.2680356107377972e-07, |
|
"loss": 0.5428, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 8.416618344305353e-08, |
|
"loss": 0.5796, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 4.1528805812327316e-08, |
|
"loss": 0.5344, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_BLEU": 37.900474787417906, |
|
"eval_BLEU-Bigram-Precision": 42.22908745247148, |
|
"eval_BLEU-Trigram-Precision": 32.8435085593092, |
|
"eval_BLEU-Unigram-Precision": 60.860977689994165, |
|
"eval_ROUGE-2": 32.21090536054051, |
|
"eval_ROUGE-L": 44.882010376945814, |
|
"eval_Sacre-Bigram-Precision": 40.09940757104127, |
|
"eval_Sacre-Trigram-Precision": 32.514319361918524, |
|
"eval_Sacre-Unigram-Precision": 58.50153627983928, |
|
"eval_SacreBLEU": 36.403184600709295, |
|
"eval_loss": 0.7115575075149536, |
|
"eval_runtime": 395.6181, |
|
"eval_samples_per_second": 3.127, |
|
"eval_steps_per_second": 3.127, |
|
"step": 123440 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 123440, |
|
"total_flos": 2092122337443840.0, |
|
"train_loss": 0.9148468588670198, |
|
"train_runtime": 16454.4516, |
|
"train_samples_per_second": 15.003, |
|
"train_steps_per_second": 7.502 |
|
} |
|
], |
|
"max_steps": 123440, |
|
"num_train_epochs": 10, |
|
"total_flos": 2092122337443840.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|