|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.7339522697499574, |
|
"eval_steps": 200, |
|
"global_step": 24000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.1643640249967575, |
|
"learning_rate": 0.0001988891104338166, |
|
"loss": 1.7673, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bertscore": 0.7312520742416382, |
|
"eval_loss": 1.7944419384002686, |
|
"eval_rouge1": 0.645726048132668, |
|
"eval_rouge2": 0.342840307585653, |
|
"eval_rougeL": 0.5174784271125388, |
|
"eval_rougeLsum": 0.6359911842715976, |
|
"eval_runtime": 67.7968, |
|
"eval_samples_per_second": 0.147, |
|
"eval_steps_per_second": 0.074, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.17238478362560272, |
|
"learning_rate": 0.00019774973651978238, |
|
"loss": 1.6985, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bertscore": 0.733666718006134, |
|
"eval_loss": 1.7791178226470947, |
|
"eval_rouge1": 0.6540909153028596, |
|
"eval_rouge2": 0.3548819059818129, |
|
"eval_rougeL": 0.527257232694246, |
|
"eval_rougeLsum": 0.6442799950994005, |
|
"eval_runtime": 15.1267, |
|
"eval_samples_per_second": 0.661, |
|
"eval_steps_per_second": 0.331, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.19368696212768555, |
|
"learning_rate": 0.00019661036260574814, |
|
"loss": 1.6962, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_bertscore": 0.7339462041854858, |
|
"eval_loss": 1.7609882354736328, |
|
"eval_rouge1": 0.6384337329686338, |
|
"eval_rouge2": 0.3415514270662107, |
|
"eval_rougeL": 0.51206080148464, |
|
"eval_rougeLsum": 0.6261968614666548, |
|
"eval_runtime": 15.2068, |
|
"eval_samples_per_second": 0.658, |
|
"eval_steps_per_second": 0.329, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.18629203736782074, |
|
"learning_rate": 0.00019547098869171392, |
|
"loss": 1.6825, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bertscore": 0.7363594174385071, |
|
"eval_loss": 1.7610784769058228, |
|
"eval_rouge1": 0.6461624591922237, |
|
"eval_rouge2": 0.3477371388439609, |
|
"eval_rougeL": 0.5187429174752844, |
|
"eval_rougeLsum": 0.6361089823008282, |
|
"eval_runtime": 15.173, |
|
"eval_samples_per_second": 0.659, |
|
"eval_steps_per_second": 0.33, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.1799013316631317, |
|
"learning_rate": 0.00019433161477767967, |
|
"loss": 1.6848, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bertscore": 0.7334067225456238, |
|
"eval_loss": 1.7576347589492798, |
|
"eval_rouge1": 0.6345119236349537, |
|
"eval_rouge2": 0.3422519149071803, |
|
"eval_rougeL": 0.5111983101326238, |
|
"eval_rougeLsum": 0.6244653120436832, |
|
"eval_runtime": 15.2847, |
|
"eval_samples_per_second": 0.654, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.22036150097846985, |
|
"learning_rate": 0.00019319224086364545, |
|
"loss": 1.6714, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bertscore": 0.7323788404464722, |
|
"eval_loss": 1.7521806955337524, |
|
"eval_rouge1": 0.6452540184557478, |
|
"eval_rouge2": 0.3465145726476423, |
|
"eval_rougeL": 0.516711757588783, |
|
"eval_rougeLsum": 0.6341049885677059, |
|
"eval_runtime": 15.1247, |
|
"eval_samples_per_second": 0.661, |
|
"eval_steps_per_second": 0.331, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.21381086111068726, |
|
"learning_rate": 0.0001920528669496112, |
|
"loss": 1.6669, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bertscore": 0.7313202619552612, |
|
"eval_loss": 1.7520482540130615, |
|
"eval_rouge1": 0.6397526546254797, |
|
"eval_rouge2": 0.3452671288110514, |
|
"eval_rougeL": 0.5176580626678706, |
|
"eval_rougeLsum": 0.6296746647539768, |
|
"eval_runtime": 15.183, |
|
"eval_samples_per_second": 0.659, |
|
"eval_steps_per_second": 0.329, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.20332874357700348, |
|
"learning_rate": 0.00019091349303557696, |
|
"loss": 1.671, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bertscore": 0.7349230647087097, |
|
"eval_loss": 1.7473630905151367, |
|
"eval_rouge1": 0.637439872504459, |
|
"eval_rouge2": 0.34307164454056094, |
|
"eval_rougeL": 0.5129717676228565, |
|
"eval_rougeLsum": 0.6272190896182391, |
|
"eval_runtime": 15.5672, |
|
"eval_samples_per_second": 0.642, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.2025599479675293, |
|
"learning_rate": 0.00018977411912154274, |
|
"loss": 1.6721, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bertscore": 0.7357184290885925, |
|
"eval_loss": 1.7516342401504517, |
|
"eval_rouge1": 0.6387615819926658, |
|
"eval_rouge2": 0.34366787517105574, |
|
"eval_rougeL": 0.5129026911770751, |
|
"eval_rougeLsum": 0.6289314118258257, |
|
"eval_runtime": 15.9574, |
|
"eval_samples_per_second": 0.627, |
|
"eval_steps_per_second": 0.313, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.20457112789154053, |
|
"learning_rate": 0.0001886347452075085, |
|
"loss": 1.671, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bertscore": 0.733718752861023, |
|
"eval_loss": 1.7501707077026367, |
|
"eval_rouge1": 0.6346207681220664, |
|
"eval_rouge2": 0.33748369437614106, |
|
"eval_rougeL": 0.5085159047705141, |
|
"eval_rougeLsum": 0.6239953154441167, |
|
"eval_runtime": 15.0863, |
|
"eval_samples_per_second": 0.663, |
|
"eval_steps_per_second": 0.331, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.22552740573883057, |
|
"learning_rate": 0.00018749537129347424, |
|
"loss": 1.6496, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bertscore": 0.7368552684783936, |
|
"eval_loss": 1.7437107563018799, |
|
"eval_rouge1": 0.6490756387878311, |
|
"eval_rouge2": 0.3448817738175175, |
|
"eval_rougeL": 0.5235187045706321, |
|
"eval_rougeLsum": 0.6377780857890332, |
|
"eval_runtime": 15.07, |
|
"eval_samples_per_second": 0.664, |
|
"eval_steps_per_second": 0.332, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.22573673725128174, |
|
"learning_rate": 0.00018635599737944, |
|
"loss": 1.6629, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_bertscore": 0.7314499616622925, |
|
"eval_loss": 1.7462828159332275, |
|
"eval_rouge1": 0.6511482369678803, |
|
"eval_rouge2": 0.34632544827771805, |
|
"eval_rougeL": 0.5212417191003778, |
|
"eval_rougeLsum": 0.6415391907940229, |
|
"eval_runtime": 15.2078, |
|
"eval_samples_per_second": 0.658, |
|
"eval_steps_per_second": 0.329, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.26426687836647034, |
|
"learning_rate": 0.00018521662346540575, |
|
"loss": 1.6644, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_bertscore": 0.7363359928131104, |
|
"eval_loss": 1.7505037784576416, |
|
"eval_rouge1": 0.6498296552335481, |
|
"eval_rouge2": 0.34873833589761183, |
|
"eval_rougeL": 0.5194028620820592, |
|
"eval_rougeLsum": 0.6404603087578984, |
|
"eval_runtime": 14.8403, |
|
"eval_samples_per_second": 0.674, |
|
"eval_steps_per_second": 0.337, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.20142091810703278, |
|
"learning_rate": 0.00018407724955137153, |
|
"loss": 1.6535, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bertscore": 0.7304679155349731, |
|
"eval_loss": 1.7511039972305298, |
|
"eval_rouge1": 0.6475130585388738, |
|
"eval_rouge2": 0.34648331046897884, |
|
"eval_rougeL": 0.5218042284020985, |
|
"eval_rougeLsum": 0.6382749834402862, |
|
"eval_runtime": 15.0162, |
|
"eval_samples_per_second": 0.666, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.23283220827579498, |
|
"learning_rate": 0.00018293787563733728, |
|
"loss": 1.6477, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_bertscore": 0.7327049374580383, |
|
"eval_loss": 1.7461665868759155, |
|
"eval_rouge1": 0.6309349586871908, |
|
"eval_rouge2": 0.3387882478990309, |
|
"eval_rougeL": 0.5042059192403674, |
|
"eval_rougeLsum": 0.6210432469674847, |
|
"eval_runtime": 15.463, |
|
"eval_samples_per_second": 0.647, |
|
"eval_steps_per_second": 0.323, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.21316750347614288, |
|
"learning_rate": 0.00018179850172330306, |
|
"loss": 1.6614, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_bertscore": 0.7314620018005371, |
|
"eval_loss": 1.7468239068984985, |
|
"eval_rouge1": 0.6480904534152265, |
|
"eval_rouge2": 0.3479530168963481, |
|
"eval_rougeL": 0.5193148273848067, |
|
"eval_rougeLsum": 0.6366010767207634, |
|
"eval_runtime": 15.0381, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.332, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.26080408692359924, |
|
"learning_rate": 0.00018065912780926882, |
|
"loss": 1.6591, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bertscore": 0.7327477335929871, |
|
"eval_loss": 1.7442594766616821, |
|
"eval_rouge1": 0.6424613378037144, |
|
"eval_rouge2": 0.34731770322974903, |
|
"eval_rougeL": 0.5160705879794565, |
|
"eval_rougeLsum": 0.6327006420281607, |
|
"eval_runtime": 15.5373, |
|
"eval_samples_per_second": 0.644, |
|
"eval_steps_per_second": 0.322, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.23274216055870056, |
|
"learning_rate": 0.0001795197538952346, |
|
"loss": 1.6613, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_bertscore": 0.736956000328064, |
|
"eval_loss": 1.7429373264312744, |
|
"eval_rouge1": 0.6514574160666677, |
|
"eval_rouge2": 0.3556199242231646, |
|
"eval_rougeL": 0.5249726237675663, |
|
"eval_rougeLsum": 0.6406261097623661, |
|
"eval_runtime": 14.9415, |
|
"eval_samples_per_second": 0.669, |
|
"eval_steps_per_second": 0.335, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.23616766929626465, |
|
"learning_rate": 0.00017838037998120035, |
|
"loss": 1.6479, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bertscore": 0.7349627614021301, |
|
"eval_loss": 1.7420669794082642, |
|
"eval_rouge1": 0.655851684949526, |
|
"eval_rouge2": 0.35254590691865084, |
|
"eval_rougeL": 0.5248980956621441, |
|
"eval_rougeLsum": 0.6449637270581419, |
|
"eval_runtime": 15.4178, |
|
"eval_samples_per_second": 0.649, |
|
"eval_steps_per_second": 0.324, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.23260319232940674, |
|
"learning_rate": 0.0001772410060671661, |
|
"loss": 1.6569, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bertscore": 0.7332885265350342, |
|
"eval_loss": 1.7401313781738281, |
|
"eval_rouge1": 0.6669483634140105, |
|
"eval_rouge2": 0.35873988835161297, |
|
"eval_rougeL": 0.5343868725007427, |
|
"eval_rougeLsum": 0.6555353134690931, |
|
"eval_runtime": 15.0822, |
|
"eval_samples_per_second": 0.663, |
|
"eval_steps_per_second": 0.332, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.2366473525762558, |
|
"learning_rate": 0.00017610163215313186, |
|
"loss": 1.6599, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bertscore": 0.7335314750671387, |
|
"eval_loss": 1.7385823726654053, |
|
"eval_rouge1": 0.6559297063578133, |
|
"eval_rouge2": 0.35483499789990636, |
|
"eval_rougeL": 0.5297939800986089, |
|
"eval_rougeLsum": 0.6454544372491222, |
|
"eval_runtime": 15.1029, |
|
"eval_samples_per_second": 0.662, |
|
"eval_steps_per_second": 0.331, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.20628753304481506, |
|
"learning_rate": 0.0001749622582390976, |
|
"loss": 1.6454, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_bertscore": 0.7342795133590698, |
|
"eval_loss": 1.7422058582305908, |
|
"eval_rouge1": 0.660746519614568, |
|
"eval_rouge2": 0.3633965895561597, |
|
"eval_rougeL": 0.5369036980876734, |
|
"eval_rougeLsum": 0.650338328328998, |
|
"eval_runtime": 15.4434, |
|
"eval_samples_per_second": 0.648, |
|
"eval_steps_per_second": 0.324, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.2239149957895279, |
|
"learning_rate": 0.0001738228843250634, |
|
"loss": 1.6594, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_bertscore": 0.7313543558120728, |
|
"eval_loss": 1.740854263305664, |
|
"eval_rouge1": 0.6591645132619427, |
|
"eval_rouge2": 0.35766117432431743, |
|
"eval_rougeL": 0.532710255034635, |
|
"eval_rougeLsum": 0.6479428185884644, |
|
"eval_runtime": 14.9436, |
|
"eval_samples_per_second": 0.669, |
|
"eval_steps_per_second": 0.335, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.24808338284492493, |
|
"learning_rate": 0.00017268351041102914, |
|
"loss": 1.6604, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_bertscore": 0.7333321571350098, |
|
"eval_loss": 1.7385585308074951, |
|
"eval_rouge1": 0.6532115808232871, |
|
"eval_rouge2": 0.35333788022501567, |
|
"eval_rougeL": 0.5284071547874328, |
|
"eval_rougeLsum": 0.6410472452797623, |
|
"eval_runtime": 15.0277, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.333, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.2555364966392517, |
|
"learning_rate": 0.0001715441364969949, |
|
"loss": 1.6493, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_bertscore": 0.7318152189254761, |
|
"eval_loss": 1.7357494831085205, |
|
"eval_rouge1": 0.6476755890502586, |
|
"eval_rouge2": 0.35312778275949164, |
|
"eval_rougeL": 0.5227601228049905, |
|
"eval_rougeLsum": 0.6371331138372852, |
|
"eval_runtime": 14.8807, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.21518155932426453, |
|
"learning_rate": 0.00017040476258296068, |
|
"loss": 1.644, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bertscore": 0.734805703163147, |
|
"eval_loss": 1.74032723903656, |
|
"eval_rouge1": 0.6476813733451636, |
|
"eval_rouge2": 0.3509259728617576, |
|
"eval_rougeL": 0.5221334872800274, |
|
"eval_rougeLsum": 0.636892384667733, |
|
"eval_runtime": 15.1271, |
|
"eval_samples_per_second": 0.661, |
|
"eval_steps_per_second": 0.331, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.26086658239364624, |
|
"learning_rate": 0.00016926538866892643, |
|
"loss": 1.6449, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bertscore": 0.7339995503425598, |
|
"eval_loss": 1.7338205575942993, |
|
"eval_rouge1": 0.6416889902864902, |
|
"eval_rouge2": 0.3479045880347737, |
|
"eval_rougeL": 0.5160577838468976, |
|
"eval_rougeLsum": 0.6317983411796093, |
|
"eval_runtime": 14.9992, |
|
"eval_samples_per_second": 0.667, |
|
"eval_steps_per_second": 0.333, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.25449469685554504, |
|
"learning_rate": 0.0001681260147548922, |
|
"loss": 1.6299, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bertscore": 0.7306328415870667, |
|
"eval_loss": 1.7369228601455688, |
|
"eval_rouge1": 0.6390760905985684, |
|
"eval_rouge2": 0.3409328272828699, |
|
"eval_rougeL": 0.5111832543685331, |
|
"eval_rougeLsum": 0.6285753423407665, |
|
"eval_runtime": 15.483, |
|
"eval_samples_per_second": 0.646, |
|
"eval_steps_per_second": 0.323, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.24706102907657623, |
|
"learning_rate": 0.00016698664084085796, |
|
"loss": 1.6374, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bertscore": 0.732075572013855, |
|
"eval_loss": 1.7343876361846924, |
|
"eval_rouge1": 0.6378821977913272, |
|
"eval_rouge2": 0.34619427775171585, |
|
"eval_rougeL": 0.5120186953041237, |
|
"eval_rougeLsum": 0.6284056323839109, |
|
"eval_runtime": 15.7341, |
|
"eval_samples_per_second": 0.636, |
|
"eval_steps_per_second": 0.318, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.24373260140419006, |
|
"learning_rate": 0.00016584726692682372, |
|
"loss": 1.6427, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_bertscore": 0.7350374460220337, |
|
"eval_loss": 1.729591965675354, |
|
"eval_rouge1": 0.6516545226356616, |
|
"eval_rouge2": 0.35485762033878543, |
|
"eval_rougeL": 0.5249054193354852, |
|
"eval_rougeLsum": 0.6411016821651583, |
|
"eval_runtime": 15.5199, |
|
"eval_samples_per_second": 0.644, |
|
"eval_steps_per_second": 0.322, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.24616578221321106, |
|
"learning_rate": 0.0001647078930127895, |
|
"loss": 1.6296, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_bertscore": 0.7335461378097534, |
|
"eval_loss": 1.7302274703979492, |
|
"eval_rouge1": 0.6531411706717427, |
|
"eval_rouge2": 0.35003053174601517, |
|
"eval_rougeL": 0.5212483686089053, |
|
"eval_rougeLsum": 0.6438454124825417, |
|
"eval_runtime": 15.3058, |
|
"eval_samples_per_second": 0.653, |
|
"eval_steps_per_second": 0.327, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.24500492215156555, |
|
"learning_rate": 0.00016356851909875522, |
|
"loss": 1.6251, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_bertscore": 0.7347471714019775, |
|
"eval_loss": 1.7292228937149048, |
|
"eval_rouge1": 0.6565322485502556, |
|
"eval_rouge2": 0.35887540291607073, |
|
"eval_rougeL": 0.5284326132878907, |
|
"eval_rougeLsum": 0.6469750895866724, |
|
"eval_runtime": 14.9708, |
|
"eval_samples_per_second": 0.668, |
|
"eval_steps_per_second": 0.334, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.26575228571891785, |
|
"learning_rate": 0.000162429145184721, |
|
"loss": 1.6389, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_bertscore": 0.7319897413253784, |
|
"eval_loss": 1.7287580966949463, |
|
"eval_rouge1": 0.6458608801881298, |
|
"eval_rouge2": 0.3503480901452204, |
|
"eval_rougeL": 0.519626708150005, |
|
"eval_rougeLsum": 0.6362405734928169, |
|
"eval_runtime": 15.3509, |
|
"eval_samples_per_second": 0.651, |
|
"eval_steps_per_second": 0.326, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.27144965529441833, |
|
"learning_rate": 0.00016128977127068676, |
|
"loss": 1.6476, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_bertscore": 0.7392772436141968, |
|
"eval_loss": 1.7257907390594482, |
|
"eval_rouge1": 0.6543238897579965, |
|
"eval_rouge2": 0.3606726049451984, |
|
"eval_rougeL": 0.5317585887753791, |
|
"eval_rougeLsum": 0.6452028420624081, |
|
"eval_runtime": 15.0268, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.333, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.2579711079597473, |
|
"learning_rate": 0.00016015039735665254, |
|
"loss": 1.6316, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bertscore": 0.7375612854957581, |
|
"eval_loss": 1.7296981811523438, |
|
"eval_rouge1": 0.658906725408624, |
|
"eval_rouge2": 0.35825094165644866, |
|
"eval_rougeL": 0.5323299193377959, |
|
"eval_rougeLsum": 0.6500364347290426, |
|
"eval_runtime": 14.9244, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.2589207589626312, |
|
"learning_rate": 0.0001590110234426183, |
|
"loss": 1.6432, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_bertscore": 0.735145092010498, |
|
"eval_loss": 1.72720205783844, |
|
"eval_rouge1": 0.6678646250245518, |
|
"eval_rouge2": 0.36332843150846983, |
|
"eval_rougeL": 0.537576430733886, |
|
"eval_rougeLsum": 0.6579789388660506, |
|
"eval_runtime": 14.9067, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.26652559638023376, |
|
"learning_rate": 0.00015787164952858404, |
|
"loss": 1.6488, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_bertscore": 0.7320755124092102, |
|
"eval_loss": 1.7282931804656982, |
|
"eval_rouge1": 0.6325633297780734, |
|
"eval_rouge2": 0.34505856555703185, |
|
"eval_rougeL": 0.5100006743383693, |
|
"eval_rougeLsum": 0.6230385336341938, |
|
"eval_runtime": 14.9075, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.27353721857070923, |
|
"learning_rate": 0.00015673227561454982, |
|
"loss": 1.6486, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_bertscore": 0.7367390394210815, |
|
"eval_loss": 1.7290785312652588, |
|
"eval_rouge1": 0.639487116874423, |
|
"eval_rouge2": 0.3466574229736927, |
|
"eval_rougeL": 0.515038120249177, |
|
"eval_rougeLsum": 0.6301157215372983, |
|
"eval_runtime": 15.0876, |
|
"eval_samples_per_second": 0.663, |
|
"eval_steps_per_second": 0.331, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.24777938425540924, |
|
"learning_rate": 0.00015559290170051558, |
|
"loss": 1.6271, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bertscore": 0.735866904258728, |
|
"eval_loss": 1.7264015674591064, |
|
"eval_rouge1": 0.64939597901302, |
|
"eval_rouge2": 0.3554282813944538, |
|
"eval_rougeL": 0.5247953329477759, |
|
"eval_rougeLsum": 0.6405524812915908, |
|
"eval_runtime": 14.8892, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.2703794538974762, |
|
"learning_rate": 0.00015445352778648136, |
|
"loss": 1.6415, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_bertscore": 0.7325771450996399, |
|
"eval_loss": 1.7271970510482788, |
|
"eval_rouge1": 0.6659432894288253, |
|
"eval_rouge2": 0.35962933912652617, |
|
"eval_rougeL": 0.5385420432813512, |
|
"eval_rougeLsum": 0.6557027031484046, |
|
"eval_runtime": 14.8319, |
|
"eval_samples_per_second": 0.674, |
|
"eval_steps_per_second": 0.337, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.28753793239593506, |
|
"learning_rate": 0.0001533141538724471, |
|
"loss": 1.6239, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bertscore": 0.7350013852119446, |
|
"eval_loss": 1.7266199588775635, |
|
"eval_rouge1": 0.6549282561414593, |
|
"eval_rouge2": 0.35694530595734475, |
|
"eval_rougeL": 0.5301601006964574, |
|
"eval_rougeLsum": 0.6441779306137909, |
|
"eval_runtime": 14.9005, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.336, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.23870150744915009, |
|
"learning_rate": 0.00015217477995841286, |
|
"loss": 1.6293, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bertscore": 0.7271261811256409, |
|
"eval_loss": 1.7256368398666382, |
|
"eval_rouge1": 0.6515513829901936, |
|
"eval_rouge2": 0.35217616104918836, |
|
"eval_rougeL": 0.5236553509227138, |
|
"eval_rougeLsum": 0.6411473505324752, |
|
"eval_runtime": 14.9938, |
|
"eval_samples_per_second": 0.667, |
|
"eval_steps_per_second": 0.333, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.28276997804641724, |
|
"learning_rate": 0.00015103540604437861, |
|
"loss": 1.6242, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_bertscore": 0.7347334027290344, |
|
"eval_loss": 1.717627763748169, |
|
"eval_rouge1": 0.6350112495847634, |
|
"eval_rouge2": 0.3477570751550898, |
|
"eval_rougeL": 0.5146616989899861, |
|
"eval_rougeLsum": 0.6246669376525157, |
|
"eval_runtime": 15.7032, |
|
"eval_samples_per_second": 0.637, |
|
"eval_steps_per_second": 0.318, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.24915842711925507, |
|
"learning_rate": 0.00014989603213034437, |
|
"loss": 1.6245, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bertscore": 0.7313701510429382, |
|
"eval_loss": 1.7292964458465576, |
|
"eval_rouge1": 0.6479528105367669, |
|
"eval_rouge2": 0.35020983244262877, |
|
"eval_rougeL": 0.5200907337780047, |
|
"eval_rougeLsum": 0.6372896614836894, |
|
"eval_runtime": 15.0043, |
|
"eval_samples_per_second": 0.666, |
|
"eval_steps_per_second": 0.333, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.24036027491092682, |
|
"learning_rate": 0.00014875665821631015, |
|
"loss": 1.5364, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_bertscore": 0.7327737808227539, |
|
"eval_loss": 1.7339435815811157, |
|
"eval_rouge1": 0.6568524178922349, |
|
"eval_rouge2": 0.35560270713543163, |
|
"eval_rougeL": 0.5310443670833082, |
|
"eval_rougeLsum": 0.6480993679097387, |
|
"eval_runtime": 14.9303, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.2729027271270752, |
|
"learning_rate": 0.0001476172843022759, |
|
"loss": 1.5182, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_bertscore": 0.7334672212600708, |
|
"eval_loss": 1.739061713218689, |
|
"eval_rouge1": 0.6552962187824329, |
|
"eval_rouge2": 0.35210314124279196, |
|
"eval_rougeL": 0.5272039052368354, |
|
"eval_rougeLsum": 0.6437473533492806, |
|
"eval_runtime": 15.5418, |
|
"eval_samples_per_second": 0.643, |
|
"eval_steps_per_second": 0.322, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.2909716069698334, |
|
"learning_rate": 0.00014647791038824168, |
|
"loss": 1.5276, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_bertscore": 0.7288902997970581, |
|
"eval_loss": 1.736944556236267, |
|
"eval_rouge1": 0.6533271685598301, |
|
"eval_rouge2": 0.35279315321532184, |
|
"eval_rougeL": 0.5262688234671329, |
|
"eval_rougeLsum": 0.6424084937151033, |
|
"eval_runtime": 15.1115, |
|
"eval_samples_per_second": 0.662, |
|
"eval_steps_per_second": 0.331, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.3035859763622284, |
|
"learning_rate": 0.00014533853647420743, |
|
"loss": 1.5445, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_bertscore": 0.7308284044265747, |
|
"eval_loss": 1.737762689590454, |
|
"eval_rouge1": 0.6619725777891359, |
|
"eval_rouge2": 0.3611963714506864, |
|
"eval_rougeL": 0.5363802967084452, |
|
"eval_rougeLsum": 0.6516690557971352, |
|
"eval_runtime": 14.9932, |
|
"eval_samples_per_second": 0.667, |
|
"eval_steps_per_second": 0.333, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.26574915647506714, |
|
"learning_rate": 0.0001441991625601732, |
|
"loss": 1.5342, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_bertscore": 0.7328712344169617, |
|
"eval_loss": 1.7393991947174072, |
|
"eval_rouge1": 0.6856504003396884, |
|
"eval_rouge2": 0.3761098841062477, |
|
"eval_rougeL": 0.555477293163325, |
|
"eval_rougeLsum": 0.6757574283262289, |
|
"eval_runtime": 14.8121, |
|
"eval_samples_per_second": 0.675, |
|
"eval_steps_per_second": 0.338, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.315468430519104, |
|
"learning_rate": 0.00014305978864613897, |
|
"loss": 1.543, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_bertscore": 0.7349387407302856, |
|
"eval_loss": 1.7352710962295532, |
|
"eval_rouge1": 0.6749953128982036, |
|
"eval_rouge2": 0.3720385250530084, |
|
"eval_rougeL": 0.5472261566474382, |
|
"eval_rougeLsum": 0.6657643539219252, |
|
"eval_runtime": 14.909, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.29815027117729187, |
|
"learning_rate": 0.00014192041473210472, |
|
"loss": 1.5547, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_bertscore": 0.7359883189201355, |
|
"eval_loss": 1.7269136905670166, |
|
"eval_rouge1": 0.6561141614088863, |
|
"eval_rouge2": 0.3606175666303814, |
|
"eval_rougeL": 0.5302270771032793, |
|
"eval_rougeLsum": 0.6446912079521883, |
|
"eval_runtime": 14.9527, |
|
"eval_samples_per_second": 0.669, |
|
"eval_steps_per_second": 0.334, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.3595702350139618, |
|
"learning_rate": 0.00014078104081807047, |
|
"loss": 1.5567, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_bertscore": 0.7328116297721863, |
|
"eval_loss": 1.7341644763946533, |
|
"eval_rouge1": 0.6420332714823549, |
|
"eval_rouge2": 0.35094864549032, |
|
"eval_rougeL": 0.5179556761398367, |
|
"eval_rougeLsum": 0.631987397226809, |
|
"eval_runtime": 15.1438, |
|
"eval_samples_per_second": 0.66, |
|
"eval_steps_per_second": 0.33, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.2718666195869446, |
|
"learning_rate": 0.00013964166690403623, |
|
"loss": 1.5408, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_bertscore": 0.7337731122970581, |
|
"eval_loss": 1.7330901622772217, |
|
"eval_rouge1": 0.661681342484687, |
|
"eval_rouge2": 0.3626833509973693, |
|
"eval_rougeL": 0.5329424447373774, |
|
"eval_rougeLsum": 0.6519750177144633, |
|
"eval_runtime": 14.8142, |
|
"eval_samples_per_second": 0.675, |
|
"eval_steps_per_second": 0.338, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.29183274507522583, |
|
"learning_rate": 0.00013850229299000198, |
|
"loss": 1.5422, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_bertscore": 0.7331051230430603, |
|
"eval_loss": 1.7297636270523071, |
|
"eval_rouge1": 0.6655497978238063, |
|
"eval_rouge2": 0.3614235788441926, |
|
"eval_rougeL": 0.5327210061667442, |
|
"eval_rougeLsum": 0.6548836840483913, |
|
"eval_runtime": 15.1359, |
|
"eval_samples_per_second": 0.661, |
|
"eval_steps_per_second": 0.33, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.30979740619659424, |
|
"learning_rate": 0.00013736291907596776, |
|
"loss": 1.5372, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_bertscore": 0.7312799692153931, |
|
"eval_loss": 1.732444167137146, |
|
"eval_rouge1": 0.6568292865033993, |
|
"eval_rouge2": 0.35876682221562006, |
|
"eval_rougeL": 0.5300878844981931, |
|
"eval_rougeLsum": 0.6461751645858989, |
|
"eval_runtime": 14.819, |
|
"eval_samples_per_second": 0.675, |
|
"eval_steps_per_second": 0.337, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.31343138217926025, |
|
"learning_rate": 0.0001362235451619335, |
|
"loss": 1.5301, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_bertscore": 0.7317885160446167, |
|
"eval_loss": 1.7358499765396118, |
|
"eval_rouge1": 0.6548673097943329, |
|
"eval_rouge2": 0.3609116081432997, |
|
"eval_rougeL": 0.5279887650752133, |
|
"eval_rougeLsum": 0.6466232329097188, |
|
"eval_runtime": 14.8259, |
|
"eval_samples_per_second": 0.674, |
|
"eval_steps_per_second": 0.337, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.36181533336639404, |
|
"learning_rate": 0.0001350841712478993, |
|
"loss": 1.5421, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_bertscore": 0.7316756248474121, |
|
"eval_loss": 1.7282969951629639, |
|
"eval_rouge1": 0.6551882964480251, |
|
"eval_rouge2": 0.3580708921400697, |
|
"eval_rougeL": 0.5255367305995147, |
|
"eval_rougeLsum": 0.6449192953008009, |
|
"eval_runtime": 14.8816, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.30600836873054504, |
|
"learning_rate": 0.00013394479733386505, |
|
"loss": 1.5538, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_bertscore": 0.7311854362487793, |
|
"eval_loss": 1.7313562631607056, |
|
"eval_rouge1": 0.6592751199424156, |
|
"eval_rouge2": 0.35802855072854206, |
|
"eval_rougeL": 0.5297288176377084, |
|
"eval_rougeLsum": 0.6489455314962717, |
|
"eval_runtime": 15.0693, |
|
"eval_samples_per_second": 0.664, |
|
"eval_steps_per_second": 0.332, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.29904893040657043, |
|
"learning_rate": 0.0001328054234198308, |
|
"loss": 1.5328, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_bertscore": 0.7312635183334351, |
|
"eval_loss": 1.7318429946899414, |
|
"eval_rouge1": 0.6577169369077195, |
|
"eval_rouge2": 0.3582474830918887, |
|
"eval_rougeL": 0.5314990647771975, |
|
"eval_rougeLsum": 0.6454785220479866, |
|
"eval_runtime": 15.0235, |
|
"eval_samples_per_second": 0.666, |
|
"eval_steps_per_second": 0.333, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.3025416433811188, |
|
"learning_rate": 0.00013166604950579658, |
|
"loss": 1.5349, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_bertscore": 0.7325812578201294, |
|
"eval_loss": 1.7309118509292603, |
|
"eval_rouge1": 0.6629133074951261, |
|
"eval_rouge2": 0.3678158453940578, |
|
"eval_rougeL": 0.5380936907276155, |
|
"eval_rougeLsum": 0.654883061928214, |
|
"eval_runtime": 14.7666, |
|
"eval_samples_per_second": 0.677, |
|
"eval_steps_per_second": 0.339, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.34982389211654663, |
|
"learning_rate": 0.00013052667559176233, |
|
"loss": 1.5513, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bertscore": 0.7340582609176636, |
|
"eval_loss": 1.7363474369049072, |
|
"eval_rouge1": 0.6555817937418287, |
|
"eval_rouge2": 0.35630500078358396, |
|
"eval_rougeL": 0.5272412353478366, |
|
"eval_rougeLsum": 0.6445837479327643, |
|
"eval_runtime": 14.9664, |
|
"eval_samples_per_second": 0.668, |
|
"eval_steps_per_second": 0.334, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.35809043049812317, |
|
"learning_rate": 0.0001293873016777281, |
|
"loss": 1.5444, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_bertscore": 0.7334069013595581, |
|
"eval_loss": 1.7328729629516602, |
|
"eval_rouge1": 0.6617971237669364, |
|
"eval_rouge2": 0.35951260376512423, |
|
"eval_rougeL": 0.5345512305507059, |
|
"eval_rougeLsum": 0.648363531132752, |
|
"eval_runtime": 15.2146, |
|
"eval_samples_per_second": 0.657, |
|
"eval_steps_per_second": 0.329, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.2954196631908417, |
|
"learning_rate": 0.00012824792776369387, |
|
"loss": 1.5406, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_bertscore": 0.7321678400039673, |
|
"eval_loss": 1.7335160970687866, |
|
"eval_rouge1": 0.6573625593086756, |
|
"eval_rouge2": 0.36210525247389347, |
|
"eval_rougeL": 0.5379361120230158, |
|
"eval_rougeLsum": 0.6459787883452857, |
|
"eval_runtime": 14.8942, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.336, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.32190588116645813, |
|
"learning_rate": 0.00012710855384965962, |
|
"loss": 1.5491, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_bertscore": 0.7346011400222778, |
|
"eval_loss": 1.7364966869354248, |
|
"eval_rouge1": 0.6481210247390559, |
|
"eval_rouge2": 0.3521173896017687, |
|
"eval_rougeL": 0.5240500581372636, |
|
"eval_rougeLsum": 0.63706442433335, |
|
"eval_runtime": 14.923, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.33323267102241516, |
|
"learning_rate": 0.00012596917993562537, |
|
"loss": 1.5596, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_bertscore": 0.7331587076187134, |
|
"eval_loss": 1.7332260608673096, |
|
"eval_rouge1": 0.6561257401878793, |
|
"eval_rouge2": 0.3548063723792664, |
|
"eval_rougeL": 0.527807776001489, |
|
"eval_rougeLsum": 0.6451911907984706, |
|
"eval_runtime": 15.4703, |
|
"eval_samples_per_second": 0.646, |
|
"eval_steps_per_second": 0.323, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.3564057946205139, |
|
"learning_rate": 0.00012482980602159113, |
|
"loss": 1.5261, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_bertscore": 0.7306063771247864, |
|
"eval_loss": 1.7368810176849365, |
|
"eval_rouge1": 0.637722723890071, |
|
"eval_rouge2": 0.3455358728458236, |
|
"eval_rougeL": 0.5136372690435154, |
|
"eval_rougeLsum": 0.6273570573595115, |
|
"eval_runtime": 15.3533, |
|
"eval_samples_per_second": 0.651, |
|
"eval_steps_per_second": 0.326, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.29219934344291687, |
|
"learning_rate": 0.0001236904321075569, |
|
"loss": 1.519, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_bertscore": 0.7338696122169495, |
|
"eval_loss": 1.734724998474121, |
|
"eval_rouge1": 0.6442107446420164, |
|
"eval_rouge2": 0.3494748457109431, |
|
"eval_rougeL": 0.5207483892007314, |
|
"eval_rougeLsum": 0.632886404907802, |
|
"eval_runtime": 15.3077, |
|
"eval_samples_per_second": 0.653, |
|
"eval_steps_per_second": 0.327, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.34681758284568787, |
|
"learning_rate": 0.00012255105819352266, |
|
"loss": 1.5419, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_bertscore": 0.7350045442581177, |
|
"eval_loss": 1.7329858541488647, |
|
"eval_rouge1": 0.6606839869796519, |
|
"eval_rouge2": 0.362188561160822, |
|
"eval_rougeL": 0.5342033818317451, |
|
"eval_rougeLsum": 0.6493340000068861, |
|
"eval_runtime": 15.44, |
|
"eval_samples_per_second": 0.648, |
|
"eval_steps_per_second": 0.324, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.3043666481971741, |
|
"learning_rate": 0.00012141168427948844, |
|
"loss": 1.5402, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_bertscore": 0.7363221645355225, |
|
"eval_loss": 1.7308530807495117, |
|
"eval_rouge1": 0.6638252384356028, |
|
"eval_rouge2": 0.3643237697892826, |
|
"eval_rougeL": 0.5403775887381331, |
|
"eval_rougeLsum": 0.6537260000827279, |
|
"eval_runtime": 14.7668, |
|
"eval_samples_per_second": 0.677, |
|
"eval_steps_per_second": 0.339, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.4073585867881775, |
|
"learning_rate": 0.00012027231036545419, |
|
"loss": 1.5256, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_bertscore": 0.7310279607772827, |
|
"eval_loss": 1.7326784133911133, |
|
"eval_rouge1": 0.6609594314120198, |
|
"eval_rouge2": 0.3601530714440473, |
|
"eval_rougeL": 0.5344452687135626, |
|
"eval_rougeLsum": 0.6480936554342305, |
|
"eval_runtime": 14.8565, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.337, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.3211813271045685, |
|
"learning_rate": 0.00011913293645141995, |
|
"loss": 1.5366, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_bertscore": 0.7356667518615723, |
|
"eval_loss": 1.7280094623565674, |
|
"eval_rouge1": 0.6519353227375031, |
|
"eval_rouge2": 0.3587025716186173, |
|
"eval_rougeL": 0.5306356200586075, |
|
"eval_rougeLsum": 0.6408870347994059, |
|
"eval_runtime": 14.9264, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.32776832580566406, |
|
"learning_rate": 0.00011799356253738571, |
|
"loss": 1.5504, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_bertscore": 0.7331353425979614, |
|
"eval_loss": 1.7308950424194336, |
|
"eval_rouge1": 0.6627702292814652, |
|
"eval_rouge2": 0.36117793957379707, |
|
"eval_rougeL": 0.5369305446079228, |
|
"eval_rougeLsum": 0.6516924083980089, |
|
"eval_runtime": 16.0138, |
|
"eval_samples_per_second": 0.624, |
|
"eval_steps_per_second": 0.312, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.3209726810455322, |
|
"learning_rate": 0.00011685418862335147, |
|
"loss": 1.5473, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_bertscore": 0.732498824596405, |
|
"eval_loss": 1.7328402996063232, |
|
"eval_rouge1": 0.6482679740803596, |
|
"eval_rouge2": 0.3538726087405498, |
|
"eval_rougeL": 0.5267677183598017, |
|
"eval_rougeLsum": 0.6366529460029322, |
|
"eval_runtime": 15.0195, |
|
"eval_samples_per_second": 0.666, |
|
"eval_steps_per_second": 0.333, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.3174591064453125, |
|
"learning_rate": 0.00011571481470931725, |
|
"loss": 1.5568, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_bertscore": 0.7335298657417297, |
|
"eval_loss": 1.7310253381729126, |
|
"eval_rouge1": 0.6560468577439627, |
|
"eval_rouge2": 0.36039371229175, |
|
"eval_rougeL": 0.5318708569729291, |
|
"eval_rougeLsum": 0.6444857558837042, |
|
"eval_runtime": 14.9774, |
|
"eval_samples_per_second": 0.668, |
|
"eval_steps_per_second": 0.334, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.2936408817768097, |
|
"learning_rate": 0.000114575440795283, |
|
"loss": 1.5345, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_bertscore": 0.7322725057601929, |
|
"eval_loss": 1.7270629405975342, |
|
"eval_rouge1": 0.6387060930656672, |
|
"eval_rouge2": 0.3480508127989137, |
|
"eval_rougeL": 0.5148670834213287, |
|
"eval_rougeLsum": 0.6273654952601909, |
|
"eval_runtime": 15.6687, |
|
"eval_samples_per_second": 0.638, |
|
"eval_steps_per_second": 0.319, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.32960689067840576, |
|
"learning_rate": 0.00011343606688124875, |
|
"loss": 1.5362, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_bertscore": 0.7337037920951843, |
|
"eval_loss": 1.7287395000457764, |
|
"eval_rouge1": 0.6476816970229771, |
|
"eval_rouge2": 0.3532248216683249, |
|
"eval_rougeL": 0.5253136618838716, |
|
"eval_rougeLsum": 0.6347493764394183, |
|
"eval_runtime": 15.0045, |
|
"eval_samples_per_second": 0.666, |
|
"eval_steps_per_second": 0.333, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.33265602588653564, |
|
"learning_rate": 0.00011229669296721452, |
|
"loss": 1.5215, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_bertscore": 0.7330806851387024, |
|
"eval_loss": 1.7265052795410156, |
|
"eval_rouge1": 0.6529393512177359, |
|
"eval_rouge2": 0.36182153145062224, |
|
"eval_rougeL": 0.5317061134915853, |
|
"eval_rougeLsum": 0.6413066299251913, |
|
"eval_runtime": 15.0256, |
|
"eval_samples_per_second": 0.666, |
|
"eval_steps_per_second": 0.333, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.3436201512813568, |
|
"learning_rate": 0.00011115731905318027, |
|
"loss": 1.539, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_bertscore": 0.7335551977157593, |
|
"eval_loss": 1.7254730463027954, |
|
"eval_rouge1": 0.6388518781767971, |
|
"eval_rouge2": 0.3501853846588857, |
|
"eval_rougeL": 0.5196828245794569, |
|
"eval_rougeLsum": 0.629333993884722, |
|
"eval_runtime": 15.2595, |
|
"eval_samples_per_second": 0.655, |
|
"eval_steps_per_second": 0.328, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.3428190350532532, |
|
"learning_rate": 0.00011001794513914605, |
|
"loss": 1.5273, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_bertscore": 0.7331770658493042, |
|
"eval_loss": 1.7286018133163452, |
|
"eval_rouge1": 0.6581941047310954, |
|
"eval_rouge2": 0.36277983926897583, |
|
"eval_rougeL": 0.5336464680120501, |
|
"eval_rougeLsum": 0.6489239720278894, |
|
"eval_runtime": 14.8252, |
|
"eval_samples_per_second": 0.675, |
|
"eval_steps_per_second": 0.337, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.363164484500885, |
|
"learning_rate": 0.0001088785712251118, |
|
"loss": 1.5445, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_bertscore": 0.7377282977104187, |
|
"eval_loss": 1.7363064289093018, |
|
"eval_rouge1": 0.6547011011872876, |
|
"eval_rouge2": 0.3553220826957326, |
|
"eval_rougeL": 0.5256073814411315, |
|
"eval_rougeLsum": 0.6420095316923398, |
|
"eval_runtime": 14.8599, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.336, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.3098333775997162, |
|
"learning_rate": 0.00010773919731107757, |
|
"loss": 1.5319, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_bertscore": 0.7324053645133972, |
|
"eval_loss": 1.7284066677093506, |
|
"eval_rouge1": 0.6477379941950916, |
|
"eval_rouge2": 0.3535918140554809, |
|
"eval_rougeL": 0.5226838544730126, |
|
"eval_rougeLsum": 0.6373271915355557, |
|
"eval_runtime": 14.9318, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.3637208938598633, |
|
"learning_rate": 0.00010659982339704332, |
|
"loss": 1.5442, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_bertscore": 0.7347462773323059, |
|
"eval_loss": 1.7252963781356812, |
|
"eval_rouge1": 0.6494449840449819, |
|
"eval_rouge2": 0.3586550050575282, |
|
"eval_rougeL": 0.5275675395159809, |
|
"eval_rougeLsum": 0.6396738714026391, |
|
"eval_runtime": 15.2218, |
|
"eval_samples_per_second": 0.657, |
|
"eval_steps_per_second": 0.328, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.35197457671165466, |
|
"learning_rate": 0.00010546044948300908, |
|
"loss": 1.5131, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_bertscore": 0.7329785227775574, |
|
"eval_loss": 1.7285687923431396, |
|
"eval_rouge1": 0.6582047811143328, |
|
"eval_rouge2": 0.3637700686094697, |
|
"eval_rougeL": 0.5355021948480279, |
|
"eval_rougeLsum": 0.6483245595148677, |
|
"eval_runtime": 14.8772, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.3406757116317749, |
|
"learning_rate": 0.00010432107556897486, |
|
"loss": 1.5394, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_bertscore": 0.7345961332321167, |
|
"eval_loss": 1.7324028015136719, |
|
"eval_rouge1": 0.6408293615351552, |
|
"eval_rouge2": 0.3520120690778129, |
|
"eval_rougeL": 0.5145218014745592, |
|
"eval_rougeLsum": 0.6297802607384266, |
|
"eval_runtime": 15.1044, |
|
"eval_samples_per_second": 0.662, |
|
"eval_steps_per_second": 0.331, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.3417683243751526, |
|
"learning_rate": 0.00010318170165494061, |
|
"loss": 1.526, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_bertscore": 0.735752522945404, |
|
"eval_loss": 1.7288110256195068, |
|
"eval_rouge1": 0.641158513352794, |
|
"eval_rouge2": 0.3544166440855814, |
|
"eval_rougeL": 0.5215201980495414, |
|
"eval_rougeLsum": 0.630550065494593, |
|
"eval_runtime": 15.0797, |
|
"eval_samples_per_second": 0.663, |
|
"eval_steps_per_second": 0.332, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.3256611227989197, |
|
"learning_rate": 0.00010204232774090639, |
|
"loss": 1.5484, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_bertscore": 0.7356327772140503, |
|
"eval_loss": 1.7305186986923218, |
|
"eval_rouge1": 0.6400269226515611, |
|
"eval_rouge2": 0.3502884634173268, |
|
"eval_rougeL": 0.517312321281175, |
|
"eval_rougeLsum": 0.6284556997614409, |
|
"eval_runtime": 15.4097, |
|
"eval_samples_per_second": 0.649, |
|
"eval_steps_per_second": 0.324, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.4035187363624573, |
|
"learning_rate": 0.00010090295382687213, |
|
"loss": 1.5261, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_bertscore": 0.7339992523193359, |
|
"eval_loss": 1.7282793521881104, |
|
"eval_rouge1": 0.6335770390416183, |
|
"eval_rouge2": 0.34592404578075897, |
|
"eval_rougeL": 0.5109045259792113, |
|
"eval_rougeLsum": 0.6218413683710426, |
|
"eval_runtime": 15.1959, |
|
"eval_samples_per_second": 0.658, |
|
"eval_steps_per_second": 0.329, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.34987062215805054, |
|
"learning_rate": 9.97635799128379e-05, |
|
"loss": 1.5199, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bertscore": 0.7326329946517944, |
|
"eval_loss": 1.7544715404510498, |
|
"eval_rouge1": 0.6451558045750205, |
|
"eval_rouge2": 0.35565806935653943, |
|
"eval_rougeL": 0.5217034865840529, |
|
"eval_rougeLsum": 0.6329869715356753, |
|
"eval_runtime": 15.0351, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.333, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.37184038758277893, |
|
"learning_rate": 9.862420599880366e-05, |
|
"loss": 1.41, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_bertscore": 0.7315141558647156, |
|
"eval_loss": 1.7585878372192383, |
|
"eval_rouge1": 0.6469319193583706, |
|
"eval_rouge2": 0.3514447211469598, |
|
"eval_rougeL": 0.524755857688278, |
|
"eval_rougeLsum": 0.6350164781858667, |
|
"eval_runtime": 14.9583, |
|
"eval_samples_per_second": 0.669, |
|
"eval_steps_per_second": 0.334, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.3812776803970337, |
|
"learning_rate": 9.748483208476943e-05, |
|
"loss": 1.4132, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_bertscore": 0.7335561513900757, |
|
"eval_loss": 1.764611840248108, |
|
"eval_rouge1": 0.6381916780473581, |
|
"eval_rouge2": 0.3482510604092539, |
|
"eval_rougeL": 0.5162105225823392, |
|
"eval_rougeLsum": 0.627150245441782, |
|
"eval_runtime": 15.8515, |
|
"eval_samples_per_second": 0.631, |
|
"eval_steps_per_second": 0.315, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.45525220036506653, |
|
"learning_rate": 9.634545817073518e-05, |
|
"loss": 1.4, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_bertscore": 0.73627769947052, |
|
"eval_loss": 1.7585163116455078, |
|
"eval_rouge1": 0.6670097658027134, |
|
"eval_rouge2": 0.3658295359911405, |
|
"eval_rougeL": 0.5429667657900548, |
|
"eval_rougeLsum": 0.6543501745791419, |
|
"eval_runtime": 15.0301, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.333, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.37322184443473816, |
|
"learning_rate": 9.520608425670095e-05, |
|
"loss": 1.4293, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_bertscore": 0.730435848236084, |
|
"eval_loss": 1.764052391052246, |
|
"eval_rouge1": 0.6640215078213034, |
|
"eval_rouge2": 0.3625932287322054, |
|
"eval_rougeL": 0.5379978391335138, |
|
"eval_rougeLsum": 0.6542054656293199, |
|
"eval_runtime": 15.0762, |
|
"eval_samples_per_second": 0.663, |
|
"eval_steps_per_second": 0.332, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.4260891079902649, |
|
"learning_rate": 9.40667103426667e-05, |
|
"loss": 1.4077, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_bertscore": 0.7309869527816772, |
|
"eval_loss": 1.762108564376831, |
|
"eval_rouge1": 0.6571171081737958, |
|
"eval_rouge2": 0.35780421333141865, |
|
"eval_rougeL": 0.5320129270967632, |
|
"eval_rougeLsum": 0.64587787409523, |
|
"eval_runtime": 14.9004, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.336, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.39479926228523254, |
|
"learning_rate": 9.292733642863247e-05, |
|
"loss": 1.4165, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_bertscore": 0.7324444651603699, |
|
"eval_loss": 1.7607113122940063, |
|
"eval_rouge1": 0.6628398862884018, |
|
"eval_rouge2": 0.3627259806721216, |
|
"eval_rougeL": 0.5366106483832656, |
|
"eval_rougeLsum": 0.6528364858807157, |
|
"eval_runtime": 15.5766, |
|
"eval_samples_per_second": 0.642, |
|
"eval_steps_per_second": 0.321, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.39267703890800476, |
|
"learning_rate": 9.178796251459824e-05, |
|
"loss": 1.4123, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_bertscore": 0.7298994064331055, |
|
"eval_loss": 1.7668545246124268, |
|
"eval_rouge1": 0.6490850022857569, |
|
"eval_rouge2": 0.3532323419511264, |
|
"eval_rougeL": 0.5212823000193295, |
|
"eval_rougeLsum": 0.636442724466695, |
|
"eval_runtime": 14.9094, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.38221287727355957, |
|
"learning_rate": 9.0648588600564e-05, |
|
"loss": 1.401, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_bertscore": 0.7316875457763672, |
|
"eval_loss": 1.764147400856018, |
|
"eval_rouge1": 0.6490326710625849, |
|
"eval_rouge2": 0.3510351037900723, |
|
"eval_rougeL": 0.5239165028795836, |
|
"eval_rougeLsum": 0.6373687316421427, |
|
"eval_runtime": 15.1192, |
|
"eval_samples_per_second": 0.661, |
|
"eval_steps_per_second": 0.331, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.3653150200843811, |
|
"learning_rate": 8.950921468652976e-05, |
|
"loss": 1.4109, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_bertscore": 0.7348155975341797, |
|
"eval_loss": 1.7640550136566162, |
|
"eval_rouge1": 0.6462152873276823, |
|
"eval_rouge2": 0.3483599145461069, |
|
"eval_rougeL": 0.5193372430687719, |
|
"eval_rougeLsum": 0.6334254357511564, |
|
"eval_runtime": 14.9291, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.38049009442329407, |
|
"learning_rate": 8.836984077249551e-05, |
|
"loss": 1.4189, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_bertscore": 0.7357938885688782, |
|
"eval_loss": 1.7696326971054077, |
|
"eval_rouge1": 0.6377276221057538, |
|
"eval_rouge2": 0.3455397190390045, |
|
"eval_rougeL": 0.5118069428064842, |
|
"eval_rougeLsum": 0.6264501633078481, |
|
"eval_runtime": 14.8653, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.336, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.42111098766326904, |
|
"learning_rate": 8.723046685846128e-05, |
|
"loss": 1.4152, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_bertscore": 0.7339056134223938, |
|
"eval_loss": 1.7658218145370483, |
|
"eval_rouge1": 0.6494820372695989, |
|
"eval_rouge2": 0.34691658128805236, |
|
"eval_rougeL": 0.5193228965163086, |
|
"eval_rougeLsum": 0.6365347065687565, |
|
"eval_runtime": 15.3562, |
|
"eval_samples_per_second": 0.651, |
|
"eval_steps_per_second": 0.326, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.4452258050441742, |
|
"learning_rate": 8.609109294442704e-05, |
|
"loss": 1.4101, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_bertscore": 0.7296434640884399, |
|
"eval_loss": 1.7714240550994873, |
|
"eval_rouge1": 0.6565600824405751, |
|
"eval_rouge2": 0.3533618655201594, |
|
"eval_rougeL": 0.5263318202066467, |
|
"eval_rougeLsum": 0.6444964824298407, |
|
"eval_runtime": 14.8578, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.337, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.4030967652797699, |
|
"learning_rate": 8.495171903039281e-05, |
|
"loss": 1.4049, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_bertscore": 0.7307097315788269, |
|
"eval_loss": 1.774444580078125, |
|
"eval_rouge1": 0.6517204836155526, |
|
"eval_rouge2": 0.3521339653276223, |
|
"eval_rougeL": 0.5223211728244184, |
|
"eval_rougeLsum": 0.6398710531932736, |
|
"eval_runtime": 15.8543, |
|
"eval_samples_per_second": 0.631, |
|
"eval_steps_per_second": 0.315, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.33409813046455383, |
|
"learning_rate": 8.381234511635858e-05, |
|
"loss": 1.4243, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_bertscore": 0.7312101721763611, |
|
"eval_loss": 1.7654094696044922, |
|
"eval_rouge1": 0.6607249126293291, |
|
"eval_rouge2": 0.3545993249716188, |
|
"eval_rougeL": 0.5320161007986739, |
|
"eval_rougeLsum": 0.6503315335963733, |
|
"eval_runtime": 14.8739, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.4044789671897888, |
|
"learning_rate": 8.267297120232433e-05, |
|
"loss": 1.413, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_bertscore": 0.7342169880867004, |
|
"eval_loss": 1.769879937171936, |
|
"eval_rouge1": 0.6442777880355144, |
|
"eval_rouge2": 0.35006080708477183, |
|
"eval_rougeL": 0.5218799478770955, |
|
"eval_rougeLsum": 0.6332700294558067, |
|
"eval_runtime": 14.9089, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.39801183342933655, |
|
"learning_rate": 8.153359728829008e-05, |
|
"loss": 1.4177, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_bertscore": 0.7343758344650269, |
|
"eval_loss": 1.7737929821014404, |
|
"eval_rouge1": 0.6495678172205896, |
|
"eval_rouge2": 0.3505195734345703, |
|
"eval_rougeL": 0.5263025592812188, |
|
"eval_rougeLsum": 0.6390057749428748, |
|
"eval_runtime": 15.2148, |
|
"eval_samples_per_second": 0.657, |
|
"eval_steps_per_second": 0.329, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.36868759989738464, |
|
"learning_rate": 8.039422337425585e-05, |
|
"loss": 1.421, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_bertscore": 0.7333502173423767, |
|
"eval_loss": 1.7708820104599, |
|
"eval_rouge1": 0.656319412860679, |
|
"eval_rouge2": 0.3557406341135577, |
|
"eval_rougeL": 0.5293456110466322, |
|
"eval_rougeLsum": 0.6421819358163285, |
|
"eval_runtime": 14.9091, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.46111443638801575, |
|
"learning_rate": 7.925484946022162e-05, |
|
"loss": 1.4102, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_bertscore": 0.736262321472168, |
|
"eval_loss": 1.768972635269165, |
|
"eval_rouge1": 0.6582574071278393, |
|
"eval_rouge2": 0.3557625250443591, |
|
"eval_rougeL": 0.5322500342922363, |
|
"eval_rougeLsum": 0.646623827844921, |
|
"eval_runtime": 15.4088, |
|
"eval_samples_per_second": 0.649, |
|
"eval_steps_per_second": 0.324, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.41794517636299133, |
|
"learning_rate": 7.811547554618738e-05, |
|
"loss": 1.4231, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_bertscore": 0.7349900603294373, |
|
"eval_loss": 1.7673609256744385, |
|
"eval_rouge1": 0.6599777278993147, |
|
"eval_rouge2": 0.35744569380532043, |
|
"eval_rougeL": 0.5359850821835463, |
|
"eval_rougeLsum": 0.6469206354455653, |
|
"eval_runtime": 14.8837, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.3874039351940155, |
|
"learning_rate": 7.697610163215314e-05, |
|
"loss": 1.4158, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_bertscore": 0.7362676858901978, |
|
"eval_loss": 1.764347791671753, |
|
"eval_rouge1": 0.6576168971663054, |
|
"eval_rouge2": 0.36010190798950537, |
|
"eval_rougeL": 0.5365592740576962, |
|
"eval_rougeLsum": 0.6455601225938818, |
|
"eval_runtime": 15.4178, |
|
"eval_samples_per_second": 0.649, |
|
"eval_steps_per_second": 0.324, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.4013253450393677, |
|
"learning_rate": 7.583672771811889e-05, |
|
"loss": 1.418, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_bertscore": 0.7303592562675476, |
|
"eval_loss": 1.765144944190979, |
|
"eval_rouge1": 0.6610480012685163, |
|
"eval_rouge2": 0.36479831105715255, |
|
"eval_rougeL": 0.5375415216439376, |
|
"eval_rougeLsum": 0.6504955320897916, |
|
"eval_runtime": 15.0677, |
|
"eval_samples_per_second": 0.664, |
|
"eval_steps_per_second": 0.332, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.4189004898071289, |
|
"learning_rate": 7.469735380408466e-05, |
|
"loss": 1.4199, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_bertscore": 0.7319179773330688, |
|
"eval_loss": 1.7685811519622803, |
|
"eval_rouge1": 0.6589314825298751, |
|
"eval_rouge2": 0.36092809773515727, |
|
"eval_rougeL": 0.5359034256928837, |
|
"eval_rougeLsum": 0.647695652924998, |
|
"eval_runtime": 15.0325, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.333, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.39296436309814453, |
|
"learning_rate": 7.355797989005042e-05, |
|
"loss": 1.4353, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_bertscore": 0.734102725982666, |
|
"eval_loss": 1.7720457315444946, |
|
"eval_rouge1": 0.652136441919871, |
|
"eval_rouge2": 0.35394856883334874, |
|
"eval_rougeL": 0.5257845140699575, |
|
"eval_rougeLsum": 0.6411232244792167, |
|
"eval_runtime": 14.9943, |
|
"eval_samples_per_second": 0.667, |
|
"eval_steps_per_second": 0.333, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.3997296392917633, |
|
"learning_rate": 7.241860597601619e-05, |
|
"loss": 1.4224, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_bertscore": 0.7339878082275391, |
|
"eval_loss": 1.7666336297988892, |
|
"eval_rouge1": 0.6537340121878514, |
|
"eval_rouge2": 0.3570961026063757, |
|
"eval_rougeL": 0.529937130767685, |
|
"eval_rougeLsum": 0.6435060914147177, |
|
"eval_runtime": 15.0439, |
|
"eval_samples_per_second": 0.665, |
|
"eval_steps_per_second": 0.332, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.45447298884391785, |
|
"learning_rate": 7.127923206198196e-05, |
|
"loss": 1.4195, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_bertscore": 0.7328049540519714, |
|
"eval_loss": 1.767970085144043, |
|
"eval_rouge1": 0.6525615870662755, |
|
"eval_rouge2": 0.3548658659692201, |
|
"eval_rougeL": 0.5278612681579985, |
|
"eval_rougeLsum": 0.6426018669254849, |
|
"eval_runtime": 14.8509, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.337, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.37068402767181396, |
|
"learning_rate": 7.01398581479477e-05, |
|
"loss": 1.4174, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_bertscore": 0.7338019609451294, |
|
"eval_loss": 1.763349175453186, |
|
"eval_rouge1": 0.6536530437395975, |
|
"eval_rouge2": 0.3564778360043106, |
|
"eval_rougeL": 0.5285382022264695, |
|
"eval_rougeLsum": 0.6425723229746058, |
|
"eval_runtime": 14.8293, |
|
"eval_samples_per_second": 0.674, |
|
"eval_steps_per_second": 0.337, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.35103845596313477, |
|
"learning_rate": 6.900048423391346e-05, |
|
"loss": 1.4176, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_bertscore": 0.7325159311294556, |
|
"eval_loss": 1.7628978490829468, |
|
"eval_rouge1": 0.6623030759220351, |
|
"eval_rouge2": 0.3604109991839185, |
|
"eval_rougeL": 0.5322677462077166, |
|
"eval_rougeLsum": 0.6516312160764892, |
|
"eval_runtime": 14.923, |
|
"eval_samples_per_second": 0.67, |
|
"eval_steps_per_second": 0.335, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.39424487948417664, |
|
"learning_rate": 6.786111031987923e-05, |
|
"loss": 1.4141, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_bertscore": 0.7374362945556641, |
|
"eval_loss": 1.7650716304779053, |
|
"eval_rouge1": 0.6574628250156043, |
|
"eval_rouge2": 0.36049809448726045, |
|
"eval_rougeL": 0.5312753147070929, |
|
"eval_rougeLsum": 0.6452805224085838, |
|
"eval_runtime": 14.8888, |
|
"eval_samples_per_second": 0.672, |
|
"eval_steps_per_second": 0.336, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.43388617038726807, |
|
"learning_rate": 6.6721736405845e-05, |
|
"loss": 1.4162, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_bertscore": 0.7336270809173584, |
|
"eval_loss": 1.7658464908599854, |
|
"eval_rouge1": 0.6573821610105346, |
|
"eval_rouge2": 0.3554905858433707, |
|
"eval_rougeL": 0.5301171280694805, |
|
"eval_rougeLsum": 0.6452831484327366, |
|
"eval_runtime": 14.9065, |
|
"eval_samples_per_second": 0.671, |
|
"eval_steps_per_second": 0.335, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.41646161675453186, |
|
"learning_rate": 6.558236249181076e-05, |
|
"loss": 1.4199, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_bertscore": 0.7336153388023376, |
|
"eval_loss": 1.767188310623169, |
|
"eval_rouge1": 0.6571676655188523, |
|
"eval_rouge2": 0.3572385458490631, |
|
"eval_rougeL": 0.5296070447249894, |
|
"eval_rougeLsum": 0.6457403843177614, |
|
"eval_runtime": 14.7731, |
|
"eval_samples_per_second": 0.677, |
|
"eval_steps_per_second": 0.338, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.38818874955177307, |
|
"learning_rate": 6.444298857777651e-05, |
|
"loss": 1.416, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_bertscore": 0.7327283620834351, |
|
"eval_loss": 1.7616260051727295, |
|
"eval_rouge1": 0.656883853658625, |
|
"eval_rouge2": 0.3566053754349987, |
|
"eval_rougeL": 0.5314518531110131, |
|
"eval_rougeLsum": 0.6446071370588691, |
|
"eval_runtime": 15.1059, |
|
"eval_samples_per_second": 0.662, |
|
"eval_steps_per_second": 0.331, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.39458197355270386, |
|
"learning_rate": 6.330361466374227e-05, |
|
"loss": 1.4243, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_bertscore": 0.7317964434623718, |
|
"eval_loss": 1.7638084888458252, |
|
"eval_rouge1": 0.6571606334248314, |
|
"eval_rouge2": 0.3569373186268652, |
|
"eval_rougeL": 0.5293554693809414, |
|
"eval_rougeLsum": 0.645057709779701, |
|
"eval_runtime": 14.8582, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.337, |
|
"step": 24000 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 35112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 800, |
|
"total_flos": 2.430740494923436e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|