gsarti's picture
Initial commit
8b88827
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"global_step": 89859,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 0.0002983307181250626,
"loss": 3.5314,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 0.00029666143625012515,
"loss": 3.2698,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 0.00029499215437518776,
"loss": 3.1741,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 0.0002933228725002504,
"loss": 3.1591,
"step": 2000
},
{
"epoch": 0.19,
"learning_rate": 0.000291653590625313,
"loss": 3.0795,
"step": 2500
},
{
"epoch": 0.23,
"learning_rate": 0.00028998430875037555,
"loss": 3.0526,
"step": 3000
},
{
"epoch": 0.27,
"learning_rate": 0.00028831502687543816,
"loss": 3.0228,
"step": 3500
},
{
"epoch": 0.31,
"learning_rate": 0.0002866457450005008,
"loss": 2.9966,
"step": 4000
},
{
"epoch": 0.35,
"learning_rate": 0.0002849764631255634,
"loss": 2.9799,
"step": 4500
},
{
"epoch": 0.39,
"learning_rate": 0.00028330718125062595,
"loss": 2.9632,
"step": 5000
},
{
"epoch": 0.39,
"eval_gen_len": 18.7655,
"eval_loss": 2.559084892272949,
"eval_rouge1": 25.8521,
"eval_rouge2": 11.6024,
"eval_rougeL": 21.0887,
"eval_rougeLsum": 22.857,
"eval_runtime": 625.5093,
"eval_samples_per_second": 20.521,
"eval_steps_per_second": 2.566,
"step": 5000
},
{
"epoch": 0.43,
"learning_rate": 0.00028163789937568856,
"loss": 2.9402,
"step": 5500
},
{
"epoch": 0.47,
"learning_rate": 0.0002799686175007512,
"loss": 2.9157,
"step": 6000
},
{
"epoch": 0.51,
"learning_rate": 0.00027829933562581373,
"loss": 2.9143,
"step": 6500
},
{
"epoch": 0.55,
"learning_rate": 0.00027663005375087635,
"loss": 2.8869,
"step": 7000
},
{
"epoch": 0.58,
"learning_rate": 0.0002749607718759389,
"loss": 2.9195,
"step": 7500
},
{
"epoch": 0.62,
"learning_rate": 0.0002732914900010015,
"loss": 2.8558,
"step": 8000
},
{
"epoch": 0.66,
"learning_rate": 0.00027162220812606414,
"loss": 2.8594,
"step": 8500
},
{
"epoch": 0.7,
"learning_rate": 0.00026995292625112675,
"loss": 2.8485,
"step": 9000
},
{
"epoch": 0.74,
"learning_rate": 0.0002682836443761893,
"loss": 2.8491,
"step": 9500
},
{
"epoch": 0.78,
"learning_rate": 0.0002666143625012519,
"loss": 2.8327,
"step": 10000
},
{
"epoch": 0.78,
"eval_gen_len": 18.8331,
"eval_loss": 2.4890213012695312,
"eval_rouge1": 26.508,
"eval_rouge2": 12.2564,
"eval_rougeL": 21.7467,
"eval_rougeLsum": 23.5474,
"eval_runtime": 624.7506,
"eval_samples_per_second": 20.546,
"eval_steps_per_second": 2.569,
"step": 10000
},
{
"epoch": 0.82,
"learning_rate": 0.00026494508062631454,
"loss": 2.8157,
"step": 10500
},
{
"epoch": 0.86,
"learning_rate": 0.00026327579875137715,
"loss": 2.8001,
"step": 11000
},
{
"epoch": 0.9,
"learning_rate": 0.0002616065168764397,
"loss": 2.84,
"step": 11500
},
{
"epoch": 0.93,
"learning_rate": 0.0002599372350015023,
"loss": 2.8051,
"step": 12000
},
{
"epoch": 0.97,
"learning_rate": 0.00025826795312656494,
"loss": 2.8138,
"step": 12500
},
{
"epoch": 1.01,
"learning_rate": 0.00025659867125162755,
"loss": 2.7677,
"step": 13000
},
{
"epoch": 1.05,
"learning_rate": 0.0002549293893766901,
"loss": 2.6905,
"step": 13500
},
{
"epoch": 1.09,
"learning_rate": 0.0002532601075017527,
"loss": 2.709,
"step": 14000
},
{
"epoch": 1.13,
"learning_rate": 0.00025159082562681534,
"loss": 2.7136,
"step": 14500
},
{
"epoch": 1.17,
"learning_rate": 0.00024992154375187795,
"loss": 2.6873,
"step": 15000
},
{
"epoch": 1.17,
"eval_gen_len": 18.8424,
"eval_loss": 2.4520416259765625,
"eval_rouge1": 26.8614,
"eval_rouge2": 12.7032,
"eval_rougeL": 22.0965,
"eval_rougeLsum": 23.832,
"eval_runtime": 623.6597,
"eval_samples_per_second": 20.582,
"eval_steps_per_second": 2.574,
"step": 15000
},
{
"epoch": 1.21,
"learning_rate": 0.0002482522618769405,
"loss": 2.6962,
"step": 15500
},
{
"epoch": 1.25,
"learning_rate": 0.0002465829800020031,
"loss": 2.6779,
"step": 16000
},
{
"epoch": 1.29,
"learning_rate": 0.00024491369812706574,
"loss": 2.6964,
"step": 16500
},
{
"epoch": 1.32,
"learning_rate": 0.00024324441625212832,
"loss": 2.7048,
"step": 17000
},
{
"epoch": 1.36,
"learning_rate": 0.00024157513437719094,
"loss": 2.6687,
"step": 17500
},
{
"epoch": 1.4,
"learning_rate": 0.00023990585250225352,
"loss": 2.6869,
"step": 18000
},
{
"epoch": 1.44,
"learning_rate": 0.00023823657062731608,
"loss": 2.678,
"step": 18500
},
{
"epoch": 1.48,
"learning_rate": 0.0002365672887523787,
"loss": 2.6544,
"step": 19000
},
{
"epoch": 1.52,
"learning_rate": 0.00023489800687744128,
"loss": 2.6591,
"step": 19500
},
{
"epoch": 1.56,
"learning_rate": 0.0002332287250025039,
"loss": 2.6572,
"step": 20000
},
{
"epoch": 1.56,
"eval_gen_len": 18.727,
"eval_loss": 2.4031243324279785,
"eval_rouge1": 27.0114,
"eval_rouge2": 12.8148,
"eval_rougeL": 22.2407,
"eval_rougeLsum": 24.0602,
"eval_runtime": 626.2755,
"eval_samples_per_second": 20.496,
"eval_steps_per_second": 2.563,
"step": 20000
},
{
"epoch": 1.6,
"learning_rate": 0.00023155944312756648,
"loss": 2.628,
"step": 20500
},
{
"epoch": 1.64,
"learning_rate": 0.0002298901612526291,
"loss": 2.6544,
"step": 21000
},
{
"epoch": 1.67,
"learning_rate": 0.00022822087937769168,
"loss": 2.6792,
"step": 21500
},
{
"epoch": 1.71,
"learning_rate": 0.0002265515975027543,
"loss": 2.6493,
"step": 22000
},
{
"epoch": 1.75,
"learning_rate": 0.00022488231562781688,
"loss": 2.6385,
"step": 22500
},
{
"epoch": 1.79,
"learning_rate": 0.0002232130337528795,
"loss": 2.6368,
"step": 23000
},
{
"epoch": 1.83,
"learning_rate": 0.00022154375187794208,
"loss": 2.6319,
"step": 23500
},
{
"epoch": 1.87,
"learning_rate": 0.0002198744700030047,
"loss": 2.6764,
"step": 24000
},
{
"epoch": 1.91,
"learning_rate": 0.00021820518812806729,
"loss": 2.6729,
"step": 24500
},
{
"epoch": 1.95,
"learning_rate": 0.0002165359062531299,
"loss": 2.6461,
"step": 25000
},
{
"epoch": 1.95,
"eval_gen_len": 18.7416,
"eval_loss": 2.391615629196167,
"eval_rouge1": 27.2287,
"eval_rouge2": 12.9935,
"eval_rougeL": 22.4718,
"eval_rougeLsum": 24.2517,
"eval_runtime": 622.8089,
"eval_samples_per_second": 20.61,
"eval_steps_per_second": 2.577,
"step": 25000
},
{
"epoch": 1.99,
"learning_rate": 0.00021486662437819249,
"loss": 2.6244,
"step": 25500
},
{
"epoch": 2.03,
"learning_rate": 0.0002131973425032551,
"loss": 2.5805,
"step": 26000
},
{
"epoch": 2.06,
"learning_rate": 0.00021152806062831769,
"loss": 2.5407,
"step": 26500
},
{
"epoch": 2.1,
"learning_rate": 0.00020985877875338027,
"loss": 2.5537,
"step": 27000
},
{
"epoch": 2.14,
"learning_rate": 0.00020818949687844289,
"loss": 2.5253,
"step": 27500
},
{
"epoch": 2.18,
"learning_rate": 0.00020652021500350547,
"loss": 2.5401,
"step": 28000
},
{
"epoch": 2.22,
"learning_rate": 0.00020485093312856809,
"loss": 2.5245,
"step": 28500
},
{
"epoch": 2.26,
"learning_rate": 0.00020318165125363067,
"loss": 2.547,
"step": 29000
},
{
"epoch": 2.3,
"learning_rate": 0.00020151236937869329,
"loss": 2.5377,
"step": 29500
},
{
"epoch": 2.34,
"learning_rate": 0.00019984308750375587,
"loss": 2.5374,
"step": 30000
},
{
"epoch": 2.34,
"eval_gen_len": 18.8003,
"eval_loss": 2.3686139583587646,
"eval_rouge1": 27.5061,
"eval_rouge2": 13.241,
"eval_rougeL": 22.6877,
"eval_rougeLsum": 24.4465,
"eval_runtime": 629.1948,
"eval_samples_per_second": 20.401,
"eval_steps_per_second": 2.551,
"step": 30000
},
{
"epoch": 2.38,
"learning_rate": 0.00019817380562881846,
"loss": 2.5457,
"step": 30500
},
{
"epoch": 2.41,
"learning_rate": 0.00019650452375388105,
"loss": 2.5105,
"step": 31000
},
{
"epoch": 2.45,
"learning_rate": 0.00019483524187894366,
"loss": 2.5367,
"step": 31500
},
{
"epoch": 2.49,
"learning_rate": 0.00019316596000400625,
"loss": 2.5471,
"step": 32000
},
{
"epoch": 2.53,
"learning_rate": 0.00019149667812906886,
"loss": 2.5488,
"step": 32500
},
{
"epoch": 2.57,
"learning_rate": 0.00018982739625413145,
"loss": 2.5402,
"step": 33000
},
{
"epoch": 2.61,
"learning_rate": 0.00018815811437919406,
"loss": 2.5437,
"step": 33500
},
{
"epoch": 2.65,
"learning_rate": 0.00018648883250425665,
"loss": 2.5244,
"step": 34000
},
{
"epoch": 2.69,
"learning_rate": 0.00018481955062931923,
"loss": 2.5389,
"step": 34500
},
{
"epoch": 2.73,
"learning_rate": 0.00018315026875438185,
"loss": 2.5081,
"step": 35000
},
{
"epoch": 2.73,
"eval_gen_len": 18.7821,
"eval_loss": 2.3596315383911133,
"eval_rouge1": 27.4715,
"eval_rouge2": 13.2862,
"eval_rougeL": 22.7022,
"eval_rougeLsum": 24.4252,
"eval_runtime": 624.9994,
"eval_samples_per_second": 20.538,
"eval_steps_per_second": 2.568,
"step": 35000
},
{
"epoch": 2.77,
"learning_rate": 0.00018148098687944443,
"loss": 2.5425,
"step": 35500
},
{
"epoch": 2.8,
"learning_rate": 0.00017981170500450705,
"loss": 2.5245,
"step": 36000
},
{
"epoch": 2.84,
"learning_rate": 0.00017814242312956963,
"loss": 2.506,
"step": 36500
},
{
"epoch": 2.88,
"learning_rate": 0.00017647314125463225,
"loss": 2.542,
"step": 37000
},
{
"epoch": 2.92,
"learning_rate": 0.00017480385937969483,
"loss": 2.5252,
"step": 37500
},
{
"epoch": 2.96,
"learning_rate": 0.00017313457750475745,
"loss": 2.5175,
"step": 38000
},
{
"epoch": 3.0,
"learning_rate": 0.00017146529562982003,
"loss": 2.5091,
"step": 38500
},
{
"epoch": 3.04,
"learning_rate": 0.00016979601375488265,
"loss": 2.4468,
"step": 39000
},
{
"epoch": 3.08,
"learning_rate": 0.00016812673187994523,
"loss": 2.447,
"step": 39500
},
{
"epoch": 3.12,
"learning_rate": 0.00016645745000500785,
"loss": 2.4152,
"step": 40000
},
{
"epoch": 3.12,
"eval_gen_len": 18.8203,
"eval_loss": 2.326728105545044,
"eval_rouge1": 27.9595,
"eval_rouge2": 13.5813,
"eval_rougeL": 23.0493,
"eval_rougeLsum": 24.9203,
"eval_runtime": 625.9414,
"eval_samples_per_second": 20.507,
"eval_steps_per_second": 2.564,
"step": 40000
},
{
"epoch": 3.15,
"learning_rate": 0.00016478816813007043,
"loss": 2.4379,
"step": 40500
},
{
"epoch": 3.19,
"learning_rate": 0.00016311888625513305,
"loss": 2.4489,
"step": 41000
},
{
"epoch": 3.23,
"learning_rate": 0.00016144960438019564,
"loss": 2.4212,
"step": 41500
},
{
"epoch": 3.27,
"learning_rate": 0.00015978032250525825,
"loss": 2.4203,
"step": 42000
},
{
"epoch": 3.31,
"learning_rate": 0.0001581110406303208,
"loss": 2.433,
"step": 42500
},
{
"epoch": 3.35,
"learning_rate": 0.0001564417587553834,
"loss": 2.432,
"step": 43000
},
{
"epoch": 3.39,
"learning_rate": 0.000154772476880446,
"loss": 2.4299,
"step": 43500
},
{
"epoch": 3.43,
"learning_rate": 0.0001531031950055086,
"loss": 2.4201,
"step": 44000
},
{
"epoch": 3.47,
"learning_rate": 0.0001514339131305712,
"loss": 2.4314,
"step": 44500
},
{
"epoch": 3.51,
"learning_rate": 0.00014976463125563382,
"loss": 2.4387,
"step": 45000
},
{
"epoch": 3.51,
"eval_gen_len": 18.8376,
"eval_loss": 2.317692518234253,
"eval_rouge1": 28.1616,
"eval_rouge2": 13.668,
"eval_rougeL": 23.1738,
"eval_rougeLsum": 25.0342,
"eval_runtime": 622.4609,
"eval_samples_per_second": 20.621,
"eval_steps_per_second": 2.578,
"step": 45000
},
{
"epoch": 3.54,
"learning_rate": 0.0001480953493806964,
"loss": 2.4389,
"step": 45500
},
{
"epoch": 3.58,
"learning_rate": 0.000146426067505759,
"loss": 2.4363,
"step": 46000
},
{
"epoch": 3.62,
"learning_rate": 0.0001447567856308216,
"loss": 2.4395,
"step": 46500
},
{
"epoch": 3.66,
"learning_rate": 0.0001430875037558842,
"loss": 2.4418,
"step": 47000
},
{
"epoch": 3.7,
"learning_rate": 0.0001414182218809468,
"loss": 2.4201,
"step": 47500
},
{
"epoch": 3.74,
"learning_rate": 0.0001397489400060094,
"loss": 2.4169,
"step": 48000
},
{
"epoch": 3.78,
"learning_rate": 0.000138079658131072,
"loss": 2.4009,
"step": 48500
},
{
"epoch": 3.82,
"learning_rate": 0.0001364103762561346,
"loss": 2.4279,
"step": 49000
},
{
"epoch": 3.86,
"learning_rate": 0.0001347410943811972,
"loss": 2.4307,
"step": 49500
},
{
"epoch": 3.89,
"learning_rate": 0.0001330718125062598,
"loss": 2.4387,
"step": 50000
},
{
"epoch": 3.89,
"eval_gen_len": 18.7872,
"eval_loss": 2.3016672134399414,
"eval_rouge1": 28.1111,
"eval_rouge2": 13.569,
"eval_rougeL": 23.1312,
"eval_rougeLsum": 24.9912,
"eval_runtime": 628.1204,
"eval_samples_per_second": 20.436,
"eval_steps_per_second": 2.555,
"step": 50000
},
{
"epoch": 3.93,
"learning_rate": 0.0001314025306313224,
"loss": 2.4143,
"step": 50500
},
{
"epoch": 3.97,
"learning_rate": 0.000129733248756385,
"loss": 2.4197,
"step": 51000
},
{
"epoch": 4.01,
"learning_rate": 0.00012806396688144758,
"loss": 2.4084,
"step": 51500
},
{
"epoch": 4.05,
"learning_rate": 0.00012639468500651017,
"loss": 2.3544,
"step": 52000
},
{
"epoch": 4.09,
"learning_rate": 0.00012472540313157278,
"loss": 2.332,
"step": 52500
},
{
"epoch": 4.13,
"learning_rate": 0.00012305612125663537,
"loss": 2.3597,
"step": 53000
},
{
"epoch": 4.17,
"learning_rate": 0.00012138683938169798,
"loss": 2.3492,
"step": 53500
},
{
"epoch": 4.21,
"learning_rate": 0.00011971755750676058,
"loss": 2.3679,
"step": 54000
},
{
"epoch": 4.25,
"learning_rate": 0.00011804827563182318,
"loss": 2.3633,
"step": 54500
},
{
"epoch": 4.28,
"learning_rate": 0.00011637899375688578,
"loss": 2.3467,
"step": 55000
},
{
"epoch": 4.28,
"eval_gen_len": 18.8334,
"eval_loss": 2.3123602867126465,
"eval_rouge1": 28.0679,
"eval_rouge2": 13.7123,
"eval_rougeL": 23.1516,
"eval_rougeLsum": 25.0002,
"eval_runtime": 622.7792,
"eval_samples_per_second": 20.611,
"eval_steps_per_second": 2.577,
"step": 55000
},
{
"epoch": 4.32,
"learning_rate": 0.00011470971188194838,
"loss": 2.3579,
"step": 55500
},
{
"epoch": 4.36,
"learning_rate": 0.00011304043000701098,
"loss": 2.3501,
"step": 56000
},
{
"epoch": 4.4,
"learning_rate": 0.00011137114813207358,
"loss": 2.3592,
"step": 56500
},
{
"epoch": 4.44,
"learning_rate": 0.00010970186625713618,
"loss": 2.344,
"step": 57000
},
{
"epoch": 4.48,
"learning_rate": 0.00010803258438219876,
"loss": 2.3578,
"step": 57500
},
{
"epoch": 4.52,
"learning_rate": 0.00010636330250726136,
"loss": 2.3407,
"step": 58000
},
{
"epoch": 4.56,
"learning_rate": 0.00010469402063232396,
"loss": 2.3452,
"step": 58500
},
{
"epoch": 4.6,
"learning_rate": 0.00010302473875738656,
"loss": 2.3465,
"step": 59000
},
{
"epoch": 4.64,
"learning_rate": 0.00010135545688244916,
"loss": 2.3687,
"step": 59500
},
{
"epoch": 4.67,
"learning_rate": 9.968617500751176e-05,
"loss": 2.3367,
"step": 60000
},
{
"epoch": 4.67,
"eval_gen_len": 18.835,
"eval_loss": 2.2966153621673584,
"eval_rouge1": 28.293,
"eval_rouge2": 13.9084,
"eval_rougeL": 23.3359,
"eval_rougeLsum": 25.1789,
"eval_runtime": 625.9275,
"eval_samples_per_second": 20.507,
"eval_steps_per_second": 2.564,
"step": 60000
},
{
"epoch": 4.71,
"learning_rate": 9.801689313257436e-05,
"loss": 2.3306,
"step": 60500
},
{
"epoch": 4.75,
"learning_rate": 9.634761125763696e-05,
"loss": 2.3497,
"step": 61000
},
{
"epoch": 4.79,
"learning_rate": 9.467832938269956e-05,
"loss": 2.3313,
"step": 61500
},
{
"epoch": 4.83,
"learning_rate": 9.300904750776216e-05,
"loss": 2.3427,
"step": 62000
},
{
"epoch": 4.87,
"learning_rate": 9.133976563282476e-05,
"loss": 2.3259,
"step": 62500
},
{
"epoch": 4.91,
"learning_rate": 8.967048375788736e-05,
"loss": 2.3544,
"step": 63000
},
{
"epoch": 4.95,
"learning_rate": 8.800120188294995e-05,
"loss": 2.3307,
"step": 63500
},
{
"epoch": 4.99,
"learning_rate": 8.633192000801255e-05,
"loss": 2.3477,
"step": 64000
},
{
"epoch": 5.02,
"learning_rate": 8.466263813307513e-05,
"loss": 2.3064,
"step": 64500
},
{
"epoch": 5.06,
"learning_rate": 8.299335625813773e-05,
"loss": 2.2882,
"step": 65000
},
{
"epoch": 5.06,
"eval_gen_len": 18.7974,
"eval_loss": 2.2921857833862305,
"eval_rouge1": 28.3828,
"eval_rouge2": 14.0129,
"eval_rougeL": 23.443,
"eval_rougeLsum": 25.3001,
"eval_runtime": 621.9943,
"eval_samples_per_second": 20.637,
"eval_steps_per_second": 2.58,
"step": 65000
},
{
"epoch": 5.1,
"learning_rate": 8.132407438320033e-05,
"loss": 2.2733,
"step": 65500
},
{
"epoch": 5.14,
"learning_rate": 7.965479250826293e-05,
"loss": 2.2869,
"step": 66000
},
{
"epoch": 5.18,
"learning_rate": 7.798551063332553e-05,
"loss": 2.2904,
"step": 66500
},
{
"epoch": 5.22,
"learning_rate": 7.631622875838813e-05,
"loss": 2.2516,
"step": 67000
},
{
"epoch": 5.26,
"learning_rate": 7.464694688345073e-05,
"loss": 2.2835,
"step": 67500
},
{
"epoch": 5.3,
"learning_rate": 7.297766500851333e-05,
"loss": 2.2802,
"step": 68000
},
{
"epoch": 5.34,
"learning_rate": 7.130838313357593e-05,
"loss": 2.2779,
"step": 68500
},
{
"epoch": 5.38,
"learning_rate": 6.963910125863853e-05,
"loss": 2.274,
"step": 69000
},
{
"epoch": 5.41,
"learning_rate": 6.796981938370112e-05,
"loss": 2.2908,
"step": 69500
},
{
"epoch": 5.45,
"learning_rate": 6.630053750876372e-05,
"loss": 2.2782,
"step": 70000
},
{
"epoch": 5.45,
"eval_gen_len": 18.8495,
"eval_loss": 2.2865357398986816,
"eval_rouge1": 28.3987,
"eval_rouge2": 13.9705,
"eval_rougeL": 23.4227,
"eval_rougeLsum": 25.3263,
"eval_runtime": 625.1399,
"eval_samples_per_second": 20.533,
"eval_steps_per_second": 2.567,
"step": 70000
},
{
"epoch": 5.49,
"learning_rate": 6.463125563382632e-05,
"loss": 2.2882,
"step": 70500
},
{
"epoch": 5.53,
"learning_rate": 6.296197375888892e-05,
"loss": 2.2802,
"step": 71000
},
{
"epoch": 5.57,
"learning_rate": 6.129269188395152e-05,
"loss": 2.2774,
"step": 71500
},
{
"epoch": 5.61,
"learning_rate": 5.9623410009014114e-05,
"loss": 2.31,
"step": 72000
},
{
"epoch": 5.65,
"learning_rate": 5.7954128134076714e-05,
"loss": 2.2829,
"step": 72500
},
{
"epoch": 5.69,
"learning_rate": 5.6284846259139314e-05,
"loss": 2.2971,
"step": 73000
},
{
"epoch": 5.73,
"learning_rate": 5.4615564384201915e-05,
"loss": 2.2824,
"step": 73500
},
{
"epoch": 5.76,
"learning_rate": 5.294628250926451e-05,
"loss": 2.2754,
"step": 74000
},
{
"epoch": 5.8,
"learning_rate": 5.127700063432711e-05,
"loss": 2.2893,
"step": 74500
},
{
"epoch": 5.84,
"learning_rate": 4.960771875938971e-05,
"loss": 2.2788,
"step": 75000
},
{
"epoch": 5.84,
"eval_gen_len": 18.7967,
"eval_loss": 2.278130531311035,
"eval_rouge1": 28.4256,
"eval_rouge2": 14.0668,
"eval_rougeL": 23.4947,
"eval_rougeLsum": 25.3403,
"eval_runtime": 622.5919,
"eval_samples_per_second": 20.617,
"eval_steps_per_second": 2.578,
"step": 75000
},
{
"epoch": 5.88,
"learning_rate": 4.793843688445231e-05,
"loss": 2.2898,
"step": 75500
},
{
"epoch": 5.92,
"learning_rate": 4.62691550095149e-05,
"loss": 2.281,
"step": 76000
},
{
"epoch": 5.96,
"learning_rate": 4.45998731345775e-05,
"loss": 2.28,
"step": 76500
},
{
"epoch": 6.0,
"learning_rate": 4.2930591259640095e-05,
"loss": 2.269,
"step": 77000
},
{
"epoch": 6.04,
"learning_rate": 4.1261309384702695e-05,
"loss": 2.2428,
"step": 77500
},
{
"epoch": 6.08,
"learning_rate": 3.9592027509765295e-05,
"loss": 2.2627,
"step": 78000
},
{
"epoch": 6.12,
"learning_rate": 3.7922745634827896e-05,
"loss": 2.2219,
"step": 78500
},
{
"epoch": 6.15,
"learning_rate": 3.625346375989049e-05,
"loss": 2.218,
"step": 79000
},
{
"epoch": 6.19,
"learning_rate": 3.458418188495309e-05,
"loss": 2.2205,
"step": 79500
},
{
"epoch": 6.23,
"learning_rate": 3.291490001001569e-05,
"loss": 2.239,
"step": 80000
},
{
"epoch": 6.23,
"eval_gen_len": 18.8384,
"eval_loss": 2.2816860675811768,
"eval_rouge1": 28.4681,
"eval_rouge2": 14.0835,
"eval_rougeL": 23.5141,
"eval_rougeLsum": 25.4023,
"eval_runtime": 624.366,
"eval_samples_per_second": 20.558,
"eval_steps_per_second": 2.571,
"step": 80000
},
{
"epoch": 6.27,
"learning_rate": 3.124561813507828e-05,
"loss": 2.2095,
"step": 80500
},
{
"epoch": 6.31,
"learning_rate": 2.9576336260140883e-05,
"loss": 2.2441,
"step": 81000
},
{
"epoch": 6.35,
"learning_rate": 2.7907054385203483e-05,
"loss": 2.2292,
"step": 81500
},
{
"epoch": 6.39,
"learning_rate": 2.623777251026608e-05,
"loss": 2.2482,
"step": 82000
},
{
"epoch": 6.43,
"learning_rate": 2.456849063532868e-05,
"loss": 2.2243,
"step": 82500
},
{
"epoch": 6.47,
"learning_rate": 2.289920876039128e-05,
"loss": 2.2177,
"step": 83000
},
{
"epoch": 6.5,
"learning_rate": 2.1229926885453873e-05,
"loss": 2.2317,
"step": 83500
},
{
"epoch": 6.54,
"learning_rate": 1.9560645010516473e-05,
"loss": 2.2311,
"step": 84000
},
{
"epoch": 6.58,
"learning_rate": 1.7891363135579073e-05,
"loss": 2.2446,
"step": 84500
},
{
"epoch": 6.62,
"learning_rate": 1.622208126064167e-05,
"loss": 2.2324,
"step": 85000
},
{
"epoch": 6.62,
"eval_gen_len": 18.8369,
"eval_loss": 2.27907395362854,
"eval_rouge1": 28.5356,
"eval_rouge2": 14.1871,
"eval_rougeL": 23.5477,
"eval_rougeLsum": 25.4279,
"eval_runtime": 621.3714,
"eval_samples_per_second": 20.658,
"eval_steps_per_second": 2.583,
"step": 85000
},
{
"epoch": 6.66,
"learning_rate": 1.4552799385704269e-05,
"loss": 2.241,
"step": 85500
},
{
"epoch": 6.7,
"learning_rate": 1.2883517510766869e-05,
"loss": 2.2262,
"step": 86000
},
{
"epoch": 6.74,
"learning_rate": 1.1214235635829465e-05,
"loss": 2.2456,
"step": 86500
},
{
"epoch": 6.78,
"learning_rate": 9.544953760892062e-06,
"loss": 2.2237,
"step": 87000
},
{
"epoch": 6.82,
"learning_rate": 7.875671885954662e-06,
"loss": 2.2294,
"step": 87500
},
{
"epoch": 6.86,
"learning_rate": 6.20639001101726e-06,
"loss": 2.2184,
"step": 88000
},
{
"epoch": 6.89,
"learning_rate": 4.537108136079858e-06,
"loss": 2.2167,
"step": 88500
},
{
"epoch": 6.93,
"learning_rate": 2.867826261142456e-06,
"loss": 2.2478,
"step": 89000
},
{
"epoch": 6.97,
"learning_rate": 1.1985443862050543e-06,
"loss": 2.2596,
"step": 89500
},
{
"epoch": 7.0,
"step": 89859,
"total_flos": 2.6510315204815258e+17,
"train_loss": 2.493601185993256,
"train_runtime": 53594.8094,
"train_samples_per_second": 13.413,
"train_steps_per_second": 1.677
}
],
"max_steps": 89859,
"num_train_epochs": 7,
"total_flos": 2.6510315204815258e+17,
"trial_name": null,
"trial_params": null
}