adapters-mistral-gptq-QLORA-super_glue-multirc / trainer_state-mistral-gptq-QLORA-super_glue-multirc-sequence_classification.json
RMHalak's picture
Task: SequenceClassification
14c1f2e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.984,
"eval_steps": 1,
"global_step": 124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 81.46387481689453,
"learning_rate": 2.5e-05,
"loss": 1.2913,
"step": 1
},
{
"epoch": 0.016,
"eval_exact_match": 0.12244897959183673,
"eval_f1_a": 0.5525291828793775,
"eval_f1_m": 0.4947961335716436,
"eval_loss": 1.4936190843582153,
"eval_runtime": 14.237,
"eval_samples_per_second": 17.56,
"eval_steps_per_second": 2.248,
"step": 1
},
{
"epoch": 0.032,
"grad_norm": 188.8806610107422,
"learning_rate": 5e-05,
"loss": 1.3198,
"step": 2
},
{
"epoch": 0.032,
"eval_exact_match": 0.10204081632653061,
"eval_f1_a": 0.46632124352331605,
"eval_f1_m": 0.4837873860833043,
"eval_loss": 1.2441954612731934,
"eval_runtime": 14.2318,
"eval_samples_per_second": 17.566,
"eval_steps_per_second": 2.248,
"step": 2
},
{
"epoch": 0.048,
"grad_norm": 35.330894470214844,
"learning_rate": 4.959016393442623e-05,
"loss": 1.189,
"step": 3
},
{
"epoch": 0.048,
"eval_exact_match": 0.061224489795918366,
"eval_f1_a": 0.3120567375886525,
"eval_f1_m": 0.4296016841635089,
"eval_loss": 1.4960094690322876,
"eval_runtime": 14.2468,
"eval_samples_per_second": 17.548,
"eval_steps_per_second": 2.246,
"step": 3
},
{
"epoch": 0.064,
"grad_norm": 128.71548461914062,
"learning_rate": 4.918032786885246e-05,
"loss": 2.0627,
"step": 4
},
{
"epoch": 0.064,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.4739884393063584,
"eval_f1_m": 0.5152380432692557,
"eval_loss": 1.1382358074188232,
"eval_runtime": 14.238,
"eval_samples_per_second": 17.559,
"eval_steps_per_second": 2.248,
"step": 4
},
{
"epoch": 0.08,
"grad_norm": 125.15596008300781,
"learning_rate": 4.8770491803278687e-05,
"loss": 1.3357,
"step": 5
},
{
"epoch": 0.08,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6304347826086957,
"eval_f1_m": 0.5950331980944225,
"eval_loss": 1.429607629776001,
"eval_runtime": 14.236,
"eval_samples_per_second": 17.561,
"eval_steps_per_second": 2.248,
"step": 5
},
{
"epoch": 0.096,
"grad_norm": 157.35198974609375,
"learning_rate": 4.836065573770492e-05,
"loss": 1.5012,
"step": 6
},
{
"epoch": 0.096,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.6148648648648648,
"eval_f1_m": 0.5295975000056632,
"eval_loss": 1.8408384323120117,
"eval_runtime": 14.334,
"eval_samples_per_second": 17.441,
"eval_steps_per_second": 2.232,
"step": 6
},
{
"epoch": 0.112,
"grad_norm": 20.72323989868164,
"learning_rate": 4.795081967213115e-05,
"loss": 0.263,
"step": 7
},
{
"epoch": 0.112,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.6158940397350993,
"eval_f1_m": 0.5160586578953926,
"eval_loss": 2.023961067199707,
"eval_runtime": 14.2426,
"eval_samples_per_second": 17.553,
"eval_steps_per_second": 2.247,
"step": 7
},
{
"epoch": 0.128,
"grad_norm": 137.9950408935547,
"learning_rate": 4.754098360655738e-05,
"loss": 0.7805,
"step": 8
},
{
"epoch": 0.128,
"eval_exact_match": 0.2653061224489796,
"eval_f1_a": 0.6363636363636364,
"eval_f1_m": 0.6154686300094462,
"eval_loss": 1.3389190435409546,
"eval_runtime": 14.3383,
"eval_samples_per_second": 17.436,
"eval_steps_per_second": 2.232,
"step": 8
},
{
"epoch": 0.144,
"grad_norm": 99.50145721435547,
"learning_rate": 4.713114754098361e-05,
"loss": 1.4556,
"step": 9
},
{
"epoch": 0.144,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5207100591715976,
"eval_f1_m": 0.5594476385292712,
"eval_loss": 1.061840295791626,
"eval_runtime": 14.2288,
"eval_samples_per_second": 17.57,
"eval_steps_per_second": 2.249,
"step": 9
},
{
"epoch": 0.16,
"grad_norm": 86.7911376953125,
"learning_rate": 4.672131147540984e-05,
"loss": 1.6426,
"step": 10
},
{
"epoch": 0.16,
"eval_exact_match": 0.10204081632653061,
"eval_f1_a": 0.41726618705035967,
"eval_f1_m": 0.5063550961510145,
"eval_loss": 1.4055088758468628,
"eval_runtime": 14.2353,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 10
},
{
"epoch": 0.176,
"grad_norm": 48.242271423339844,
"learning_rate": 4.631147540983607e-05,
"loss": 0.6148,
"step": 11
},
{
"epoch": 0.176,
"eval_exact_match": 0.10204081632653061,
"eval_f1_a": 0.41428571428571426,
"eval_f1_m": 0.5043761227434697,
"eval_loss": 1.3665776252746582,
"eval_runtime": 14.2332,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 11
},
{
"epoch": 0.192,
"grad_norm": 86.30690002441406,
"learning_rate": 4.59016393442623e-05,
"loss": 0.9029,
"step": 12
},
{
"epoch": 0.192,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.5294117647058824,
"eval_f1_m": 0.5558578043271921,
"eval_loss": 1.0262727737426758,
"eval_runtime": 14.2392,
"eval_samples_per_second": 17.557,
"eval_steps_per_second": 2.247,
"step": 12
},
{
"epoch": 0.208,
"grad_norm": 95.64641571044922,
"learning_rate": 4.549180327868853e-05,
"loss": 0.3356,
"step": 13
},
{
"epoch": 0.208,
"eval_exact_match": 0.12244897959183673,
"eval_f1_a": 0.5030674846625767,
"eval_f1_m": 0.5387398315969746,
"eval_loss": 1.0579854249954224,
"eval_runtime": 14.2396,
"eval_samples_per_second": 17.557,
"eval_steps_per_second": 2.247,
"step": 13
},
{
"epoch": 0.224,
"grad_norm": 120.39337921142578,
"learning_rate": 4.508196721311476e-05,
"loss": 0.5427,
"step": 14
},
{
"epoch": 0.224,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5894736842105263,
"eval_f1_m": 0.5906226992961685,
"eval_loss": 0.9290003776550293,
"eval_runtime": 14.2383,
"eval_samples_per_second": 17.558,
"eval_steps_per_second": 2.247,
"step": 14
},
{
"epoch": 0.24,
"grad_norm": 59.97566604614258,
"learning_rate": 4.467213114754098e-05,
"loss": 2.5016,
"step": 15
},
{
"epoch": 0.24,
"eval_exact_match": 0.2857142857142857,
"eval_f1_a": 0.6551724137931034,
"eval_f1_m": 0.6429105191860293,
"eval_loss": 1.0172419548034668,
"eval_runtime": 14.2349,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 15
},
{
"epoch": 0.256,
"grad_norm": 90.46813201904297,
"learning_rate": 4.426229508196721e-05,
"loss": 0.5741,
"step": 16
},
{
"epoch": 0.256,
"eval_exact_match": 0.30612244897959184,
"eval_f1_a": 0.6608695652173914,
"eval_f1_m": 0.6547261978384425,
"eval_loss": 1.0083348751068115,
"eval_runtime": 14.2226,
"eval_samples_per_second": 17.578,
"eval_steps_per_second": 2.25,
"step": 16
},
{
"epoch": 0.272,
"grad_norm": 272.53857421875,
"learning_rate": 4.3852459016393444e-05,
"loss": 2.131,
"step": 17
},
{
"epoch": 0.272,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6224489795918368,
"eval_f1_m": 0.6273816886061783,
"eval_loss": 0.9200563430786133,
"eval_runtime": 14.2293,
"eval_samples_per_second": 17.569,
"eval_steps_per_second": 2.249,
"step": 17
},
{
"epoch": 0.288,
"grad_norm": 97.75645446777344,
"learning_rate": 4.3442622950819674e-05,
"loss": 1.6872,
"step": 18
},
{
"epoch": 0.288,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6326530612244898,
"eval_f1_m": 0.6421047206761491,
"eval_loss": 0.9017351269721985,
"eval_runtime": 14.2331,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 18
},
{
"epoch": 0.304,
"grad_norm": 89.68392181396484,
"learning_rate": 4.3032786885245904e-05,
"loss": 0.7496,
"step": 19
},
{
"epoch": 0.304,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5942857142857144,
"eval_f1_m": 0.6143906546967771,
"eval_loss": 0.9645715355873108,
"eval_runtime": 14.2324,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 19
},
{
"epoch": 0.32,
"grad_norm": 81.89313507080078,
"learning_rate": 4.262295081967213e-05,
"loss": 0.3902,
"step": 20
},
{
"epoch": 0.32,
"eval_exact_match": 0.12244897959183673,
"eval_f1_a": 0.406015037593985,
"eval_f1_m": 0.5054664723032071,
"eval_loss": 1.4447712898254395,
"eval_runtime": 14.3411,
"eval_samples_per_second": 17.432,
"eval_steps_per_second": 2.231,
"step": 20
},
{
"epoch": 0.336,
"grad_norm": 71.31260681152344,
"learning_rate": 4.2213114754098365e-05,
"loss": 1.2667,
"step": 21
},
{
"epoch": 0.336,
"eval_exact_match": 0.08163265306122448,
"eval_f1_a": 0.3305785123966942,
"eval_f1_m": 0.46700963435657317,
"eval_loss": 1.6687042713165283,
"eval_runtime": 14.2325,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 21
},
{
"epoch": 0.352,
"grad_norm": 189.01861572265625,
"learning_rate": 4.1803278688524595e-05,
"loss": 2.3478,
"step": 22
},
{
"epoch": 0.352,
"eval_exact_match": 0.12244897959183673,
"eval_f1_a": 0.4179104477611941,
"eval_f1_m": 0.5093007332803252,
"eval_loss": 1.451232671737671,
"eval_runtime": 14.2331,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 22
},
{
"epoch": 0.368,
"grad_norm": 86.25955200195312,
"learning_rate": 4.1393442622950826e-05,
"loss": 1.33,
"step": 23
},
{
"epoch": 0.368,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5443037974683544,
"eval_f1_m": 0.5908616326983674,
"eval_loss": 1.052020788192749,
"eval_runtime": 14.2347,
"eval_samples_per_second": 17.563,
"eval_steps_per_second": 2.248,
"step": 23
},
{
"epoch": 0.384,
"grad_norm": 52.738563537597656,
"learning_rate": 4.098360655737705e-05,
"loss": 1.236,
"step": 24
},
{
"epoch": 0.384,
"eval_exact_match": 0.2653061224489796,
"eval_f1_a": 0.6494845360824741,
"eval_f1_m": 0.6442331478045764,
"eval_loss": 0.8382735848426819,
"eval_runtime": 14.2369,
"eval_samples_per_second": 17.56,
"eval_steps_per_second": 2.248,
"step": 24
},
{
"epoch": 0.4,
"grad_norm": 40.95921325683594,
"learning_rate": 4.057377049180328e-05,
"loss": 0.3315,
"step": 25
},
{
"epoch": 0.4,
"eval_exact_match": 0.30612244897959184,
"eval_f1_a": 0.6899563318777292,
"eval_f1_m": 0.6908583779832279,
"eval_loss": 0.9057127833366394,
"eval_runtime": 14.2385,
"eval_samples_per_second": 17.558,
"eval_steps_per_second": 2.247,
"step": 25
},
{
"epoch": 0.416,
"grad_norm": 32.647369384765625,
"learning_rate": 4.016393442622951e-05,
"loss": 1.2426,
"step": 26
},
{
"epoch": 0.416,
"eval_exact_match": 0.2857142857142857,
"eval_f1_a": 0.6772908366533864,
"eval_f1_m": 0.6716122596224635,
"eval_loss": 1.041088581085205,
"eval_runtime": 14.2297,
"eval_samples_per_second": 17.569,
"eval_steps_per_second": 2.249,
"step": 26
},
{
"epoch": 0.432,
"grad_norm": 215.0525665283203,
"learning_rate": 3.975409836065574e-05,
"loss": 1.6515,
"step": 27
},
{
"epoch": 0.432,
"eval_exact_match": 0.2857142857142857,
"eval_f1_a": 0.674698795180723,
"eval_f1_m": 0.6731278658649605,
"eval_loss": 1.025064468383789,
"eval_runtime": 14.2342,
"eval_samples_per_second": 17.563,
"eval_steps_per_second": 2.248,
"step": 27
},
{
"epoch": 0.448,
"grad_norm": 75.84857177734375,
"learning_rate": 3.934426229508197e-05,
"loss": 0.8826,
"step": 28
},
{
"epoch": 0.448,
"eval_exact_match": 0.2857142857142857,
"eval_f1_a": 0.6515837104072398,
"eval_f1_m": 0.6577141679182493,
"eval_loss": 0.8711889386177063,
"eval_runtime": 14.3415,
"eval_samples_per_second": 17.432,
"eval_steps_per_second": 2.231,
"step": 28
},
{
"epoch": 0.464,
"grad_norm": 104.8478775024414,
"learning_rate": 3.89344262295082e-05,
"loss": 1.3364,
"step": 29
},
{
"epoch": 0.464,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6111111111111112,
"eval_f1_m": 0.6279861295167416,
"eval_loss": 0.8534930348396301,
"eval_runtime": 14.3389,
"eval_samples_per_second": 17.435,
"eval_steps_per_second": 2.232,
"step": 29
},
{
"epoch": 0.48,
"grad_norm": 141.2751922607422,
"learning_rate": 3.8524590163934424e-05,
"loss": 0.5787,
"step": 30
},
{
"epoch": 0.48,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.49315068493150677,
"eval_f1_m": 0.5514150248844125,
"eval_loss": 1.225917935371399,
"eval_runtime": 14.2404,
"eval_samples_per_second": 17.556,
"eval_steps_per_second": 2.247,
"step": 30
},
{
"epoch": 0.496,
"grad_norm": 132.69497680664062,
"learning_rate": 3.8114754098360655e-05,
"loss": 1.8268,
"step": 31
},
{
"epoch": 0.496,
"eval_exact_match": 0.10204081632653061,
"eval_f1_a": 0.3548387096774194,
"eval_f1_m": 0.48598142220591195,
"eval_loss": 1.5571385622024536,
"eval_runtime": 14.2363,
"eval_samples_per_second": 17.561,
"eval_steps_per_second": 2.248,
"step": 31
},
{
"epoch": 0.512,
"grad_norm": 193.7267608642578,
"learning_rate": 3.7704918032786885e-05,
"loss": 1.176,
"step": 32
},
{
"epoch": 0.512,
"eval_exact_match": 0.10204081632653061,
"eval_f1_a": 0.3414634146341463,
"eval_f1_m": 0.48029629101057675,
"eval_loss": 1.5923832654953003,
"eval_runtime": 14.3471,
"eval_samples_per_second": 17.425,
"eval_steps_per_second": 2.23,
"step": 32
},
{
"epoch": 0.528,
"grad_norm": 157.8994903564453,
"learning_rate": 3.729508196721312e-05,
"loss": 1.1536,
"step": 33
},
{
"epoch": 0.528,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.43076923076923085,
"eval_f1_m": 0.5186639324394426,
"eval_loss": 1.3835409879684448,
"eval_runtime": 14.3412,
"eval_samples_per_second": 17.432,
"eval_steps_per_second": 2.231,
"step": 33
},
{
"epoch": 0.544,
"grad_norm": 106.80976867675781,
"learning_rate": 3.6885245901639346e-05,
"loss": 0.6749,
"step": 34
},
{
"epoch": 0.544,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5228758169934641,
"eval_f1_m": 0.5656035574402921,
"eval_loss": 1.0968397855758667,
"eval_runtime": 14.2314,
"eval_samples_per_second": 17.567,
"eval_steps_per_second": 2.249,
"step": 34
},
{
"epoch": 0.56,
"grad_norm": 41.265201568603516,
"learning_rate": 3.6475409836065576e-05,
"loss": 1.1197,
"step": 35
},
{
"epoch": 0.56,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5595238095238095,
"eval_f1_m": 0.5912371075636381,
"eval_loss": 0.9161210656166077,
"eval_runtime": 14.2362,
"eval_samples_per_second": 17.561,
"eval_steps_per_second": 2.248,
"step": 35
},
{
"epoch": 0.576,
"grad_norm": 129.1635284423828,
"learning_rate": 3.6065573770491806e-05,
"loss": 1.4224,
"step": 36
},
{
"epoch": 0.576,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.61,
"eval_f1_m": 0.6188252790293606,
"eval_loss": 0.8555943369865417,
"eval_runtime": 14.241,
"eval_samples_per_second": 17.555,
"eval_steps_per_second": 2.247,
"step": 36
},
{
"epoch": 0.592,
"grad_norm": 37.038997650146484,
"learning_rate": 3.5655737704918037e-05,
"loss": 1.1437,
"step": 37
},
{
"epoch": 0.592,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.646288209606987,
"eval_f1_m": 0.6307418205377388,
"eval_loss": 0.9305726289749146,
"eval_runtime": 14.2339,
"eval_samples_per_second": 17.564,
"eval_steps_per_second": 2.248,
"step": 37
},
{
"epoch": 0.608,
"grad_norm": 148.70489501953125,
"learning_rate": 3.524590163934427e-05,
"loss": 1.2494,
"step": 38
},
{
"epoch": 0.608,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.639344262295082,
"eval_f1_m": 0.6150846052706795,
"eval_loss": 0.9929665327072144,
"eval_runtime": 14.2304,
"eval_samples_per_second": 17.568,
"eval_steps_per_second": 2.249,
"step": 38
},
{
"epoch": 0.624,
"grad_norm": 105.3074722290039,
"learning_rate": 3.483606557377049e-05,
"loss": 0.86,
"step": 39
},
{
"epoch": 0.624,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.652542372881356,
"eval_f1_m": 0.6313492063492062,
"eval_loss": 0.9553629159927368,
"eval_runtime": 14.2397,
"eval_samples_per_second": 17.557,
"eval_steps_per_second": 2.247,
"step": 39
},
{
"epoch": 0.64,
"grad_norm": 117.94893646240234,
"learning_rate": 3.442622950819672e-05,
"loss": 0.9122,
"step": 40
},
{
"epoch": 0.64,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6190476190476191,
"eval_f1_m": 0.6127175545542892,
"eval_loss": 0.8853461742401123,
"eval_runtime": 14.3327,
"eval_samples_per_second": 17.443,
"eval_steps_per_second": 2.233,
"step": 40
},
{
"epoch": 0.656,
"grad_norm": 59.64809036254883,
"learning_rate": 3.401639344262295e-05,
"loss": 0.7094,
"step": 41
},
{
"epoch": 0.656,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5863874345549739,
"eval_f1_m": 0.6034632034632034,
"eval_loss": 0.8550831079483032,
"eval_runtime": 14.3356,
"eval_samples_per_second": 17.439,
"eval_steps_per_second": 2.232,
"step": 41
},
{
"epoch": 0.672,
"grad_norm": 26.82879066467285,
"learning_rate": 3.360655737704918e-05,
"loss": 0.3156,
"step": 42
},
{
"epoch": 0.672,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5505617977528091,
"eval_f1_m": 0.5863309026574332,
"eval_loss": 0.908556342124939,
"eval_runtime": 14.3422,
"eval_samples_per_second": 17.431,
"eval_steps_per_second": 2.231,
"step": 42
},
{
"epoch": 0.688,
"grad_norm": 37.69074630737305,
"learning_rate": 3.319672131147541e-05,
"loss": 0.9358,
"step": 43
},
{
"epoch": 0.688,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5432098765432098,
"eval_f1_m": 0.5794387018876814,
"eval_loss": 1.004197597503662,
"eval_runtime": 14.3302,
"eval_samples_per_second": 17.446,
"eval_steps_per_second": 2.233,
"step": 43
},
{
"epoch": 0.704,
"grad_norm": 85.96589660644531,
"learning_rate": 3.2786885245901635e-05,
"loss": 1.2701,
"step": 44
},
{
"epoch": 0.704,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5128205128205128,
"eval_f1_m": 0.5589686957033895,
"eval_loss": 1.0330990552902222,
"eval_runtime": 14.2281,
"eval_samples_per_second": 17.571,
"eval_steps_per_second": 2.249,
"step": 44
},
{
"epoch": 0.72,
"grad_norm": 73.4930191040039,
"learning_rate": 3.237704918032787e-05,
"loss": 0.6213,
"step": 45
},
{
"epoch": 0.72,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.46896551724137925,
"eval_f1_m": 0.5310531305429266,
"eval_loss": 1.1586450338363647,
"eval_runtime": 14.3344,
"eval_samples_per_second": 17.441,
"eval_steps_per_second": 2.232,
"step": 45
},
{
"epoch": 0.736,
"grad_norm": 25.196025848388672,
"learning_rate": 3.19672131147541e-05,
"loss": 0.2137,
"step": 46
},
{
"epoch": 0.736,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.46808510638297873,
"eval_f1_m": 0.5367949510806654,
"eval_loss": 1.2649037837982178,
"eval_runtime": 14.23,
"eval_samples_per_second": 17.569,
"eval_steps_per_second": 2.249,
"step": 46
},
{
"epoch": 0.752,
"grad_norm": 149.361328125,
"learning_rate": 3.155737704918033e-05,
"loss": 1.0182,
"step": 47
},
{
"epoch": 0.752,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.4647887323943662,
"eval_f1_m": 0.5342844230599333,
"eval_loss": 1.2592852115631104,
"eval_runtime": 14.235,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 47
},
{
"epoch": 0.768,
"grad_norm": 65.61959075927734,
"learning_rate": 3.114754098360656e-05,
"loss": 0.6999,
"step": 48
},
{
"epoch": 0.768,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.4697986577181208,
"eval_f1_m": 0.5299296508480181,
"eval_loss": 1.143484115600586,
"eval_runtime": 14.2312,
"eval_samples_per_second": 17.567,
"eval_steps_per_second": 2.249,
"step": 48
},
{
"epoch": 0.784,
"grad_norm": 27.904254913330078,
"learning_rate": 3.073770491803279e-05,
"loss": 0.6342,
"step": 49
},
{
"epoch": 0.784,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5389221556886228,
"eval_f1_m": 0.5721706864564007,
"eval_loss": 1.0147384405136108,
"eval_runtime": 14.2319,
"eval_samples_per_second": 17.566,
"eval_steps_per_second": 2.248,
"step": 49
},
{
"epoch": 0.8,
"grad_norm": 206.67816162109375,
"learning_rate": 3.0327868852459017e-05,
"loss": 2.0752,
"step": 50
},
{
"epoch": 0.8,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6021505376344086,
"eval_f1_m": 0.6184708148993864,
"eval_loss": 0.90333092212677,
"eval_runtime": 14.234,
"eval_samples_per_second": 17.564,
"eval_steps_per_second": 2.248,
"step": 50
},
{
"epoch": 0.816,
"grad_norm": 32.51238250732422,
"learning_rate": 2.9918032786885248e-05,
"loss": 0.6363,
"step": 51
},
{
"epoch": 0.816,
"eval_exact_match": 0.30612244897959184,
"eval_f1_a": 0.6346153846153846,
"eval_f1_m": 0.6513524457402007,
"eval_loss": 0.9025561809539795,
"eval_runtime": 14.3356,
"eval_samples_per_second": 17.439,
"eval_steps_per_second": 2.232,
"step": 51
},
{
"epoch": 0.832,
"grad_norm": 88.53801727294922,
"learning_rate": 2.9508196721311478e-05,
"loss": 0.6725,
"step": 52
},
{
"epoch": 0.832,
"eval_exact_match": 0.30612244897959184,
"eval_f1_a": 0.6542056074766355,
"eval_f1_m": 0.6610168742821804,
"eval_loss": 0.9258018136024475,
"eval_runtime": 14.337,
"eval_samples_per_second": 17.437,
"eval_steps_per_second": 2.232,
"step": 52
},
{
"epoch": 0.848,
"grad_norm": 210.28028869628906,
"learning_rate": 2.9098360655737705e-05,
"loss": 1.4444,
"step": 53
},
{
"epoch": 0.848,
"eval_exact_match": 0.30612244897959184,
"eval_f1_a": 0.6478873239436621,
"eval_f1_m": 0.6559332390965044,
"eval_loss": 0.9136560559272766,
"eval_runtime": 14.3332,
"eval_samples_per_second": 17.442,
"eval_steps_per_second": 2.233,
"step": 53
},
{
"epoch": 0.864,
"grad_norm": 130.76451110839844,
"learning_rate": 2.8688524590163935e-05,
"loss": 1.5007,
"step": 54
},
{
"epoch": 0.864,
"eval_exact_match": 0.2857142857142857,
"eval_f1_a": 0.6435643564356436,
"eval_f1_m": 0.6596710545690138,
"eval_loss": 0.8902440667152405,
"eval_runtime": 14.2297,
"eval_samples_per_second": 17.569,
"eval_steps_per_second": 2.249,
"step": 54
},
{
"epoch": 0.88,
"grad_norm": 59.906551361083984,
"learning_rate": 2.8278688524590162e-05,
"loss": 1.4569,
"step": 55
},
{
"epoch": 0.88,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.5901639344262295,
"eval_f1_m": 0.6132119581099174,
"eval_loss": 0.9152215719223022,
"eval_runtime": 14.2381,
"eval_samples_per_second": 17.558,
"eval_steps_per_second": 2.247,
"step": 55
},
{
"epoch": 0.896,
"grad_norm": 73.04712677001953,
"learning_rate": 2.7868852459016392e-05,
"loss": 1.4627,
"step": 56
},
{
"epoch": 0.896,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5408805031446541,
"eval_f1_m": 0.581694156183952,
"eval_loss": 1.0332040786743164,
"eval_runtime": 14.235,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 56
},
{
"epoch": 0.912,
"grad_norm": 41.93537902832031,
"learning_rate": 2.7459016393442626e-05,
"loss": 0.6447,
"step": 57
},
{
"epoch": 0.912,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5,
"eval_f1_m": 0.5578105001574389,
"eval_loss": 1.1393637657165527,
"eval_runtime": 14.2337,
"eval_samples_per_second": 17.564,
"eval_steps_per_second": 2.248,
"step": 57
},
{
"epoch": 0.928,
"grad_norm": 225.17127990722656,
"learning_rate": 2.7049180327868856e-05,
"loss": 2.1825,
"step": 58
},
{
"epoch": 0.928,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5170068027210885,
"eval_f1_m": 0.5664545205361532,
"eval_loss": 1.1429673433303833,
"eval_runtime": 14.3356,
"eval_samples_per_second": 17.439,
"eval_steps_per_second": 2.232,
"step": 58
},
{
"epoch": 0.944,
"grad_norm": 218.0926513671875,
"learning_rate": 2.6639344262295087e-05,
"loss": 1.5474,
"step": 59
},
{
"epoch": 0.944,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5100671140939597,
"eval_f1_m": 0.5603939144755471,
"eval_loss": 1.0793055295944214,
"eval_runtime": 14.3623,
"eval_samples_per_second": 17.407,
"eval_steps_per_second": 2.228,
"step": 59
},
{
"epoch": 0.96,
"grad_norm": 214.265625,
"learning_rate": 2.6229508196721314e-05,
"loss": 2.0897,
"step": 60
},
{
"epoch": 0.96,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.5748502994011976,
"eval_f1_m": 0.6078411950860929,
"eval_loss": 0.9570010900497437,
"eval_runtime": 14.3353,
"eval_samples_per_second": 17.439,
"eval_steps_per_second": 2.232,
"step": 60
},
{
"epoch": 0.976,
"grad_norm": 30.67082405090332,
"learning_rate": 2.5819672131147544e-05,
"loss": 0.5137,
"step": 61
},
{
"epoch": 0.976,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5989304812834225,
"eval_f1_m": 0.6199340342197485,
"eval_loss": 0.8710112571716309,
"eval_runtime": 14.2317,
"eval_samples_per_second": 17.566,
"eval_steps_per_second": 2.249,
"step": 61
},
{
"epoch": 0.992,
"grad_norm": 44.715511322021484,
"learning_rate": 2.540983606557377e-05,
"loss": 0.9146,
"step": 62
},
{
"epoch": 0.992,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6336633663366337,
"eval_f1_m": 0.643560974173219,
"eval_loss": 0.8580593466758728,
"eval_runtime": 14.2244,
"eval_samples_per_second": 17.575,
"eval_steps_per_second": 2.25,
"step": 62
},
{
"epoch": 1.008,
"grad_norm": 47.77970886230469,
"learning_rate": 2.5e-05,
"loss": 0.8187,
"step": 63
},
{
"epoch": 1.008,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6283185840707965,
"eval_f1_m": 0.6231513384574607,
"eval_loss": 0.9097387194633484,
"eval_runtime": 14.2275,
"eval_samples_per_second": 17.572,
"eval_steps_per_second": 2.249,
"step": 63
},
{
"epoch": 1.024,
"grad_norm": 62.07459259033203,
"learning_rate": 2.459016393442623e-05,
"loss": 0.389,
"step": 64
},
{
"epoch": 1.024,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6416666666666667,
"eval_f1_m": 0.6224541331684187,
"eval_loss": 0.9766004085540771,
"eval_runtime": 14.328,
"eval_samples_per_second": 17.448,
"eval_steps_per_second": 2.233,
"step": 64
},
{
"epoch": 1.04,
"grad_norm": 27.772306442260742,
"learning_rate": 2.418032786885246e-05,
"loss": 0.5394,
"step": 65
},
{
"epoch": 1.04,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6422764227642277,
"eval_f1_m": 0.6168094943605147,
"eval_loss": 1.0415246486663818,
"eval_runtime": 14.235,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 65
},
{
"epoch": 1.056,
"grad_norm": 72.29753875732422,
"learning_rate": 2.377049180327869e-05,
"loss": 1.1039,
"step": 66
},
{
"epoch": 1.056,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6448979591836735,
"eval_f1_m": 0.6215713991224195,
"eval_loss": 1.0369302034378052,
"eval_runtime": 14.2353,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 66
},
{
"epoch": 1.072,
"grad_norm": 79.86347198486328,
"learning_rate": 2.336065573770492e-05,
"loss": 0.3747,
"step": 67
},
{
"epoch": 1.072,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6440677966101694,
"eval_f1_m": 0.6234590806019376,
"eval_loss": 0.971021831035614,
"eval_runtime": 14.335,
"eval_samples_per_second": 17.44,
"eval_steps_per_second": 2.232,
"step": 67
},
{
"epoch": 1.088,
"grad_norm": 98.71847534179688,
"learning_rate": 2.295081967213115e-05,
"loss": 0.6412,
"step": 68
},
{
"epoch": 1.088,
"eval_exact_match": 0.2653061224489796,
"eval_f1_a": 0.6301369863013699,
"eval_f1_m": 0.6385634773389874,
"eval_loss": 0.89739990234375,
"eval_runtime": 14.2391,
"eval_samples_per_second": 17.557,
"eval_steps_per_second": 2.247,
"step": 68
},
{
"epoch": 1.104,
"grad_norm": 37.62075424194336,
"learning_rate": 2.254098360655738e-05,
"loss": 0.1785,
"step": 69
},
{
"epoch": 1.104,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6178010471204188,
"eval_f1_m": 0.6330241187384045,
"eval_loss": 0.8888271450996399,
"eval_runtime": 14.2337,
"eval_samples_per_second": 17.564,
"eval_steps_per_second": 2.248,
"step": 69
},
{
"epoch": 1.12,
"grad_norm": 39.140846252441406,
"learning_rate": 2.2131147540983607e-05,
"loss": 0.8809,
"step": 70
},
{
"epoch": 1.12,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.5952380952380953,
"eval_f1_m": 0.6151768299727483,
"eval_loss": 0.9461753964424133,
"eval_runtime": 14.2294,
"eval_samples_per_second": 17.569,
"eval_steps_per_second": 2.249,
"step": 70
},
{
"epoch": 1.1360000000000001,
"grad_norm": 24.488187789916992,
"learning_rate": 2.1721311475409837e-05,
"loss": 0.3331,
"step": 71
},
{
"epoch": 1.1360000000000001,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5548387096774193,
"eval_f1_m": 0.5858735935266548,
"eval_loss": 1.0686767101287842,
"eval_runtime": 14.2405,
"eval_samples_per_second": 17.556,
"eval_steps_per_second": 2.247,
"step": 71
},
{
"epoch": 1.152,
"grad_norm": 159.9743194580078,
"learning_rate": 2.1311475409836064e-05,
"loss": 0.7266,
"step": 72
},
{
"epoch": 1.152,
"eval_exact_match": 0.16326530612244897,
"eval_f1_a": 0.5205479452054794,
"eval_f1_m": 0.5669536812393955,
"eval_loss": 1.139631986618042,
"eval_runtime": 14.236,
"eval_samples_per_second": 17.561,
"eval_steps_per_second": 2.248,
"step": 72
},
{
"epoch": 1.168,
"grad_norm": 180.88392639160156,
"learning_rate": 2.0901639344262298e-05,
"loss": 2.2863,
"step": 73
},
{
"epoch": 1.168,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.5466666666666666,
"eval_f1_m": 0.5785720515312353,
"eval_loss": 1.1217446327209473,
"eval_runtime": 14.3346,
"eval_samples_per_second": 17.44,
"eval_steps_per_second": 2.232,
"step": 73
},
{
"epoch": 1.184,
"grad_norm": 152.32852172851562,
"learning_rate": 2.0491803278688525e-05,
"loss": 1.3959,
"step": 74
},
{
"epoch": 1.184,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.5822784810126582,
"eval_f1_m": 0.6006187123534062,
"eval_loss": 1.045620083808899,
"eval_runtime": 14.2336,
"eval_samples_per_second": 17.564,
"eval_steps_per_second": 2.248,
"step": 74
},
{
"epoch": 1.2,
"grad_norm": 147.99668884277344,
"learning_rate": 2.0081967213114755e-05,
"loss": 1.1065,
"step": 75
},
{
"epoch": 1.2,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6071428571428572,
"eval_f1_m": 0.630640221456548,
"eval_loss": 0.9323554635047913,
"eval_runtime": 14.3399,
"eval_samples_per_second": 17.434,
"eval_steps_per_second": 2.232,
"step": 75
},
{
"epoch": 1.216,
"grad_norm": 44.00741958618164,
"learning_rate": 1.9672131147540985e-05,
"loss": 0.8284,
"step": 76
},
{
"epoch": 1.216,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6526315789473685,
"eval_f1_m": 0.6551741908884765,
"eval_loss": 0.8580492734909058,
"eval_runtime": 14.2326,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 76
},
{
"epoch": 1.232,
"grad_norm": 84.2724609375,
"learning_rate": 1.9262295081967212e-05,
"loss": 1.4605,
"step": 77
},
{
"epoch": 1.232,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6542056074766355,
"eval_f1_m": 0.6638233648437729,
"eval_loss": 0.8490340709686279,
"eval_runtime": 14.3306,
"eval_samples_per_second": 17.445,
"eval_steps_per_second": 2.233,
"step": 77
},
{
"epoch": 1.248,
"grad_norm": 57.137474060058594,
"learning_rate": 1.8852459016393442e-05,
"loss": 0.7637,
"step": 78
},
{
"epoch": 1.248,
"eval_exact_match": 0.2857142857142857,
"eval_f1_a": 0.6666666666666667,
"eval_f1_m": 0.6427098978119384,
"eval_loss": 0.9433552026748657,
"eval_runtime": 14.2366,
"eval_samples_per_second": 17.56,
"eval_steps_per_second": 2.248,
"step": 78
},
{
"epoch": 1.264,
"grad_norm": 81.78334045410156,
"learning_rate": 1.8442622950819673e-05,
"loss": 1.0805,
"step": 79
},
{
"epoch": 1.264,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6459143968871595,
"eval_f1_m": 0.6244088111435049,
"eval_loss": 1.1826504468917847,
"eval_runtime": 14.3352,
"eval_samples_per_second": 17.44,
"eval_steps_per_second": 2.232,
"step": 79
},
{
"epoch": 1.28,
"grad_norm": 168.3184814453125,
"learning_rate": 1.8032786885245903e-05,
"loss": 0.8076,
"step": 80
},
{
"epoch": 1.28,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.6472727272727273,
"eval_f1_m": 0.6001591112635529,
"eval_loss": 1.4046131372451782,
"eval_runtime": 14.3393,
"eval_samples_per_second": 17.435,
"eval_steps_per_second": 2.232,
"step": 80
},
{
"epoch": 1.296,
"grad_norm": 165.74830627441406,
"learning_rate": 1.7622950819672133e-05,
"loss": 0.934,
"step": 81
},
{
"epoch": 1.296,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.6408450704225352,
"eval_f1_m": 0.5587504049238743,
"eval_loss": 1.519930124282837,
"eval_runtime": 14.3365,
"eval_samples_per_second": 17.438,
"eval_steps_per_second": 2.232,
"step": 81
},
{
"epoch": 1.312,
"grad_norm": 137.4995880126953,
"learning_rate": 1.721311475409836e-05,
"loss": 0.9774,
"step": 82
},
{
"epoch": 1.312,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.6363636363636364,
"eval_f1_m": 0.5380992873339812,
"eval_loss": 1.5469399690628052,
"eval_runtime": 14.4478,
"eval_samples_per_second": 17.304,
"eval_steps_per_second": 2.215,
"step": 82
},
{
"epoch": 1.328,
"grad_norm": 280.535400390625,
"learning_rate": 1.680327868852459e-05,
"loss": 2.8829,
"step": 83
},
{
"epoch": 1.328,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6451612903225806,
"eval_f1_m": 0.5875740772499476,
"eval_loss": 1.444277286529541,
"eval_runtime": 14.2369,
"eval_samples_per_second": 17.56,
"eval_steps_per_second": 2.248,
"step": 83
},
{
"epoch": 1.3439999999999999,
"grad_norm": 92.20207214355469,
"learning_rate": 1.6393442622950818e-05,
"loss": 1.0067,
"step": 84
},
{
"epoch": 1.3439999999999999,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6616541353383458,
"eval_f1_m": 0.6289330017721373,
"eval_loss": 1.285833477973938,
"eval_runtime": 14.2358,
"eval_samples_per_second": 17.561,
"eval_steps_per_second": 2.248,
"step": 84
},
{
"epoch": 1.3599999999999999,
"grad_norm": 51.42112731933594,
"learning_rate": 1.598360655737705e-05,
"loss": 0.6843,
"step": 85
},
{
"epoch": 1.3599999999999999,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6535433070866141,
"eval_f1_m": 0.6334548104956267,
"eval_loss": 1.1037089824676514,
"eval_runtime": 14.2366,
"eval_samples_per_second": 17.56,
"eval_steps_per_second": 2.248,
"step": 85
},
{
"epoch": 1.376,
"grad_norm": 108.26708221435547,
"learning_rate": 1.557377049180328e-05,
"loss": 0.6217,
"step": 86
},
{
"epoch": 1.376,
"eval_exact_match": 0.2653061224489796,
"eval_f1_a": 0.6611570247933884,
"eval_f1_m": 0.6427356656948492,
"eval_loss": 0.946870744228363,
"eval_runtime": 14.2288,
"eval_samples_per_second": 17.57,
"eval_steps_per_second": 2.249,
"step": 86
},
{
"epoch": 1.392,
"grad_norm": 54.97236251831055,
"learning_rate": 1.5163934426229509e-05,
"loss": 0.7171,
"step": 87
},
{
"epoch": 1.392,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6363636363636364,
"eval_f1_m": 0.6192088523721175,
"eval_loss": 0.8331412672996521,
"eval_runtime": 14.2345,
"eval_samples_per_second": 17.563,
"eval_steps_per_second": 2.248,
"step": 87
},
{
"epoch": 1.408,
"grad_norm": 199.80453491210938,
"learning_rate": 1.4754098360655739e-05,
"loss": 0.9963,
"step": 88
},
{
"epoch": 1.408,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6461538461538462,
"eval_f1_m": 0.6503011161174427,
"eval_loss": 0.8008266091346741,
"eval_runtime": 14.3423,
"eval_samples_per_second": 17.431,
"eval_steps_per_second": 2.231,
"step": 88
},
{
"epoch": 1.424,
"grad_norm": 103.70662689208984,
"learning_rate": 1.4344262295081968e-05,
"loss": 0.8336,
"step": 89
},
{
"epoch": 1.424,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6035502958579881,
"eval_f1_m": 0.6173385005017658,
"eval_loss": 0.8721192479133606,
"eval_runtime": 14.3376,
"eval_samples_per_second": 17.437,
"eval_steps_per_second": 2.232,
"step": 89
},
{
"epoch": 1.44,
"grad_norm": 151.86329650878906,
"learning_rate": 1.3934426229508196e-05,
"loss": 0.7439,
"step": 90
},
{
"epoch": 1.44,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.5897435897435898,
"eval_f1_m": 0.6109928620132702,
"eval_loss": 0.977756679058075,
"eval_runtime": 14.3382,
"eval_samples_per_second": 17.436,
"eval_steps_per_second": 2.232,
"step": 90
},
{
"epoch": 1.456,
"grad_norm": 42.42748260498047,
"learning_rate": 1.3524590163934428e-05,
"loss": 0.4346,
"step": 91
},
{
"epoch": 1.456,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5562913907284768,
"eval_f1_m": 0.5869466134772258,
"eval_loss": 1.0586659908294678,
"eval_runtime": 14.2345,
"eval_samples_per_second": 17.563,
"eval_steps_per_second": 2.248,
"step": 91
},
{
"epoch": 1.472,
"grad_norm": 148.7361602783203,
"learning_rate": 1.3114754098360657e-05,
"loss": 0.6824,
"step": 92
},
{
"epoch": 1.472,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5277777777777778,
"eval_f1_m": 0.5762801824026313,
"eval_loss": 1.0946743488311768,
"eval_runtime": 14.3359,
"eval_samples_per_second": 17.439,
"eval_steps_per_second": 2.232,
"step": 92
},
{
"epoch": 1.488,
"grad_norm": 231.66839599609375,
"learning_rate": 1.2704918032786885e-05,
"loss": 1.486,
"step": 93
},
{
"epoch": 1.488,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.527027027027027,
"eval_f1_m": 0.5733919708409505,
"eval_loss": 1.0785720348358154,
"eval_runtime": 14.3409,
"eval_samples_per_second": 17.433,
"eval_steps_per_second": 2.231,
"step": 93
},
{
"epoch": 1.504,
"grad_norm": 78.1316909790039,
"learning_rate": 1.2295081967213116e-05,
"loss": 1.3813,
"step": 94
},
{
"epoch": 1.504,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.527027027027027,
"eval_f1_m": 0.5735782358231338,
"eval_loss": 1.045979380607605,
"eval_runtime": 14.2374,
"eval_samples_per_second": 17.559,
"eval_steps_per_second": 2.248,
"step": 94
},
{
"epoch": 1.52,
"grad_norm": 82.6171875,
"learning_rate": 1.1885245901639344e-05,
"loss": 0.4523,
"step": 95
},
{
"epoch": 1.52,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.5859872611464968,
"eval_f1_m": 0.606911229360209,
"eval_loss": 0.9884187579154968,
"eval_runtime": 14.2353,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 95
},
{
"epoch": 1.536,
"grad_norm": 174.6714630126953,
"learning_rate": 1.1475409836065575e-05,
"loss": 0.6107,
"step": 96
},
{
"epoch": 1.536,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.5925925925925926,
"eval_f1_m": 0.6086355821049698,
"eval_loss": 0.9135745763778687,
"eval_runtime": 14.2341,
"eval_samples_per_second": 17.563,
"eval_steps_per_second": 2.248,
"step": 96
},
{
"epoch": 1.552,
"grad_norm": 30.871902465820312,
"learning_rate": 1.1065573770491803e-05,
"loss": 0.6965,
"step": 97
},
{
"epoch": 1.552,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.608187134502924,
"eval_f1_m": 0.6191871620443049,
"eval_loss": 0.8789020776748657,
"eval_runtime": 14.2294,
"eval_samples_per_second": 17.569,
"eval_steps_per_second": 2.249,
"step": 97
},
{
"epoch": 1.568,
"grad_norm": 64.87429809570312,
"learning_rate": 1.0655737704918032e-05,
"loss": 0.2532,
"step": 98
},
{
"epoch": 1.568,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6101694915254237,
"eval_f1_m": 0.6249433106575963,
"eval_loss": 0.8408387899398804,
"eval_runtime": 14.2324,
"eval_samples_per_second": 17.566,
"eval_steps_per_second": 2.248,
"step": 98
},
{
"epoch": 1.584,
"grad_norm": 29.755355834960938,
"learning_rate": 1.0245901639344262e-05,
"loss": 0.3168,
"step": 99
},
{
"epoch": 1.584,
"eval_exact_match": 0.1836734693877551,
"eval_f1_a": 0.6162162162162163,
"eval_f1_m": 0.6253806284418529,
"eval_loss": 0.8142973780632019,
"eval_runtime": 14.3427,
"eval_samples_per_second": 17.43,
"eval_steps_per_second": 2.231,
"step": 99
},
{
"epoch": 1.6,
"grad_norm": 50.73051834106445,
"learning_rate": 9.836065573770493e-06,
"loss": 0.3733,
"step": 100
},
{
"epoch": 1.6,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6217616580310881,
"eval_f1_m": 0.6374355802927232,
"eval_loss": 0.8038433194160461,
"eval_runtime": 14.2306,
"eval_samples_per_second": 17.568,
"eval_steps_per_second": 2.249,
"step": 100
},
{
"epoch": 1.616,
"grad_norm": 59.93278884887695,
"learning_rate": 9.426229508196721e-06,
"loss": 0.6704,
"step": 101
},
{
"epoch": 1.616,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6354166666666666,
"eval_f1_m": 0.6522800895249876,
"eval_loss": 0.8013660311698914,
"eval_runtime": 14.3343,
"eval_samples_per_second": 17.441,
"eval_steps_per_second": 2.232,
"step": 101
},
{
"epoch": 1.6320000000000001,
"grad_norm": 86.40263366699219,
"learning_rate": 9.016393442622952e-06,
"loss": 0.3565,
"step": 102
},
{
"epoch": 1.6320000000000001,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6321243523316061,
"eval_f1_m": 0.6478826162499632,
"eval_loss": 0.7988218665122986,
"eval_runtime": 14.3368,
"eval_samples_per_second": 17.438,
"eval_steps_per_second": 2.232,
"step": 102
},
{
"epoch": 1.6480000000000001,
"grad_norm": 48.909366607666016,
"learning_rate": 8.60655737704918e-06,
"loss": 0.4804,
"step": 103
},
{
"epoch": 1.6480000000000001,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6461538461538462,
"eval_f1_m": 0.6624841711576407,
"eval_loss": 0.8011977672576904,
"eval_runtime": 14.3334,
"eval_samples_per_second": 17.442,
"eval_steps_per_second": 2.233,
"step": 103
},
{
"epoch": 1.6640000000000001,
"grad_norm": 45.205810546875,
"learning_rate": 8.196721311475409e-06,
"loss": 0.4073,
"step": 104
},
{
"epoch": 1.6640000000000001,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6288659793814433,
"eval_f1_m": 0.6442383013811586,
"eval_loss": 0.8024572730064392,
"eval_runtime": 14.2326,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 104
},
{
"epoch": 1.6800000000000002,
"grad_norm": 40.28095245361328,
"learning_rate": 7.78688524590164e-06,
"loss": 0.555,
"step": 105
},
{
"epoch": 1.6800000000000002,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6282722513089005,
"eval_f1_m": 0.6386746473481169,
"eval_loss": 0.7961085438728333,
"eval_runtime": 14.2384,
"eval_samples_per_second": 17.558,
"eval_steps_per_second": 2.247,
"step": 105
},
{
"epoch": 1.696,
"grad_norm": 33.259437561035156,
"learning_rate": 7.3770491803278695e-06,
"loss": 1.0566,
"step": 106
},
{
"epoch": 1.696,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6170212765957447,
"eval_f1_m": 0.6252878640633743,
"eval_loss": 0.7974104285240173,
"eval_runtime": 14.2436,
"eval_samples_per_second": 17.552,
"eval_steps_per_second": 2.247,
"step": 106
},
{
"epoch": 1.712,
"grad_norm": 47.35837936401367,
"learning_rate": 6.967213114754098e-06,
"loss": 0.5672,
"step": 107
},
{
"epoch": 1.712,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6120218579234973,
"eval_f1_m": 0.626776511470389,
"eval_loss": 0.801659345626831,
"eval_runtime": 14.3356,
"eval_samples_per_second": 17.439,
"eval_steps_per_second": 2.232,
"step": 107
},
{
"epoch": 1.728,
"grad_norm": 54.06736373901367,
"learning_rate": 6.557377049180328e-06,
"loss": 1.3321,
"step": 108
},
{
"epoch": 1.728,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6187845303867403,
"eval_f1_m": 0.6321215066113025,
"eval_loss": 0.8101469874382019,
"eval_runtime": 14.2407,
"eval_samples_per_second": 17.555,
"eval_steps_per_second": 2.247,
"step": 108
},
{
"epoch": 1.744,
"grad_norm": 28.87479019165039,
"learning_rate": 6.147540983606558e-06,
"loss": 0.6863,
"step": 109
},
{
"epoch": 1.744,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6179775280898876,
"eval_f1_m": 0.6296433725005154,
"eval_loss": 0.823810875415802,
"eval_runtime": 14.3328,
"eval_samples_per_second": 17.442,
"eval_steps_per_second": 2.233,
"step": 109
},
{
"epoch": 1.76,
"grad_norm": 31.916561126708984,
"learning_rate": 5.737704918032787e-06,
"loss": 0.4661,
"step": 110
},
{
"epoch": 1.76,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6171428571428571,
"eval_f1_m": 0.629597726536502,
"eval_loss": 0.8390248417854309,
"eval_runtime": 14.3379,
"eval_samples_per_second": 17.436,
"eval_steps_per_second": 2.232,
"step": 110
},
{
"epoch": 1.776,
"grad_norm": 23.416114807128906,
"learning_rate": 5.327868852459016e-06,
"loss": 0.4288,
"step": 111
},
{
"epoch": 1.776,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6046511627906977,
"eval_f1_m": 0.6097097347097348,
"eval_loss": 0.8482766151428223,
"eval_runtime": 14.2326,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 111
},
{
"epoch": 1.792,
"grad_norm": 61.64879608154297,
"learning_rate": 4.918032786885246e-06,
"loss": 0.7776,
"step": 112
},
{
"epoch": 1.792,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6024096385542169,
"eval_f1_m": 0.6138944388944387,
"eval_loss": 0.8711744546890259,
"eval_runtime": 14.2329,
"eval_samples_per_second": 17.565,
"eval_steps_per_second": 2.248,
"step": 112
},
{
"epoch": 1.808,
"grad_norm": 36.72175979614258,
"learning_rate": 4.508196721311476e-06,
"loss": 0.7535,
"step": 113
},
{
"epoch": 1.808,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6049382716049383,
"eval_f1_m": 0.622810862606781,
"eval_loss": 0.8896200656890869,
"eval_runtime": 14.3388,
"eval_samples_per_second": 17.435,
"eval_steps_per_second": 2.232,
"step": 113
},
{
"epoch": 1.8239999999999998,
"grad_norm": 97.84061431884766,
"learning_rate": 4.098360655737704e-06,
"loss": 0.9935,
"step": 114
},
{
"epoch": 1.8239999999999998,
"eval_exact_match": 0.2653061224489796,
"eval_f1_a": 0.6097560975609756,
"eval_f1_m": 0.6331121259692688,
"eval_loss": 0.8925275802612305,
"eval_runtime": 14.2301,
"eval_samples_per_second": 17.568,
"eval_steps_per_second": 2.249,
"step": 114
},
{
"epoch": 1.8399999999999999,
"grad_norm": 28.994890213012695,
"learning_rate": 3.6885245901639347e-06,
"loss": 0.6005,
"step": 115
},
{
"epoch": 1.8399999999999999,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6097560975609756,
"eval_f1_m": 0.62264079457957,
"eval_loss": 0.902693510055542,
"eval_runtime": 14.324,
"eval_samples_per_second": 17.453,
"eval_steps_per_second": 2.234,
"step": 115
},
{
"epoch": 1.8559999999999999,
"grad_norm": 69.83122253417969,
"learning_rate": 3.278688524590164e-06,
"loss": 0.3714,
"step": 116
},
{
"epoch": 1.8559999999999999,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6086956521739131,
"eval_f1_m": 0.6247434538250866,
"eval_loss": 0.90312659740448,
"eval_runtime": 14.24,
"eval_samples_per_second": 17.556,
"eval_steps_per_second": 2.247,
"step": 116
},
{
"epoch": 1.8719999999999999,
"grad_norm": 45.33132553100586,
"learning_rate": 2.8688524590163937e-06,
"loss": 0.2872,
"step": 117
},
{
"epoch": 1.8719999999999999,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6012269938650306,
"eval_f1_m": 0.6208672280100851,
"eval_loss": 0.900492250919342,
"eval_runtime": 14.2312,
"eval_samples_per_second": 17.567,
"eval_steps_per_second": 2.249,
"step": 117
},
{
"epoch": 1.888,
"grad_norm": 40.021827697753906,
"learning_rate": 2.459016393442623e-06,
"loss": 1.0036,
"step": 118
},
{
"epoch": 1.888,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6097560975609756,
"eval_f1_m": 0.6235397255805418,
"eval_loss": 0.8864443898200989,
"eval_runtime": 14.2356,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 2.248,
"step": 118
},
{
"epoch": 1.904,
"grad_norm": 34.89374923706055,
"learning_rate": 2.049180327868852e-06,
"loss": 0.4085,
"step": 119
},
{
"epoch": 1.904,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6097560975609756,
"eval_f1_m": 0.62264079457957,
"eval_loss": 0.8801668286323547,
"eval_runtime": 14.2335,
"eval_samples_per_second": 17.564,
"eval_steps_per_second": 2.248,
"step": 119
},
{
"epoch": 1.92,
"grad_norm": 40.872169494628906,
"learning_rate": 1.639344262295082e-06,
"loss": 1.2969,
"step": 120
},
{
"epoch": 1.92,
"eval_exact_match": 0.24489795918367346,
"eval_f1_a": 0.6024096385542169,
"eval_f1_m": 0.6187181865753294,
"eval_loss": 0.8752056956291199,
"eval_runtime": 14.2287,
"eval_samples_per_second": 17.57,
"eval_steps_per_second": 2.249,
"step": 120
},
{
"epoch": 1.936,
"grad_norm": 29.49952507019043,
"learning_rate": 1.2295081967213116e-06,
"loss": 0.3546,
"step": 121
},
{
"epoch": 1.936,
"eval_exact_match": 0.20408163265306123,
"eval_f1_a": 0.6071428571428572,
"eval_f1_m": 0.6135057119750997,
"eval_loss": 0.866171658039093,
"eval_runtime": 14.2372,
"eval_samples_per_second": 17.56,
"eval_steps_per_second": 2.248,
"step": 121
},
{
"epoch": 1.952,
"grad_norm": 26.981924057006836,
"learning_rate": 8.19672131147541e-07,
"loss": 0.6466,
"step": 122
},
{
"epoch": 1.952,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6107784431137724,
"eval_f1_m": 0.6170042542491522,
"eval_loss": 0.8648313879966736,
"eval_runtime": 14.2434,
"eval_samples_per_second": 17.552,
"eval_steps_per_second": 2.247,
"step": 122
},
{
"epoch": 1.968,
"grad_norm": 34.26253128051758,
"learning_rate": 4.098360655737705e-07,
"loss": 0.1163,
"step": 123
},
{
"epoch": 1.968,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6190476190476191,
"eval_f1_m": 0.6196767518196089,
"eval_loss": 0.858651876449585,
"eval_runtime": 14.231,
"eval_samples_per_second": 17.567,
"eval_steps_per_second": 2.249,
"step": 123
},
{
"epoch": 1.984,
"grad_norm": 22.19408416748047,
"learning_rate": 0.0,
"loss": 0.2309,
"step": 124
},
{
"epoch": 1.984,
"eval_exact_match": 0.22448979591836735,
"eval_f1_a": 0.6107784431137724,
"eval_f1_m": 0.6170042542491522,
"eval_loss": 0.8643447756767273,
"eval_runtime": 14.3408,
"eval_samples_per_second": 17.433,
"eval_steps_per_second": 2.231,
"step": 124
},
{
"epoch": 1.984,
"step": 124,
"total_flos": 133624123228160.0,
"train_loss": 0.9533988147853844,
"train_runtime": 2319.6694,
"train_samples_per_second": 0.862,
"train_steps_per_second": 0.053
}
],
"logging_steps": 1,
"max_steps": 124,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 133624123228160.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}