adapters-mistral-gptq-QLORA-super_glue-multirc
/
trainer_state-mistral-gptq-QLORA-super_glue-multirc-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.984, | |
"eval_steps": 1, | |
"global_step": 124, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.016, | |
"grad_norm": 81.46387481689453, | |
"learning_rate": 2.5e-05, | |
"loss": 1.2913, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.016, | |
"eval_exact_match": 0.12244897959183673, | |
"eval_f1_a": 0.5525291828793775, | |
"eval_f1_m": 0.4947961335716436, | |
"eval_loss": 1.4936190843582153, | |
"eval_runtime": 14.237, | |
"eval_samples_per_second": 17.56, | |
"eval_steps_per_second": 2.248, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.032, | |
"grad_norm": 188.8806610107422, | |
"learning_rate": 5e-05, | |
"loss": 1.3198, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.032, | |
"eval_exact_match": 0.10204081632653061, | |
"eval_f1_a": 0.46632124352331605, | |
"eval_f1_m": 0.4837873860833043, | |
"eval_loss": 1.2441954612731934, | |
"eval_runtime": 14.2318, | |
"eval_samples_per_second": 17.566, | |
"eval_steps_per_second": 2.248, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.048, | |
"grad_norm": 35.330894470214844, | |
"learning_rate": 4.959016393442623e-05, | |
"loss": 1.189, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.048, | |
"eval_exact_match": 0.061224489795918366, | |
"eval_f1_a": 0.3120567375886525, | |
"eval_f1_m": 0.4296016841635089, | |
"eval_loss": 1.4960094690322876, | |
"eval_runtime": 14.2468, | |
"eval_samples_per_second": 17.548, | |
"eval_steps_per_second": 2.246, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.064, | |
"grad_norm": 128.71548461914062, | |
"learning_rate": 4.918032786885246e-05, | |
"loss": 2.0627, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.064, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.4739884393063584, | |
"eval_f1_m": 0.5152380432692557, | |
"eval_loss": 1.1382358074188232, | |
"eval_runtime": 14.238, | |
"eval_samples_per_second": 17.559, | |
"eval_steps_per_second": 2.248, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.08, | |
"grad_norm": 125.15596008300781, | |
"learning_rate": 4.8770491803278687e-05, | |
"loss": 1.3357, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.08, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6304347826086957, | |
"eval_f1_m": 0.5950331980944225, | |
"eval_loss": 1.429607629776001, | |
"eval_runtime": 14.236, | |
"eval_samples_per_second": 17.561, | |
"eval_steps_per_second": 2.248, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.096, | |
"grad_norm": 157.35198974609375, | |
"learning_rate": 4.836065573770492e-05, | |
"loss": 1.5012, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.096, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.6148648648648648, | |
"eval_f1_m": 0.5295975000056632, | |
"eval_loss": 1.8408384323120117, | |
"eval_runtime": 14.334, | |
"eval_samples_per_second": 17.441, | |
"eval_steps_per_second": 2.232, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.112, | |
"grad_norm": 20.72323989868164, | |
"learning_rate": 4.795081967213115e-05, | |
"loss": 0.263, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.112, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.6158940397350993, | |
"eval_f1_m": 0.5160586578953926, | |
"eval_loss": 2.023961067199707, | |
"eval_runtime": 14.2426, | |
"eval_samples_per_second": 17.553, | |
"eval_steps_per_second": 2.247, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.128, | |
"grad_norm": 137.9950408935547, | |
"learning_rate": 4.754098360655738e-05, | |
"loss": 0.7805, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.128, | |
"eval_exact_match": 0.2653061224489796, | |
"eval_f1_a": 0.6363636363636364, | |
"eval_f1_m": 0.6154686300094462, | |
"eval_loss": 1.3389190435409546, | |
"eval_runtime": 14.3383, | |
"eval_samples_per_second": 17.436, | |
"eval_steps_per_second": 2.232, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.144, | |
"grad_norm": 99.50145721435547, | |
"learning_rate": 4.713114754098361e-05, | |
"loss": 1.4556, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.144, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5207100591715976, | |
"eval_f1_m": 0.5594476385292712, | |
"eval_loss": 1.061840295791626, | |
"eval_runtime": 14.2288, | |
"eval_samples_per_second": 17.57, | |
"eval_steps_per_second": 2.249, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.16, | |
"grad_norm": 86.7911376953125, | |
"learning_rate": 4.672131147540984e-05, | |
"loss": 1.6426, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.16, | |
"eval_exact_match": 0.10204081632653061, | |
"eval_f1_a": 0.41726618705035967, | |
"eval_f1_m": 0.5063550961510145, | |
"eval_loss": 1.4055088758468628, | |
"eval_runtime": 14.2353, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.176, | |
"grad_norm": 48.242271423339844, | |
"learning_rate": 4.631147540983607e-05, | |
"loss": 0.6148, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.176, | |
"eval_exact_match": 0.10204081632653061, | |
"eval_f1_a": 0.41428571428571426, | |
"eval_f1_m": 0.5043761227434697, | |
"eval_loss": 1.3665776252746582, | |
"eval_runtime": 14.2332, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.192, | |
"grad_norm": 86.30690002441406, | |
"learning_rate": 4.59016393442623e-05, | |
"loss": 0.9029, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.192, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.5294117647058824, | |
"eval_f1_m": 0.5558578043271921, | |
"eval_loss": 1.0262727737426758, | |
"eval_runtime": 14.2392, | |
"eval_samples_per_second": 17.557, | |
"eval_steps_per_second": 2.247, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.208, | |
"grad_norm": 95.64641571044922, | |
"learning_rate": 4.549180327868853e-05, | |
"loss": 0.3356, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.208, | |
"eval_exact_match": 0.12244897959183673, | |
"eval_f1_a": 0.5030674846625767, | |
"eval_f1_m": 0.5387398315969746, | |
"eval_loss": 1.0579854249954224, | |
"eval_runtime": 14.2396, | |
"eval_samples_per_second": 17.557, | |
"eval_steps_per_second": 2.247, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.224, | |
"grad_norm": 120.39337921142578, | |
"learning_rate": 4.508196721311476e-05, | |
"loss": 0.5427, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.224, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5894736842105263, | |
"eval_f1_m": 0.5906226992961685, | |
"eval_loss": 0.9290003776550293, | |
"eval_runtime": 14.2383, | |
"eval_samples_per_second": 17.558, | |
"eval_steps_per_second": 2.247, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.24, | |
"grad_norm": 59.97566604614258, | |
"learning_rate": 4.467213114754098e-05, | |
"loss": 2.5016, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.24, | |
"eval_exact_match": 0.2857142857142857, | |
"eval_f1_a": 0.6551724137931034, | |
"eval_f1_m": 0.6429105191860293, | |
"eval_loss": 1.0172419548034668, | |
"eval_runtime": 14.2349, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.256, | |
"grad_norm": 90.46813201904297, | |
"learning_rate": 4.426229508196721e-05, | |
"loss": 0.5741, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.256, | |
"eval_exact_match": 0.30612244897959184, | |
"eval_f1_a": 0.6608695652173914, | |
"eval_f1_m": 0.6547261978384425, | |
"eval_loss": 1.0083348751068115, | |
"eval_runtime": 14.2226, | |
"eval_samples_per_second": 17.578, | |
"eval_steps_per_second": 2.25, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.272, | |
"grad_norm": 272.53857421875, | |
"learning_rate": 4.3852459016393444e-05, | |
"loss": 2.131, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.272, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6224489795918368, | |
"eval_f1_m": 0.6273816886061783, | |
"eval_loss": 0.9200563430786133, | |
"eval_runtime": 14.2293, | |
"eval_samples_per_second": 17.569, | |
"eval_steps_per_second": 2.249, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.288, | |
"grad_norm": 97.75645446777344, | |
"learning_rate": 4.3442622950819674e-05, | |
"loss": 1.6872, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.288, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6326530612244898, | |
"eval_f1_m": 0.6421047206761491, | |
"eval_loss": 0.9017351269721985, | |
"eval_runtime": 14.2331, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.304, | |
"grad_norm": 89.68392181396484, | |
"learning_rate": 4.3032786885245904e-05, | |
"loss": 0.7496, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.304, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5942857142857144, | |
"eval_f1_m": 0.6143906546967771, | |
"eval_loss": 0.9645715355873108, | |
"eval_runtime": 14.2324, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.32, | |
"grad_norm": 81.89313507080078, | |
"learning_rate": 4.262295081967213e-05, | |
"loss": 0.3902, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.32, | |
"eval_exact_match": 0.12244897959183673, | |
"eval_f1_a": 0.406015037593985, | |
"eval_f1_m": 0.5054664723032071, | |
"eval_loss": 1.4447712898254395, | |
"eval_runtime": 14.3411, | |
"eval_samples_per_second": 17.432, | |
"eval_steps_per_second": 2.231, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.336, | |
"grad_norm": 71.31260681152344, | |
"learning_rate": 4.2213114754098365e-05, | |
"loss": 1.2667, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.336, | |
"eval_exact_match": 0.08163265306122448, | |
"eval_f1_a": 0.3305785123966942, | |
"eval_f1_m": 0.46700963435657317, | |
"eval_loss": 1.6687042713165283, | |
"eval_runtime": 14.2325, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.352, | |
"grad_norm": 189.01861572265625, | |
"learning_rate": 4.1803278688524595e-05, | |
"loss": 2.3478, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.352, | |
"eval_exact_match": 0.12244897959183673, | |
"eval_f1_a": 0.4179104477611941, | |
"eval_f1_m": 0.5093007332803252, | |
"eval_loss": 1.451232671737671, | |
"eval_runtime": 14.2331, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.368, | |
"grad_norm": 86.25955200195312, | |
"learning_rate": 4.1393442622950826e-05, | |
"loss": 1.33, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.368, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5443037974683544, | |
"eval_f1_m": 0.5908616326983674, | |
"eval_loss": 1.052020788192749, | |
"eval_runtime": 14.2347, | |
"eval_samples_per_second": 17.563, | |
"eval_steps_per_second": 2.248, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.384, | |
"grad_norm": 52.738563537597656, | |
"learning_rate": 4.098360655737705e-05, | |
"loss": 1.236, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.384, | |
"eval_exact_match": 0.2653061224489796, | |
"eval_f1_a": 0.6494845360824741, | |
"eval_f1_m": 0.6442331478045764, | |
"eval_loss": 0.8382735848426819, | |
"eval_runtime": 14.2369, | |
"eval_samples_per_second": 17.56, | |
"eval_steps_per_second": 2.248, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 40.95921325683594, | |
"learning_rate": 4.057377049180328e-05, | |
"loss": 0.3315, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_exact_match": 0.30612244897959184, | |
"eval_f1_a": 0.6899563318777292, | |
"eval_f1_m": 0.6908583779832279, | |
"eval_loss": 0.9057127833366394, | |
"eval_runtime": 14.2385, | |
"eval_samples_per_second": 17.558, | |
"eval_steps_per_second": 2.247, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.416, | |
"grad_norm": 32.647369384765625, | |
"learning_rate": 4.016393442622951e-05, | |
"loss": 1.2426, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.416, | |
"eval_exact_match": 0.2857142857142857, | |
"eval_f1_a": 0.6772908366533864, | |
"eval_f1_m": 0.6716122596224635, | |
"eval_loss": 1.041088581085205, | |
"eval_runtime": 14.2297, | |
"eval_samples_per_second": 17.569, | |
"eval_steps_per_second": 2.249, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.432, | |
"grad_norm": 215.0525665283203, | |
"learning_rate": 3.975409836065574e-05, | |
"loss": 1.6515, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.432, | |
"eval_exact_match": 0.2857142857142857, | |
"eval_f1_a": 0.674698795180723, | |
"eval_f1_m": 0.6731278658649605, | |
"eval_loss": 1.025064468383789, | |
"eval_runtime": 14.2342, | |
"eval_samples_per_second": 17.563, | |
"eval_steps_per_second": 2.248, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.448, | |
"grad_norm": 75.84857177734375, | |
"learning_rate": 3.934426229508197e-05, | |
"loss": 0.8826, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.448, | |
"eval_exact_match": 0.2857142857142857, | |
"eval_f1_a": 0.6515837104072398, | |
"eval_f1_m": 0.6577141679182493, | |
"eval_loss": 0.8711889386177063, | |
"eval_runtime": 14.3415, | |
"eval_samples_per_second": 17.432, | |
"eval_steps_per_second": 2.231, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.464, | |
"grad_norm": 104.8478775024414, | |
"learning_rate": 3.89344262295082e-05, | |
"loss": 1.3364, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.464, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6111111111111112, | |
"eval_f1_m": 0.6279861295167416, | |
"eval_loss": 0.8534930348396301, | |
"eval_runtime": 14.3389, | |
"eval_samples_per_second": 17.435, | |
"eval_steps_per_second": 2.232, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 141.2751922607422, | |
"learning_rate": 3.8524590163934424e-05, | |
"loss": 0.5787, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.48, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.49315068493150677, | |
"eval_f1_m": 0.5514150248844125, | |
"eval_loss": 1.225917935371399, | |
"eval_runtime": 14.2404, | |
"eval_samples_per_second": 17.556, | |
"eval_steps_per_second": 2.247, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.496, | |
"grad_norm": 132.69497680664062, | |
"learning_rate": 3.8114754098360655e-05, | |
"loss": 1.8268, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.496, | |
"eval_exact_match": 0.10204081632653061, | |
"eval_f1_a": 0.3548387096774194, | |
"eval_f1_m": 0.48598142220591195, | |
"eval_loss": 1.5571385622024536, | |
"eval_runtime": 14.2363, | |
"eval_samples_per_second": 17.561, | |
"eval_steps_per_second": 2.248, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.512, | |
"grad_norm": 193.7267608642578, | |
"learning_rate": 3.7704918032786885e-05, | |
"loss": 1.176, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.512, | |
"eval_exact_match": 0.10204081632653061, | |
"eval_f1_a": 0.3414634146341463, | |
"eval_f1_m": 0.48029629101057675, | |
"eval_loss": 1.5923832654953003, | |
"eval_runtime": 14.3471, | |
"eval_samples_per_second": 17.425, | |
"eval_steps_per_second": 2.23, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.528, | |
"grad_norm": 157.8994903564453, | |
"learning_rate": 3.729508196721312e-05, | |
"loss": 1.1536, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.528, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.43076923076923085, | |
"eval_f1_m": 0.5186639324394426, | |
"eval_loss": 1.3835409879684448, | |
"eval_runtime": 14.3412, | |
"eval_samples_per_second": 17.432, | |
"eval_steps_per_second": 2.231, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.544, | |
"grad_norm": 106.80976867675781, | |
"learning_rate": 3.6885245901639346e-05, | |
"loss": 0.6749, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.544, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5228758169934641, | |
"eval_f1_m": 0.5656035574402921, | |
"eval_loss": 1.0968397855758667, | |
"eval_runtime": 14.2314, | |
"eval_samples_per_second": 17.567, | |
"eval_steps_per_second": 2.249, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.56, | |
"grad_norm": 41.265201568603516, | |
"learning_rate": 3.6475409836065576e-05, | |
"loss": 1.1197, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.56, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5595238095238095, | |
"eval_f1_m": 0.5912371075636381, | |
"eval_loss": 0.9161210656166077, | |
"eval_runtime": 14.2362, | |
"eval_samples_per_second": 17.561, | |
"eval_steps_per_second": 2.248, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.576, | |
"grad_norm": 129.1635284423828, | |
"learning_rate": 3.6065573770491806e-05, | |
"loss": 1.4224, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.576, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.61, | |
"eval_f1_m": 0.6188252790293606, | |
"eval_loss": 0.8555943369865417, | |
"eval_runtime": 14.241, | |
"eval_samples_per_second": 17.555, | |
"eval_steps_per_second": 2.247, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.592, | |
"grad_norm": 37.038997650146484, | |
"learning_rate": 3.5655737704918037e-05, | |
"loss": 1.1437, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.592, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.646288209606987, | |
"eval_f1_m": 0.6307418205377388, | |
"eval_loss": 0.9305726289749146, | |
"eval_runtime": 14.2339, | |
"eval_samples_per_second": 17.564, | |
"eval_steps_per_second": 2.248, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.608, | |
"grad_norm": 148.70489501953125, | |
"learning_rate": 3.524590163934427e-05, | |
"loss": 1.2494, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.608, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.639344262295082, | |
"eval_f1_m": 0.6150846052706795, | |
"eval_loss": 0.9929665327072144, | |
"eval_runtime": 14.2304, | |
"eval_samples_per_second": 17.568, | |
"eval_steps_per_second": 2.249, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.624, | |
"grad_norm": 105.3074722290039, | |
"learning_rate": 3.483606557377049e-05, | |
"loss": 0.86, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.624, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.652542372881356, | |
"eval_f1_m": 0.6313492063492062, | |
"eval_loss": 0.9553629159927368, | |
"eval_runtime": 14.2397, | |
"eval_samples_per_second": 17.557, | |
"eval_steps_per_second": 2.247, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.64, | |
"grad_norm": 117.94893646240234, | |
"learning_rate": 3.442622950819672e-05, | |
"loss": 0.9122, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.64, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6190476190476191, | |
"eval_f1_m": 0.6127175545542892, | |
"eval_loss": 0.8853461742401123, | |
"eval_runtime": 14.3327, | |
"eval_samples_per_second": 17.443, | |
"eval_steps_per_second": 2.233, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.656, | |
"grad_norm": 59.64809036254883, | |
"learning_rate": 3.401639344262295e-05, | |
"loss": 0.7094, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.656, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5863874345549739, | |
"eval_f1_m": 0.6034632034632034, | |
"eval_loss": 0.8550831079483032, | |
"eval_runtime": 14.3356, | |
"eval_samples_per_second": 17.439, | |
"eval_steps_per_second": 2.232, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.672, | |
"grad_norm": 26.82879066467285, | |
"learning_rate": 3.360655737704918e-05, | |
"loss": 0.3156, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.672, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5505617977528091, | |
"eval_f1_m": 0.5863309026574332, | |
"eval_loss": 0.908556342124939, | |
"eval_runtime": 14.3422, | |
"eval_samples_per_second": 17.431, | |
"eval_steps_per_second": 2.231, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.688, | |
"grad_norm": 37.69074630737305, | |
"learning_rate": 3.319672131147541e-05, | |
"loss": 0.9358, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.688, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5432098765432098, | |
"eval_f1_m": 0.5794387018876814, | |
"eval_loss": 1.004197597503662, | |
"eval_runtime": 14.3302, | |
"eval_samples_per_second": 17.446, | |
"eval_steps_per_second": 2.233, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.704, | |
"grad_norm": 85.96589660644531, | |
"learning_rate": 3.2786885245901635e-05, | |
"loss": 1.2701, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.704, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5128205128205128, | |
"eval_f1_m": 0.5589686957033895, | |
"eval_loss": 1.0330990552902222, | |
"eval_runtime": 14.2281, | |
"eval_samples_per_second": 17.571, | |
"eval_steps_per_second": 2.249, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.72, | |
"grad_norm": 73.4930191040039, | |
"learning_rate": 3.237704918032787e-05, | |
"loss": 0.6213, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.72, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.46896551724137925, | |
"eval_f1_m": 0.5310531305429266, | |
"eval_loss": 1.1586450338363647, | |
"eval_runtime": 14.3344, | |
"eval_samples_per_second": 17.441, | |
"eval_steps_per_second": 2.232, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.736, | |
"grad_norm": 25.196025848388672, | |
"learning_rate": 3.19672131147541e-05, | |
"loss": 0.2137, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.736, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.46808510638297873, | |
"eval_f1_m": 0.5367949510806654, | |
"eval_loss": 1.2649037837982178, | |
"eval_runtime": 14.23, | |
"eval_samples_per_second": 17.569, | |
"eval_steps_per_second": 2.249, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.752, | |
"grad_norm": 149.361328125, | |
"learning_rate": 3.155737704918033e-05, | |
"loss": 1.0182, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.752, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.4647887323943662, | |
"eval_f1_m": 0.5342844230599333, | |
"eval_loss": 1.2592852115631104, | |
"eval_runtime": 14.235, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.768, | |
"grad_norm": 65.61959075927734, | |
"learning_rate": 3.114754098360656e-05, | |
"loss": 0.6999, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.768, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.4697986577181208, | |
"eval_f1_m": 0.5299296508480181, | |
"eval_loss": 1.143484115600586, | |
"eval_runtime": 14.2312, | |
"eval_samples_per_second": 17.567, | |
"eval_steps_per_second": 2.249, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.784, | |
"grad_norm": 27.904254913330078, | |
"learning_rate": 3.073770491803279e-05, | |
"loss": 0.6342, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.784, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5389221556886228, | |
"eval_f1_m": 0.5721706864564007, | |
"eval_loss": 1.0147384405136108, | |
"eval_runtime": 14.2319, | |
"eval_samples_per_second": 17.566, | |
"eval_steps_per_second": 2.248, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 206.67816162109375, | |
"learning_rate": 3.0327868852459017e-05, | |
"loss": 2.0752, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6021505376344086, | |
"eval_f1_m": 0.6184708148993864, | |
"eval_loss": 0.90333092212677, | |
"eval_runtime": 14.234, | |
"eval_samples_per_second": 17.564, | |
"eval_steps_per_second": 2.248, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.816, | |
"grad_norm": 32.51238250732422, | |
"learning_rate": 2.9918032786885248e-05, | |
"loss": 0.6363, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.816, | |
"eval_exact_match": 0.30612244897959184, | |
"eval_f1_a": 0.6346153846153846, | |
"eval_f1_m": 0.6513524457402007, | |
"eval_loss": 0.9025561809539795, | |
"eval_runtime": 14.3356, | |
"eval_samples_per_second": 17.439, | |
"eval_steps_per_second": 2.232, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.832, | |
"grad_norm": 88.53801727294922, | |
"learning_rate": 2.9508196721311478e-05, | |
"loss": 0.6725, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.832, | |
"eval_exact_match": 0.30612244897959184, | |
"eval_f1_a": 0.6542056074766355, | |
"eval_f1_m": 0.6610168742821804, | |
"eval_loss": 0.9258018136024475, | |
"eval_runtime": 14.337, | |
"eval_samples_per_second": 17.437, | |
"eval_steps_per_second": 2.232, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.848, | |
"grad_norm": 210.28028869628906, | |
"learning_rate": 2.9098360655737705e-05, | |
"loss": 1.4444, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.848, | |
"eval_exact_match": 0.30612244897959184, | |
"eval_f1_a": 0.6478873239436621, | |
"eval_f1_m": 0.6559332390965044, | |
"eval_loss": 0.9136560559272766, | |
"eval_runtime": 14.3332, | |
"eval_samples_per_second": 17.442, | |
"eval_steps_per_second": 2.233, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.864, | |
"grad_norm": 130.76451110839844, | |
"learning_rate": 2.8688524590163935e-05, | |
"loss": 1.5007, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.864, | |
"eval_exact_match": 0.2857142857142857, | |
"eval_f1_a": 0.6435643564356436, | |
"eval_f1_m": 0.6596710545690138, | |
"eval_loss": 0.8902440667152405, | |
"eval_runtime": 14.2297, | |
"eval_samples_per_second": 17.569, | |
"eval_steps_per_second": 2.249, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.88, | |
"grad_norm": 59.906551361083984, | |
"learning_rate": 2.8278688524590162e-05, | |
"loss": 1.4569, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.88, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.5901639344262295, | |
"eval_f1_m": 0.6132119581099174, | |
"eval_loss": 0.9152215719223022, | |
"eval_runtime": 14.2381, | |
"eval_samples_per_second": 17.558, | |
"eval_steps_per_second": 2.247, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.896, | |
"grad_norm": 73.04712677001953, | |
"learning_rate": 2.7868852459016392e-05, | |
"loss": 1.4627, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.896, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5408805031446541, | |
"eval_f1_m": 0.581694156183952, | |
"eval_loss": 1.0332040786743164, | |
"eval_runtime": 14.235, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.912, | |
"grad_norm": 41.93537902832031, | |
"learning_rate": 2.7459016393442626e-05, | |
"loss": 0.6447, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.912, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5, | |
"eval_f1_m": 0.5578105001574389, | |
"eval_loss": 1.1393637657165527, | |
"eval_runtime": 14.2337, | |
"eval_samples_per_second": 17.564, | |
"eval_steps_per_second": 2.248, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.928, | |
"grad_norm": 225.17127990722656, | |
"learning_rate": 2.7049180327868856e-05, | |
"loss": 2.1825, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.928, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5170068027210885, | |
"eval_f1_m": 0.5664545205361532, | |
"eval_loss": 1.1429673433303833, | |
"eval_runtime": 14.3356, | |
"eval_samples_per_second": 17.439, | |
"eval_steps_per_second": 2.232, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.944, | |
"grad_norm": 218.0926513671875, | |
"learning_rate": 2.6639344262295087e-05, | |
"loss": 1.5474, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.944, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5100671140939597, | |
"eval_f1_m": 0.5603939144755471, | |
"eval_loss": 1.0793055295944214, | |
"eval_runtime": 14.3623, | |
"eval_samples_per_second": 17.407, | |
"eval_steps_per_second": 2.228, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 214.265625, | |
"learning_rate": 2.6229508196721314e-05, | |
"loss": 2.0897, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.96, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.5748502994011976, | |
"eval_f1_m": 0.6078411950860929, | |
"eval_loss": 0.9570010900497437, | |
"eval_runtime": 14.3353, | |
"eval_samples_per_second": 17.439, | |
"eval_steps_per_second": 2.232, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.976, | |
"grad_norm": 30.67082405090332, | |
"learning_rate": 2.5819672131147544e-05, | |
"loss": 0.5137, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.976, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5989304812834225, | |
"eval_f1_m": 0.6199340342197485, | |
"eval_loss": 0.8710112571716309, | |
"eval_runtime": 14.2317, | |
"eval_samples_per_second": 17.566, | |
"eval_steps_per_second": 2.249, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.992, | |
"grad_norm": 44.715511322021484, | |
"learning_rate": 2.540983606557377e-05, | |
"loss": 0.9146, | |
"step": 62 | |
}, | |
{ | |
"epoch": 0.992, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6336633663366337, | |
"eval_f1_m": 0.643560974173219, | |
"eval_loss": 0.8580593466758728, | |
"eval_runtime": 14.2244, | |
"eval_samples_per_second": 17.575, | |
"eval_steps_per_second": 2.25, | |
"step": 62 | |
}, | |
{ | |
"epoch": 1.008, | |
"grad_norm": 47.77970886230469, | |
"learning_rate": 2.5e-05, | |
"loss": 0.8187, | |
"step": 63 | |
}, | |
{ | |
"epoch": 1.008, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6283185840707965, | |
"eval_f1_m": 0.6231513384574607, | |
"eval_loss": 0.9097387194633484, | |
"eval_runtime": 14.2275, | |
"eval_samples_per_second": 17.572, | |
"eval_steps_per_second": 2.249, | |
"step": 63 | |
}, | |
{ | |
"epoch": 1.024, | |
"grad_norm": 62.07459259033203, | |
"learning_rate": 2.459016393442623e-05, | |
"loss": 0.389, | |
"step": 64 | |
}, | |
{ | |
"epoch": 1.024, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6416666666666667, | |
"eval_f1_m": 0.6224541331684187, | |
"eval_loss": 0.9766004085540771, | |
"eval_runtime": 14.328, | |
"eval_samples_per_second": 17.448, | |
"eval_steps_per_second": 2.233, | |
"step": 64 | |
}, | |
{ | |
"epoch": 1.04, | |
"grad_norm": 27.772306442260742, | |
"learning_rate": 2.418032786885246e-05, | |
"loss": 0.5394, | |
"step": 65 | |
}, | |
{ | |
"epoch": 1.04, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6422764227642277, | |
"eval_f1_m": 0.6168094943605147, | |
"eval_loss": 1.0415246486663818, | |
"eval_runtime": 14.235, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 65 | |
}, | |
{ | |
"epoch": 1.056, | |
"grad_norm": 72.29753875732422, | |
"learning_rate": 2.377049180327869e-05, | |
"loss": 1.1039, | |
"step": 66 | |
}, | |
{ | |
"epoch": 1.056, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6448979591836735, | |
"eval_f1_m": 0.6215713991224195, | |
"eval_loss": 1.0369302034378052, | |
"eval_runtime": 14.2353, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 66 | |
}, | |
{ | |
"epoch": 1.072, | |
"grad_norm": 79.86347198486328, | |
"learning_rate": 2.336065573770492e-05, | |
"loss": 0.3747, | |
"step": 67 | |
}, | |
{ | |
"epoch": 1.072, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6440677966101694, | |
"eval_f1_m": 0.6234590806019376, | |
"eval_loss": 0.971021831035614, | |
"eval_runtime": 14.335, | |
"eval_samples_per_second": 17.44, | |
"eval_steps_per_second": 2.232, | |
"step": 67 | |
}, | |
{ | |
"epoch": 1.088, | |
"grad_norm": 98.71847534179688, | |
"learning_rate": 2.295081967213115e-05, | |
"loss": 0.6412, | |
"step": 68 | |
}, | |
{ | |
"epoch": 1.088, | |
"eval_exact_match": 0.2653061224489796, | |
"eval_f1_a": 0.6301369863013699, | |
"eval_f1_m": 0.6385634773389874, | |
"eval_loss": 0.89739990234375, | |
"eval_runtime": 14.2391, | |
"eval_samples_per_second": 17.557, | |
"eval_steps_per_second": 2.247, | |
"step": 68 | |
}, | |
{ | |
"epoch": 1.104, | |
"grad_norm": 37.62075424194336, | |
"learning_rate": 2.254098360655738e-05, | |
"loss": 0.1785, | |
"step": 69 | |
}, | |
{ | |
"epoch": 1.104, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6178010471204188, | |
"eval_f1_m": 0.6330241187384045, | |
"eval_loss": 0.8888271450996399, | |
"eval_runtime": 14.2337, | |
"eval_samples_per_second": 17.564, | |
"eval_steps_per_second": 2.248, | |
"step": 69 | |
}, | |
{ | |
"epoch": 1.12, | |
"grad_norm": 39.140846252441406, | |
"learning_rate": 2.2131147540983607e-05, | |
"loss": 0.8809, | |
"step": 70 | |
}, | |
{ | |
"epoch": 1.12, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.5952380952380953, | |
"eval_f1_m": 0.6151768299727483, | |
"eval_loss": 0.9461753964424133, | |
"eval_runtime": 14.2294, | |
"eval_samples_per_second": 17.569, | |
"eval_steps_per_second": 2.249, | |
"step": 70 | |
}, | |
{ | |
"epoch": 1.1360000000000001, | |
"grad_norm": 24.488187789916992, | |
"learning_rate": 2.1721311475409837e-05, | |
"loss": 0.3331, | |
"step": 71 | |
}, | |
{ | |
"epoch": 1.1360000000000001, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5548387096774193, | |
"eval_f1_m": 0.5858735935266548, | |
"eval_loss": 1.0686767101287842, | |
"eval_runtime": 14.2405, | |
"eval_samples_per_second": 17.556, | |
"eval_steps_per_second": 2.247, | |
"step": 71 | |
}, | |
{ | |
"epoch": 1.152, | |
"grad_norm": 159.9743194580078, | |
"learning_rate": 2.1311475409836064e-05, | |
"loss": 0.7266, | |
"step": 72 | |
}, | |
{ | |
"epoch": 1.152, | |
"eval_exact_match": 0.16326530612244897, | |
"eval_f1_a": 0.5205479452054794, | |
"eval_f1_m": 0.5669536812393955, | |
"eval_loss": 1.139631986618042, | |
"eval_runtime": 14.236, | |
"eval_samples_per_second": 17.561, | |
"eval_steps_per_second": 2.248, | |
"step": 72 | |
}, | |
{ | |
"epoch": 1.168, | |
"grad_norm": 180.88392639160156, | |
"learning_rate": 2.0901639344262298e-05, | |
"loss": 2.2863, | |
"step": 73 | |
}, | |
{ | |
"epoch": 1.168, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.5466666666666666, | |
"eval_f1_m": 0.5785720515312353, | |
"eval_loss": 1.1217446327209473, | |
"eval_runtime": 14.3346, | |
"eval_samples_per_second": 17.44, | |
"eval_steps_per_second": 2.232, | |
"step": 73 | |
}, | |
{ | |
"epoch": 1.184, | |
"grad_norm": 152.32852172851562, | |
"learning_rate": 2.0491803278688525e-05, | |
"loss": 1.3959, | |
"step": 74 | |
}, | |
{ | |
"epoch": 1.184, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.5822784810126582, | |
"eval_f1_m": 0.6006187123534062, | |
"eval_loss": 1.045620083808899, | |
"eval_runtime": 14.2336, | |
"eval_samples_per_second": 17.564, | |
"eval_steps_per_second": 2.248, | |
"step": 74 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 147.99668884277344, | |
"learning_rate": 2.0081967213114755e-05, | |
"loss": 1.1065, | |
"step": 75 | |
}, | |
{ | |
"epoch": 1.2, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6071428571428572, | |
"eval_f1_m": 0.630640221456548, | |
"eval_loss": 0.9323554635047913, | |
"eval_runtime": 14.3399, | |
"eval_samples_per_second": 17.434, | |
"eval_steps_per_second": 2.232, | |
"step": 75 | |
}, | |
{ | |
"epoch": 1.216, | |
"grad_norm": 44.00741958618164, | |
"learning_rate": 1.9672131147540985e-05, | |
"loss": 0.8284, | |
"step": 76 | |
}, | |
{ | |
"epoch": 1.216, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6526315789473685, | |
"eval_f1_m": 0.6551741908884765, | |
"eval_loss": 0.8580492734909058, | |
"eval_runtime": 14.2326, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 76 | |
}, | |
{ | |
"epoch": 1.232, | |
"grad_norm": 84.2724609375, | |
"learning_rate": 1.9262295081967212e-05, | |
"loss": 1.4605, | |
"step": 77 | |
}, | |
{ | |
"epoch": 1.232, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6542056074766355, | |
"eval_f1_m": 0.6638233648437729, | |
"eval_loss": 0.8490340709686279, | |
"eval_runtime": 14.3306, | |
"eval_samples_per_second": 17.445, | |
"eval_steps_per_second": 2.233, | |
"step": 77 | |
}, | |
{ | |
"epoch": 1.248, | |
"grad_norm": 57.137474060058594, | |
"learning_rate": 1.8852459016393442e-05, | |
"loss": 0.7637, | |
"step": 78 | |
}, | |
{ | |
"epoch": 1.248, | |
"eval_exact_match": 0.2857142857142857, | |
"eval_f1_a": 0.6666666666666667, | |
"eval_f1_m": 0.6427098978119384, | |
"eval_loss": 0.9433552026748657, | |
"eval_runtime": 14.2366, | |
"eval_samples_per_second": 17.56, | |
"eval_steps_per_second": 2.248, | |
"step": 78 | |
}, | |
{ | |
"epoch": 1.264, | |
"grad_norm": 81.78334045410156, | |
"learning_rate": 1.8442622950819673e-05, | |
"loss": 1.0805, | |
"step": 79 | |
}, | |
{ | |
"epoch": 1.264, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6459143968871595, | |
"eval_f1_m": 0.6244088111435049, | |
"eval_loss": 1.1826504468917847, | |
"eval_runtime": 14.3352, | |
"eval_samples_per_second": 17.44, | |
"eval_steps_per_second": 2.232, | |
"step": 79 | |
}, | |
{ | |
"epoch": 1.28, | |
"grad_norm": 168.3184814453125, | |
"learning_rate": 1.8032786885245903e-05, | |
"loss": 0.8076, | |
"step": 80 | |
}, | |
{ | |
"epoch": 1.28, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.6472727272727273, | |
"eval_f1_m": 0.6001591112635529, | |
"eval_loss": 1.4046131372451782, | |
"eval_runtime": 14.3393, | |
"eval_samples_per_second": 17.435, | |
"eval_steps_per_second": 2.232, | |
"step": 80 | |
}, | |
{ | |
"epoch": 1.296, | |
"grad_norm": 165.74830627441406, | |
"learning_rate": 1.7622950819672133e-05, | |
"loss": 0.934, | |
"step": 81 | |
}, | |
{ | |
"epoch": 1.296, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.6408450704225352, | |
"eval_f1_m": 0.5587504049238743, | |
"eval_loss": 1.519930124282837, | |
"eval_runtime": 14.3365, | |
"eval_samples_per_second": 17.438, | |
"eval_steps_per_second": 2.232, | |
"step": 81 | |
}, | |
{ | |
"epoch": 1.312, | |
"grad_norm": 137.4995880126953, | |
"learning_rate": 1.721311475409836e-05, | |
"loss": 0.9774, | |
"step": 82 | |
}, | |
{ | |
"epoch": 1.312, | |
"eval_exact_match": 0.14285714285714285, | |
"eval_f1_a": 0.6363636363636364, | |
"eval_f1_m": 0.5380992873339812, | |
"eval_loss": 1.5469399690628052, | |
"eval_runtime": 14.4478, | |
"eval_samples_per_second": 17.304, | |
"eval_steps_per_second": 2.215, | |
"step": 82 | |
}, | |
{ | |
"epoch": 1.328, | |
"grad_norm": 280.535400390625, | |
"learning_rate": 1.680327868852459e-05, | |
"loss": 2.8829, | |
"step": 83 | |
}, | |
{ | |
"epoch": 1.328, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6451612903225806, | |
"eval_f1_m": 0.5875740772499476, | |
"eval_loss": 1.444277286529541, | |
"eval_runtime": 14.2369, | |
"eval_samples_per_second": 17.56, | |
"eval_steps_per_second": 2.248, | |
"step": 83 | |
}, | |
{ | |
"epoch": 1.3439999999999999, | |
"grad_norm": 92.20207214355469, | |
"learning_rate": 1.6393442622950818e-05, | |
"loss": 1.0067, | |
"step": 84 | |
}, | |
{ | |
"epoch": 1.3439999999999999, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6616541353383458, | |
"eval_f1_m": 0.6289330017721373, | |
"eval_loss": 1.285833477973938, | |
"eval_runtime": 14.2358, | |
"eval_samples_per_second": 17.561, | |
"eval_steps_per_second": 2.248, | |
"step": 84 | |
}, | |
{ | |
"epoch": 1.3599999999999999, | |
"grad_norm": 51.42112731933594, | |
"learning_rate": 1.598360655737705e-05, | |
"loss": 0.6843, | |
"step": 85 | |
}, | |
{ | |
"epoch": 1.3599999999999999, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6535433070866141, | |
"eval_f1_m": 0.6334548104956267, | |
"eval_loss": 1.1037089824676514, | |
"eval_runtime": 14.2366, | |
"eval_samples_per_second": 17.56, | |
"eval_steps_per_second": 2.248, | |
"step": 85 | |
}, | |
{ | |
"epoch": 1.376, | |
"grad_norm": 108.26708221435547, | |
"learning_rate": 1.557377049180328e-05, | |
"loss": 0.6217, | |
"step": 86 | |
}, | |
{ | |
"epoch": 1.376, | |
"eval_exact_match": 0.2653061224489796, | |
"eval_f1_a": 0.6611570247933884, | |
"eval_f1_m": 0.6427356656948492, | |
"eval_loss": 0.946870744228363, | |
"eval_runtime": 14.2288, | |
"eval_samples_per_second": 17.57, | |
"eval_steps_per_second": 2.249, | |
"step": 86 | |
}, | |
{ | |
"epoch": 1.392, | |
"grad_norm": 54.97236251831055, | |
"learning_rate": 1.5163934426229509e-05, | |
"loss": 0.7171, | |
"step": 87 | |
}, | |
{ | |
"epoch": 1.392, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6363636363636364, | |
"eval_f1_m": 0.6192088523721175, | |
"eval_loss": 0.8331412672996521, | |
"eval_runtime": 14.2345, | |
"eval_samples_per_second": 17.563, | |
"eval_steps_per_second": 2.248, | |
"step": 87 | |
}, | |
{ | |
"epoch": 1.408, | |
"grad_norm": 199.80453491210938, | |
"learning_rate": 1.4754098360655739e-05, | |
"loss": 0.9963, | |
"step": 88 | |
}, | |
{ | |
"epoch": 1.408, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6461538461538462, | |
"eval_f1_m": 0.6503011161174427, | |
"eval_loss": 0.8008266091346741, | |
"eval_runtime": 14.3423, | |
"eval_samples_per_second": 17.431, | |
"eval_steps_per_second": 2.231, | |
"step": 88 | |
}, | |
{ | |
"epoch": 1.424, | |
"grad_norm": 103.70662689208984, | |
"learning_rate": 1.4344262295081968e-05, | |
"loss": 0.8336, | |
"step": 89 | |
}, | |
{ | |
"epoch": 1.424, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6035502958579881, | |
"eval_f1_m": 0.6173385005017658, | |
"eval_loss": 0.8721192479133606, | |
"eval_runtime": 14.3376, | |
"eval_samples_per_second": 17.437, | |
"eval_steps_per_second": 2.232, | |
"step": 89 | |
}, | |
{ | |
"epoch": 1.44, | |
"grad_norm": 151.86329650878906, | |
"learning_rate": 1.3934426229508196e-05, | |
"loss": 0.7439, | |
"step": 90 | |
}, | |
{ | |
"epoch": 1.44, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.5897435897435898, | |
"eval_f1_m": 0.6109928620132702, | |
"eval_loss": 0.977756679058075, | |
"eval_runtime": 14.3382, | |
"eval_samples_per_second": 17.436, | |
"eval_steps_per_second": 2.232, | |
"step": 90 | |
}, | |
{ | |
"epoch": 1.456, | |
"grad_norm": 42.42748260498047, | |
"learning_rate": 1.3524590163934428e-05, | |
"loss": 0.4346, | |
"step": 91 | |
}, | |
{ | |
"epoch": 1.456, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5562913907284768, | |
"eval_f1_m": 0.5869466134772258, | |
"eval_loss": 1.0586659908294678, | |
"eval_runtime": 14.2345, | |
"eval_samples_per_second": 17.563, | |
"eval_steps_per_second": 2.248, | |
"step": 91 | |
}, | |
{ | |
"epoch": 1.472, | |
"grad_norm": 148.7361602783203, | |
"learning_rate": 1.3114754098360657e-05, | |
"loss": 0.6824, | |
"step": 92 | |
}, | |
{ | |
"epoch": 1.472, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5277777777777778, | |
"eval_f1_m": 0.5762801824026313, | |
"eval_loss": 1.0946743488311768, | |
"eval_runtime": 14.3359, | |
"eval_samples_per_second": 17.439, | |
"eval_steps_per_second": 2.232, | |
"step": 92 | |
}, | |
{ | |
"epoch": 1.488, | |
"grad_norm": 231.66839599609375, | |
"learning_rate": 1.2704918032786885e-05, | |
"loss": 1.486, | |
"step": 93 | |
}, | |
{ | |
"epoch": 1.488, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.527027027027027, | |
"eval_f1_m": 0.5733919708409505, | |
"eval_loss": 1.0785720348358154, | |
"eval_runtime": 14.3409, | |
"eval_samples_per_second": 17.433, | |
"eval_steps_per_second": 2.231, | |
"step": 93 | |
}, | |
{ | |
"epoch": 1.504, | |
"grad_norm": 78.1316909790039, | |
"learning_rate": 1.2295081967213116e-05, | |
"loss": 1.3813, | |
"step": 94 | |
}, | |
{ | |
"epoch": 1.504, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.527027027027027, | |
"eval_f1_m": 0.5735782358231338, | |
"eval_loss": 1.045979380607605, | |
"eval_runtime": 14.2374, | |
"eval_samples_per_second": 17.559, | |
"eval_steps_per_second": 2.248, | |
"step": 94 | |
}, | |
{ | |
"epoch": 1.52, | |
"grad_norm": 82.6171875, | |
"learning_rate": 1.1885245901639344e-05, | |
"loss": 0.4523, | |
"step": 95 | |
}, | |
{ | |
"epoch": 1.52, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.5859872611464968, | |
"eval_f1_m": 0.606911229360209, | |
"eval_loss": 0.9884187579154968, | |
"eval_runtime": 14.2353, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 95 | |
}, | |
{ | |
"epoch": 1.536, | |
"grad_norm": 174.6714630126953, | |
"learning_rate": 1.1475409836065575e-05, | |
"loss": 0.6107, | |
"step": 96 | |
}, | |
{ | |
"epoch": 1.536, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.5925925925925926, | |
"eval_f1_m": 0.6086355821049698, | |
"eval_loss": 0.9135745763778687, | |
"eval_runtime": 14.2341, | |
"eval_samples_per_second": 17.563, | |
"eval_steps_per_second": 2.248, | |
"step": 96 | |
}, | |
{ | |
"epoch": 1.552, | |
"grad_norm": 30.871902465820312, | |
"learning_rate": 1.1065573770491803e-05, | |
"loss": 0.6965, | |
"step": 97 | |
}, | |
{ | |
"epoch": 1.552, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.608187134502924, | |
"eval_f1_m": 0.6191871620443049, | |
"eval_loss": 0.8789020776748657, | |
"eval_runtime": 14.2294, | |
"eval_samples_per_second": 17.569, | |
"eval_steps_per_second": 2.249, | |
"step": 97 | |
}, | |
{ | |
"epoch": 1.568, | |
"grad_norm": 64.87429809570312, | |
"learning_rate": 1.0655737704918032e-05, | |
"loss": 0.2532, | |
"step": 98 | |
}, | |
{ | |
"epoch": 1.568, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6101694915254237, | |
"eval_f1_m": 0.6249433106575963, | |
"eval_loss": 0.8408387899398804, | |
"eval_runtime": 14.2324, | |
"eval_samples_per_second": 17.566, | |
"eval_steps_per_second": 2.248, | |
"step": 98 | |
}, | |
{ | |
"epoch": 1.584, | |
"grad_norm": 29.755355834960938, | |
"learning_rate": 1.0245901639344262e-05, | |
"loss": 0.3168, | |
"step": 99 | |
}, | |
{ | |
"epoch": 1.584, | |
"eval_exact_match": 0.1836734693877551, | |
"eval_f1_a": 0.6162162162162163, | |
"eval_f1_m": 0.6253806284418529, | |
"eval_loss": 0.8142973780632019, | |
"eval_runtime": 14.3427, | |
"eval_samples_per_second": 17.43, | |
"eval_steps_per_second": 2.231, | |
"step": 99 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 50.73051834106445, | |
"learning_rate": 9.836065573770493e-06, | |
"loss": 0.3733, | |
"step": 100 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6217616580310881, | |
"eval_f1_m": 0.6374355802927232, | |
"eval_loss": 0.8038433194160461, | |
"eval_runtime": 14.2306, | |
"eval_samples_per_second": 17.568, | |
"eval_steps_per_second": 2.249, | |
"step": 100 | |
}, | |
{ | |
"epoch": 1.616, | |
"grad_norm": 59.93278884887695, | |
"learning_rate": 9.426229508196721e-06, | |
"loss": 0.6704, | |
"step": 101 | |
}, | |
{ | |
"epoch": 1.616, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6354166666666666, | |
"eval_f1_m": 0.6522800895249876, | |
"eval_loss": 0.8013660311698914, | |
"eval_runtime": 14.3343, | |
"eval_samples_per_second": 17.441, | |
"eval_steps_per_second": 2.232, | |
"step": 101 | |
}, | |
{ | |
"epoch": 1.6320000000000001, | |
"grad_norm": 86.40263366699219, | |
"learning_rate": 9.016393442622952e-06, | |
"loss": 0.3565, | |
"step": 102 | |
}, | |
{ | |
"epoch": 1.6320000000000001, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6321243523316061, | |
"eval_f1_m": 0.6478826162499632, | |
"eval_loss": 0.7988218665122986, | |
"eval_runtime": 14.3368, | |
"eval_samples_per_second": 17.438, | |
"eval_steps_per_second": 2.232, | |
"step": 102 | |
}, | |
{ | |
"epoch": 1.6480000000000001, | |
"grad_norm": 48.909366607666016, | |
"learning_rate": 8.60655737704918e-06, | |
"loss": 0.4804, | |
"step": 103 | |
}, | |
{ | |
"epoch": 1.6480000000000001, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6461538461538462, | |
"eval_f1_m": 0.6624841711576407, | |
"eval_loss": 0.8011977672576904, | |
"eval_runtime": 14.3334, | |
"eval_samples_per_second": 17.442, | |
"eval_steps_per_second": 2.233, | |
"step": 103 | |
}, | |
{ | |
"epoch": 1.6640000000000001, | |
"grad_norm": 45.205810546875, | |
"learning_rate": 8.196721311475409e-06, | |
"loss": 0.4073, | |
"step": 104 | |
}, | |
{ | |
"epoch": 1.6640000000000001, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6288659793814433, | |
"eval_f1_m": 0.6442383013811586, | |
"eval_loss": 0.8024572730064392, | |
"eval_runtime": 14.2326, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 104 | |
}, | |
{ | |
"epoch": 1.6800000000000002, | |
"grad_norm": 40.28095245361328, | |
"learning_rate": 7.78688524590164e-06, | |
"loss": 0.555, | |
"step": 105 | |
}, | |
{ | |
"epoch": 1.6800000000000002, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6282722513089005, | |
"eval_f1_m": 0.6386746473481169, | |
"eval_loss": 0.7961085438728333, | |
"eval_runtime": 14.2384, | |
"eval_samples_per_second": 17.558, | |
"eval_steps_per_second": 2.247, | |
"step": 105 | |
}, | |
{ | |
"epoch": 1.696, | |
"grad_norm": 33.259437561035156, | |
"learning_rate": 7.3770491803278695e-06, | |
"loss": 1.0566, | |
"step": 106 | |
}, | |
{ | |
"epoch": 1.696, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6170212765957447, | |
"eval_f1_m": 0.6252878640633743, | |
"eval_loss": 0.7974104285240173, | |
"eval_runtime": 14.2436, | |
"eval_samples_per_second": 17.552, | |
"eval_steps_per_second": 2.247, | |
"step": 106 | |
}, | |
{ | |
"epoch": 1.712, | |
"grad_norm": 47.35837936401367, | |
"learning_rate": 6.967213114754098e-06, | |
"loss": 0.5672, | |
"step": 107 | |
}, | |
{ | |
"epoch": 1.712, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6120218579234973, | |
"eval_f1_m": 0.626776511470389, | |
"eval_loss": 0.801659345626831, | |
"eval_runtime": 14.3356, | |
"eval_samples_per_second": 17.439, | |
"eval_steps_per_second": 2.232, | |
"step": 107 | |
}, | |
{ | |
"epoch": 1.728, | |
"grad_norm": 54.06736373901367, | |
"learning_rate": 6.557377049180328e-06, | |
"loss": 1.3321, | |
"step": 108 | |
}, | |
{ | |
"epoch": 1.728, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6187845303867403, | |
"eval_f1_m": 0.6321215066113025, | |
"eval_loss": 0.8101469874382019, | |
"eval_runtime": 14.2407, | |
"eval_samples_per_second": 17.555, | |
"eval_steps_per_second": 2.247, | |
"step": 108 | |
}, | |
{ | |
"epoch": 1.744, | |
"grad_norm": 28.87479019165039, | |
"learning_rate": 6.147540983606558e-06, | |
"loss": 0.6863, | |
"step": 109 | |
}, | |
{ | |
"epoch": 1.744, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6179775280898876, | |
"eval_f1_m": 0.6296433725005154, | |
"eval_loss": 0.823810875415802, | |
"eval_runtime": 14.3328, | |
"eval_samples_per_second": 17.442, | |
"eval_steps_per_second": 2.233, | |
"step": 109 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 31.916561126708984, | |
"learning_rate": 5.737704918032787e-06, | |
"loss": 0.4661, | |
"step": 110 | |
}, | |
{ | |
"epoch": 1.76, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6171428571428571, | |
"eval_f1_m": 0.629597726536502, | |
"eval_loss": 0.8390248417854309, | |
"eval_runtime": 14.3379, | |
"eval_samples_per_second": 17.436, | |
"eval_steps_per_second": 2.232, | |
"step": 110 | |
}, | |
{ | |
"epoch": 1.776, | |
"grad_norm": 23.416114807128906, | |
"learning_rate": 5.327868852459016e-06, | |
"loss": 0.4288, | |
"step": 111 | |
}, | |
{ | |
"epoch": 1.776, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6046511627906977, | |
"eval_f1_m": 0.6097097347097348, | |
"eval_loss": 0.8482766151428223, | |
"eval_runtime": 14.2326, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 111 | |
}, | |
{ | |
"epoch": 1.792, | |
"grad_norm": 61.64879608154297, | |
"learning_rate": 4.918032786885246e-06, | |
"loss": 0.7776, | |
"step": 112 | |
}, | |
{ | |
"epoch": 1.792, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6024096385542169, | |
"eval_f1_m": 0.6138944388944387, | |
"eval_loss": 0.8711744546890259, | |
"eval_runtime": 14.2329, | |
"eval_samples_per_second": 17.565, | |
"eval_steps_per_second": 2.248, | |
"step": 112 | |
}, | |
{ | |
"epoch": 1.808, | |
"grad_norm": 36.72175979614258, | |
"learning_rate": 4.508196721311476e-06, | |
"loss": 0.7535, | |
"step": 113 | |
}, | |
{ | |
"epoch": 1.808, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6049382716049383, | |
"eval_f1_m": 0.622810862606781, | |
"eval_loss": 0.8896200656890869, | |
"eval_runtime": 14.3388, | |
"eval_samples_per_second": 17.435, | |
"eval_steps_per_second": 2.232, | |
"step": 113 | |
}, | |
{ | |
"epoch": 1.8239999999999998, | |
"grad_norm": 97.84061431884766, | |
"learning_rate": 4.098360655737704e-06, | |
"loss": 0.9935, | |
"step": 114 | |
}, | |
{ | |
"epoch": 1.8239999999999998, | |
"eval_exact_match": 0.2653061224489796, | |
"eval_f1_a": 0.6097560975609756, | |
"eval_f1_m": 0.6331121259692688, | |
"eval_loss": 0.8925275802612305, | |
"eval_runtime": 14.2301, | |
"eval_samples_per_second": 17.568, | |
"eval_steps_per_second": 2.249, | |
"step": 114 | |
}, | |
{ | |
"epoch": 1.8399999999999999, | |
"grad_norm": 28.994890213012695, | |
"learning_rate": 3.6885245901639347e-06, | |
"loss": 0.6005, | |
"step": 115 | |
}, | |
{ | |
"epoch": 1.8399999999999999, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6097560975609756, | |
"eval_f1_m": 0.62264079457957, | |
"eval_loss": 0.902693510055542, | |
"eval_runtime": 14.324, | |
"eval_samples_per_second": 17.453, | |
"eval_steps_per_second": 2.234, | |
"step": 115 | |
}, | |
{ | |
"epoch": 1.8559999999999999, | |
"grad_norm": 69.83122253417969, | |
"learning_rate": 3.278688524590164e-06, | |
"loss": 0.3714, | |
"step": 116 | |
}, | |
{ | |
"epoch": 1.8559999999999999, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6086956521739131, | |
"eval_f1_m": 0.6247434538250866, | |
"eval_loss": 0.90312659740448, | |
"eval_runtime": 14.24, | |
"eval_samples_per_second": 17.556, | |
"eval_steps_per_second": 2.247, | |
"step": 116 | |
}, | |
{ | |
"epoch": 1.8719999999999999, | |
"grad_norm": 45.33132553100586, | |
"learning_rate": 2.8688524590163937e-06, | |
"loss": 0.2872, | |
"step": 117 | |
}, | |
{ | |
"epoch": 1.8719999999999999, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6012269938650306, | |
"eval_f1_m": 0.6208672280100851, | |
"eval_loss": 0.900492250919342, | |
"eval_runtime": 14.2312, | |
"eval_samples_per_second": 17.567, | |
"eval_steps_per_second": 2.249, | |
"step": 117 | |
}, | |
{ | |
"epoch": 1.888, | |
"grad_norm": 40.021827697753906, | |
"learning_rate": 2.459016393442623e-06, | |
"loss": 1.0036, | |
"step": 118 | |
}, | |
{ | |
"epoch": 1.888, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6097560975609756, | |
"eval_f1_m": 0.6235397255805418, | |
"eval_loss": 0.8864443898200989, | |
"eval_runtime": 14.2356, | |
"eval_samples_per_second": 17.562, | |
"eval_steps_per_second": 2.248, | |
"step": 118 | |
}, | |
{ | |
"epoch": 1.904, | |
"grad_norm": 34.89374923706055, | |
"learning_rate": 2.049180327868852e-06, | |
"loss": 0.4085, | |
"step": 119 | |
}, | |
{ | |
"epoch": 1.904, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6097560975609756, | |
"eval_f1_m": 0.62264079457957, | |
"eval_loss": 0.8801668286323547, | |
"eval_runtime": 14.2335, | |
"eval_samples_per_second": 17.564, | |
"eval_steps_per_second": 2.248, | |
"step": 119 | |
}, | |
{ | |
"epoch": 1.92, | |
"grad_norm": 40.872169494628906, | |
"learning_rate": 1.639344262295082e-06, | |
"loss": 1.2969, | |
"step": 120 | |
}, | |
{ | |
"epoch": 1.92, | |
"eval_exact_match": 0.24489795918367346, | |
"eval_f1_a": 0.6024096385542169, | |
"eval_f1_m": 0.6187181865753294, | |
"eval_loss": 0.8752056956291199, | |
"eval_runtime": 14.2287, | |
"eval_samples_per_second": 17.57, | |
"eval_steps_per_second": 2.249, | |
"step": 120 | |
}, | |
{ | |
"epoch": 1.936, | |
"grad_norm": 29.49952507019043, | |
"learning_rate": 1.2295081967213116e-06, | |
"loss": 0.3546, | |
"step": 121 | |
}, | |
{ | |
"epoch": 1.936, | |
"eval_exact_match": 0.20408163265306123, | |
"eval_f1_a": 0.6071428571428572, | |
"eval_f1_m": 0.6135057119750997, | |
"eval_loss": 0.866171658039093, | |
"eval_runtime": 14.2372, | |
"eval_samples_per_second": 17.56, | |
"eval_steps_per_second": 2.248, | |
"step": 121 | |
}, | |
{ | |
"epoch": 1.952, | |
"grad_norm": 26.981924057006836, | |
"learning_rate": 8.19672131147541e-07, | |
"loss": 0.6466, | |
"step": 122 | |
}, | |
{ | |
"epoch": 1.952, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6107784431137724, | |
"eval_f1_m": 0.6170042542491522, | |
"eval_loss": 0.8648313879966736, | |
"eval_runtime": 14.2434, | |
"eval_samples_per_second": 17.552, | |
"eval_steps_per_second": 2.247, | |
"step": 122 | |
}, | |
{ | |
"epoch": 1.968, | |
"grad_norm": 34.26253128051758, | |
"learning_rate": 4.098360655737705e-07, | |
"loss": 0.1163, | |
"step": 123 | |
}, | |
{ | |
"epoch": 1.968, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6190476190476191, | |
"eval_f1_m": 0.6196767518196089, | |
"eval_loss": 0.858651876449585, | |
"eval_runtime": 14.231, | |
"eval_samples_per_second": 17.567, | |
"eval_steps_per_second": 2.249, | |
"step": 123 | |
}, | |
{ | |
"epoch": 1.984, | |
"grad_norm": 22.19408416748047, | |
"learning_rate": 0.0, | |
"loss": 0.2309, | |
"step": 124 | |
}, | |
{ | |
"epoch": 1.984, | |
"eval_exact_match": 0.22448979591836735, | |
"eval_f1_a": 0.6107784431137724, | |
"eval_f1_m": 0.6170042542491522, | |
"eval_loss": 0.8643447756767273, | |
"eval_runtime": 14.3408, | |
"eval_samples_per_second": 17.433, | |
"eval_steps_per_second": 2.231, | |
"step": 124 | |
}, | |
{ | |
"epoch": 1.984, | |
"step": 124, | |
"total_flos": 133624123228160.0, | |
"train_loss": 0.9533988147853844, | |
"train_runtime": 2319.6694, | |
"train_samples_per_second": 0.862, | |
"train_steps_per_second": 0.053 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 124, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 500, | |
"total_flos": 133624123228160.0, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |