|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999072097986452, |
|
"eval_steps": 500, |
|
"global_step": 1347, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007423216108378956, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6654, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014846432216757911, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6541, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0022269648325136866, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.4815, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0029692864433515822, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.5496, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0037116080541894775, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.477, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004453929665027373, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.5704, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005196251275865269, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.7545, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0059385728867031645, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.5284, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006680894497541059, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.4984, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007423216108378955, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.576, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008165537719216851, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.8177, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008907859330054746, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6046, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009650180940892641, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6048, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010392502551730538, |
|
"grad_norm": 2.162328004837036, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.6391, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011134824162568432, |
|
"grad_norm": 2.17216420173645, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 1.5029, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011877145773406329, |
|
"grad_norm": 2.3341081142425537, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 1.6207, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012619467384244224, |
|
"grad_norm": 2.0773301124572754, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 1.5508, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013361788995082119, |
|
"grad_norm": 2.2292685508728027, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.5605, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.014104110605920015, |
|
"grad_norm": 1.7689887285232544, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 1.4014, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01484643221675791, |
|
"grad_norm": 2.2630300521850586, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 1.6371, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015588753827595806, |
|
"grad_norm": 1.9840623140335083, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 1.5566, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.016331075438433703, |
|
"grad_norm": 2.096895933151245, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 1.57, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.017073397049271598, |
|
"grad_norm": 1.6705613136291504, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.5249, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.017815718660109493, |
|
"grad_norm": 1.7144721746444702, |
|
"learning_rate": 4.4e-06, |
|
"loss": 1.4803, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018558040270947387, |
|
"grad_norm": 1.4625688791275024, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 1.3866, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.019300361881785282, |
|
"grad_norm": 1.5206489562988281, |
|
"learning_rate": 5.2e-06, |
|
"loss": 1.5484, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02004268349262318, |
|
"grad_norm": 1.3581645488739014, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 1.4105, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.020785005103461075, |
|
"grad_norm": 1.4587604999542236, |
|
"learning_rate": 6e-06, |
|
"loss": 1.5549, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02152732671429897, |
|
"grad_norm": 1.3189691305160522, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 1.3761, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.022269648325136865, |
|
"grad_norm": 1.3762433528900146, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 1.4088, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02301196993597476, |
|
"grad_norm": 1.269411325454712, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 1.4693, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.023754291546812658, |
|
"grad_norm": 1.4452253580093384, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 1.3392, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.024496613157650553, |
|
"grad_norm": 1.1886940002441406, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3498, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.025238934768488448, |
|
"grad_norm": 1.0324665307998657, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.1602, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.025981256379326342, |
|
"grad_norm": 1.0723854303359985, |
|
"learning_rate": 8.8e-06, |
|
"loss": 1.2315, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.026723577990164237, |
|
"grad_norm": 1.045143961906433, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 1.1834, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.027465899601002135, |
|
"grad_norm": 0.9035683870315552, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 1.2346, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02820822121184003, |
|
"grad_norm": 0.9809949994087219, |
|
"learning_rate": 1e-05, |
|
"loss": 1.161, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.028950542822677925, |
|
"grad_norm": 0.7753032445907593, |
|
"learning_rate": 1.04e-05, |
|
"loss": 1.1423, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02969286443351582, |
|
"grad_norm": 0.821992814540863, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 1.0866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.030435186044353715, |
|
"grad_norm": 0.768587052822113, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 1.1209, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.031177507655191613, |
|
"grad_norm": 0.7687053084373474, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.0443, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03191982926602951, |
|
"grad_norm": 0.7157124280929565, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.032662150876867406, |
|
"grad_norm": 0.6085985898971558, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 1.1363, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0334044724877053, |
|
"grad_norm": 0.5863650441169739, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 1.0818, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.034146794098543196, |
|
"grad_norm": 0.5280351638793945, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 1.0964, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03488911570938109, |
|
"grad_norm": 0.5869213938713074, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.9784, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.035631437320218985, |
|
"grad_norm": 0.5950367450714111, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.1197, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03637375893105688, |
|
"grad_norm": 0.5516757369041443, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 1.0141, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.037116080541894775, |
|
"grad_norm": 0.4884754717350006, |
|
"learning_rate": 1.48e-05, |
|
"loss": 1.0251, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03785840215273267, |
|
"grad_norm": 0.570762038230896, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.9775, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.038600723763570564, |
|
"grad_norm": 0.5657237768173218, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 1.0276, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03934304537440846, |
|
"grad_norm": 0.584648609161377, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.9453, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04008536698524636, |
|
"grad_norm": 0.4895365238189697, |
|
"learning_rate": 1.64e-05, |
|
"loss": 1.0402, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04082768859608425, |
|
"grad_norm": 0.5166955590248108, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.9825, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04157001020692215, |
|
"grad_norm": 0.5778055787086487, |
|
"learning_rate": 1.72e-05, |
|
"loss": 0.9668, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04231233181776004, |
|
"grad_norm": 0.4417908489704132, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.9147, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04305465342859794, |
|
"grad_norm": 0.5314612984657288, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.9739, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04379697503943584, |
|
"grad_norm": 0.44156596064567566, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.9353, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04453929665027373, |
|
"grad_norm": 0.46377748250961304, |
|
"learning_rate": 1.88e-05, |
|
"loss": 1.0251, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04528161826111163, |
|
"grad_norm": 0.5635647177696228, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.9475, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04602393987194952, |
|
"grad_norm": 0.5183905959129333, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.9855, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04676626148278742, |
|
"grad_norm": 0.43670588731765747, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9497, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.047508583093625316, |
|
"grad_norm": 0.42160764336586, |
|
"learning_rate": 2.04e-05, |
|
"loss": 0.9568, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04825090470446321, |
|
"grad_norm": 0.46232110261917114, |
|
"learning_rate": 2.08e-05, |
|
"loss": 1.019, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.048993226315301106, |
|
"grad_norm": 0.46109461784362793, |
|
"learning_rate": 2.1200000000000004e-05, |
|
"loss": 0.9295, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.049735547926139, |
|
"grad_norm": 0.4705204665660858, |
|
"learning_rate": 2.1600000000000003e-05, |
|
"loss": 0.9813, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.050477869536976895, |
|
"grad_norm": 0.4682143032550812, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.9802, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05122019114781479, |
|
"grad_norm": 0.5252828598022461, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 1.002, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.051962512758652685, |
|
"grad_norm": 0.38845106959342957, |
|
"learning_rate": 2.28e-05, |
|
"loss": 0.8624, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05270483436949058, |
|
"grad_norm": 0.4243197739124298, |
|
"learning_rate": 2.32e-05, |
|
"loss": 0.8991, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.053447155980328474, |
|
"grad_norm": 0.45470067858695984, |
|
"learning_rate": 2.36e-05, |
|
"loss": 0.8961, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05418947759116637, |
|
"grad_norm": 0.5030398964881897, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.0106, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05493179920200427, |
|
"grad_norm": 0.4755796194076538, |
|
"learning_rate": 2.44e-05, |
|
"loss": 0.9399, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05567412081284216, |
|
"grad_norm": 0.5100265741348267, |
|
"learning_rate": 2.4800000000000003e-05, |
|
"loss": 0.9387, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05641644242368006, |
|
"grad_norm": 0.4337320327758789, |
|
"learning_rate": 2.5200000000000003e-05, |
|
"loss": 0.92, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05715876403451795, |
|
"grad_norm": 0.44164350628852844, |
|
"learning_rate": 2.5600000000000002e-05, |
|
"loss": 0.8263, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05790108564535585, |
|
"grad_norm": 0.3856760561466217, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.9932, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05864340725619375, |
|
"grad_norm": 0.3697056472301483, |
|
"learning_rate": 2.6400000000000005e-05, |
|
"loss": 0.9092, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05938572886703164, |
|
"grad_norm": 0.4331798255443573, |
|
"learning_rate": 2.6800000000000004e-05, |
|
"loss": 0.8742, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06012805047786954, |
|
"grad_norm": 0.49398401379585266, |
|
"learning_rate": 2.7200000000000004e-05, |
|
"loss": 0.9422, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06087037208870743, |
|
"grad_norm": 0.3899066150188446, |
|
"learning_rate": 2.76e-05, |
|
"loss": 0.9762, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06161269369954533, |
|
"grad_norm": 0.4635457992553711, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.7991, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.062355015310383226, |
|
"grad_norm": 0.4666687548160553, |
|
"learning_rate": 2.8400000000000003e-05, |
|
"loss": 0.8868, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06309733692122112, |
|
"grad_norm": 0.3857990801334381, |
|
"learning_rate": 2.8800000000000002e-05, |
|
"loss": 0.8029, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06383965853205902, |
|
"grad_norm": 0.44243311882019043, |
|
"learning_rate": 2.92e-05, |
|
"loss": 0.9814, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06458198014289691, |
|
"grad_norm": 0.45594194531440735, |
|
"learning_rate": 2.96e-05, |
|
"loss": 0.9913, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06532430175373481, |
|
"grad_norm": 0.43604806065559387, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 0.8859, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0660666233645727, |
|
"grad_norm": 0.41517373919487, |
|
"learning_rate": 3.0400000000000004e-05, |
|
"loss": 0.8824, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0668089449754106, |
|
"grad_norm": 0.4230550229549408, |
|
"learning_rate": 3.08e-05, |
|
"loss": 0.9572, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06755126658624849, |
|
"grad_norm": 0.42611706256866455, |
|
"learning_rate": 3.1200000000000006e-05, |
|
"loss": 0.9215, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06829358819708639, |
|
"grad_norm": 0.39391592144966125, |
|
"learning_rate": 3.16e-05, |
|
"loss": 0.8478, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06903590980792429, |
|
"grad_norm": 0.39002394676208496, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.9072, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06977823141876217, |
|
"grad_norm": 0.4524936079978943, |
|
"learning_rate": 3.24e-05, |
|
"loss": 0.899, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07052055302960007, |
|
"grad_norm": 0.4666615128517151, |
|
"learning_rate": 3.28e-05, |
|
"loss": 0.9768, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07126287464043797, |
|
"grad_norm": 0.4445357322692871, |
|
"learning_rate": 3.32e-05, |
|
"loss": 0.9929, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07200519625127587, |
|
"grad_norm": 0.5923082232475281, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.86, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07274751786211377, |
|
"grad_norm": 0.41096585988998413, |
|
"learning_rate": 3.4e-05, |
|
"loss": 1.0012, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07348983947295165, |
|
"grad_norm": 0.4832744896411896, |
|
"learning_rate": 3.44e-05, |
|
"loss": 0.8828, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07423216108378955, |
|
"grad_norm": 0.4651799499988556, |
|
"learning_rate": 3.4800000000000006e-05, |
|
"loss": 0.9219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07497448269462745, |
|
"grad_norm": 0.4387566149234772, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.9121, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07571680430546535, |
|
"grad_norm": 0.3984525501728058, |
|
"learning_rate": 3.5600000000000005e-05, |
|
"loss": 0.8894, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07645912591630324, |
|
"grad_norm": 0.4379989206790924, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.9295, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07720144752714113, |
|
"grad_norm": 0.44189852476119995, |
|
"learning_rate": 3.6400000000000004e-05, |
|
"loss": 0.8774, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07794376913797903, |
|
"grad_norm": 0.4428102672100067, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 0.9494, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07868609074881693, |
|
"grad_norm": 0.42305949330329895, |
|
"learning_rate": 3.72e-05, |
|
"loss": 0.8043, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07942841235965482, |
|
"grad_norm": 0.44839444756507874, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.896, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08017073397049272, |
|
"grad_norm": 0.5253618955612183, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.8899, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0809130555813306, |
|
"grad_norm": 0.4362352192401886, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.8541, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0816553771921685, |
|
"grad_norm": 0.4516163468360901, |
|
"learning_rate": 3.88e-05, |
|
"loss": 0.9253, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0823976988030064, |
|
"grad_norm": 0.5379387736320496, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.9011, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0831400204138443, |
|
"grad_norm": 0.48964133858680725, |
|
"learning_rate": 3.96e-05, |
|
"loss": 0.8902, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0838823420246822, |
|
"grad_norm": 0.5008836984634399, |
|
"learning_rate": 4e-05, |
|
"loss": 0.8841, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08462466363552008, |
|
"grad_norm": 0.47988569736480713, |
|
"learning_rate": 3.9999985332376666e-05, |
|
"loss": 0.9657, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08536698524635798, |
|
"grad_norm": 0.4180363714694977, |
|
"learning_rate": 3.999994132952817e-05, |
|
"loss": 0.8685, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08610930685719588, |
|
"grad_norm": 0.42892715334892273, |
|
"learning_rate": 3.999986799151905e-05, |
|
"loss": 0.8255, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08685162846803378, |
|
"grad_norm": 0.5795795917510986, |
|
"learning_rate": 3.999976531845688e-05, |
|
"loss": 0.8694, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08759395007887168, |
|
"grad_norm": 0.4624157249927521, |
|
"learning_rate": 3.9999633310492266e-05, |
|
"loss": 0.9485, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08833627168970956, |
|
"grad_norm": 0.4330659806728363, |
|
"learning_rate": 3.999947196781881e-05, |
|
"loss": 0.9166, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08907859330054746, |
|
"grad_norm": 0.46188855171203613, |
|
"learning_rate": 3.999928129067319e-05, |
|
"loss": 0.8938, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08982091491138536, |
|
"grad_norm": 0.45563969016075134, |
|
"learning_rate": 3.999906127933506e-05, |
|
"loss": 0.9488, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09056323652222326, |
|
"grad_norm": 0.46216803789138794, |
|
"learning_rate": 3.999881193412714e-05, |
|
"loss": 0.9242, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09130555813306115, |
|
"grad_norm": 0.40392884612083435, |
|
"learning_rate": 3.999853325541516e-05, |
|
"loss": 1.0076, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09204787974389904, |
|
"grad_norm": 0.5220507979393005, |
|
"learning_rate": 3.999822524360787e-05, |
|
"loss": 0.8663, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09279020135473694, |
|
"grad_norm": 0.49559637904167175, |
|
"learning_rate": 3.999788789915705e-05, |
|
"loss": 0.8733, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09353252296557484, |
|
"grad_norm": 0.5094253420829773, |
|
"learning_rate": 3.9997521222557496e-05, |
|
"loss": 0.9381, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09427484457641273, |
|
"grad_norm": 0.4276386797428131, |
|
"learning_rate": 3.999712521434705e-05, |
|
"loss": 0.8812, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09501716618725063, |
|
"grad_norm": 0.49965962767601013, |
|
"learning_rate": 3.999669987510656e-05, |
|
"loss": 1.0346, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09575948779808852, |
|
"grad_norm": 0.442796528339386, |
|
"learning_rate": 3.9996245205459894e-05, |
|
"loss": 0.8995, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09650180940892641, |
|
"grad_norm": 0.47127169370651245, |
|
"learning_rate": 3.999576120607394e-05, |
|
"loss": 0.925, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09724413101976431, |
|
"grad_norm": 0.42022350430488586, |
|
"learning_rate": 3.999524787765862e-05, |
|
"loss": 0.9539, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09798645263060221, |
|
"grad_norm": 0.4752717614173889, |
|
"learning_rate": 3.999470522096685e-05, |
|
"loss": 0.8577, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09872877424144011, |
|
"grad_norm": 0.5515152812004089, |
|
"learning_rate": 3.99941332367946e-05, |
|
"loss": 0.8861, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.099471095852278, |
|
"grad_norm": 0.49411219358444214, |
|
"learning_rate": 3.9993531925980816e-05, |
|
"loss": 0.9313, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10021341746311589, |
|
"grad_norm": 0.49033278226852417, |
|
"learning_rate": 3.9992901289407486e-05, |
|
"loss": 0.9737, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10095573907395379, |
|
"grad_norm": 0.5231755375862122, |
|
"learning_rate": 3.9992241327999596e-05, |
|
"loss": 0.9684, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10169806068479169, |
|
"grad_norm": 0.443928062915802, |
|
"learning_rate": 3.999155204272517e-05, |
|
"loss": 0.8833, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.10244038229562959, |
|
"grad_norm": 0.4927554726600647, |
|
"learning_rate": 3.9990833434595204e-05, |
|
"loss": 0.9229, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10318270390646747, |
|
"grad_norm": 0.4793432950973511, |
|
"learning_rate": 3.999008550466374e-05, |
|
"loss": 0.8628, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10392502551730537, |
|
"grad_norm": 0.44405823945999146, |
|
"learning_rate": 3.998930825402781e-05, |
|
"loss": 0.9081, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10466734712814327, |
|
"grad_norm": 0.46231722831726074, |
|
"learning_rate": 3.998850168382746e-05, |
|
"loss": 0.9286, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10540966873898117, |
|
"grad_norm": 0.42269936203956604, |
|
"learning_rate": 3.9987665795245727e-05, |
|
"loss": 0.9151, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10615199034981906, |
|
"grad_norm": 0.44719186425209045, |
|
"learning_rate": 3.998680058950867e-05, |
|
"loss": 0.9411, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10689431196065695, |
|
"grad_norm": 0.4178447127342224, |
|
"learning_rate": 3.998590606788533e-05, |
|
"loss": 0.9347, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10763663357149485, |
|
"grad_norm": 0.5274525284767151, |
|
"learning_rate": 3.9984982231687765e-05, |
|
"loss": 0.9379, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10837895518233275, |
|
"grad_norm": 0.4062984883785248, |
|
"learning_rate": 3.9984029082271024e-05, |
|
"loss": 0.7217, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10912127679317064, |
|
"grad_norm": 0.42657554149627686, |
|
"learning_rate": 3.998304662103315e-05, |
|
"loss": 0.9075, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10986359840400854, |
|
"grad_norm": 0.38112106919288635, |
|
"learning_rate": 3.9982034849415174e-05, |
|
"loss": 0.891, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11060592001484643, |
|
"grad_norm": 0.4420554041862488, |
|
"learning_rate": 3.998099376890114e-05, |
|
"loss": 0.8228, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11134824162568432, |
|
"grad_norm": 0.4574986696243286, |
|
"learning_rate": 3.997992338101805e-05, |
|
"loss": 0.8934, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11209056323652222, |
|
"grad_norm": 0.5493448376655579, |
|
"learning_rate": 3.997882368733591e-05, |
|
"loss": 0.8621, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11283288484736012, |
|
"grad_norm": 0.46607592701911926, |
|
"learning_rate": 3.9977694689467714e-05, |
|
"loss": 0.9219, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11357520645819802, |
|
"grad_norm": 0.3952650725841522, |
|
"learning_rate": 3.997653638906943e-05, |
|
"loss": 0.9321, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1143175280690359, |
|
"grad_norm": 0.45397239923477173, |
|
"learning_rate": 3.997534878784002e-05, |
|
"loss": 0.9325, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1150598496798738, |
|
"grad_norm": 0.4392563998699188, |
|
"learning_rate": 3.99741318875214e-05, |
|
"loss": 0.9588, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1158021712907117, |
|
"grad_norm": 0.44695863127708435, |
|
"learning_rate": 3.997288568989848e-05, |
|
"loss": 0.8417, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1165444929015496, |
|
"grad_norm": 0.42836833000183105, |
|
"learning_rate": 3.997161019679913e-05, |
|
"loss": 0.9387, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1172868145123875, |
|
"grad_norm": 0.493874728679657, |
|
"learning_rate": 3.9970305410094206e-05, |
|
"loss": 0.8795, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11802913612322538, |
|
"grad_norm": 0.4306289851665497, |
|
"learning_rate": 3.99689713316975e-05, |
|
"loss": 0.9509, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11877145773406328, |
|
"grad_norm": 0.512447714805603, |
|
"learning_rate": 3.996760796356581e-05, |
|
"loss": 1.0612, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11951377934490118, |
|
"grad_norm": 0.42709997296333313, |
|
"learning_rate": 3.9966215307698865e-05, |
|
"loss": 0.8967, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12025610095573908, |
|
"grad_norm": 0.4356139600276947, |
|
"learning_rate": 3.996479336613936e-05, |
|
"loss": 0.9122, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.12099842256657697, |
|
"grad_norm": 0.447262167930603, |
|
"learning_rate": 3.996334214097294e-05, |
|
"loss": 0.8335, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.12174074417741486, |
|
"grad_norm": 0.45617246627807617, |
|
"learning_rate": 3.996186163432822e-05, |
|
"loss": 0.9416, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.12248306578825276, |
|
"grad_norm": 0.4708889424800873, |
|
"learning_rate": 3.996035184837674e-05, |
|
"loss": 0.9881, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12322538739909066, |
|
"grad_norm": 0.43020421266555786, |
|
"learning_rate": 3.9958812785332994e-05, |
|
"loss": 0.9952, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12396770900992855, |
|
"grad_norm": 0.4495503306388855, |
|
"learning_rate": 3.9957244447454436e-05, |
|
"loss": 0.8973, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12471003062076645, |
|
"grad_norm": 0.54659503698349, |
|
"learning_rate": 3.9955646837041435e-05, |
|
"loss": 0.9545, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12545235223160434, |
|
"grad_norm": 0.5163118839263916, |
|
"learning_rate": 3.9954019956437304e-05, |
|
"loss": 0.9693, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12619467384244223, |
|
"grad_norm": 0.4061867594718933, |
|
"learning_rate": 3.99523638080283e-05, |
|
"loss": 0.884, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12693699545328013, |
|
"grad_norm": 0.3987230658531189, |
|
"learning_rate": 3.995067839424359e-05, |
|
"loss": 0.9333, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12767931706411803, |
|
"grad_norm": 0.41483163833618164, |
|
"learning_rate": 3.994896371755528e-05, |
|
"loss": 0.8577, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12842163867495593, |
|
"grad_norm": 0.5029232501983643, |
|
"learning_rate": 3.9947219780478385e-05, |
|
"loss": 0.9523, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12916396028579383, |
|
"grad_norm": 0.5120161175727844, |
|
"learning_rate": 3.994544658557086e-05, |
|
"loss": 0.9828, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12990628189663173, |
|
"grad_norm": 0.4548787474632263, |
|
"learning_rate": 3.994364413543356e-05, |
|
"loss": 0.9701, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13064860350746962, |
|
"grad_norm": 0.5206537246704102, |
|
"learning_rate": 3.9941812432710234e-05, |
|
"loss": 0.9157, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1313909251183075, |
|
"grad_norm": 0.3944074511528015, |
|
"learning_rate": 3.993995148008757e-05, |
|
"loss": 0.8267, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1321332467291454, |
|
"grad_norm": 0.4675633907318115, |
|
"learning_rate": 3.993806128029513e-05, |
|
"loss": 0.8274, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1328755683399833, |
|
"grad_norm": 0.5009557008743286, |
|
"learning_rate": 3.9936141836105407e-05, |
|
"loss": 0.9023, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1336178899508212, |
|
"grad_norm": 0.43380340933799744, |
|
"learning_rate": 3.9934193150333754e-05, |
|
"loss": 0.7749, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1343602115616591, |
|
"grad_norm": 0.4866674542427063, |
|
"learning_rate": 3.9932215225838436e-05, |
|
"loss": 0.9514, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13510253317249699, |
|
"grad_norm": 0.40783071517944336, |
|
"learning_rate": 3.9930208065520595e-05, |
|
"loss": 0.8255, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13584485478333488, |
|
"grad_norm": 0.46471208333969116, |
|
"learning_rate": 3.992817167232426e-05, |
|
"loss": 0.9077, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13658717639417278, |
|
"grad_norm": 0.4536527395248413, |
|
"learning_rate": 3.9926106049236345e-05, |
|
"loss": 0.8559, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13732949800501068, |
|
"grad_norm": 0.38513433933258057, |
|
"learning_rate": 3.99240111992866e-05, |
|
"loss": 0.8146, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13807181961584858, |
|
"grad_norm": 0.4751570522785187, |
|
"learning_rate": 3.9921887125547704e-05, |
|
"loss": 0.9198, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.13881414122668645, |
|
"grad_norm": 0.4330991804599762, |
|
"learning_rate": 3.9919733831135156e-05, |
|
"loss": 0.9203, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13955646283752435, |
|
"grad_norm": 0.4104817807674408, |
|
"learning_rate": 3.991755131920732e-05, |
|
"loss": 0.8185, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14029878444836225, |
|
"grad_norm": 0.44837674498558044, |
|
"learning_rate": 3.9915339592965436e-05, |
|
"loss": 0.9184, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.14104110605920014, |
|
"grad_norm": 0.44174924492836, |
|
"learning_rate": 3.991309865565357e-05, |
|
"loss": 0.8141, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14178342767003804, |
|
"grad_norm": 0.5452368855476379, |
|
"learning_rate": 3.991082851055864e-05, |
|
"loss": 0.9376, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.14252574928087594, |
|
"grad_norm": 0.5171647071838379, |
|
"learning_rate": 3.9908529161010425e-05, |
|
"loss": 0.9844, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14326807089171384, |
|
"grad_norm": 0.46090564131736755, |
|
"learning_rate": 3.990620061038152e-05, |
|
"loss": 0.8576, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.14401039250255174, |
|
"grad_norm": 0.4665868580341339, |
|
"learning_rate": 3.990384286208734e-05, |
|
"loss": 0.9254, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14475271411338964, |
|
"grad_norm": 0.41708534955978394, |
|
"learning_rate": 3.9901455919586165e-05, |
|
"loss": 0.865, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14549503572422753, |
|
"grad_norm": 0.4729004502296448, |
|
"learning_rate": 3.989903978637905e-05, |
|
"loss": 0.9742, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1462373573350654, |
|
"grad_norm": 0.45266857743263245, |
|
"learning_rate": 3.989659446600991e-05, |
|
"loss": 0.8797, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1469796789459033, |
|
"grad_norm": 0.45862093567848206, |
|
"learning_rate": 3.989411996206543e-05, |
|
"loss": 0.8965, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1477220005567412, |
|
"grad_norm": 0.4977729618549347, |
|
"learning_rate": 3.9891616278175134e-05, |
|
"loss": 0.8884, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1484643221675791, |
|
"grad_norm": 0.43955883383750916, |
|
"learning_rate": 3.988908341801131e-05, |
|
"loss": 0.9651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.149206643778417, |
|
"grad_norm": 0.4800843894481659, |
|
"learning_rate": 3.9886521385289086e-05, |
|
"loss": 0.836, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1499489653892549, |
|
"grad_norm": 0.4543246924877167, |
|
"learning_rate": 3.988393018376635e-05, |
|
"loss": 0.975, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1506912870000928, |
|
"grad_norm": 0.44506338238716125, |
|
"learning_rate": 3.9881309817243766e-05, |
|
"loss": 0.8872, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1514336086109307, |
|
"grad_norm": 0.45602262020111084, |
|
"learning_rate": 3.98786602895648e-05, |
|
"loss": 0.8382, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1521759302217686, |
|
"grad_norm": 0.4463014602661133, |
|
"learning_rate": 3.9875981604615666e-05, |
|
"loss": 0.8192, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1529182518326065, |
|
"grad_norm": 0.42010006308555603, |
|
"learning_rate": 3.9873273766325374e-05, |
|
"loss": 0.9253, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.15366057344344436, |
|
"grad_norm": 0.4275091588497162, |
|
"learning_rate": 3.987053677866568e-05, |
|
"loss": 0.791, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.15440289505428226, |
|
"grad_norm": 0.505247175693512, |
|
"learning_rate": 3.9867770645651084e-05, |
|
"loss": 0.8528, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15514521666512016, |
|
"grad_norm": 0.42931032180786133, |
|
"learning_rate": 3.986497537133885e-05, |
|
"loss": 0.8356, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.15588753827595805, |
|
"grad_norm": 0.6309205293655396, |
|
"learning_rate": 3.986215095982897e-05, |
|
"loss": 0.946, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15662985988679595, |
|
"grad_norm": 0.44027337431907654, |
|
"learning_rate": 3.985929741526421e-05, |
|
"loss": 0.8109, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15737218149763385, |
|
"grad_norm": 0.44372624158859253, |
|
"learning_rate": 3.985641474183002e-05, |
|
"loss": 0.9662, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15811450310847175, |
|
"grad_norm": 0.4606941044330597, |
|
"learning_rate": 3.985350294375461e-05, |
|
"loss": 0.8919, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.15885682471930965, |
|
"grad_norm": 0.3926544189453125, |
|
"learning_rate": 3.9850562025308874e-05, |
|
"loss": 0.8643, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.15959914633014755, |
|
"grad_norm": 0.4373493492603302, |
|
"learning_rate": 3.9847591990806466e-05, |
|
"loss": 0.9673, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.16034146794098544, |
|
"grad_norm": 0.4592650830745697, |
|
"learning_rate": 3.984459284460371e-05, |
|
"loss": 0.9397, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.16108378955182331, |
|
"grad_norm": 0.43196824193000793, |
|
"learning_rate": 3.984156459109965e-05, |
|
"loss": 0.887, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1618261111626612, |
|
"grad_norm": 0.44357168674468994, |
|
"learning_rate": 3.983850723473599e-05, |
|
"loss": 0.9653, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1625684327734991, |
|
"grad_norm": 0.39339280128479004, |
|
"learning_rate": 3.983542077999717e-05, |
|
"loss": 0.7815, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.163310754384337, |
|
"grad_norm": 0.3603425920009613, |
|
"learning_rate": 3.983230523141027e-05, |
|
"loss": 0.8622, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1640530759951749, |
|
"grad_norm": 0.4492432177066803, |
|
"learning_rate": 3.982916059354507e-05, |
|
"loss": 0.8553, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1647953976060128, |
|
"grad_norm": 0.4893001317977905, |
|
"learning_rate": 3.9825986871014e-05, |
|
"loss": 0.8892, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1655377192168507, |
|
"grad_norm": 0.4576875865459442, |
|
"learning_rate": 3.9822784068472155e-05, |
|
"loss": 0.8574, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1662800408276886, |
|
"grad_norm": 0.47724637389183044, |
|
"learning_rate": 3.981955219061729e-05, |
|
"loss": 0.8827, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1670223624385265, |
|
"grad_norm": 0.4396674931049347, |
|
"learning_rate": 3.98162912421898e-05, |
|
"loss": 0.9252, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1677646840493644, |
|
"grad_norm": 0.4329068958759308, |
|
"learning_rate": 3.981300122797273e-05, |
|
"loss": 0.9277, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16850700566020227, |
|
"grad_norm": 0.4214411973953247, |
|
"learning_rate": 3.980968215279173e-05, |
|
"loss": 0.8663, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.16924932727104017, |
|
"grad_norm": 0.4411622881889343, |
|
"learning_rate": 3.980633402151511e-05, |
|
"loss": 0.9531, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16999164888187807, |
|
"grad_norm": 0.48397454619407654, |
|
"learning_rate": 3.980295683905378e-05, |
|
"loss": 0.8853, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.17073397049271596, |
|
"grad_norm": 0.4269099831581116, |
|
"learning_rate": 3.979955061036125e-05, |
|
"loss": 0.7328, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17147629210355386, |
|
"grad_norm": 0.42758798599243164, |
|
"learning_rate": 3.979611534043367e-05, |
|
"loss": 0.8891, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.17221861371439176, |
|
"grad_norm": 0.42514568567276, |
|
"learning_rate": 3.979265103430975e-05, |
|
"loss": 0.7428, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.17296093532522966, |
|
"grad_norm": 0.40256327390670776, |
|
"learning_rate": 3.978915769707081e-05, |
|
"loss": 0.8206, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.17370325693606756, |
|
"grad_norm": 0.4632292687892914, |
|
"learning_rate": 3.9785635333840746e-05, |
|
"loss": 0.8407, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.17444557854690546, |
|
"grad_norm": 0.41627851128578186, |
|
"learning_rate": 3.9782083949786026e-05, |
|
"loss": 0.9274, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.17518790015774335, |
|
"grad_norm": 0.416707843542099, |
|
"learning_rate": 3.977850355011568e-05, |
|
"loss": 0.8109, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17593022176858122, |
|
"grad_norm": 0.4402737319469452, |
|
"learning_rate": 3.977489414008131e-05, |
|
"loss": 0.7994, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.17667254337941912, |
|
"grad_norm": 0.4560914635658264, |
|
"learning_rate": 3.9771255724977065e-05, |
|
"loss": 0.8921, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.17741486499025702, |
|
"grad_norm": 0.42798957228660583, |
|
"learning_rate": 3.9767588310139625e-05, |
|
"loss": 0.9176, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.17815718660109492, |
|
"grad_norm": 0.5003705024719238, |
|
"learning_rate": 3.976389190094823e-05, |
|
"loss": 0.8734, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17889950821193282, |
|
"grad_norm": 0.45051515102386475, |
|
"learning_rate": 3.976016650282462e-05, |
|
"loss": 0.8313, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.17964182982277072, |
|
"grad_norm": 0.4295995831489563, |
|
"learning_rate": 3.975641212123308e-05, |
|
"loss": 0.9299, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.1803841514336086, |
|
"grad_norm": 0.451457679271698, |
|
"learning_rate": 3.975262876168039e-05, |
|
"loss": 0.9284, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1811264730444465, |
|
"grad_norm": 0.4843652546405792, |
|
"learning_rate": 3.974881642971584e-05, |
|
"loss": 0.861, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1818687946552844, |
|
"grad_norm": 0.4577447474002838, |
|
"learning_rate": 3.9744975130931214e-05, |
|
"loss": 0.8976, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1826111162661223, |
|
"grad_norm": 0.40924352407455444, |
|
"learning_rate": 3.974110487096079e-05, |
|
"loss": 0.7861, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.18335343787696018, |
|
"grad_norm": 0.49304714798927307, |
|
"learning_rate": 3.973720565548131e-05, |
|
"loss": 0.8693, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.18409575948779808, |
|
"grad_norm": 0.4253219664096832, |
|
"learning_rate": 3.9733277490212e-05, |
|
"loss": 0.8902, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.18483808109863598, |
|
"grad_norm": 0.44300004839897156, |
|
"learning_rate": 3.9729320380914546e-05, |
|
"loss": 0.8948, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.18558040270947387, |
|
"grad_norm": 0.4048704504966736, |
|
"learning_rate": 3.972533433339309e-05, |
|
"loss": 0.8657, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18632272432031177, |
|
"grad_norm": 0.4248516261577606, |
|
"learning_rate": 3.972131935349421e-05, |
|
"loss": 0.8626, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.18706504593114967, |
|
"grad_norm": 0.41515034437179565, |
|
"learning_rate": 3.9717275447106936e-05, |
|
"loss": 0.8483, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.18780736754198757, |
|
"grad_norm": 0.4110088348388672, |
|
"learning_rate": 3.9713202620162704e-05, |
|
"loss": 0.8291, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.18854968915282547, |
|
"grad_norm": 0.4878021478652954, |
|
"learning_rate": 3.9709100878635397e-05, |
|
"loss": 0.88, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18929201076366337, |
|
"grad_norm": 0.4222434163093567, |
|
"learning_rate": 3.970497022854129e-05, |
|
"loss": 0.855, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.19003433237450126, |
|
"grad_norm": 0.40751904249191284, |
|
"learning_rate": 3.9700810675939067e-05, |
|
"loss": 0.8315, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.19077665398533916, |
|
"grad_norm": 0.431059330701828, |
|
"learning_rate": 3.969662222692979e-05, |
|
"loss": 0.7909, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.19151897559617703, |
|
"grad_norm": 0.43389439582824707, |
|
"learning_rate": 3.9692404887656936e-05, |
|
"loss": 0.9269, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.19226129720701493, |
|
"grad_norm": 0.4089961051940918, |
|
"learning_rate": 3.9688158664306333e-05, |
|
"loss": 0.8458, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.19300361881785283, |
|
"grad_norm": 0.43871837854385376, |
|
"learning_rate": 3.968388356310618e-05, |
|
"loss": 0.8475, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19374594042869073, |
|
"grad_norm": 0.4958783686161041, |
|
"learning_rate": 3.967957959032703e-05, |
|
"loss": 0.9589, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.19448826203952863, |
|
"grad_norm": 0.47255924344062805, |
|
"learning_rate": 3.96752467522818e-05, |
|
"loss": 0.8113, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.19523058365036652, |
|
"grad_norm": 0.39204686880111694, |
|
"learning_rate": 3.967088505532572e-05, |
|
"loss": 0.8786, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.19597290526120442, |
|
"grad_norm": 0.4337204396724701, |
|
"learning_rate": 3.966649450585637e-05, |
|
"loss": 0.8074, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.19671522687204232, |
|
"grad_norm": 0.44740915298461914, |
|
"learning_rate": 3.966207511031365e-05, |
|
"loss": 0.9015, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.19745754848288022, |
|
"grad_norm": 0.4619744122028351, |
|
"learning_rate": 3.9657626875179746e-05, |
|
"loss": 0.8584, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.19819987009371812, |
|
"grad_norm": 0.4195767641067505, |
|
"learning_rate": 3.9653149806979174e-05, |
|
"loss": 0.9042, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.198942191704556, |
|
"grad_norm": 0.3937821388244629, |
|
"learning_rate": 3.9648643912278726e-05, |
|
"loss": 0.9548, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19968451331539389, |
|
"grad_norm": 0.4471302032470703, |
|
"learning_rate": 3.9644109197687475e-05, |
|
"loss": 0.9643, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.20042683492623178, |
|
"grad_norm": 0.48815852403640747, |
|
"learning_rate": 3.963954566985678e-05, |
|
"loss": 1.0094, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.20116915653706968, |
|
"grad_norm": 0.43544676899909973, |
|
"learning_rate": 3.963495333548024e-05, |
|
"loss": 0.7804, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.20191147814790758, |
|
"grad_norm": 0.4542047679424286, |
|
"learning_rate": 3.963033220129372e-05, |
|
"loss": 0.7921, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.20265379975874548, |
|
"grad_norm": 0.39049360156059265, |
|
"learning_rate": 3.962568227407533e-05, |
|
"loss": 0.9492, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.20339612136958338, |
|
"grad_norm": 0.4454337954521179, |
|
"learning_rate": 3.962100356064541e-05, |
|
"loss": 0.9064, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.20413844298042128, |
|
"grad_norm": 0.41931623220443726, |
|
"learning_rate": 3.961629606786652e-05, |
|
"loss": 0.7879, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.20488076459125917, |
|
"grad_norm": 0.4287397265434265, |
|
"learning_rate": 3.9611559802643427e-05, |
|
"loss": 0.846, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.20562308620209707, |
|
"grad_norm": 0.5089631676673889, |
|
"learning_rate": 3.9606794771923106e-05, |
|
"loss": 0.864, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.20636540781293494, |
|
"grad_norm": 0.4220036268234253, |
|
"learning_rate": 3.960200098269473e-05, |
|
"loss": 0.8548, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.20710772942377284, |
|
"grad_norm": 0.4368970990180969, |
|
"learning_rate": 3.9597178441989646e-05, |
|
"loss": 0.8912, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.20785005103461074, |
|
"grad_norm": 0.4061053395271301, |
|
"learning_rate": 3.9592327156881375e-05, |
|
"loss": 0.8345, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20859237264544864, |
|
"grad_norm": 0.40495994687080383, |
|
"learning_rate": 3.9587447134485605e-05, |
|
"loss": 0.9221, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.20933469425628654, |
|
"grad_norm": 0.4269440472126007, |
|
"learning_rate": 3.958253838196016e-05, |
|
"loss": 0.8588, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.21007701586712443, |
|
"grad_norm": 0.5028578042984009, |
|
"learning_rate": 3.957760090650501e-05, |
|
"loss": 0.9026, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.21081933747796233, |
|
"grad_norm": 0.40312886238098145, |
|
"learning_rate": 3.957263471536227e-05, |
|
"loss": 0.9118, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.21156165908880023, |
|
"grad_norm": 0.4545218050479889, |
|
"learning_rate": 3.956763981581617e-05, |
|
"loss": 0.8419, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.21230398069963813, |
|
"grad_norm": 0.37066158652305603, |
|
"learning_rate": 3.956261621519302e-05, |
|
"loss": 0.8473, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.21304630231047603, |
|
"grad_norm": 0.45726296305656433, |
|
"learning_rate": 3.955756392086125e-05, |
|
"loss": 0.8868, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2137886239213139, |
|
"grad_norm": 0.38632458448410034, |
|
"learning_rate": 3.955248294023139e-05, |
|
"loss": 0.7743, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2145309455321518, |
|
"grad_norm": 0.399452269077301, |
|
"learning_rate": 3.9547373280756016e-05, |
|
"loss": 0.8737, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2152732671429897, |
|
"grad_norm": 0.42522764205932617, |
|
"learning_rate": 3.9542234949929793e-05, |
|
"loss": 0.9165, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2160155887538276, |
|
"grad_norm": 0.4133375883102417, |
|
"learning_rate": 3.9537067955289424e-05, |
|
"loss": 0.8659, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2167579103646655, |
|
"grad_norm": 0.3964545428752899, |
|
"learning_rate": 3.953187230441367e-05, |
|
"loss": 0.8724, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2175002319755034, |
|
"grad_norm": 0.41253530979156494, |
|
"learning_rate": 3.952664800492331e-05, |
|
"loss": 0.8626, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2182425535863413, |
|
"grad_norm": 0.42898398637771606, |
|
"learning_rate": 3.952139506448116e-05, |
|
"loss": 0.8405, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.21898487519717919, |
|
"grad_norm": 0.4509212374687195, |
|
"learning_rate": 3.951611349079202e-05, |
|
"loss": 0.9459, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.21972719680801708, |
|
"grad_norm": 0.42532891035079956, |
|
"learning_rate": 3.951080329160271e-05, |
|
"loss": 0.8979, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.22046951841885498, |
|
"grad_norm": 0.40851664543151855, |
|
"learning_rate": 3.950546447470203e-05, |
|
"loss": 0.7875, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.22121184002969285, |
|
"grad_norm": 0.38995298743247986, |
|
"learning_rate": 3.9500097047920764e-05, |
|
"loss": 0.7943, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.22195416164053075, |
|
"grad_norm": 0.4160335063934326, |
|
"learning_rate": 3.949470101913163e-05, |
|
"loss": 0.8773, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.22269648325136865, |
|
"grad_norm": 0.4847520887851715, |
|
"learning_rate": 3.948927639624935e-05, |
|
"loss": 0.9078, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22343880486220655, |
|
"grad_norm": 0.4614812135696411, |
|
"learning_rate": 3.9483823187230534e-05, |
|
"loss": 0.9538, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.22418112647304445, |
|
"grad_norm": 0.478161096572876, |
|
"learning_rate": 3.947834140007375e-05, |
|
"loss": 0.9021, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.22492344808388234, |
|
"grad_norm": 0.4866395890712738, |
|
"learning_rate": 3.947283104281947e-05, |
|
"loss": 0.9058, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.22566576969472024, |
|
"grad_norm": 0.47090503573417664, |
|
"learning_rate": 3.946729212355009e-05, |
|
"loss": 0.805, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.22640809130555814, |
|
"grad_norm": 0.44924086332321167, |
|
"learning_rate": 3.9461724650389886e-05, |
|
"loss": 0.9648, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.22715041291639604, |
|
"grad_norm": 0.43780702352523804, |
|
"learning_rate": 3.9456128631505014e-05, |
|
"loss": 0.9912, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.22789273452723394, |
|
"grad_norm": 0.4452776312828064, |
|
"learning_rate": 3.9450504075103507e-05, |
|
"loss": 0.9099, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2286350561380718, |
|
"grad_norm": 0.41327065229415894, |
|
"learning_rate": 3.944485098943524e-05, |
|
"loss": 0.8384, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2293773777489097, |
|
"grad_norm": 0.43018415570259094, |
|
"learning_rate": 3.9439169382791965e-05, |
|
"loss": 0.9611, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2301196993597476, |
|
"grad_norm": 0.414542555809021, |
|
"learning_rate": 3.9433459263507236e-05, |
|
"loss": 0.8249, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2308620209705855, |
|
"grad_norm": 0.4167710542678833, |
|
"learning_rate": 3.942772063995645e-05, |
|
"loss": 0.8588, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2316043425814234, |
|
"grad_norm": 0.5119878649711609, |
|
"learning_rate": 3.94219535205568e-05, |
|
"loss": 0.9008, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2323466641922613, |
|
"grad_norm": 0.4627315402030945, |
|
"learning_rate": 3.941615791376727e-05, |
|
"loss": 0.7732, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2330889858030992, |
|
"grad_norm": 0.3760371208190918, |
|
"learning_rate": 3.941033382808865e-05, |
|
"loss": 0.9079, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2338313074139371, |
|
"grad_norm": 0.38762086629867554, |
|
"learning_rate": 3.9404481272063486e-05, |
|
"loss": 0.8229, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.234573629024775, |
|
"grad_norm": 0.38175562024116516, |
|
"learning_rate": 3.9398600254276085e-05, |
|
"loss": 0.8135, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2353159506356129, |
|
"grad_norm": 0.4348665177822113, |
|
"learning_rate": 3.939269078335251e-05, |
|
"loss": 0.8682, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.23605827224645076, |
|
"grad_norm": 0.3956005871295929, |
|
"learning_rate": 3.938675286796054e-05, |
|
"loss": 0.7935, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.23680059385728866, |
|
"grad_norm": 0.402208149433136, |
|
"learning_rate": 3.938078651680969e-05, |
|
"loss": 0.9668, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.23754291546812656, |
|
"grad_norm": 0.44131597876548767, |
|
"learning_rate": 3.9374791738651175e-05, |
|
"loss": 0.8008, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23828523707896446, |
|
"grad_norm": 0.43316978216171265, |
|
"learning_rate": 3.936876854227792e-05, |
|
"loss": 0.8541, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.23902755868980236, |
|
"grad_norm": 0.4488168954849243, |
|
"learning_rate": 3.936271693652451e-05, |
|
"loss": 0.8773, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.23976988030064025, |
|
"grad_norm": 0.4164707362651825, |
|
"learning_rate": 3.935663693026722e-05, |
|
"loss": 0.9193, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.24051220191147815, |
|
"grad_norm": 0.4314461648464203, |
|
"learning_rate": 3.935052853242398e-05, |
|
"loss": 0.8206, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.24125452352231605, |
|
"grad_norm": 0.4680315852165222, |
|
"learning_rate": 3.934439175195434e-05, |
|
"loss": 0.9033, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.24199684513315395, |
|
"grad_norm": 0.4575926959514618, |
|
"learning_rate": 3.933822659785951e-05, |
|
"loss": 0.8849, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.24273916674399185, |
|
"grad_norm": 0.4294450879096985, |
|
"learning_rate": 3.933203307918231e-05, |
|
"loss": 0.7656, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.24348148835482972, |
|
"grad_norm": 0.4561205506324768, |
|
"learning_rate": 3.932581120500715e-05, |
|
"loss": 0.8032, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.24422380996566762, |
|
"grad_norm": 0.39646250009536743, |
|
"learning_rate": 3.931956098446006e-05, |
|
"loss": 0.7969, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2449661315765055, |
|
"grad_norm": 0.41189101338386536, |
|
"learning_rate": 3.9313282426708594e-05, |
|
"loss": 0.8546, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2457084531873434, |
|
"grad_norm": 0.4370476305484772, |
|
"learning_rate": 3.9306975540961935e-05, |
|
"loss": 0.8269, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2464507747981813, |
|
"grad_norm": 0.44512081146240234, |
|
"learning_rate": 3.930064033647077e-05, |
|
"loss": 0.884, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2471930964090192, |
|
"grad_norm": 0.3812503516674042, |
|
"learning_rate": 3.9294276822527344e-05, |
|
"loss": 0.8196, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2479354180198571, |
|
"grad_norm": 0.39937490224838257, |
|
"learning_rate": 3.9287885008465416e-05, |
|
"loss": 0.8481, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.248677739630695, |
|
"grad_norm": 0.5105488300323486, |
|
"learning_rate": 3.9281464903660266e-05, |
|
"loss": 0.8751, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2494200612415329, |
|
"grad_norm": 0.3878211975097656, |
|
"learning_rate": 3.927501651752865e-05, |
|
"loss": 0.8023, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2501623828523708, |
|
"grad_norm": 0.413700133562088, |
|
"learning_rate": 3.926853985952883e-05, |
|
"loss": 0.8475, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.25090470446320867, |
|
"grad_norm": 0.3983979821205139, |
|
"learning_rate": 3.926203493916051e-05, |
|
"loss": 0.7672, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2516470260740466, |
|
"grad_norm": 0.41137751936912537, |
|
"learning_rate": 3.9255501765964874e-05, |
|
"loss": 0.8201, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.25238934768488447, |
|
"grad_norm": 0.46660321950912476, |
|
"learning_rate": 3.9248940349524526e-05, |
|
"loss": 0.8046, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2531316692957224, |
|
"grad_norm": 0.42749106884002686, |
|
"learning_rate": 3.9242350699463516e-05, |
|
"loss": 0.8447, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.25387399090656027, |
|
"grad_norm": 0.3725341856479645, |
|
"learning_rate": 3.9235732825447284e-05, |
|
"loss": 0.8874, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.25461631251739814, |
|
"grad_norm": 0.36658021807670593, |
|
"learning_rate": 3.9229086737182676e-05, |
|
"loss": 0.7429, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.25535863412823606, |
|
"grad_norm": 0.4336754381656647, |
|
"learning_rate": 3.922241244441794e-05, |
|
"loss": 0.8506, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.25610095573907393, |
|
"grad_norm": 0.4874718487262726, |
|
"learning_rate": 3.921570995694266e-05, |
|
"loss": 0.9424, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.25684327734991186, |
|
"grad_norm": 0.4024730920791626, |
|
"learning_rate": 3.92089792845878e-05, |
|
"loss": 0.8936, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.25758559896074973, |
|
"grad_norm": 0.4135879874229431, |
|
"learning_rate": 3.9202220437225665e-05, |
|
"loss": 0.8154, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.25832792057158765, |
|
"grad_norm": 0.3991898000240326, |
|
"learning_rate": 3.9195433424769857e-05, |
|
"loss": 0.8992, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2590702421824255, |
|
"grad_norm": 0.4020022749900818, |
|
"learning_rate": 3.9188618257175326e-05, |
|
"loss": 0.9374, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.25981256379326345, |
|
"grad_norm": 0.4002429246902466, |
|
"learning_rate": 3.9181774944438294e-05, |
|
"loss": 0.8036, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2605548854041013, |
|
"grad_norm": 0.4645288288593292, |
|
"learning_rate": 3.9174903496596286e-05, |
|
"loss": 0.9058, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.26129720701493925, |
|
"grad_norm": 0.5104494690895081, |
|
"learning_rate": 3.916800392372807e-05, |
|
"loss": 0.858, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2620395286257771, |
|
"grad_norm": 0.42878013849258423, |
|
"learning_rate": 3.9161076235953693e-05, |
|
"loss": 0.8975, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.262781850236615, |
|
"grad_norm": 0.47523993253707886, |
|
"learning_rate": 3.915412044343441e-05, |
|
"loss": 0.8762, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2635241718474529, |
|
"grad_norm": 0.4574279189109802, |
|
"learning_rate": 3.914713655637273e-05, |
|
"loss": 0.9237, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2642664934582908, |
|
"grad_norm": 0.4427787959575653, |
|
"learning_rate": 3.914012458501235e-05, |
|
"loss": 0.9013, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2650088150691287, |
|
"grad_norm": 0.4818330407142639, |
|
"learning_rate": 3.913308453963817e-05, |
|
"loss": 0.8158, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2657511366799666, |
|
"grad_norm": 0.3826821744441986, |
|
"learning_rate": 3.912601643057625e-05, |
|
"loss": 0.8757, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2664934582908045, |
|
"grad_norm": 0.447723388671875, |
|
"learning_rate": 3.9118920268193844e-05, |
|
"loss": 0.8342, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.2672357799016424, |
|
"grad_norm": 0.42185550928115845, |
|
"learning_rate": 3.911179606289932e-05, |
|
"loss": 0.9412, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2679781015124803, |
|
"grad_norm": 0.471258282661438, |
|
"learning_rate": 3.91046438251422e-05, |
|
"loss": 0.85, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2687204231233182, |
|
"grad_norm": 0.42661038041114807, |
|
"learning_rate": 3.909746356541312e-05, |
|
"loss": 0.7643, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.26946274473415605, |
|
"grad_norm": 0.4222320020198822, |
|
"learning_rate": 3.909025529424382e-05, |
|
"loss": 0.83, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.27020506634499397, |
|
"grad_norm": 0.388431578874588, |
|
"learning_rate": 3.90830190222071e-05, |
|
"loss": 0.7993, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.27094738795583184, |
|
"grad_norm": 0.41494980454444885, |
|
"learning_rate": 3.907575475991687e-05, |
|
"loss": 0.9685, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.27168970956666977, |
|
"grad_norm": 0.4347587823867798, |
|
"learning_rate": 3.9068462518028074e-05, |
|
"loss": 0.8882, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.27243203117750764, |
|
"grad_norm": 0.38997599482536316, |
|
"learning_rate": 3.906114230723669e-05, |
|
"loss": 0.9071, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.27317435278834556, |
|
"grad_norm": 0.413625031709671, |
|
"learning_rate": 3.9053794138279734e-05, |
|
"loss": 0.8533, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.27391667439918344, |
|
"grad_norm": 0.397564560174942, |
|
"learning_rate": 3.9046418021935214e-05, |
|
"loss": 0.8478, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.27465899601002136, |
|
"grad_norm": 0.38332417607307434, |
|
"learning_rate": 3.903901396902216e-05, |
|
"loss": 0.884, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.27540131762085923, |
|
"grad_norm": 0.4055768847465515, |
|
"learning_rate": 3.903158199040053e-05, |
|
"loss": 0.81, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.27614363923169716, |
|
"grad_norm": 0.41119036078453064, |
|
"learning_rate": 3.902412209697129e-05, |
|
"loss": 0.8914, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.27688596084253503, |
|
"grad_norm": 0.4107328951358795, |
|
"learning_rate": 3.901663429967633e-05, |
|
"loss": 0.8504, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2776282824533729, |
|
"grad_norm": 0.46609365940093994, |
|
"learning_rate": 3.900911860949847e-05, |
|
"loss": 0.842, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2783706040642108, |
|
"grad_norm": 0.4163331687450409, |
|
"learning_rate": 3.900157503746142e-05, |
|
"loss": 0.7783, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2791129256750487, |
|
"grad_norm": 0.4238855838775635, |
|
"learning_rate": 3.899400359462983e-05, |
|
"loss": 0.9216, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2798552472858866, |
|
"grad_norm": 0.4086857736110687, |
|
"learning_rate": 3.8986404292109206e-05, |
|
"loss": 0.836, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2805975688967245, |
|
"grad_norm": 0.47745761275291443, |
|
"learning_rate": 3.8978777141045904e-05, |
|
"loss": 0.7913, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2813398905075624, |
|
"grad_norm": 0.39942634105682373, |
|
"learning_rate": 3.897112215262716e-05, |
|
"loss": 0.9627, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2820822121184003, |
|
"grad_norm": 0.38598352670669556, |
|
"learning_rate": 3.896343933808101e-05, |
|
"loss": 0.9239, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2828245337292382, |
|
"grad_norm": 0.49593856930732727, |
|
"learning_rate": 3.895572870867632e-05, |
|
"loss": 0.898, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2835668553400761, |
|
"grad_norm": 0.40926507115364075, |
|
"learning_rate": 3.8947990275722756e-05, |
|
"loss": 0.8174, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.284309176950914, |
|
"grad_norm": 0.37032049894332886, |
|
"learning_rate": 3.894022405057075e-05, |
|
"loss": 0.8367, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.2850514985617519, |
|
"grad_norm": 0.3458698093891144, |
|
"learning_rate": 3.893243004461151e-05, |
|
"loss": 0.7966, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.28579382017258975, |
|
"grad_norm": 0.47391876578330994, |
|
"learning_rate": 3.8924608269277004e-05, |
|
"loss": 0.943, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2865361417834277, |
|
"grad_norm": 0.4300226867198944, |
|
"learning_rate": 3.89167587360399e-05, |
|
"loss": 0.8706, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.28727846339426555, |
|
"grad_norm": 0.37660089135169983, |
|
"learning_rate": 3.890888145641361e-05, |
|
"loss": 0.8721, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2880207850051035, |
|
"grad_norm": 0.35884180665016174, |
|
"learning_rate": 3.890097644195223e-05, |
|
"loss": 0.8307, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.28876310661594135, |
|
"grad_norm": 0.42722952365875244, |
|
"learning_rate": 3.889304370425053e-05, |
|
"loss": 0.9278, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.28950542822677927, |
|
"grad_norm": 0.4369671046733856, |
|
"learning_rate": 3.888508325494395e-05, |
|
"loss": 0.7752, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.29024774983761714, |
|
"grad_norm": 0.40296581387519836, |
|
"learning_rate": 3.887709510570859e-05, |
|
"loss": 0.8253, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.29099007144845507, |
|
"grad_norm": 0.37896981835365295, |
|
"learning_rate": 3.8869079268261155e-05, |
|
"loss": 0.758, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.29173239305929294, |
|
"grad_norm": 0.4341178834438324, |
|
"learning_rate": 3.886103575435897e-05, |
|
"loss": 0.8982, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2924747146701308, |
|
"grad_norm": 0.36294007301330566, |
|
"learning_rate": 3.885296457579998e-05, |
|
"loss": 0.7998, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.29321703628096873, |
|
"grad_norm": 0.4262610971927643, |
|
"learning_rate": 3.884486574442265e-05, |
|
"loss": 0.8212, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2939593578918066, |
|
"grad_norm": 0.41790515184402466, |
|
"learning_rate": 3.883673927210608e-05, |
|
"loss": 0.8739, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.29470167950264453, |
|
"grad_norm": 0.4500531554222107, |
|
"learning_rate": 3.8828585170769854e-05, |
|
"loss": 0.8168, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.2954440011134824, |
|
"grad_norm": 0.424883633852005, |
|
"learning_rate": 3.8820403452374093e-05, |
|
"loss": 0.8356, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.29618632272432033, |
|
"grad_norm": 0.4678550958633423, |
|
"learning_rate": 3.881219412891945e-05, |
|
"loss": 0.9249, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2969286443351582, |
|
"grad_norm": 0.4226338565349579, |
|
"learning_rate": 3.880395721244704e-05, |
|
"loss": 0.8317, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2976709659459961, |
|
"grad_norm": 0.44711941480636597, |
|
"learning_rate": 3.8795692715038465e-05, |
|
"loss": 0.8585, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.298413287556834, |
|
"grad_norm": 0.3793635666370392, |
|
"learning_rate": 3.878740064881578e-05, |
|
"loss": 0.8116, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2991556091676719, |
|
"grad_norm": 0.41916918754577637, |
|
"learning_rate": 3.877908102594147e-05, |
|
"loss": 0.838, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.2998979307785098, |
|
"grad_norm": 0.44144803285598755, |
|
"learning_rate": 3.877073385861846e-05, |
|
"loss": 0.8794, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.30064025238934766, |
|
"grad_norm": 0.41615739464759827, |
|
"learning_rate": 3.876235915909004e-05, |
|
"loss": 0.8119, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3013825740001856, |
|
"grad_norm": 0.38933223485946655, |
|
"learning_rate": 3.8753956939639915e-05, |
|
"loss": 0.9161, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.30212489561102346, |
|
"grad_norm": 0.38489091396331787, |
|
"learning_rate": 3.874552721259215e-05, |
|
"loss": 0.7344, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3028672172218614, |
|
"grad_norm": 0.405105859041214, |
|
"learning_rate": 3.873706999031113e-05, |
|
"loss": 0.9039, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.30360953883269926, |
|
"grad_norm": 0.4045158624649048, |
|
"learning_rate": 3.872858528520161e-05, |
|
"loss": 0.883, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3043518604435372, |
|
"grad_norm": 0.4408639967441559, |
|
"learning_rate": 3.872007310970864e-05, |
|
"loss": 0.8605, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.30509418205437505, |
|
"grad_norm": 0.39430099725723267, |
|
"learning_rate": 3.871153347631753e-05, |
|
"loss": 0.7924, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.305836503665213, |
|
"grad_norm": 0.4196453392505646, |
|
"learning_rate": 3.8702966397553917e-05, |
|
"loss": 0.8715, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.30657882527605085, |
|
"grad_norm": 0.3732720613479614, |
|
"learning_rate": 3.869437188598366e-05, |
|
"loss": 0.8603, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.3073211468868887, |
|
"grad_norm": 0.4107831120491028, |
|
"learning_rate": 3.868574995421288e-05, |
|
"loss": 0.7688, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.30806346849772664, |
|
"grad_norm": 0.43467453122138977, |
|
"learning_rate": 3.867710061488788e-05, |
|
"loss": 0.8764, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3088057901085645, |
|
"grad_norm": 0.3707616329193115, |
|
"learning_rate": 3.866842388069519e-05, |
|
"loss": 0.8257, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.30954811171940244, |
|
"grad_norm": 0.4145166575908661, |
|
"learning_rate": 3.8659719764361526e-05, |
|
"loss": 0.8701, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3102904333302403, |
|
"grad_norm": 0.33687037229537964, |
|
"learning_rate": 3.8650988278653755e-05, |
|
"loss": 0.8158, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.31103275494107824, |
|
"grad_norm": 0.40490809082984924, |
|
"learning_rate": 3.864222943637889e-05, |
|
"loss": 0.7536, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3117750765519161, |
|
"grad_norm": 0.4449961483478546, |
|
"learning_rate": 3.863344325038407e-05, |
|
"loss": 0.8911, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.31251739816275403, |
|
"grad_norm": 0.47596439719200134, |
|
"learning_rate": 3.862462973355654e-05, |
|
"loss": 0.869, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3132597197735919, |
|
"grad_norm": 0.3893236517906189, |
|
"learning_rate": 3.861578889882364e-05, |
|
"loss": 0.8048, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.31400204138442983, |
|
"grad_norm": 0.4097529351711273, |
|
"learning_rate": 3.860692075915277e-05, |
|
"loss": 0.8832, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3147443629952677, |
|
"grad_norm": 0.4777495563030243, |
|
"learning_rate": 3.859802532755139e-05, |
|
"loss": 0.8201, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.31548668460610557, |
|
"grad_norm": 0.40158170461654663, |
|
"learning_rate": 3.858910261706696e-05, |
|
"loss": 0.9073, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3162290062169435, |
|
"grad_norm": 0.4094659090042114, |
|
"learning_rate": 3.8580152640787014e-05, |
|
"loss": 0.8032, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.31697132782778137, |
|
"grad_norm": 0.4358592629432678, |
|
"learning_rate": 3.8571175411839006e-05, |
|
"loss": 0.8377, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3177136494386193, |
|
"grad_norm": 0.45116010308265686, |
|
"learning_rate": 3.856217094339041e-05, |
|
"loss": 0.7675, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.31845597104945716, |
|
"grad_norm": 0.3857732117176056, |
|
"learning_rate": 3.8553139248648645e-05, |
|
"loss": 0.8392, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3191982926602951, |
|
"grad_norm": 0.39530789852142334, |
|
"learning_rate": 3.854408034086106e-05, |
|
"loss": 0.8753, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.31994061427113296, |
|
"grad_norm": 0.3955869972705841, |
|
"learning_rate": 3.853499423331492e-05, |
|
"loss": 0.8537, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3206829358819709, |
|
"grad_norm": 0.40758398175239563, |
|
"learning_rate": 3.8525880939337375e-05, |
|
"loss": 0.9214, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.32142525749280876, |
|
"grad_norm": 0.4302966892719269, |
|
"learning_rate": 3.8516740472295474e-05, |
|
"loss": 0.9284, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.32216757910364663, |
|
"grad_norm": 0.3887743651866913, |
|
"learning_rate": 3.8507572845596095e-05, |
|
"loss": 0.8329, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.32290990071448455, |
|
"grad_norm": 0.38320812582969666, |
|
"learning_rate": 3.849837807268599e-05, |
|
"loss": 0.8195, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3236522223253224, |
|
"grad_norm": 0.4431188404560089, |
|
"learning_rate": 3.8489156167051686e-05, |
|
"loss": 0.9228, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.32439454393616035, |
|
"grad_norm": 0.4172087609767914, |
|
"learning_rate": 3.847990714221953e-05, |
|
"loss": 0.9269, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3251368655469982, |
|
"grad_norm": 0.41571882367134094, |
|
"learning_rate": 3.8470631011755654e-05, |
|
"loss": 0.8732, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.32587918715783615, |
|
"grad_norm": 0.4024905860424042, |
|
"learning_rate": 3.846132778926593e-05, |
|
"loss": 0.7738, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.326621508768674, |
|
"grad_norm": 0.4435541331768036, |
|
"learning_rate": 3.845199748839597e-05, |
|
"loss": 0.9251, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.32736383037951194, |
|
"grad_norm": 0.38982489705085754, |
|
"learning_rate": 3.844264012283111e-05, |
|
"loss": 0.7196, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3281061519903498, |
|
"grad_norm": 0.37881848216056824, |
|
"learning_rate": 3.8433255706296384e-05, |
|
"loss": 0.8207, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.32884847360118774, |
|
"grad_norm": 0.35831478238105774, |
|
"learning_rate": 3.8423844252556504e-05, |
|
"loss": 0.8365, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3295907952120256, |
|
"grad_norm": 0.4337356984615326, |
|
"learning_rate": 3.841440577541583e-05, |
|
"loss": 0.8655, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3303331168228635, |
|
"grad_norm": 0.41039177775382996, |
|
"learning_rate": 3.840494028871836e-05, |
|
"loss": 0.818, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3310754384337014, |
|
"grad_norm": 0.3991888165473938, |
|
"learning_rate": 3.839544780634772e-05, |
|
"loss": 0.7963, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3318177600445393, |
|
"grad_norm": 0.34694334864616394, |
|
"learning_rate": 3.838592834222714e-05, |
|
"loss": 0.8754, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3325600816553772, |
|
"grad_norm": 0.3922919034957886, |
|
"learning_rate": 3.837638191031938e-05, |
|
"loss": 0.9201, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3333024032662151, |
|
"grad_norm": 0.3862178921699524, |
|
"learning_rate": 3.836680852462681e-05, |
|
"loss": 0.7506, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.334044724877053, |
|
"grad_norm": 0.4066505432128906, |
|
"learning_rate": 3.835720819919131e-05, |
|
"loss": 0.8283, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.33478704648789087, |
|
"grad_norm": 0.4454757273197174, |
|
"learning_rate": 3.834758094809426e-05, |
|
"loss": 0.8575, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3355293680987288, |
|
"grad_norm": 0.49195176362991333, |
|
"learning_rate": 3.8337926785456575e-05, |
|
"loss": 0.8578, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.33627168970956667, |
|
"grad_norm": 0.44131457805633545, |
|
"learning_rate": 3.832824572543859e-05, |
|
"loss": 0.7825, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.33701401132040454, |
|
"grad_norm": 0.45455294847488403, |
|
"learning_rate": 3.8318537782240135e-05, |
|
"loss": 0.8687, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.33775633293124246, |
|
"grad_norm": 0.40335503220558167, |
|
"learning_rate": 3.830880297010045e-05, |
|
"loss": 0.7981, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.33849865454208033, |
|
"grad_norm": 0.3739373981952667, |
|
"learning_rate": 3.829904130329819e-05, |
|
"loss": 0.8766, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.33924097615291826, |
|
"grad_norm": 0.4278407692909241, |
|
"learning_rate": 3.828925279615141e-05, |
|
"loss": 0.8353, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.33998329776375613, |
|
"grad_norm": 0.4283519387245178, |
|
"learning_rate": 3.8279437463017516e-05, |
|
"loss": 0.9147, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.34072561937459406, |
|
"grad_norm": 0.3937970995903015, |
|
"learning_rate": 3.826959531829326e-05, |
|
"loss": 0.885, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.34146794098543193, |
|
"grad_norm": 0.40536752343177795, |
|
"learning_rate": 3.8259726376414754e-05, |
|
"loss": 0.9063, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.34221026259626985, |
|
"grad_norm": 0.3611074388027191, |
|
"learning_rate": 3.8249830651857355e-05, |
|
"loss": 0.8843, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3429525842071077, |
|
"grad_norm": 0.3790298402309418, |
|
"learning_rate": 3.8239908159135774e-05, |
|
"loss": 0.912, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.34369490581794565, |
|
"grad_norm": 0.4086690843105316, |
|
"learning_rate": 3.822995891280393e-05, |
|
"loss": 0.8264, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3444372274287835, |
|
"grad_norm": 0.3959694802761078, |
|
"learning_rate": 3.8219982927455006e-05, |
|
"loss": 0.8477, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3451795490396214, |
|
"grad_norm": 0.35375314950942993, |
|
"learning_rate": 3.82099802177214e-05, |
|
"loss": 0.9514, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3459218706504593, |
|
"grad_norm": 0.38223370909690857, |
|
"learning_rate": 3.819995079827472e-05, |
|
"loss": 0.7945, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3466641922612972, |
|
"grad_norm": 0.46122410893440247, |
|
"learning_rate": 3.8189894683825726e-05, |
|
"loss": 0.9686, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3474065138721351, |
|
"grad_norm": 0.4506250023841858, |
|
"learning_rate": 3.817981188912436e-05, |
|
"loss": 0.8485, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.348148835482973, |
|
"grad_norm": 0.4149094223976135, |
|
"learning_rate": 3.816970242895968e-05, |
|
"loss": 0.8154, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3488911570938109, |
|
"grad_norm": 0.40498077869415283, |
|
"learning_rate": 3.815956631815985e-05, |
|
"loss": 0.8611, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3496334787046488, |
|
"grad_norm": 0.3880930542945862, |
|
"learning_rate": 3.8149403571592164e-05, |
|
"loss": 0.8212, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3503758003154867, |
|
"grad_norm": 0.43260103464126587, |
|
"learning_rate": 3.8139214204162934e-05, |
|
"loss": 0.8884, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3511181219263246, |
|
"grad_norm": 0.4028635621070862, |
|
"learning_rate": 3.8128998230817544e-05, |
|
"loss": 0.8843, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.35186044353716245, |
|
"grad_norm": 0.3990902304649353, |
|
"learning_rate": 3.81187556665404e-05, |
|
"loss": 0.8498, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3526027651480004, |
|
"grad_norm": 0.3879133462905884, |
|
"learning_rate": 3.810848652635491e-05, |
|
"loss": 0.7933, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.35334508675883824, |
|
"grad_norm": 0.3822309076786041, |
|
"learning_rate": 3.8098190825323466e-05, |
|
"loss": 0.8271, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.35408740836967617, |
|
"grad_norm": 0.3867788314819336, |
|
"learning_rate": 3.808786857854741e-05, |
|
"loss": 0.761, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.35482972998051404, |
|
"grad_norm": 0.4349990487098694, |
|
"learning_rate": 3.807751980116702e-05, |
|
"loss": 0.9026, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.35557205159135197, |
|
"grad_norm": 0.4421882927417755, |
|
"learning_rate": 3.80671445083615e-05, |
|
"loss": 0.8214, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.35631437320218984, |
|
"grad_norm": 0.4466465711593628, |
|
"learning_rate": 3.805674271534894e-05, |
|
"loss": 0.9893, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.35705669481302776, |
|
"grad_norm": 0.4131970703601837, |
|
"learning_rate": 3.8046314437386286e-05, |
|
"loss": 0.8921, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.35779901642386563, |
|
"grad_norm": 0.36232057213783264, |
|
"learning_rate": 3.803585968976936e-05, |
|
"loss": 0.816, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.35854133803470356, |
|
"grad_norm": 0.36747488379478455, |
|
"learning_rate": 3.8025378487832786e-05, |
|
"loss": 0.863, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.35928365964554143, |
|
"grad_norm": 0.4169939160346985, |
|
"learning_rate": 3.801487084695e-05, |
|
"loss": 0.8733, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3600259812563793, |
|
"grad_norm": 0.40697571635246277, |
|
"learning_rate": 3.8004336782533194e-05, |
|
"loss": 0.8453, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3607683028672172, |
|
"grad_norm": 0.4079035818576813, |
|
"learning_rate": 3.799377631003336e-05, |
|
"loss": 0.7795, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3615106244780551, |
|
"grad_norm": 0.35980504751205444, |
|
"learning_rate": 3.7983189444940196e-05, |
|
"loss": 0.8452, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.362252946088893, |
|
"grad_norm": 0.3967490494251251, |
|
"learning_rate": 3.7972576202782104e-05, |
|
"loss": 0.8732, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3629952676997309, |
|
"grad_norm": 0.3711526393890381, |
|
"learning_rate": 3.796193659912621e-05, |
|
"loss": 0.8941, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3637375893105688, |
|
"grad_norm": 0.358286589384079, |
|
"learning_rate": 3.795127064957825e-05, |
|
"loss": 0.8332, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3644799109214067, |
|
"grad_norm": 0.3899291455745697, |
|
"learning_rate": 3.7940578369782676e-05, |
|
"loss": 0.8518, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3652222325322446, |
|
"grad_norm": 0.4205942451953888, |
|
"learning_rate": 3.79298597754225e-05, |
|
"loss": 0.7851, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3659645541430825, |
|
"grad_norm": 0.443820059299469, |
|
"learning_rate": 3.791911488221936e-05, |
|
"loss": 0.847, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.36670687575392036, |
|
"grad_norm": 0.3978535234928131, |
|
"learning_rate": 3.790834370593345e-05, |
|
"loss": 0.8564, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.3674491973647583, |
|
"grad_norm": 0.4479997754096985, |
|
"learning_rate": 3.789754626236353e-05, |
|
"loss": 0.8226, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.36819151897559615, |
|
"grad_norm": 0.3979516923427582, |
|
"learning_rate": 3.78867225673469e-05, |
|
"loss": 0.7801, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3689338405864341, |
|
"grad_norm": 0.4172329306602478, |
|
"learning_rate": 3.7875872636759327e-05, |
|
"loss": 0.7681, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.36967616219727195, |
|
"grad_norm": 0.3862856924533844, |
|
"learning_rate": 3.786499648651508e-05, |
|
"loss": 0.7748, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3704184838081099, |
|
"grad_norm": 0.3918432295322418, |
|
"learning_rate": 3.78540941325669e-05, |
|
"loss": 0.9062, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.37116080541894775, |
|
"grad_norm": 0.4164000153541565, |
|
"learning_rate": 3.784316559090594e-05, |
|
"loss": 0.8612, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3719031270297857, |
|
"grad_norm": 0.40305987000465393, |
|
"learning_rate": 3.783221087756178e-05, |
|
"loss": 0.9142, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.37264544864062354, |
|
"grad_norm": 0.4271204173564911, |
|
"learning_rate": 3.782123000860238e-05, |
|
"loss": 0.795, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.37338777025146147, |
|
"grad_norm": 0.4040716290473938, |
|
"learning_rate": 3.781022300013406e-05, |
|
"loss": 0.8365, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.37413009186229934, |
|
"grad_norm": 0.41757476329803467, |
|
"learning_rate": 3.779918986830148e-05, |
|
"loss": 0.8417, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.3748724134731372, |
|
"grad_norm": 0.36810386180877686, |
|
"learning_rate": 3.7788130629287645e-05, |
|
"loss": 0.8355, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.37561473508397514, |
|
"grad_norm": 0.42814144492149353, |
|
"learning_rate": 3.777704529931381e-05, |
|
"loss": 0.8722, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.376357056694813, |
|
"grad_norm": 0.44470280408859253, |
|
"learning_rate": 3.776593389463952e-05, |
|
"loss": 0.8156, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.37709937830565093, |
|
"grad_norm": 0.3996254801750183, |
|
"learning_rate": 3.775479643156257e-05, |
|
"loss": 0.8909, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3778416999164888, |
|
"grad_norm": 0.4491524398326874, |
|
"learning_rate": 3.774363292641897e-05, |
|
"loss": 0.8144, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.37858402152732673, |
|
"grad_norm": 0.4341849088668823, |
|
"learning_rate": 3.7732443395582935e-05, |
|
"loss": 0.9167, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3793263431381646, |
|
"grad_norm": 0.38755953311920166, |
|
"learning_rate": 3.772122785546684e-05, |
|
"loss": 0.8664, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.3800686647490025, |
|
"grad_norm": 0.39869973063468933, |
|
"learning_rate": 3.7709986322521217e-05, |
|
"loss": 0.8358, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.3808109863598404, |
|
"grad_norm": 0.3752814531326294, |
|
"learning_rate": 3.769871881323473e-05, |
|
"loss": 0.8321, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.3815533079706783, |
|
"grad_norm": 0.4040491282939911, |
|
"learning_rate": 3.768742534413413e-05, |
|
"loss": 0.8743, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.3822956295815162, |
|
"grad_norm": 0.4491027295589447, |
|
"learning_rate": 3.7676105931784256e-05, |
|
"loss": 0.8705, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.38303795119235406, |
|
"grad_norm": 0.4306964874267578, |
|
"learning_rate": 3.766476059278799e-05, |
|
"loss": 0.892, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.383780272803192, |
|
"grad_norm": 0.43329861760139465, |
|
"learning_rate": 3.765338934378625e-05, |
|
"loss": 0.8469, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.38452259441402986, |
|
"grad_norm": 0.36683329939842224, |
|
"learning_rate": 3.764199220145796e-05, |
|
"loss": 0.8179, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3852649160248678, |
|
"grad_norm": 0.3505510985851288, |
|
"learning_rate": 3.7630569182520015e-05, |
|
"loss": 0.797, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.38600723763570566, |
|
"grad_norm": 0.4275372624397278, |
|
"learning_rate": 3.7619120303727274e-05, |
|
"loss": 0.8307, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3867495592465436, |
|
"grad_norm": 0.3772055208683014, |
|
"learning_rate": 3.760764558187252e-05, |
|
"loss": 0.8697, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.38749188085738145, |
|
"grad_norm": 0.348351389169693, |
|
"learning_rate": 3.759614503378644e-05, |
|
"loss": 0.7987, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3882342024682194, |
|
"grad_norm": 0.40751683712005615, |
|
"learning_rate": 3.75846186763376e-05, |
|
"loss": 0.7577, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.38897652407905725, |
|
"grad_norm": 0.413409948348999, |
|
"learning_rate": 3.7573066526432435e-05, |
|
"loss": 0.9066, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3897188456898951, |
|
"grad_norm": 0.39468032121658325, |
|
"learning_rate": 3.7561488601015206e-05, |
|
"loss": 0.8524, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.39046116730073305, |
|
"grad_norm": 0.38360586762428284, |
|
"learning_rate": 3.754988491706796e-05, |
|
"loss": 0.8282, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3912034889115709, |
|
"grad_norm": 0.38212329149246216, |
|
"learning_rate": 3.7538255491610567e-05, |
|
"loss": 0.8489, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.39194581052240884, |
|
"grad_norm": 0.37275058031082153, |
|
"learning_rate": 3.752660034170062e-05, |
|
"loss": 0.8327, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3926881321332467, |
|
"grad_norm": 0.39423465728759766, |
|
"learning_rate": 3.7514919484433444e-05, |
|
"loss": 0.8281, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.39343045374408464, |
|
"grad_norm": 0.40932732820510864, |
|
"learning_rate": 3.75032129369421e-05, |
|
"loss": 0.8683, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3941727753549225, |
|
"grad_norm": 0.41477513313293457, |
|
"learning_rate": 3.7491480716397296e-05, |
|
"loss": 0.8427, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.39491509696576044, |
|
"grad_norm": 0.38719847798347473, |
|
"learning_rate": 3.747972284000741e-05, |
|
"loss": 0.8367, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3956574185765983, |
|
"grad_norm": 0.39567679166793823, |
|
"learning_rate": 3.746793932501847e-05, |
|
"loss": 0.9394, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.39639974018743623, |
|
"grad_norm": 0.42474374175071716, |
|
"learning_rate": 3.745613018871407e-05, |
|
"loss": 0.8696, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3971420617982741, |
|
"grad_norm": 0.39733052253723145, |
|
"learning_rate": 3.7444295448415423e-05, |
|
"loss": 0.7553, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.397884383409112, |
|
"grad_norm": 0.4539112150669098, |
|
"learning_rate": 3.743243512148127e-05, |
|
"loss": 0.791, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3986267050199499, |
|
"grad_norm": 0.40971097350120544, |
|
"learning_rate": 3.74205492253079e-05, |
|
"loss": 0.9272, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.39936902663078777, |
|
"grad_norm": 0.3799310624599457, |
|
"learning_rate": 3.740863777732909e-05, |
|
"loss": 0.8347, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4001113482416257, |
|
"grad_norm": 0.3867487609386444, |
|
"learning_rate": 3.7396700795016106e-05, |
|
"loss": 0.8936, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.40085366985246357, |
|
"grad_norm": 0.39778730273246765, |
|
"learning_rate": 3.738473829587766e-05, |
|
"loss": 0.8159, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4015959914633015, |
|
"grad_norm": 0.3948611915111542, |
|
"learning_rate": 3.7372750297459904e-05, |
|
"loss": 0.7943, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.40233831307413936, |
|
"grad_norm": 0.3824806809425354, |
|
"learning_rate": 3.736073681734638e-05, |
|
"loss": 0.8733, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4030806346849773, |
|
"grad_norm": 0.413933664560318, |
|
"learning_rate": 3.7348697873158e-05, |
|
"loss": 0.8837, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.40382295629581516, |
|
"grad_norm": 0.4327161908149719, |
|
"learning_rate": 3.733663348255305e-05, |
|
"loss": 0.8503, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.40456527790665303, |
|
"grad_norm": 0.4562986195087433, |
|
"learning_rate": 3.7324543663227105e-05, |
|
"loss": 0.7086, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.40530759951749096, |
|
"grad_norm": 0.40474218130111694, |
|
"learning_rate": 3.731242843291307e-05, |
|
"loss": 0.8736, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.40604992112832883, |
|
"grad_norm": 0.4206937849521637, |
|
"learning_rate": 3.73002878093811e-05, |
|
"loss": 0.9176, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.40679224273916675, |
|
"grad_norm": 0.40525129437446594, |
|
"learning_rate": 3.7288121810438614e-05, |
|
"loss": 0.9027, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4075345643500046, |
|
"grad_norm": 0.39779412746429443, |
|
"learning_rate": 3.727593045393024e-05, |
|
"loss": 0.8264, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.40827688596084255, |
|
"grad_norm": 0.3574381470680237, |
|
"learning_rate": 3.726371375773778e-05, |
|
"loss": 0.8602, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4090192075716804, |
|
"grad_norm": 0.37428656220436096, |
|
"learning_rate": 3.725147173978025e-05, |
|
"loss": 0.9414, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.40976152918251835, |
|
"grad_norm": 0.3972627818584442, |
|
"learning_rate": 3.7239204418013767e-05, |
|
"loss": 0.9304, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4105038507933562, |
|
"grad_norm": 0.43916741013526917, |
|
"learning_rate": 3.722691181043158e-05, |
|
"loss": 0.8894, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.41124617240419414, |
|
"grad_norm": 0.42011746764183044, |
|
"learning_rate": 3.7214593935064026e-05, |
|
"loss": 0.7993, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.411988494015032, |
|
"grad_norm": 0.4282603859901428, |
|
"learning_rate": 3.72022508099785e-05, |
|
"loss": 0.8025, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4127308156258699, |
|
"grad_norm": 0.4019373953342438, |
|
"learning_rate": 3.718988245327942e-05, |
|
"loss": 0.7938, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4134731372367078, |
|
"grad_norm": 0.3733516335487366, |
|
"learning_rate": 3.7177488883108245e-05, |
|
"loss": 0.8268, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4142154588475457, |
|
"grad_norm": 0.41670674085617065, |
|
"learning_rate": 3.716507011764338e-05, |
|
"loss": 0.8477, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4149577804583836, |
|
"grad_norm": 0.40059223771095276, |
|
"learning_rate": 3.715262617510022e-05, |
|
"loss": 0.9232, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4157001020692215, |
|
"grad_norm": 0.3981556296348572, |
|
"learning_rate": 3.714015707373106e-05, |
|
"loss": 0.7917, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4164424236800594, |
|
"grad_norm": 0.42250776290893555, |
|
"learning_rate": 3.7127662831825104e-05, |
|
"loss": 0.831, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4171847452908973, |
|
"grad_norm": 0.3976747393608093, |
|
"learning_rate": 3.711514346770844e-05, |
|
"loss": 0.9161, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4179270669017352, |
|
"grad_norm": 0.45880788564682007, |
|
"learning_rate": 3.710259899974401e-05, |
|
"loss": 0.926, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.41866938851257307, |
|
"grad_norm": 0.4061499536037445, |
|
"learning_rate": 3.7090029446331555e-05, |
|
"loss": 0.9172, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.41941171012341094, |
|
"grad_norm": 0.5111361145973206, |
|
"learning_rate": 3.707743482590762e-05, |
|
"loss": 0.8568, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.42015403173424887, |
|
"grad_norm": 0.3791842758655548, |
|
"learning_rate": 3.7064815156945534e-05, |
|
"loss": 0.8478, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.42089635334508674, |
|
"grad_norm": 0.3697012960910797, |
|
"learning_rate": 3.705217045795534e-05, |
|
"loss": 0.807, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.42163867495592466, |
|
"grad_norm": 0.3967568874359131, |
|
"learning_rate": 3.703950074748381e-05, |
|
"loss": 0.7994, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.42238099656676253, |
|
"grad_norm": 0.379448801279068, |
|
"learning_rate": 3.7026806044114396e-05, |
|
"loss": 0.8293, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.42312331817760046, |
|
"grad_norm": 0.40183165669441223, |
|
"learning_rate": 3.701408636646721e-05, |
|
"loss": 0.9044, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.42386563978843833, |
|
"grad_norm": 0.4314356744289398, |
|
"learning_rate": 3.7001341733199003e-05, |
|
"loss": 0.782, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.42460796139927626, |
|
"grad_norm": 0.4023379385471344, |
|
"learning_rate": 3.698857216300312e-05, |
|
"loss": 0.9449, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4253502830101141, |
|
"grad_norm": 0.37457001209259033, |
|
"learning_rate": 3.6975777674609473e-05, |
|
"loss": 0.8054, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.42609260462095205, |
|
"grad_norm": 0.3759816288948059, |
|
"learning_rate": 3.696295828678456e-05, |
|
"loss": 0.8421, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4268349262317899, |
|
"grad_norm": 0.383489727973938, |
|
"learning_rate": 3.695011401833136e-05, |
|
"loss": 0.901, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4275772478426278, |
|
"grad_norm": 0.41077226400375366, |
|
"learning_rate": 3.693724488808936e-05, |
|
"loss": 0.8094, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4283195694534657, |
|
"grad_norm": 0.36784303188323975, |
|
"learning_rate": 3.692435091493453e-05, |
|
"loss": 0.7936, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4290618910643036, |
|
"grad_norm": 0.39439964294433594, |
|
"learning_rate": 3.691143211777925e-05, |
|
"loss": 0.8754, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4298042126751415, |
|
"grad_norm": 0.4088978171348572, |
|
"learning_rate": 3.6898488515572326e-05, |
|
"loss": 0.9492, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4305465342859794, |
|
"grad_norm": 0.364327609539032, |
|
"learning_rate": 3.6885520127298955e-05, |
|
"loss": 0.9537, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4312888558968173, |
|
"grad_norm": 0.4293886125087738, |
|
"learning_rate": 3.6872526971980676e-05, |
|
"loss": 0.8285, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4320311775076552, |
|
"grad_norm": 0.3976035416126251, |
|
"learning_rate": 3.685950906867535e-05, |
|
"loss": 0.8095, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4327734991184931, |
|
"grad_norm": 0.4648852050304413, |
|
"learning_rate": 3.684646643647717e-05, |
|
"loss": 0.897, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.433515820729331, |
|
"grad_norm": 0.36569103598594666, |
|
"learning_rate": 3.683339909451656e-05, |
|
"loss": 0.8027, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.43425814234016885, |
|
"grad_norm": 0.4245463013648987, |
|
"learning_rate": 3.6820307061960206e-05, |
|
"loss": 0.8928, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4350004639510068, |
|
"grad_norm": 0.3979533910751343, |
|
"learning_rate": 3.680719035801102e-05, |
|
"loss": 0.9094, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.43574278556184465, |
|
"grad_norm": 0.3963554799556732, |
|
"learning_rate": 3.679404900190807e-05, |
|
"loss": 0.7871, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4364851071726826, |
|
"grad_norm": 0.38191401958465576, |
|
"learning_rate": 3.678088301292662e-05, |
|
"loss": 0.7979, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.43722742878352044, |
|
"grad_norm": 0.3779836893081665, |
|
"learning_rate": 3.676769241037803e-05, |
|
"loss": 0.76, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.43796975039435837, |
|
"grad_norm": 0.4065380394458771, |
|
"learning_rate": 3.67544772136098e-05, |
|
"loss": 0.8759, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.43871207200519624, |
|
"grad_norm": 0.43756797909736633, |
|
"learning_rate": 3.674123744200547e-05, |
|
"loss": 0.9037, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.43945439361603417, |
|
"grad_norm": 0.36865559220314026, |
|
"learning_rate": 3.672797311498464e-05, |
|
"loss": 0.7835, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.44019671522687204, |
|
"grad_norm": 0.45453381538391113, |
|
"learning_rate": 3.671468425200292e-05, |
|
"loss": 0.8964, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.44093903683770996, |
|
"grad_norm": 0.35778266191482544, |
|
"learning_rate": 3.670137087255192e-05, |
|
"loss": 0.8124, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.44168135844854783, |
|
"grad_norm": 0.41455769538879395, |
|
"learning_rate": 3.66880329961592e-05, |
|
"loss": 0.8156, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4424236800593857, |
|
"grad_norm": 0.38713696599006653, |
|
"learning_rate": 3.6674670642388264e-05, |
|
"loss": 0.9086, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.44316600167022363, |
|
"grad_norm": 0.36686089634895325, |
|
"learning_rate": 3.66612838308385e-05, |
|
"loss": 0.825, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.4439083232810615, |
|
"grad_norm": 0.4371680021286011, |
|
"learning_rate": 3.664787258114518e-05, |
|
"loss": 0.8598, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.4446506448918994, |
|
"grad_norm": 0.40336138010025024, |
|
"learning_rate": 3.663443691297942e-05, |
|
"loss": 0.8545, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4453929665027373, |
|
"grad_norm": 0.4215971827507019, |
|
"learning_rate": 3.662097684604815e-05, |
|
"loss": 0.905, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4461352881135752, |
|
"grad_norm": 0.42287206649780273, |
|
"learning_rate": 3.66074924000941e-05, |
|
"loss": 0.9016, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.4468776097244131, |
|
"grad_norm": 0.4127437472343445, |
|
"learning_rate": 3.659398359489574e-05, |
|
"loss": 0.8348, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.447619931335251, |
|
"grad_norm": 0.42018598318099976, |
|
"learning_rate": 3.658045045026727e-05, |
|
"loss": 0.8192, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4483622529460889, |
|
"grad_norm": 0.429340660572052, |
|
"learning_rate": 3.65668929860586e-05, |
|
"loss": 0.8346, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.44910457455692676, |
|
"grad_norm": 0.4273694157600403, |
|
"learning_rate": 3.6553311222155313e-05, |
|
"loss": 0.8744, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4498468961677647, |
|
"grad_norm": 0.38480550050735474, |
|
"learning_rate": 3.6539705178478636e-05, |
|
"loss": 0.8466, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.45058921777860256, |
|
"grad_norm": 0.38357681035995483, |
|
"learning_rate": 3.6526074874985385e-05, |
|
"loss": 0.7869, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4513315393894405, |
|
"grad_norm": 0.4218459129333496, |
|
"learning_rate": 3.651242033166798e-05, |
|
"loss": 0.8489, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.45207386100027835, |
|
"grad_norm": 0.3404082953929901, |
|
"learning_rate": 3.64987415685544e-05, |
|
"loss": 0.8556, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4528161826111163, |
|
"grad_norm": 0.38693150877952576, |
|
"learning_rate": 3.648503860570813e-05, |
|
"loss": 0.8234, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.45355850422195415, |
|
"grad_norm": 0.39517828822135925, |
|
"learning_rate": 3.6471311463228164e-05, |
|
"loss": 0.9296, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4543008258327921, |
|
"grad_norm": 0.4109130799770355, |
|
"learning_rate": 3.6457560161248956e-05, |
|
"loss": 0.8357, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.45504314744362995, |
|
"grad_norm": 0.39871203899383545, |
|
"learning_rate": 3.64437847199404e-05, |
|
"loss": 0.9087, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4557854690544679, |
|
"grad_norm": 0.3984218239784241, |
|
"learning_rate": 3.642998515950779e-05, |
|
"loss": 0.7781, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.45652779066530574, |
|
"grad_norm": 0.37950950860977173, |
|
"learning_rate": 3.6416161500191806e-05, |
|
"loss": 0.8539, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4572701122761436, |
|
"grad_norm": 0.3692931830883026, |
|
"learning_rate": 3.640231376226847e-05, |
|
"loss": 0.8544, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.45801243388698154, |
|
"grad_norm": 0.3967374265193939, |
|
"learning_rate": 3.638844196604911e-05, |
|
"loss": 0.8591, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4587547554978194, |
|
"grad_norm": 0.3724336624145508, |
|
"learning_rate": 3.6374546131880375e-05, |
|
"loss": 0.8421, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.45949707710865734, |
|
"grad_norm": 0.398163765668869, |
|
"learning_rate": 3.636062628014414e-05, |
|
"loss": 0.8612, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4602393987194952, |
|
"grad_norm": 0.3659161329269409, |
|
"learning_rate": 3.634668243125752e-05, |
|
"loss": 0.8387, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.46098172033033313, |
|
"grad_norm": 0.4477706551551819, |
|
"learning_rate": 3.633271460567284e-05, |
|
"loss": 0.9355, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.461724041941171, |
|
"grad_norm": 0.4127034842967987, |
|
"learning_rate": 3.6318722823877555e-05, |
|
"loss": 0.8034, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.46246636355200893, |
|
"grad_norm": 0.4307912290096283, |
|
"learning_rate": 3.6304707106394295e-05, |
|
"loss": 0.874, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4632086851628468, |
|
"grad_norm": 0.4227927029132843, |
|
"learning_rate": 3.6290667473780796e-05, |
|
"loss": 0.8189, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4639510067736847, |
|
"grad_norm": 0.3703235983848572, |
|
"learning_rate": 3.627660394662986e-05, |
|
"loss": 0.8073, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4646933283845226, |
|
"grad_norm": 0.46729329228401184, |
|
"learning_rate": 3.6262516545569325e-05, |
|
"loss": 0.8373, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.46543564999536047, |
|
"grad_norm": 0.41088923811912537, |
|
"learning_rate": 3.624840529126207e-05, |
|
"loss": 0.8126, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4661779716061984, |
|
"grad_norm": 0.3681904673576355, |
|
"learning_rate": 3.623427020440595e-05, |
|
"loss": 0.8599, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.46692029321703626, |
|
"grad_norm": 0.3651503324508667, |
|
"learning_rate": 3.622011130573378e-05, |
|
"loss": 0.8385, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4676626148278742, |
|
"grad_norm": 0.3684818148612976, |
|
"learning_rate": 3.62059286160133e-05, |
|
"loss": 0.8341, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.46840493643871206, |
|
"grad_norm": 0.37057459354400635, |
|
"learning_rate": 3.619172215604714e-05, |
|
"loss": 0.9151, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.46914725804955, |
|
"grad_norm": 0.4190065264701843, |
|
"learning_rate": 3.61774919466728e-05, |
|
"loss": 0.8517, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.46988957966038786, |
|
"grad_norm": 0.42590153217315674, |
|
"learning_rate": 3.616323800876262e-05, |
|
"loss": 0.7829, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.4706319012712258, |
|
"grad_norm": 0.407207190990448, |
|
"learning_rate": 3.614896036322374e-05, |
|
"loss": 0.7631, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.47137422288206365, |
|
"grad_norm": 0.4581393897533417, |
|
"learning_rate": 3.613465903099807e-05, |
|
"loss": 0.8206, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4721165444929015, |
|
"grad_norm": 0.39499345421791077, |
|
"learning_rate": 3.6120334033062264e-05, |
|
"loss": 0.8448, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.47285886610373945, |
|
"grad_norm": 0.35499414801597595, |
|
"learning_rate": 3.6105985390427696e-05, |
|
"loss": 0.7845, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4736011877145773, |
|
"grad_norm": 0.38710108399391174, |
|
"learning_rate": 3.6091613124140404e-05, |
|
"loss": 0.901, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.47434350932541525, |
|
"grad_norm": 0.4199868142604828, |
|
"learning_rate": 3.6077217255281094e-05, |
|
"loss": 0.8268, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.4750858309362531, |
|
"grad_norm": 0.384318470954895, |
|
"learning_rate": 3.606279780496508e-05, |
|
"loss": 0.8498, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.47582815254709104, |
|
"grad_norm": 0.38041505217552185, |
|
"learning_rate": 3.604835479434227e-05, |
|
"loss": 0.7877, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.4765704741579289, |
|
"grad_norm": 0.3970247209072113, |
|
"learning_rate": 3.6033888244597136e-05, |
|
"loss": 0.8143, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.47731279576876684, |
|
"grad_norm": 0.40868648886680603, |
|
"learning_rate": 3.6019398176948656e-05, |
|
"loss": 0.8718, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4780551173796047, |
|
"grad_norm": 0.3927657902240753, |
|
"learning_rate": 3.6004884612650326e-05, |
|
"loss": 0.8719, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.47879743899044264, |
|
"grad_norm": 0.37920475006103516, |
|
"learning_rate": 3.599034757299009e-05, |
|
"loss": 0.8496, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4795397606012805, |
|
"grad_norm": 0.3785390257835388, |
|
"learning_rate": 3.597578707929033e-05, |
|
"loss": 0.7623, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.4802820822121184, |
|
"grad_norm": 0.3962065279483795, |
|
"learning_rate": 3.596120315290782e-05, |
|
"loss": 0.7983, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.4810244038229563, |
|
"grad_norm": 0.40997880697250366, |
|
"learning_rate": 3.594659581523374e-05, |
|
"loss": 0.7733, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4817667254337942, |
|
"grad_norm": 0.3517996072769165, |
|
"learning_rate": 3.593196508769355e-05, |
|
"loss": 0.85, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.4825090470446321, |
|
"grad_norm": 0.3954550325870514, |
|
"learning_rate": 3.591731099174708e-05, |
|
"loss": 0.837, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.48325136865546997, |
|
"grad_norm": 0.3854595124721527, |
|
"learning_rate": 3.590263354888839e-05, |
|
"loss": 0.8456, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.4839936902663079, |
|
"grad_norm": 0.3883722126483917, |
|
"learning_rate": 3.58879327806458e-05, |
|
"loss": 0.7988, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.48473601187714577, |
|
"grad_norm": 0.432900607585907, |
|
"learning_rate": 3.5873208708581844e-05, |
|
"loss": 0.8776, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.4854783334879837, |
|
"grad_norm": 0.37475481629371643, |
|
"learning_rate": 3.5858461354293244e-05, |
|
"loss": 0.8021, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.48622065509882156, |
|
"grad_norm": 0.4133627116680145, |
|
"learning_rate": 3.584369073941086e-05, |
|
"loss": 0.8003, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.48696297670965943, |
|
"grad_norm": 0.3836146891117096, |
|
"learning_rate": 3.5828896885599666e-05, |
|
"loss": 0.8419, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.48770529832049736, |
|
"grad_norm": 0.40715518593788147, |
|
"learning_rate": 3.5814079814558737e-05, |
|
"loss": 0.8652, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.48844761993133523, |
|
"grad_norm": 0.4351827800273895, |
|
"learning_rate": 3.57992395480212e-05, |
|
"loss": 0.9617, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.48918994154217316, |
|
"grad_norm": 0.3888394832611084, |
|
"learning_rate": 3.578437610775418e-05, |
|
"loss": 0.8957, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.489932263153011, |
|
"grad_norm": 0.3931278586387634, |
|
"learning_rate": 3.5769489515558835e-05, |
|
"loss": 0.9177, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.49067458476384895, |
|
"grad_norm": 0.3771383464336395, |
|
"learning_rate": 3.575457979327024e-05, |
|
"loss": 0.8871, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.4914169063746868, |
|
"grad_norm": 0.43624410033226013, |
|
"learning_rate": 3.5739646962757426e-05, |
|
"loss": 0.8575, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.49215922798552475, |
|
"grad_norm": 0.38663193583488464, |
|
"learning_rate": 3.5724691045923296e-05, |
|
"loss": 0.7904, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.4929015495963626, |
|
"grad_norm": 0.40553534030914307, |
|
"learning_rate": 3.570971206470463e-05, |
|
"loss": 0.8474, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.49364387120720055, |
|
"grad_norm": 0.37837696075439453, |
|
"learning_rate": 3.5694710041072034e-05, |
|
"loss": 0.7521, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4943861928180384, |
|
"grad_norm": 0.3879217505455017, |
|
"learning_rate": 3.567968499702991e-05, |
|
"loss": 0.9081, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.4951285144288763, |
|
"grad_norm": 0.32567813992500305, |
|
"learning_rate": 3.566463695461643e-05, |
|
"loss": 0.7584, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.4958708360397142, |
|
"grad_norm": 0.40887537598609924, |
|
"learning_rate": 3.564956593590349e-05, |
|
"loss": 0.7644, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4966131576505521, |
|
"grad_norm": 0.3539418876171112, |
|
"learning_rate": 3.5634471962996704e-05, |
|
"loss": 0.7637, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.49735547926139, |
|
"grad_norm": 0.43159979581832886, |
|
"learning_rate": 3.5619355058035326e-05, |
|
"loss": 0.8638, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4980978008722279, |
|
"grad_norm": 0.3916085958480835, |
|
"learning_rate": 3.560421524319227e-05, |
|
"loss": 0.8409, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.4988401224830658, |
|
"grad_norm": 0.36610978841781616, |
|
"learning_rate": 3.558905254067405e-05, |
|
"loss": 0.7831, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4995824440939037, |
|
"grad_norm": 0.3962661623954773, |
|
"learning_rate": 3.5573866972720746e-05, |
|
"loss": 0.8764, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5003247657047416, |
|
"grad_norm": 0.5491713881492615, |
|
"learning_rate": 3.555865856160598e-05, |
|
"loss": 0.8311, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5010670873155795, |
|
"grad_norm": 0.34136977791786194, |
|
"learning_rate": 3.554342732963687e-05, |
|
"loss": 0.7774, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5018094089264173, |
|
"grad_norm": 0.3658745586872101, |
|
"learning_rate": 3.5528173299154015e-05, |
|
"loss": 0.7664, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5025517305372552, |
|
"grad_norm": 0.40912461280822754, |
|
"learning_rate": 3.551289649253144e-05, |
|
"loss": 0.7996, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5032940521480932, |
|
"grad_norm": 0.4073297083377838, |
|
"learning_rate": 3.5497596932176624e-05, |
|
"loss": 0.8359, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5040363737589311, |
|
"grad_norm": 0.3738069534301758, |
|
"learning_rate": 3.548227464053035e-05, |
|
"loss": 0.928, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5047786953697689, |
|
"grad_norm": 0.37688982486724854, |
|
"learning_rate": 3.54669296400668e-05, |
|
"loss": 0.8951, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5055210169806068, |
|
"grad_norm": 0.3885107934474945, |
|
"learning_rate": 3.545156195329343e-05, |
|
"loss": 0.8279, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5062633385914448, |
|
"grad_norm": 0.35046374797821045, |
|
"learning_rate": 3.5436171602750995e-05, |
|
"loss": 0.7523, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5070056602022827, |
|
"grad_norm": 0.3633882403373718, |
|
"learning_rate": 3.542075861101347e-05, |
|
"loss": 0.8077, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5077479818131205, |
|
"grad_norm": 0.3783382475376129, |
|
"learning_rate": 3.5405323000688056e-05, |
|
"loss": 0.7674, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5084903034239584, |
|
"grad_norm": 0.38828450441360474, |
|
"learning_rate": 3.538986479441513e-05, |
|
"loss": 0.7737, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5092326250347963, |
|
"grad_norm": 0.4310080409049988, |
|
"learning_rate": 3.53743840148682e-05, |
|
"loss": 0.8116, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5099749466456343, |
|
"grad_norm": 0.38372164964675903, |
|
"learning_rate": 3.53588806847539e-05, |
|
"loss": 0.7766, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5107172682564721, |
|
"grad_norm": 0.3972887098789215, |
|
"learning_rate": 3.534335482681192e-05, |
|
"loss": 0.7485, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.51145958986731, |
|
"grad_norm": 0.3987019956111908, |
|
"learning_rate": 3.5327806463815e-05, |
|
"loss": 0.7545, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5122019114781479, |
|
"grad_norm": 0.36997538805007935, |
|
"learning_rate": 3.53122356185689e-05, |
|
"loss": 0.8385, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5129442330889858, |
|
"grad_norm": 0.40917664766311646, |
|
"learning_rate": 3.529664231391236e-05, |
|
"loss": 0.9488, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5136865546998237, |
|
"grad_norm": 0.3602358102798462, |
|
"learning_rate": 3.5281026572717025e-05, |
|
"loss": 0.7981, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5144288763106616, |
|
"grad_norm": 0.44117608666419983, |
|
"learning_rate": 3.52653884178875e-05, |
|
"loss": 0.7832, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5151711979214995, |
|
"grad_norm": 0.3614718019962311, |
|
"learning_rate": 3.524972787236124e-05, |
|
"loss": 0.8741, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5159135195323374, |
|
"grad_norm": 0.3987668752670288, |
|
"learning_rate": 3.5234044959108534e-05, |
|
"loss": 0.8139, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5166558411431753, |
|
"grad_norm": 0.37257644534111023, |
|
"learning_rate": 3.5218339701132486e-05, |
|
"loss": 0.8719, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5173981627540132, |
|
"grad_norm": 0.43900442123413086, |
|
"learning_rate": 3.520261212146899e-05, |
|
"loss": 0.797, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.518140484364851, |
|
"grad_norm": 0.3919014632701874, |
|
"learning_rate": 3.518686224318665e-05, |
|
"loss": 0.8669, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5188828059756889, |
|
"grad_norm": 0.3852793574333191, |
|
"learning_rate": 3.5171090089386816e-05, |
|
"loss": 0.8776, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5196251275865269, |
|
"grad_norm": 0.3601354956626892, |
|
"learning_rate": 3.515529568320347e-05, |
|
"loss": 0.8386, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5203674491973648, |
|
"grad_norm": 0.4443444609642029, |
|
"learning_rate": 3.5139479047803254e-05, |
|
"loss": 0.8605, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5211097708082026, |
|
"grad_norm": 0.3493961989879608, |
|
"learning_rate": 3.5123640206385425e-05, |
|
"loss": 0.7663, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5218520924190405, |
|
"grad_norm": 0.4082690179347992, |
|
"learning_rate": 3.510777918218179e-05, |
|
"loss": 0.7387, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5225944140298785, |
|
"grad_norm": 0.41370537877082825, |
|
"learning_rate": 3.509189599845671e-05, |
|
"loss": 0.8461, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5233367356407164, |
|
"grad_norm": 0.39665308594703674, |
|
"learning_rate": 3.507599067850703e-05, |
|
"loss": 0.8814, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5240790572515542, |
|
"grad_norm": 0.37258079648017883, |
|
"learning_rate": 3.5060063245662085e-05, |
|
"loss": 0.9054, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5248213788623921, |
|
"grad_norm": 0.3601747751235962, |
|
"learning_rate": 3.504411372328362e-05, |
|
"loss": 0.8276, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.52556370047323, |
|
"grad_norm": 0.3841734826564789, |
|
"learning_rate": 3.5028142134765794e-05, |
|
"loss": 0.8039, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.526306022084068, |
|
"grad_norm": 0.432404100894928, |
|
"learning_rate": 3.501214850353515e-05, |
|
"loss": 0.8261, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5270483436949058, |
|
"grad_norm": 0.42703214287757874, |
|
"learning_rate": 3.499613285305053e-05, |
|
"loss": 0.8143, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5277906653057437, |
|
"grad_norm": 0.4320433735847473, |
|
"learning_rate": 3.498009520680309e-05, |
|
"loss": 0.9653, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5285329869165816, |
|
"grad_norm": 0.37552034854888916, |
|
"learning_rate": 3.496403558831625e-05, |
|
"loss": 0.8471, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5292753085274196, |
|
"grad_norm": 0.4038369953632355, |
|
"learning_rate": 3.494795402114564e-05, |
|
"loss": 0.9201, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5300176301382574, |
|
"grad_norm": 0.41206666827201843, |
|
"learning_rate": 3.4931850528879105e-05, |
|
"loss": 0.9038, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5307599517490953, |
|
"grad_norm": 0.4328741431236267, |
|
"learning_rate": 3.491572513513664e-05, |
|
"loss": 0.7807, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5315022733599332, |
|
"grad_norm": 0.39441853761672974, |
|
"learning_rate": 3.489957786357037e-05, |
|
"loss": 0.8205, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.532244594970771, |
|
"grad_norm": 0.38545891642570496, |
|
"learning_rate": 3.488340873786451e-05, |
|
"loss": 0.8253, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.532986916581609, |
|
"grad_norm": 0.3683164417743683, |
|
"learning_rate": 3.4867217781735296e-05, |
|
"loss": 0.7755, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5337292381924469, |
|
"grad_norm": 0.3887660503387451, |
|
"learning_rate": 3.485100501893105e-05, |
|
"loss": 0.8543, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5344715598032848, |
|
"grad_norm": 0.44087541103363037, |
|
"learning_rate": 3.4834770473232014e-05, |
|
"loss": 0.8477, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5352138814141226, |
|
"grad_norm": 0.3935564458370209, |
|
"learning_rate": 3.481851416845042e-05, |
|
"loss": 0.7372, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5359562030249606, |
|
"grad_norm": 0.3968266546726227, |
|
"learning_rate": 3.4802236128430404e-05, |
|
"loss": 0.8384, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5366985246357985, |
|
"grad_norm": 0.4122190773487091, |
|
"learning_rate": 3.4785936377047975e-05, |
|
"loss": 0.9467, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5374408462466364, |
|
"grad_norm": 0.40589356422424316, |
|
"learning_rate": 3.476961493821101e-05, |
|
"loss": 0.849, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5381831678574742, |
|
"grad_norm": 0.37581416964530945, |
|
"learning_rate": 3.475327183585916e-05, |
|
"loss": 0.8122, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5389254894683121, |
|
"grad_norm": 0.41619259119033813, |
|
"learning_rate": 3.473690709396389e-05, |
|
"loss": 0.8647, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5396678110791501, |
|
"grad_norm": 0.3226104974746704, |
|
"learning_rate": 3.472052073652837e-05, |
|
"loss": 0.7746, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5404101326899879, |
|
"grad_norm": 0.37998801469802856, |
|
"learning_rate": 3.4704112787587496e-05, |
|
"loss": 0.869, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5411524543008258, |
|
"grad_norm": 0.3882545530796051, |
|
"learning_rate": 3.468768327120784e-05, |
|
"loss": 0.8667, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5418947759116637, |
|
"grad_norm": 0.3522816002368927, |
|
"learning_rate": 3.4671232211487595e-05, |
|
"loss": 0.7567, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5426370975225017, |
|
"grad_norm": 0.4263498783111572, |
|
"learning_rate": 3.4654759632556555e-05, |
|
"loss": 0.8234, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5433794191333395, |
|
"grad_norm": 0.34016892313957214, |
|
"learning_rate": 3.463826555857607e-05, |
|
"loss": 0.8479, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5441217407441774, |
|
"grad_norm": 0.4033363461494446, |
|
"learning_rate": 3.4621750013739035e-05, |
|
"loss": 0.8393, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5448640623550153, |
|
"grad_norm": 0.3858112394809723, |
|
"learning_rate": 3.4605213022269825e-05, |
|
"loss": 0.8389, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5456063839658533, |
|
"grad_norm": 0.3489677608013153, |
|
"learning_rate": 3.458865460842428e-05, |
|
"loss": 0.7695, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5463487055766911, |
|
"grad_norm": 0.39663171768188477, |
|
"learning_rate": 3.457207479648965e-05, |
|
"loss": 0.8842, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.547091027187529, |
|
"grad_norm": 0.3862421214580536, |
|
"learning_rate": 3.455547361078459e-05, |
|
"loss": 0.849, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5478333487983669, |
|
"grad_norm": 0.3735657036304474, |
|
"learning_rate": 3.453885107565908e-05, |
|
"loss": 0.7996, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5485756704092047, |
|
"grad_norm": 0.4164029359817505, |
|
"learning_rate": 3.4522207215494444e-05, |
|
"loss": 0.8688, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5493179920200427, |
|
"grad_norm": 0.3550073802471161, |
|
"learning_rate": 3.4505542054703256e-05, |
|
"loss": 0.7474, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5500603136308806, |
|
"grad_norm": 0.3763352036476135, |
|
"learning_rate": 3.4488855617729356e-05, |
|
"loss": 0.7542, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5508026352417185, |
|
"grad_norm": 0.41175779700279236, |
|
"learning_rate": 3.447214792904778e-05, |
|
"loss": 0.7904, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5515449568525563, |
|
"grad_norm": 0.40034011006355286, |
|
"learning_rate": 3.445541901316473e-05, |
|
"loss": 0.8647, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5522872784633943, |
|
"grad_norm": 0.3602341115474701, |
|
"learning_rate": 3.443866889461755e-05, |
|
"loss": 0.8479, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5530296000742322, |
|
"grad_norm": 0.4051341712474823, |
|
"learning_rate": 3.442189759797469e-05, |
|
"loss": 0.8418, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5537719216850701, |
|
"grad_norm": 0.4291275441646576, |
|
"learning_rate": 3.440510514783565e-05, |
|
"loss": 0.8203, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5545142432959079, |
|
"grad_norm": 0.38578981161117554, |
|
"learning_rate": 3.4388291568830965e-05, |
|
"loss": 0.8703, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5552565649067458, |
|
"grad_norm": 0.48124009370803833, |
|
"learning_rate": 3.437145688562216e-05, |
|
"loss": 0.8084, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5559988865175838, |
|
"grad_norm": 0.41740983724594116, |
|
"learning_rate": 3.435460112290172e-05, |
|
"loss": 0.7703, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5567412081284216, |
|
"grad_norm": 0.4573703110218048, |
|
"learning_rate": 3.433772430539303e-05, |
|
"loss": 0.9169, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5574835297392595, |
|
"grad_norm": 0.4573703110218048, |
|
"learning_rate": 3.433772430539303e-05, |
|
"loss": 0.7791, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5582258513500974, |
|
"grad_norm": 0.4058649241924286, |
|
"learning_rate": 3.432082645785038e-05, |
|
"loss": 0.8604, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5589681729609354, |
|
"grad_norm": 0.3486841917037964, |
|
"learning_rate": 3.430390760505889e-05, |
|
"loss": 0.7316, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5597104945717732, |
|
"grad_norm": 0.36052748560905457, |
|
"learning_rate": 3.428696777183451e-05, |
|
"loss": 0.6917, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5604528161826111, |
|
"grad_norm": 0.40812695026397705, |
|
"learning_rate": 3.427000698302393e-05, |
|
"loss": 0.9129, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.561195137793449, |
|
"grad_norm": 0.402180939912796, |
|
"learning_rate": 3.4253025263504605e-05, |
|
"loss": 0.8565, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5619374594042869, |
|
"grad_norm": 0.3584100604057312, |
|
"learning_rate": 3.4236022638184684e-05, |
|
"loss": 0.926, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5626797810151248, |
|
"grad_norm": 0.4021860957145691, |
|
"learning_rate": 3.421899913200297e-05, |
|
"loss": 0.8613, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5634221026259627, |
|
"grad_norm": 0.4699723422527313, |
|
"learning_rate": 3.420195476992892e-05, |
|
"loss": 0.8572, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5641644242368006, |
|
"grad_norm": 0.37108978629112244, |
|
"learning_rate": 3.418488957696253e-05, |
|
"loss": 0.8848, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5649067458476384, |
|
"grad_norm": 0.4078989326953888, |
|
"learning_rate": 3.4167803578134406e-05, |
|
"loss": 0.9227, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5656490674584764, |
|
"grad_norm": 0.399107426404953, |
|
"learning_rate": 3.4150696798505644e-05, |
|
"loss": 0.7975, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5663913890693143, |
|
"grad_norm": 0.3846902549266815, |
|
"learning_rate": 3.413356926316782e-05, |
|
"loss": 0.7224, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5671337106801522, |
|
"grad_norm": 0.37825313210487366, |
|
"learning_rate": 3.4116420997242964e-05, |
|
"loss": 0.8094, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.56787603229099, |
|
"grad_norm": 0.37176600098609924, |
|
"learning_rate": 3.40992520258835e-05, |
|
"loss": 0.8543, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.568618353901828, |
|
"grad_norm": 0.3635413646697998, |
|
"learning_rate": 3.408206237427223e-05, |
|
"loss": 0.8697, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5693606755126659, |
|
"grad_norm": 0.4106957018375397, |
|
"learning_rate": 3.406485206762229e-05, |
|
"loss": 0.8703, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5701029971235038, |
|
"grad_norm": 0.35687169432640076, |
|
"learning_rate": 3.40476211311771e-05, |
|
"loss": 0.9626, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5708453187343416, |
|
"grad_norm": 0.4074583351612091, |
|
"learning_rate": 3.403036959021036e-05, |
|
"loss": 0.888, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5715876403451795, |
|
"grad_norm": 0.33353114128112793, |
|
"learning_rate": 3.401309747002598e-05, |
|
"loss": 0.8315, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5723299619560175, |
|
"grad_norm": 0.3577583432197571, |
|
"learning_rate": 3.3995804795958055e-05, |
|
"loss": 0.8436, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5730722835668554, |
|
"grad_norm": 0.4142861068248749, |
|
"learning_rate": 3.3978491593370814e-05, |
|
"loss": 0.9741, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5738146051776932, |
|
"grad_norm": 0.4031902253627777, |
|
"learning_rate": 3.3961157887658624e-05, |
|
"loss": 0.8151, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5745569267885311, |
|
"grad_norm": 0.45672792196273804, |
|
"learning_rate": 3.394380370424592e-05, |
|
"loss": 0.8195, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5752992483993691, |
|
"grad_norm": 0.3539574444293976, |
|
"learning_rate": 3.392642906858714e-05, |
|
"loss": 0.753, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.576041570010207, |
|
"grad_norm": 0.4349236488342285, |
|
"learning_rate": 3.390903400616677e-05, |
|
"loss": 0.7667, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5767838916210448, |
|
"grad_norm": 0.42073139548301697, |
|
"learning_rate": 3.389161854249921e-05, |
|
"loss": 0.8512, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5775262132318827, |
|
"grad_norm": 0.39800146222114563, |
|
"learning_rate": 3.3874182703128825e-05, |
|
"loss": 0.8022, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5782685348427206, |
|
"grad_norm": 0.47772639989852905, |
|
"learning_rate": 3.385672651362984e-05, |
|
"loss": 0.9928, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5790108564535585, |
|
"grad_norm": 0.35686033964157104, |
|
"learning_rate": 3.383924999960633e-05, |
|
"loss": 0.7766, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5797531780643964, |
|
"grad_norm": 0.38933950662612915, |
|
"learning_rate": 3.3821753186692194e-05, |
|
"loss": 0.7371, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5804954996752343, |
|
"grad_norm": 0.3493152856826782, |
|
"learning_rate": 3.38042361005511e-05, |
|
"loss": 0.8075, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5812378212860722, |
|
"grad_norm": 0.3548669219017029, |
|
"learning_rate": 3.378669876687645e-05, |
|
"loss": 0.7585, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5819801428969101, |
|
"grad_norm": 0.4344249665737152, |
|
"learning_rate": 3.3769141211391336e-05, |
|
"loss": 0.818, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.582722464507748, |
|
"grad_norm": 0.3780219852924347, |
|
"learning_rate": 3.375156345984853e-05, |
|
"loss": 0.8477, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5834647861185859, |
|
"grad_norm": 0.3857625722885132, |
|
"learning_rate": 3.373396553803041e-05, |
|
"loss": 0.9338, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5842071077294237, |
|
"grad_norm": 0.4386730194091797, |
|
"learning_rate": 3.371634747174895e-05, |
|
"loss": 0.8064, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5849494293402616, |
|
"grad_norm": 0.39791983366012573, |
|
"learning_rate": 3.3698709286845655e-05, |
|
"loss": 0.9154, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5856917509510996, |
|
"grad_norm": 0.3847380578517914, |
|
"learning_rate": 3.368105100919156e-05, |
|
"loss": 0.8217, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5864340725619375, |
|
"grad_norm": 0.36979803442955017, |
|
"learning_rate": 3.3663372664687154e-05, |
|
"loss": 0.7634, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5871763941727753, |
|
"grad_norm": 0.38267242908477783, |
|
"learning_rate": 3.3645674279262376e-05, |
|
"loss": 0.7904, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5879187157836132, |
|
"grad_norm": 0.47703686356544495, |
|
"learning_rate": 3.362795587887655e-05, |
|
"loss": 0.8899, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5886610373944512, |
|
"grad_norm": 0.3864832818508148, |
|
"learning_rate": 3.361021748951835e-05, |
|
"loss": 0.8206, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5894033590052891, |
|
"grad_norm": 0.367291659116745, |
|
"learning_rate": 3.359245913720578e-05, |
|
"loss": 0.7678, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5901456806161269, |
|
"grad_norm": 0.3677731454372406, |
|
"learning_rate": 3.357468084798613e-05, |
|
"loss": 0.7437, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5908880022269648, |
|
"grad_norm": 0.3390463888645172, |
|
"learning_rate": 3.355688264793592e-05, |
|
"loss": 0.8462, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5916303238378027, |
|
"grad_norm": 0.3929853141307831, |
|
"learning_rate": 3.3539064563160874e-05, |
|
"loss": 0.8486, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5923726454486407, |
|
"grad_norm": 0.42393356561660767, |
|
"learning_rate": 3.35212266197959e-05, |
|
"loss": 0.9258, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5931149670594785, |
|
"grad_norm": 0.4595308005809784, |
|
"learning_rate": 3.350336884400501e-05, |
|
"loss": 0.8199, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5938572886703164, |
|
"grad_norm": 0.39049914479255676, |
|
"learning_rate": 3.348549126198132e-05, |
|
"loss": 0.8381, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5945996102811543, |
|
"grad_norm": 0.3757772147655487, |
|
"learning_rate": 3.346759389994699e-05, |
|
"loss": 0.7859, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5953419318919922, |
|
"grad_norm": 0.3846074938774109, |
|
"learning_rate": 3.344967678415321e-05, |
|
"loss": 0.9707, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5960842535028301, |
|
"grad_norm": 0.3639870285987854, |
|
"learning_rate": 3.343173994088012e-05, |
|
"loss": 0.8917, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.596826575113668, |
|
"grad_norm": 0.4003622531890869, |
|
"learning_rate": 3.3413783396436805e-05, |
|
"loss": 0.8328, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5975688967245059, |
|
"grad_norm": 0.37097591161727905, |
|
"learning_rate": 3.339580717716126e-05, |
|
"loss": 0.8419, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5983112183353438, |
|
"grad_norm": 0.377591997385025, |
|
"learning_rate": 3.337781130942031e-05, |
|
"loss": 0.7994, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5990535399461817, |
|
"grad_norm": 0.3893373906612396, |
|
"learning_rate": 3.3359795819609624e-05, |
|
"loss": 0.8612, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5997958615570196, |
|
"grad_norm": 0.43286728858947754, |
|
"learning_rate": 3.334176073415364e-05, |
|
"loss": 0.804, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6005381831678575, |
|
"grad_norm": 0.38128551840782166, |
|
"learning_rate": 3.332370607950555e-05, |
|
"loss": 0.8117, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.6012805047786953, |
|
"grad_norm": 0.42448368668556213, |
|
"learning_rate": 3.330563188214724e-05, |
|
"loss": 0.9025, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6020228263895333, |
|
"grad_norm": 0.38770368695259094, |
|
"learning_rate": 3.328753816858925e-05, |
|
"loss": 0.8392, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6027651480003712, |
|
"grad_norm": 0.39272114634513855, |
|
"learning_rate": 3.326942496537077e-05, |
|
"loss": 0.8368, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.603507469611209, |
|
"grad_norm": 0.49103134870529175, |
|
"learning_rate": 3.325129229905956e-05, |
|
"loss": 0.9017, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6042497912220469, |
|
"grad_norm": 0.3955812454223633, |
|
"learning_rate": 3.323314019625193e-05, |
|
"loss": 0.8856, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6049921128328849, |
|
"grad_norm": 0.34001222252845764, |
|
"learning_rate": 3.321496868357272e-05, |
|
"loss": 0.7656, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6057344344437228, |
|
"grad_norm": 0.43722572922706604, |
|
"learning_rate": 3.319677778767518e-05, |
|
"loss": 0.8833, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6064767560545606, |
|
"grad_norm": 0.42245838046073914, |
|
"learning_rate": 3.3178567535241066e-05, |
|
"loss": 0.8043, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6072190776653985, |
|
"grad_norm": 0.3846532106399536, |
|
"learning_rate": 3.3160337952980475e-05, |
|
"loss": 0.8255, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6079613992762364, |
|
"grad_norm": 0.34817036986351013, |
|
"learning_rate": 3.314208906763188e-05, |
|
"loss": 0.8635, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6087037208870744, |
|
"grad_norm": 0.3706674575805664, |
|
"learning_rate": 3.312382090596205e-05, |
|
"loss": 0.7863, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6094460424979122, |
|
"grad_norm": 0.39172613620758057, |
|
"learning_rate": 3.310553349476604e-05, |
|
"loss": 0.7935, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6101883641087501, |
|
"grad_norm": 0.401347279548645, |
|
"learning_rate": 3.308722686086714e-05, |
|
"loss": 0.8842, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.610930685719588, |
|
"grad_norm": 0.384919136762619, |
|
"learning_rate": 3.306890103111682e-05, |
|
"loss": 0.8038, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.611673007330426, |
|
"grad_norm": 0.4191407859325409, |
|
"learning_rate": 3.305055603239473e-05, |
|
"loss": 0.9038, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6124153289412638, |
|
"grad_norm": 0.41282200813293457, |
|
"learning_rate": 3.303219189160862e-05, |
|
"loss": 0.768, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6131576505521017, |
|
"grad_norm": 0.3830862045288086, |
|
"learning_rate": 3.301380863569431e-05, |
|
"loss": 0.8179, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6138999721629396, |
|
"grad_norm": 0.3940469026565552, |
|
"learning_rate": 3.299540629161568e-05, |
|
"loss": 0.8294, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6146422937737774, |
|
"grad_norm": 0.38618114590644836, |
|
"learning_rate": 3.2976984886364586e-05, |
|
"loss": 0.8532, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 0.394499272108078, |
|
"learning_rate": 3.295854444696086e-05, |
|
"loss": 0.8044, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6161269369954533, |
|
"grad_norm": 0.4039349853992462, |
|
"learning_rate": 3.294008500045223e-05, |
|
"loss": 0.8388, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6168692586062912, |
|
"grad_norm": 0.38985222578048706, |
|
"learning_rate": 3.292160657391433e-05, |
|
"loss": 0.8092, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.617611580217129, |
|
"grad_norm": 0.416485458612442, |
|
"learning_rate": 3.290310919445062e-05, |
|
"loss": 0.8467, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.618353901827967, |
|
"grad_norm": 0.4300270676612854, |
|
"learning_rate": 3.288459288919236e-05, |
|
"loss": 0.8217, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6190962234388049, |
|
"grad_norm": 0.3902897238731384, |
|
"learning_rate": 3.286605768529856e-05, |
|
"loss": 0.8181, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6198385450496428, |
|
"grad_norm": 0.4227379262447357, |
|
"learning_rate": 3.2847503609955955e-05, |
|
"loss": 0.8944, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6205808666604806, |
|
"grad_norm": 0.4002430737018585, |
|
"learning_rate": 3.2828930690378976e-05, |
|
"loss": 0.7856, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6213231882713185, |
|
"grad_norm": 0.42526379227638245, |
|
"learning_rate": 3.281033895380969e-05, |
|
"loss": 0.8948, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6220655098821565, |
|
"grad_norm": 0.37326619029045105, |
|
"learning_rate": 3.279172842751773e-05, |
|
"loss": 0.8338, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6228078314929943, |
|
"grad_norm": 0.3695979714393616, |
|
"learning_rate": 3.277309913880033e-05, |
|
"loss": 0.8823, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6235501531038322, |
|
"grad_norm": 0.3828786015510559, |
|
"learning_rate": 3.2754451114982235e-05, |
|
"loss": 0.9321, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6242924747146701, |
|
"grad_norm": 0.3889092803001404, |
|
"learning_rate": 3.273578438341566e-05, |
|
"loss": 0.8804, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6250347963255081, |
|
"grad_norm": 0.35321757197380066, |
|
"learning_rate": 3.271709897148025e-05, |
|
"loss": 0.8862, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6257771179363459, |
|
"grad_norm": 0.3559962213039398, |
|
"learning_rate": 3.269839490658308e-05, |
|
"loss": 0.8031, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6265194395471838, |
|
"grad_norm": 0.3700624108314514, |
|
"learning_rate": 3.267967221615856e-05, |
|
"loss": 0.7905, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6272617611580217, |
|
"grad_norm": 0.3585308790206909, |
|
"learning_rate": 3.266093092766843e-05, |
|
"loss": 0.7957, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6280040827688597, |
|
"grad_norm": 0.36834481358528137, |
|
"learning_rate": 3.2642171068601705e-05, |
|
"loss": 0.8328, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6287464043796975, |
|
"grad_norm": 0.38698065280914307, |
|
"learning_rate": 3.262339266647464e-05, |
|
"loss": 0.8505, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6294887259905354, |
|
"grad_norm": 0.39008861780166626, |
|
"learning_rate": 3.260459574883069e-05, |
|
"loss": 0.8754, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6302310476013733, |
|
"grad_norm": 0.3963302671909332, |
|
"learning_rate": 3.258578034324046e-05, |
|
"loss": 0.8419, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6309733692122111, |
|
"grad_norm": 0.39984002709388733, |
|
"learning_rate": 3.2566946477301684e-05, |
|
"loss": 0.8469, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6317156908230491, |
|
"grad_norm": 0.39819765090942383, |
|
"learning_rate": 3.254809417863916e-05, |
|
"loss": 0.8328, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.632458012433887, |
|
"grad_norm": 0.4232073128223419, |
|
"learning_rate": 3.252922347490474e-05, |
|
"loss": 0.8927, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6332003340447249, |
|
"grad_norm": 0.3462536334991455, |
|
"learning_rate": 3.2510334393777254e-05, |
|
"loss": 0.8007, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6339426556555627, |
|
"grad_norm": 0.3509646952152252, |
|
"learning_rate": 3.24914269629625e-05, |
|
"loss": 0.8285, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6346849772664007, |
|
"grad_norm": 0.38063347339630127, |
|
"learning_rate": 3.247250121019318e-05, |
|
"loss": 0.8141, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6354272988772386, |
|
"grad_norm": 0.4435429871082306, |
|
"learning_rate": 3.245355716322887e-05, |
|
"loss": 0.8522, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6361696204880765, |
|
"grad_norm": 0.41526708006858826, |
|
"learning_rate": 3.2434594849856006e-05, |
|
"loss": 0.8535, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6369119420989143, |
|
"grad_norm": 0.43962687253952026, |
|
"learning_rate": 3.241561429788777e-05, |
|
"loss": 0.8415, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6376542637097522, |
|
"grad_norm": 0.3527955710887909, |
|
"learning_rate": 3.239661553516414e-05, |
|
"loss": 0.9008, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6383965853205902, |
|
"grad_norm": 0.36493077874183655, |
|
"learning_rate": 3.237759858955177e-05, |
|
"loss": 0.7132, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.639138906931428, |
|
"grad_norm": 0.38288044929504395, |
|
"learning_rate": 3.235856348894401e-05, |
|
"loss": 0.8561, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6398812285422659, |
|
"grad_norm": 0.4172011911869049, |
|
"learning_rate": 3.2339510261260826e-05, |
|
"loss": 0.8374, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6406235501531038, |
|
"grad_norm": 0.4358551502227783, |
|
"learning_rate": 3.2320438934448774e-05, |
|
"loss": 0.9169, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6413658717639418, |
|
"grad_norm": 0.35578832030296326, |
|
"learning_rate": 3.230134953648096e-05, |
|
"loss": 0.8731, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6421081933747796, |
|
"grad_norm": 0.4214344620704651, |
|
"learning_rate": 3.228224209535698e-05, |
|
"loss": 0.8212, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6428505149856175, |
|
"grad_norm": 0.40245211124420166, |
|
"learning_rate": 3.226311663910293e-05, |
|
"loss": 0.8055, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6435928365964554, |
|
"grad_norm": 0.3829837739467621, |
|
"learning_rate": 3.22439731957713e-05, |
|
"loss": 0.8086, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6443351582072933, |
|
"grad_norm": 0.43586400151252747, |
|
"learning_rate": 3.222481179344096e-05, |
|
"loss": 0.8499, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6450774798181312, |
|
"grad_norm": 0.3507193326950073, |
|
"learning_rate": 3.220563246021716e-05, |
|
"loss": 0.8941, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6458198014289691, |
|
"grad_norm": 0.3587462306022644, |
|
"learning_rate": 3.21864352242314e-05, |
|
"loss": 0.7786, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.646562123039807, |
|
"grad_norm": 0.37669169902801514, |
|
"learning_rate": 3.216722011364147e-05, |
|
"loss": 0.7859, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6473044446506449, |
|
"grad_norm": 0.4303325414657593, |
|
"learning_rate": 3.2147987156631377e-05, |
|
"loss": 0.8269, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6480467662614828, |
|
"grad_norm": 0.45145362615585327, |
|
"learning_rate": 3.212873638141129e-05, |
|
"loss": 0.8287, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6487890878723207, |
|
"grad_norm": 0.4037097692489624, |
|
"learning_rate": 3.210946781621752e-05, |
|
"loss": 0.7869, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6495314094831586, |
|
"grad_norm": 0.37830254435539246, |
|
"learning_rate": 3.2090181489312476e-05, |
|
"loss": 0.8967, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6502737310939964, |
|
"grad_norm": 0.39029765129089355, |
|
"learning_rate": 3.207087742898461e-05, |
|
"loss": 0.7591, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6510160527048344, |
|
"grad_norm": 0.35120412707328796, |
|
"learning_rate": 3.20515556635484e-05, |
|
"loss": 0.8334, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6517583743156723, |
|
"grad_norm": 0.36138907074928284, |
|
"learning_rate": 3.203221622134428e-05, |
|
"loss": 0.7944, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6525006959265102, |
|
"grad_norm": 0.3629201650619507, |
|
"learning_rate": 3.2012859130738615e-05, |
|
"loss": 0.852, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.653243017537348, |
|
"grad_norm": 0.4027819037437439, |
|
"learning_rate": 3.199348442012366e-05, |
|
"loss": 0.9275, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6539853391481859, |
|
"grad_norm": 0.36106258630752563, |
|
"learning_rate": 3.1974092117917504e-05, |
|
"loss": 0.7495, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6547276607590239, |
|
"grad_norm": 0.49002131819725037, |
|
"learning_rate": 3.1954682252564054e-05, |
|
"loss": 0.818, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6554699823698618, |
|
"grad_norm": 0.36758434772491455, |
|
"learning_rate": 3.1935254852532956e-05, |
|
"loss": 0.8609, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6562123039806996, |
|
"grad_norm": 0.4289117157459259, |
|
"learning_rate": 3.19158099463196e-05, |
|
"loss": 0.8596, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6569546255915375, |
|
"grad_norm": 0.3696727752685547, |
|
"learning_rate": 3.189634756244504e-05, |
|
"loss": 0.8629, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6576969472023755, |
|
"grad_norm": 0.3562766909599304, |
|
"learning_rate": 3.187686772945597e-05, |
|
"loss": 0.7651, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6584392688132134, |
|
"grad_norm": 0.38660067319869995, |
|
"learning_rate": 3.185737047592467e-05, |
|
"loss": 0.9487, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6591815904240512, |
|
"grad_norm": 0.421682208776474, |
|
"learning_rate": 3.1837855830448976e-05, |
|
"loss": 0.8885, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6599239120348891, |
|
"grad_norm": 0.366974800825119, |
|
"learning_rate": 3.181832382165223e-05, |
|
"loss": 0.8434, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.660666233645727, |
|
"grad_norm": 0.4101247787475586, |
|
"learning_rate": 3.179877447818326e-05, |
|
"loss": 0.8329, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.661408555256565, |
|
"grad_norm": 0.3925361931324005, |
|
"learning_rate": 3.17792078287163e-05, |
|
"loss": 0.866, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6621508768674028, |
|
"grad_norm": 0.39480844140052795, |
|
"learning_rate": 3.175962390195098e-05, |
|
"loss": 0.8342, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6628931984782407, |
|
"grad_norm": 0.42709940671920776, |
|
"learning_rate": 3.174002272661226e-05, |
|
"loss": 0.8079, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6636355200890786, |
|
"grad_norm": 0.3921566307544708, |
|
"learning_rate": 3.172040433145041e-05, |
|
"loss": 0.8701, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6643778416999165, |
|
"grad_norm": 0.37820595502853394, |
|
"learning_rate": 3.1700768745240945e-05, |
|
"loss": 0.8168, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6651201633107544, |
|
"grad_norm": 0.4000493884086609, |
|
"learning_rate": 3.1681115996784617e-05, |
|
"loss": 0.7657, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6658624849215923, |
|
"grad_norm": 0.4290461838245392, |
|
"learning_rate": 3.1661446114907325e-05, |
|
"loss": 0.7515, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6666048065324301, |
|
"grad_norm": 0.3985341191291809, |
|
"learning_rate": 3.164175912846011e-05, |
|
"loss": 0.7869, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.667347128143268, |
|
"grad_norm": 0.3736097812652588, |
|
"learning_rate": 3.162205506631911e-05, |
|
"loss": 0.8461, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.668089449754106, |
|
"grad_norm": 0.4430498480796814, |
|
"learning_rate": 3.1602333957385495e-05, |
|
"loss": 0.88, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6688317713649439, |
|
"grad_norm": 0.4275624454021454, |
|
"learning_rate": 3.158259583058545e-05, |
|
"loss": 0.833, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6695740929757817, |
|
"grad_norm": 0.42715683579444885, |
|
"learning_rate": 3.156284071487012e-05, |
|
"loss": 0.8321, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6703164145866196, |
|
"grad_norm": 0.46634772419929504, |
|
"learning_rate": 3.154306863921555e-05, |
|
"loss": 0.8799, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6710587361974576, |
|
"grad_norm": 0.38095808029174805, |
|
"learning_rate": 3.1523279632622684e-05, |
|
"loss": 0.805, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6718010578082955, |
|
"grad_norm": 0.3953753709793091, |
|
"learning_rate": 3.150347372411729e-05, |
|
"loss": 0.9367, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6725433794191333, |
|
"grad_norm": 0.40556415915489197, |
|
"learning_rate": 3.148365094274994e-05, |
|
"loss": 0.8616, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6732857010299712, |
|
"grad_norm": 0.36998236179351807, |
|
"learning_rate": 3.1463811317595924e-05, |
|
"loss": 0.8632, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6740280226408091, |
|
"grad_norm": 0.5273242592811584, |
|
"learning_rate": 3.144395487775527e-05, |
|
"loss": 0.7877, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6747703442516471, |
|
"grad_norm": 0.43254518508911133, |
|
"learning_rate": 3.142408165235266e-05, |
|
"loss": 0.7899, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6755126658624849, |
|
"grad_norm": 0.41343677043914795, |
|
"learning_rate": 3.140419167053738e-05, |
|
"loss": 0.8354, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6762549874733228, |
|
"grad_norm": 0.3693787157535553, |
|
"learning_rate": 3.13842849614833e-05, |
|
"loss": 0.7484, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6769973090841607, |
|
"grad_norm": 0.34831342101097107, |
|
"learning_rate": 3.136436155438885e-05, |
|
"loss": 0.8299, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6777396306949987, |
|
"grad_norm": 0.3597358167171478, |
|
"learning_rate": 3.1344421478476926e-05, |
|
"loss": 0.8714, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6784819523058365, |
|
"grad_norm": 0.36480751633644104, |
|
"learning_rate": 3.132446476299488e-05, |
|
"loss": 0.8386, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6792242739166744, |
|
"grad_norm": 0.3980991542339325, |
|
"learning_rate": 3.130449143721447e-05, |
|
"loss": 0.8226, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6799665955275123, |
|
"grad_norm": 0.39509183168411255, |
|
"learning_rate": 3.128450153043181e-05, |
|
"loss": 0.7421, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6807089171383502, |
|
"grad_norm": 0.40358635783195496, |
|
"learning_rate": 3.126449507196736e-05, |
|
"loss": 0.8075, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6814512387491881, |
|
"grad_norm": 0.42663317918777466, |
|
"learning_rate": 3.124447209116583e-05, |
|
"loss": 0.8733, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.682193560360026, |
|
"grad_norm": 0.4373573064804077, |
|
"learning_rate": 3.122443261739616e-05, |
|
"loss": 0.8067, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6829358819708639, |
|
"grad_norm": 0.3898407518863678, |
|
"learning_rate": 3.1204376680051525e-05, |
|
"loss": 0.9386, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6836782035817017, |
|
"grad_norm": 0.3621942698955536, |
|
"learning_rate": 3.11843043085492e-05, |
|
"loss": 0.8037, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6844205251925397, |
|
"grad_norm": 0.3760909140110016, |
|
"learning_rate": 3.1164215532330585e-05, |
|
"loss": 0.8503, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6851628468033776, |
|
"grad_norm": 0.37526753544807434, |
|
"learning_rate": 3.1144110380861134e-05, |
|
"loss": 0.8107, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6859051684142154, |
|
"grad_norm": 0.40608811378479004, |
|
"learning_rate": 3.1123988883630337e-05, |
|
"loss": 0.8461, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6866474900250533, |
|
"grad_norm": 0.32391631603240967, |
|
"learning_rate": 3.1103851070151646e-05, |
|
"loss": 0.8786, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6873898116358913, |
|
"grad_norm": 0.42110610008239746, |
|
"learning_rate": 3.108369696996245e-05, |
|
"loss": 0.8611, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6881321332467292, |
|
"grad_norm": 0.4104180932044983, |
|
"learning_rate": 3.1063526612624015e-05, |
|
"loss": 0.8632, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.688874454857567, |
|
"grad_norm": 0.401500403881073, |
|
"learning_rate": 3.104334002772146e-05, |
|
"loss": 0.8179, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6896167764684049, |
|
"grad_norm": 0.3811194598674774, |
|
"learning_rate": 3.1023137244863726e-05, |
|
"loss": 0.7376, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6903590980792428, |
|
"grad_norm": 0.34555327892303467, |
|
"learning_rate": 3.100291829368348e-05, |
|
"loss": 0.8595, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6911014196900808, |
|
"grad_norm": 0.3416372835636139, |
|
"learning_rate": 3.098268320383711e-05, |
|
"loss": 0.9128, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6918437413009186, |
|
"grad_norm": 0.36647018790245056, |
|
"learning_rate": 3.09624320050047e-05, |
|
"loss": 0.7548, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6925860629117565, |
|
"grad_norm": 0.3531283140182495, |
|
"learning_rate": 3.094216472688994e-05, |
|
"loss": 0.8315, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6933283845225944, |
|
"grad_norm": 0.42129501700401306, |
|
"learning_rate": 3.092188139922011e-05, |
|
"loss": 0.8368, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.6940707061334324, |
|
"grad_norm": 0.3378678858280182, |
|
"learning_rate": 3.090158205174603e-05, |
|
"loss": 0.9293, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6948130277442702, |
|
"grad_norm": 0.38736778497695923, |
|
"learning_rate": 3.0881266714242005e-05, |
|
"loss": 0.8752, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6955553493551081, |
|
"grad_norm": 0.34928473830223083, |
|
"learning_rate": 3.086093541650583e-05, |
|
"loss": 0.7956, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.696297670965946, |
|
"grad_norm": 0.3697172701358795, |
|
"learning_rate": 3.0840588188358685e-05, |
|
"loss": 0.8248, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6970399925767838, |
|
"grad_norm": 0.4416426420211792, |
|
"learning_rate": 3.08202250596451e-05, |
|
"loss": 0.8146, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6977823141876218, |
|
"grad_norm": 0.4334758520126343, |
|
"learning_rate": 3.0799846060232955e-05, |
|
"loss": 0.7391, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6985246357984597, |
|
"grad_norm": 0.3890143632888794, |
|
"learning_rate": 3.07794512200134e-05, |
|
"loss": 0.7769, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.6992669574092976, |
|
"grad_norm": 0.411953330039978, |
|
"learning_rate": 3.075904056890082e-05, |
|
"loss": 0.9243, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7000092790201354, |
|
"grad_norm": 0.45792150497436523, |
|
"learning_rate": 3.073861413683278e-05, |
|
"loss": 0.9176, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7007516006309734, |
|
"grad_norm": 0.37603330612182617, |
|
"learning_rate": 3.0718171953770016e-05, |
|
"loss": 0.8958, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7014939222418113, |
|
"grad_norm": 0.387465238571167, |
|
"learning_rate": 3.069771404969633e-05, |
|
"loss": 0.7828, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7022362438526492, |
|
"grad_norm": 0.3826201558113098, |
|
"learning_rate": 3.067724045461864e-05, |
|
"loss": 0.8544, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.702978565463487, |
|
"grad_norm": 0.40352147817611694, |
|
"learning_rate": 3.0656751198566803e-05, |
|
"loss": 0.8082, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7037208870743249, |
|
"grad_norm": 0.4045282304286957, |
|
"learning_rate": 3.0636246311593717e-05, |
|
"loss": 0.8526, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7044632086851629, |
|
"grad_norm": 0.40600502490997314, |
|
"learning_rate": 3.061572582377517e-05, |
|
"loss": 0.9317, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7052055302960007, |
|
"grad_norm": 0.3995624780654907, |
|
"learning_rate": 3.0595189765209834e-05, |
|
"loss": 0.8922, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7059478519068386, |
|
"grad_norm": 0.4132879376411438, |
|
"learning_rate": 3.057463816601924e-05, |
|
"loss": 0.867, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7066901735176765, |
|
"grad_norm": 0.39816924929618835, |
|
"learning_rate": 3.0554071056347674e-05, |
|
"loss": 0.8102, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7074324951285145, |
|
"grad_norm": 0.44873765110969543, |
|
"learning_rate": 3.0533488466362235e-05, |
|
"loss": 0.8293, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7081748167393523, |
|
"grad_norm": 0.3815595507621765, |
|
"learning_rate": 3.051289042625266e-05, |
|
"loss": 0.8287, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7089171383501902, |
|
"grad_norm": 0.39245954155921936, |
|
"learning_rate": 3.0492276966231388e-05, |
|
"loss": 0.8116, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7096594599610281, |
|
"grad_norm": 0.3837089538574219, |
|
"learning_rate": 3.047164811653347e-05, |
|
"loss": 0.8383, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7104017815718661, |
|
"grad_norm": 0.3879731297492981, |
|
"learning_rate": 3.0451003907416522e-05, |
|
"loss": 0.8636, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7111441031827039, |
|
"grad_norm": 0.44654250144958496, |
|
"learning_rate": 3.0430344369160695e-05, |
|
"loss": 0.862, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7118864247935418, |
|
"grad_norm": 0.38784080743789673, |
|
"learning_rate": 3.0409669532068618e-05, |
|
"loss": 0.7992, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7126287464043797, |
|
"grad_norm": 0.38243433833122253, |
|
"learning_rate": 3.038897942646536e-05, |
|
"loss": 0.9515, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7133710680152175, |
|
"grad_norm": 0.36308029294013977, |
|
"learning_rate": 3.0368274082698402e-05, |
|
"loss": 0.7527, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7141133896260555, |
|
"grad_norm": 0.3840027153491974, |
|
"learning_rate": 3.034755353113755e-05, |
|
"loss": 0.8217, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7148557112368934, |
|
"grad_norm": 0.33255666494369507, |
|
"learning_rate": 3.0326817802174928e-05, |
|
"loss": 0.7772, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7155980328477313, |
|
"grad_norm": 0.35044682025909424, |
|
"learning_rate": 3.0306066926224925e-05, |
|
"loss": 0.7885, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7163403544585691, |
|
"grad_norm": 0.4197009801864624, |
|
"learning_rate": 3.028530093372415e-05, |
|
"loss": 0.9473, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7170826760694071, |
|
"grad_norm": 0.36847788095474243, |
|
"learning_rate": 3.026451985513138e-05, |
|
"loss": 0.8673, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.717824997680245, |
|
"grad_norm": 0.3890594244003296, |
|
"learning_rate": 3.02437237209275e-05, |
|
"loss": 0.8133, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7185673192910829, |
|
"grad_norm": 0.43013694882392883, |
|
"learning_rate": 3.0222912561615517e-05, |
|
"loss": 0.906, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7193096409019207, |
|
"grad_norm": 0.4074924886226654, |
|
"learning_rate": 3.020208640772045e-05, |
|
"loss": 0.8783, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7200519625127586, |
|
"grad_norm": 0.4189145863056183, |
|
"learning_rate": 3.018124528978931e-05, |
|
"loss": 0.895, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7207942841235966, |
|
"grad_norm": 0.3682161271572113, |
|
"learning_rate": 3.0160389238391067e-05, |
|
"loss": 0.7772, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7215366057344345, |
|
"grad_norm": 0.3887183666229248, |
|
"learning_rate": 3.01395182841166e-05, |
|
"loss": 0.9046, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7222789273452723, |
|
"grad_norm": 0.35502782464027405, |
|
"learning_rate": 3.0118632457578624e-05, |
|
"loss": 0.778, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7230212489561102, |
|
"grad_norm": 0.3781295120716095, |
|
"learning_rate": 3.0097731789411688e-05, |
|
"loss": 0.7921, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7237635705669482, |
|
"grad_norm": 0.436229407787323, |
|
"learning_rate": 3.007681631027212e-05, |
|
"loss": 0.9466, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.724505892177786, |
|
"grad_norm": 0.37810221314430237, |
|
"learning_rate": 3.0055886050837935e-05, |
|
"loss": 0.7668, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7252482137886239, |
|
"grad_norm": 0.43310630321502686, |
|
"learning_rate": 3.0034941041808853e-05, |
|
"loss": 0.7256, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7259905353994618, |
|
"grad_norm": 0.4197729825973511, |
|
"learning_rate": 3.0013981313906235e-05, |
|
"loss": 0.8872, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7267328570102997, |
|
"grad_norm": 0.4396391808986664, |
|
"learning_rate": 2.999300689787302e-05, |
|
"loss": 0.7703, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7274751786211376, |
|
"grad_norm": 0.37335899472236633, |
|
"learning_rate": 2.9972017824473682e-05, |
|
"loss": 0.8266, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7282175002319755, |
|
"grad_norm": 0.38292384147644043, |
|
"learning_rate": 2.9951014124494206e-05, |
|
"loss": 0.7715, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7289598218428134, |
|
"grad_norm": 0.3634425401687622, |
|
"learning_rate": 2.9929995828742032e-05, |
|
"loss": 0.9531, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7297021434536513, |
|
"grad_norm": 0.3410262167453766, |
|
"learning_rate": 2.9908962968046e-05, |
|
"loss": 0.7864, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7304444650644892, |
|
"grad_norm": 0.4278801381587982, |
|
"learning_rate": 2.988791557325632e-05, |
|
"loss": 0.8387, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7311867866753271, |
|
"grad_norm": 0.394827276468277, |
|
"learning_rate": 2.9866853675244523e-05, |
|
"loss": 0.8465, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.731929108286165, |
|
"grad_norm": 0.3513793349266052, |
|
"learning_rate": 2.98457773049034e-05, |
|
"loss": 0.792, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7326714298970028, |
|
"grad_norm": 0.33131977915763855, |
|
"learning_rate": 2.9824686493146977e-05, |
|
"loss": 0.9048, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7334137515078407, |
|
"grad_norm": 0.36972224712371826, |
|
"learning_rate": 2.9803581270910466e-05, |
|
"loss": 0.7708, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7341560731186787, |
|
"grad_norm": 0.4358534812927246, |
|
"learning_rate": 2.9782461669150212e-05, |
|
"loss": 0.781, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7348983947295166, |
|
"grad_norm": 0.37654241919517517, |
|
"learning_rate": 2.9761327718843648e-05, |
|
"loss": 0.8687, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7356407163403544, |
|
"grad_norm": 0.409423291683197, |
|
"learning_rate": 2.9740179450989265e-05, |
|
"loss": 0.8757, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7363830379511923, |
|
"grad_norm": 0.36852940917015076, |
|
"learning_rate": 2.9719016896606535e-05, |
|
"loss": 0.7978, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7371253595620303, |
|
"grad_norm": 0.4487001299858093, |
|
"learning_rate": 2.9697840086735898e-05, |
|
"loss": 0.8977, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7378676811728682, |
|
"grad_norm": 0.4240976870059967, |
|
"learning_rate": 2.9676649052438706e-05, |
|
"loss": 0.8973, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.738610002783706, |
|
"grad_norm": 0.4073665142059326, |
|
"learning_rate": 2.965544382479717e-05, |
|
"loss": 0.8245, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7393523243945439, |
|
"grad_norm": 0.40183866024017334, |
|
"learning_rate": 2.963422443491431e-05, |
|
"loss": 0.8173, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7400946460053819, |
|
"grad_norm": 0.4195357859134674, |
|
"learning_rate": 2.9612990913913935e-05, |
|
"loss": 0.8614, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7408369676162198, |
|
"grad_norm": 0.3881988823413849, |
|
"learning_rate": 2.959174329294058e-05, |
|
"loss": 0.8538, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7415792892270576, |
|
"grad_norm": 0.4063870906829834, |
|
"learning_rate": 2.957048160315944e-05, |
|
"loss": 0.7881, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7423216108378955, |
|
"grad_norm": 0.3842681646347046, |
|
"learning_rate": 2.954920587575637e-05, |
|
"loss": 0.8986, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7430639324487334, |
|
"grad_norm": 0.379730224609375, |
|
"learning_rate": 2.952791614193781e-05, |
|
"loss": 0.7682, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7438062540595713, |
|
"grad_norm": 0.36264169216156006, |
|
"learning_rate": 2.9506612432930722e-05, |
|
"loss": 0.7856, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7445485756704092, |
|
"grad_norm": 0.3864235579967499, |
|
"learning_rate": 2.948529477998261e-05, |
|
"loss": 0.7882, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7452908972812471, |
|
"grad_norm": 0.42890578508377075, |
|
"learning_rate": 2.946396321436138e-05, |
|
"loss": 0.8788, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.746033218892085, |
|
"grad_norm": 0.3864159882068634, |
|
"learning_rate": 2.944261776735539e-05, |
|
"loss": 0.8518, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7467755405029229, |
|
"grad_norm": 0.3886793255805969, |
|
"learning_rate": 2.9421258470273317e-05, |
|
"loss": 0.842, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7475178621137608, |
|
"grad_norm": 0.4735707938671112, |
|
"learning_rate": 2.9399885354444184e-05, |
|
"loss": 0.9169, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7482601837245987, |
|
"grad_norm": 0.3890320956707001, |
|
"learning_rate": 2.9378498451217275e-05, |
|
"loss": 0.8453, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7490025053354366, |
|
"grad_norm": 0.36628207564353943, |
|
"learning_rate": 2.9357097791962093e-05, |
|
"loss": 0.7281, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7497448269462744, |
|
"grad_norm": 0.35675758123397827, |
|
"learning_rate": 2.933568340806831e-05, |
|
"loss": 0.8212, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7504871485571124, |
|
"grad_norm": 0.3940046429634094, |
|
"learning_rate": 2.931425533094575e-05, |
|
"loss": 0.833, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7512294701679503, |
|
"grad_norm": 0.3780806362628937, |
|
"learning_rate": 2.9292813592024304e-05, |
|
"loss": 0.8061, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7519717917787881, |
|
"grad_norm": 0.4200505316257477, |
|
"learning_rate": 2.92713582227539e-05, |
|
"loss": 0.804, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.752714113389626, |
|
"grad_norm": 0.4011412262916565, |
|
"learning_rate": 2.924988925460448e-05, |
|
"loss": 0.8795, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.753456435000464, |
|
"grad_norm": 0.42554283142089844, |
|
"learning_rate": 2.92284067190659e-05, |
|
"loss": 0.872, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7541987566113019, |
|
"grad_norm": 0.44452059268951416, |
|
"learning_rate": 2.920691064764795e-05, |
|
"loss": 0.8406, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7549410782221397, |
|
"grad_norm": 0.36996594071388245, |
|
"learning_rate": 2.9185401071880256e-05, |
|
"loss": 0.801, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7556833998329776, |
|
"grad_norm": 0.3992551267147064, |
|
"learning_rate": 2.9163878023312248e-05, |
|
"loss": 0.8065, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7564257214438155, |
|
"grad_norm": 0.4441946744918823, |
|
"learning_rate": 2.914234153351312e-05, |
|
"loss": 0.8438, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7571680430546535, |
|
"grad_norm": 0.367318332195282, |
|
"learning_rate": 2.912079163407179e-05, |
|
"loss": 0.8188, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7579103646654913, |
|
"grad_norm": 0.47029224038124084, |
|
"learning_rate": 2.909922835659684e-05, |
|
"loss": 0.8777, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7586526862763292, |
|
"grad_norm": 0.351092129945755, |
|
"learning_rate": 2.9077651732716466e-05, |
|
"loss": 0.8203, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7593950078871671, |
|
"grad_norm": 0.40848031640052795, |
|
"learning_rate": 2.9056061794078454e-05, |
|
"loss": 0.9871, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.760137329498005, |
|
"grad_norm": 0.3885229527950287, |
|
"learning_rate": 2.9034458572350113e-05, |
|
"loss": 0.9423, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7608796511088429, |
|
"grad_norm": 0.4087800979614258, |
|
"learning_rate": 2.9012842099218227e-05, |
|
"loss": 0.8842, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7616219727196808, |
|
"grad_norm": 0.42746204137802124, |
|
"learning_rate": 2.8991212406389034e-05, |
|
"loss": 0.8461, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7623642943305187, |
|
"grad_norm": 0.34393808245658875, |
|
"learning_rate": 2.8969569525588145e-05, |
|
"loss": 0.8116, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7631066159413566, |
|
"grad_norm": 0.36236944794654846, |
|
"learning_rate": 2.8947913488560523e-05, |
|
"loss": 0.9132, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7638489375521945, |
|
"grad_norm": 0.4048609435558319, |
|
"learning_rate": 2.8926244327070444e-05, |
|
"loss": 0.8502, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.7645912591630324, |
|
"grad_norm": 0.39788907766342163, |
|
"learning_rate": 2.8904562072901395e-05, |
|
"loss": 0.8751, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7653335807738703, |
|
"grad_norm": 0.3588632345199585, |
|
"learning_rate": 2.88828667578561e-05, |
|
"loss": 0.7647, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7660759023847081, |
|
"grad_norm": 0.3718065917491913, |
|
"learning_rate": 2.886115841375643e-05, |
|
"loss": 0.88, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7668182239955461, |
|
"grad_norm": 0.3948284983634949, |
|
"learning_rate": 2.8839437072443362e-05, |
|
"loss": 0.88, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.767560545606384, |
|
"grad_norm": 0.36646491289138794, |
|
"learning_rate": 2.8817702765776947e-05, |
|
"loss": 0.8244, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7683028672172219, |
|
"grad_norm": 0.41700711846351624, |
|
"learning_rate": 2.8795955525636247e-05, |
|
"loss": 0.8149, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7690451888280597, |
|
"grad_norm": 0.36176496744155884, |
|
"learning_rate": 2.877419538391929e-05, |
|
"loss": 0.7405, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7697875104388977, |
|
"grad_norm": 0.3812231123447418, |
|
"learning_rate": 2.8752422372543037e-05, |
|
"loss": 0.8124, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7705298320497356, |
|
"grad_norm": 0.3683227300643921, |
|
"learning_rate": 2.8730636523443313e-05, |
|
"loss": 0.8463, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7712721536605734, |
|
"grad_norm": 0.37682121992111206, |
|
"learning_rate": 2.870883786857479e-05, |
|
"loss": 0.8515, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7720144752714113, |
|
"grad_norm": 0.40752267837524414, |
|
"learning_rate": 2.868702643991091e-05, |
|
"loss": 0.9872, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7727567968822492, |
|
"grad_norm": 0.34598663449287415, |
|
"learning_rate": 2.866520226944386e-05, |
|
"loss": 0.8201, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7734991184930872, |
|
"grad_norm": 0.39901018142700195, |
|
"learning_rate": 2.864336538918451e-05, |
|
"loss": 0.7867, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.774241440103925, |
|
"grad_norm": 0.40626272559165955, |
|
"learning_rate": 2.8621515831162363e-05, |
|
"loss": 0.8817, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7749837617147629, |
|
"grad_norm": 0.4141016900539398, |
|
"learning_rate": 2.859965362742554e-05, |
|
"loss": 0.8937, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7757260833256008, |
|
"grad_norm": 0.43257343769073486, |
|
"learning_rate": 2.8577778810040687e-05, |
|
"loss": 0.8147, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7764684049364388, |
|
"grad_norm": 0.40944743156433105, |
|
"learning_rate": 2.855589141109297e-05, |
|
"loss": 0.8747, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7772107265472766, |
|
"grad_norm": 0.4060775935649872, |
|
"learning_rate": 2.8533991462686005e-05, |
|
"loss": 0.9374, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7779530481581145, |
|
"grad_norm": 0.37721845507621765, |
|
"learning_rate": 2.8512078996941805e-05, |
|
"loss": 0.837, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7786953697689524, |
|
"grad_norm": 0.3883989155292511, |
|
"learning_rate": 2.849015404600074e-05, |
|
"loss": 0.8522, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7794376913797902, |
|
"grad_norm": 0.3714500367641449, |
|
"learning_rate": 2.8468216642021524e-05, |
|
"loss": 0.7972, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7801800129906282, |
|
"grad_norm": 0.37216463685035706, |
|
"learning_rate": 2.84462668171811e-05, |
|
"loss": 0.7885, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7809223346014661, |
|
"grad_norm": 0.343142032623291, |
|
"learning_rate": 2.8424304603674645e-05, |
|
"loss": 0.8198, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.781664656212304, |
|
"grad_norm": 0.39523279666900635, |
|
"learning_rate": 2.8402330033715512e-05, |
|
"loss": 0.8359, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.7824069778231418, |
|
"grad_norm": 0.425644189119339, |
|
"learning_rate": 2.838034313953517e-05, |
|
"loss": 0.873, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.7831492994339798, |
|
"grad_norm": 0.37101006507873535, |
|
"learning_rate": 2.835834395338317e-05, |
|
"loss": 0.7968, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7838916210448177, |
|
"grad_norm": 0.5156154036521912, |
|
"learning_rate": 2.833633250752708e-05, |
|
"loss": 0.7642, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7846339426556556, |
|
"grad_norm": 0.42274123430252075, |
|
"learning_rate": 2.8314308834252477e-05, |
|
"loss": 0.9259, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7853762642664934, |
|
"grad_norm": 0.45442909002304077, |
|
"learning_rate": 2.8292272965862834e-05, |
|
"loss": 0.8184, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.7861185858773313, |
|
"grad_norm": 0.38438424468040466, |
|
"learning_rate": 2.8270224934679547e-05, |
|
"loss": 0.8094, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.7868609074881693, |
|
"grad_norm": 0.3720132112503052, |
|
"learning_rate": 2.8248164773041838e-05, |
|
"loss": 0.8544, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7876032290990072, |
|
"grad_norm": 0.3671931028366089, |
|
"learning_rate": 2.822609251330671e-05, |
|
"loss": 0.7667, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.788345550709845, |
|
"grad_norm": 0.37383246421813965, |
|
"learning_rate": 2.8204008187848938e-05, |
|
"loss": 0.8313, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.7890878723206829, |
|
"grad_norm": 0.37284618616104126, |
|
"learning_rate": 2.8181911829060963e-05, |
|
"loss": 0.7514, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.7898301939315209, |
|
"grad_norm": 0.45464012026786804, |
|
"learning_rate": 2.8159803469352902e-05, |
|
"loss": 0.9212, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7905725155423587, |
|
"grad_norm": 0.3994920551776886, |
|
"learning_rate": 2.813768314115246e-05, |
|
"loss": 0.8621, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7913148371531966, |
|
"grad_norm": 0.4181472957134247, |
|
"learning_rate": 2.8115550876904905e-05, |
|
"loss": 0.8572, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.7920571587640345, |
|
"grad_norm": 0.37849193811416626, |
|
"learning_rate": 2.8093406709073002e-05, |
|
"loss": 0.8802, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.7927994803748725, |
|
"grad_norm": 0.3892970681190491, |
|
"learning_rate": 2.8071250670136994e-05, |
|
"loss": 0.9052, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7935418019857103, |
|
"grad_norm": 0.39315545558929443, |
|
"learning_rate": 2.8049082792594515e-05, |
|
"loss": 0.8753, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.7942841235965482, |
|
"grad_norm": 0.35754016041755676, |
|
"learning_rate": 2.8026903108960573e-05, |
|
"loss": 0.8032, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7950264452073861, |
|
"grad_norm": 0.3976321518421173, |
|
"learning_rate": 2.8004711651767492e-05, |
|
"loss": 0.8564, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.795768766818224, |
|
"grad_norm": 0.36166390776634216, |
|
"learning_rate": 2.7982508453564866e-05, |
|
"loss": 0.7671, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.7965110884290619, |
|
"grad_norm": 0.36037370562553406, |
|
"learning_rate": 2.796029354691952e-05, |
|
"loss": 0.915, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.7972534100398998, |
|
"grad_norm": 0.368314266204834, |
|
"learning_rate": 2.793806696441543e-05, |
|
"loss": 0.8892, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.7979957316507377, |
|
"grad_norm": 0.4094344973564148, |
|
"learning_rate": 2.791582873865371e-05, |
|
"loss": 0.8191, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7987380532615755, |
|
"grad_norm": 0.38346514105796814, |
|
"learning_rate": 2.7893578902252563e-05, |
|
"loss": 0.8655, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.7994803748724135, |
|
"grad_norm": 0.3845061659812927, |
|
"learning_rate": 2.7871317487847195e-05, |
|
"loss": 0.7341, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8002226964832514, |
|
"grad_norm": 0.37997809052467346, |
|
"learning_rate": 2.784904452808982e-05, |
|
"loss": 0.8391, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8009650180940893, |
|
"grad_norm": 0.38898172974586487, |
|
"learning_rate": 2.782676005564958e-05, |
|
"loss": 0.9057, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8017073397049271, |
|
"grad_norm": 0.3827812075614929, |
|
"learning_rate": 2.7804464103212492e-05, |
|
"loss": 0.8068, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.802449661315765, |
|
"grad_norm": 0.36027270555496216, |
|
"learning_rate": 2.7782156703481428e-05, |
|
"loss": 0.8588, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.803191982926603, |
|
"grad_norm": 0.4327481687068939, |
|
"learning_rate": 2.775983788917603e-05, |
|
"loss": 0.8632, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8039343045374409, |
|
"grad_norm": 0.3570108413696289, |
|
"learning_rate": 2.7737507693032704e-05, |
|
"loss": 0.8142, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8046766261482787, |
|
"grad_norm": 0.3824721872806549, |
|
"learning_rate": 2.7715166147804534e-05, |
|
"loss": 0.8073, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.8054189477591166, |
|
"grad_norm": 0.3799905478954315, |
|
"learning_rate": 2.769281328626126e-05, |
|
"loss": 0.8286, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8061612693699546, |
|
"grad_norm": 0.39156484603881836, |
|
"learning_rate": 2.7670449141189224e-05, |
|
"loss": 0.8259, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8069035909807925, |
|
"grad_norm": 0.37677666544914246, |
|
"learning_rate": 2.7648073745391294e-05, |
|
"loss": 0.7925, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8076459125916303, |
|
"grad_norm": 0.3373050093650818, |
|
"learning_rate": 2.7625687131686874e-05, |
|
"loss": 0.855, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8083882342024682, |
|
"grad_norm": 0.397009938955307, |
|
"learning_rate": 2.76032893329118e-05, |
|
"loss": 0.8977, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8091305558133061, |
|
"grad_norm": 0.37769317626953125, |
|
"learning_rate": 2.7580880381918317e-05, |
|
"loss": 0.95, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.809872877424144, |
|
"grad_norm": 0.4025100767612457, |
|
"learning_rate": 2.755846031157504e-05, |
|
"loss": 0.7946, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8106151990349819, |
|
"grad_norm": 0.4239715337753296, |
|
"learning_rate": 2.753602915476687e-05, |
|
"loss": 0.8794, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8113575206458198, |
|
"grad_norm": 0.40224555134773254, |
|
"learning_rate": 2.7513586944394992e-05, |
|
"loss": 0.9042, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8120998422566577, |
|
"grad_norm": 0.32581862807273865, |
|
"learning_rate": 2.7491133713376795e-05, |
|
"loss": 0.8939, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8128421638674956, |
|
"grad_norm": 0.4022167921066284, |
|
"learning_rate": 2.7468669494645827e-05, |
|
"loss": 0.7777, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8135844854783335, |
|
"grad_norm": 0.4196806252002716, |
|
"learning_rate": 2.7446194321151768e-05, |
|
"loss": 0.8122, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8143268070891714, |
|
"grad_norm": 0.3758104145526886, |
|
"learning_rate": 2.7423708225860344e-05, |
|
"loss": 0.7338, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8150691287000092, |
|
"grad_norm": 0.37303996086120605, |
|
"learning_rate": 2.7401211241753315e-05, |
|
"loss": 0.8801, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8158114503108471, |
|
"grad_norm": 0.392053484916687, |
|
"learning_rate": 2.7378703401828416e-05, |
|
"loss": 0.6883, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8165537719216851, |
|
"grad_norm": 0.37932631373405457, |
|
"learning_rate": 2.7356184739099293e-05, |
|
"loss": 0.7823, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.817296093532523, |
|
"grad_norm": 0.3743017315864563, |
|
"learning_rate": 2.7333655286595474e-05, |
|
"loss": 0.8226, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8180384151433608, |
|
"grad_norm": 0.4144737720489502, |
|
"learning_rate": 2.731111507736232e-05, |
|
"loss": 0.8217, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8187807367541987, |
|
"grad_norm": 0.3302975296974182, |
|
"learning_rate": 2.728856414446094e-05, |
|
"loss": 0.8851, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8195230583650367, |
|
"grad_norm": 0.37614309787750244, |
|
"learning_rate": 2.7266002520968212e-05, |
|
"loss": 0.8134, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8202653799758746, |
|
"grad_norm": 0.42986634373664856, |
|
"learning_rate": 2.7243430239976668e-05, |
|
"loss": 0.8935, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8210077015867124, |
|
"grad_norm": 0.4134221076965332, |
|
"learning_rate": 2.722084733459448e-05, |
|
"loss": 0.8741, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8217500231975503, |
|
"grad_norm": 0.4643120765686035, |
|
"learning_rate": 2.7198253837945414e-05, |
|
"loss": 0.8715, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8224923448083883, |
|
"grad_norm": 0.4156094789505005, |
|
"learning_rate": 2.7175649783168742e-05, |
|
"loss": 0.9386, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8232346664192262, |
|
"grad_norm": 0.39130133390426636, |
|
"learning_rate": 2.715303520341925e-05, |
|
"loss": 0.9361, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.823976988030064, |
|
"grad_norm": 0.38539817929267883, |
|
"learning_rate": 2.7130410131867147e-05, |
|
"loss": 0.8998, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8247193096409019, |
|
"grad_norm": 0.35698390007019043, |
|
"learning_rate": 2.710777460169804e-05, |
|
"loss": 0.761, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8254616312517398, |
|
"grad_norm": 0.33499324321746826, |
|
"learning_rate": 2.708512864611287e-05, |
|
"loss": 0.8085, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8262039528625778, |
|
"grad_norm": 0.44463443756103516, |
|
"learning_rate": 2.706247229832787e-05, |
|
"loss": 0.7751, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8269462744734156, |
|
"grad_norm": 0.4300941526889801, |
|
"learning_rate": 2.703980559157452e-05, |
|
"loss": 0.7756, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8276885960842535, |
|
"grad_norm": 0.3815639019012451, |
|
"learning_rate": 2.70171285590995e-05, |
|
"loss": 0.8001, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8284309176950914, |
|
"grad_norm": 0.38274961709976196, |
|
"learning_rate": 2.6994441234164605e-05, |
|
"loss": 0.7243, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8291732393059293, |
|
"grad_norm": 0.39203885197639465, |
|
"learning_rate": 2.6971743650046774e-05, |
|
"loss": 0.8777, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8299155609167672, |
|
"grad_norm": 0.38965150713920593, |
|
"learning_rate": 2.694903584003795e-05, |
|
"loss": 0.8189, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8306578825276051, |
|
"grad_norm": 0.39319199323654175, |
|
"learning_rate": 2.69263178374451e-05, |
|
"loss": 0.8219, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.831400204138443, |
|
"grad_norm": 0.3590227961540222, |
|
"learning_rate": 2.690358967559014e-05, |
|
"loss": 0.7929, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8321425257492808, |
|
"grad_norm": 0.4018113911151886, |
|
"learning_rate": 2.688085138780987e-05, |
|
"loss": 0.7804, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8328848473601188, |
|
"grad_norm": 0.3763115406036377, |
|
"learning_rate": 2.6858103007455968e-05, |
|
"loss": 0.9118, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8336271689709567, |
|
"grad_norm": 0.4066718816757202, |
|
"learning_rate": 2.6835344567894887e-05, |
|
"loss": 0.7809, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8343694905817945, |
|
"grad_norm": 0.4564039707183838, |
|
"learning_rate": 2.6812576102507852e-05, |
|
"loss": 0.9001, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8351118121926324, |
|
"grad_norm": 0.38714709877967834, |
|
"learning_rate": 2.6789797644690804e-05, |
|
"loss": 0.7852, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8358541338034704, |
|
"grad_norm": 0.38495564460754395, |
|
"learning_rate": 2.6767009227854307e-05, |
|
"loss": 0.7465, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8365964554143083, |
|
"grad_norm": 0.35638296604156494, |
|
"learning_rate": 2.6744210885423568e-05, |
|
"loss": 0.876, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8373387770251461, |
|
"grad_norm": 0.40398475527763367, |
|
"learning_rate": 2.672140265083833e-05, |
|
"loss": 0.8088, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.838081098635984, |
|
"grad_norm": 0.3809717297554016, |
|
"learning_rate": 2.6698584557552853e-05, |
|
"loss": 0.7492, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8388234202468219, |
|
"grad_norm": 0.34428274631500244, |
|
"learning_rate": 2.6675756639035865e-05, |
|
"loss": 0.7429, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8395657418576599, |
|
"grad_norm": 0.4311201572418213, |
|
"learning_rate": 2.6652918928770484e-05, |
|
"loss": 0.8018, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.8403080634684977, |
|
"grad_norm": 0.36646634340286255, |
|
"learning_rate": 2.663007146025421e-05, |
|
"loss": 0.8448, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8410503850793356, |
|
"grad_norm": 0.3897891342639923, |
|
"learning_rate": 2.6607214266998846e-05, |
|
"loss": 0.7464, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8417927066901735, |
|
"grad_norm": 0.38725921511650085, |
|
"learning_rate": 2.658434738253046e-05, |
|
"loss": 0.7544, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8425350283010115, |
|
"grad_norm": 0.4934210479259491, |
|
"learning_rate": 2.6561470840389354e-05, |
|
"loss": 0.7775, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8432773499118493, |
|
"grad_norm": 0.40992632508277893, |
|
"learning_rate": 2.6538584674129958e-05, |
|
"loss": 0.8458, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8440196715226872, |
|
"grad_norm": 0.39897727966308594, |
|
"learning_rate": 2.651568891732084e-05, |
|
"loss": 0.8362, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8447619931335251, |
|
"grad_norm": 0.38299280405044556, |
|
"learning_rate": 2.6492783603544648e-05, |
|
"loss": 0.8096, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.845504314744363, |
|
"grad_norm": 0.37341177463531494, |
|
"learning_rate": 2.6469868766398024e-05, |
|
"loss": 0.7951, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8462466363552009, |
|
"grad_norm": 0.36628684401512146, |
|
"learning_rate": 2.644694443949159e-05, |
|
"loss": 0.9074, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8469889579660388, |
|
"grad_norm": 0.33878034353256226, |
|
"learning_rate": 2.6424010656449877e-05, |
|
"loss": 0.7807, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.8477312795768767, |
|
"grad_norm": 0.3553994297981262, |
|
"learning_rate": 2.6401067450911305e-05, |
|
"loss": 0.7631, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8484736011877145, |
|
"grad_norm": 0.35956519842147827, |
|
"learning_rate": 2.6378114856528108e-05, |
|
"loss": 0.7291, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8492159227985525, |
|
"grad_norm": 0.44231006503105164, |
|
"learning_rate": 2.6355152906966268e-05, |
|
"loss": 0.9642, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8499582444093904, |
|
"grad_norm": 0.3414170444011688, |
|
"learning_rate": 2.6332181635905523e-05, |
|
"loss": 0.8088, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8507005660202283, |
|
"grad_norm": 0.3992343246936798, |
|
"learning_rate": 2.630920107703927e-05, |
|
"loss": 0.8089, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.8514428876310661, |
|
"grad_norm": 0.44197651743888855, |
|
"learning_rate": 2.6286211264074518e-05, |
|
"loss": 0.7817, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8521852092419041, |
|
"grad_norm": 0.3761456310749054, |
|
"learning_rate": 2.626321223073186e-05, |
|
"loss": 0.844, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.852927530852742, |
|
"grad_norm": 0.3781460225582123, |
|
"learning_rate": 2.6240204010745417e-05, |
|
"loss": 0.8404, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.8536698524635798, |
|
"grad_norm": 0.41302886605262756, |
|
"learning_rate": 2.6217186637862767e-05, |
|
"loss": 0.8341, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8544121740744177, |
|
"grad_norm": 0.3607892394065857, |
|
"learning_rate": 2.6194160145844943e-05, |
|
"loss": 0.781, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.8551544956852556, |
|
"grad_norm": 0.3791287839412689, |
|
"learning_rate": 2.6171124568466328e-05, |
|
"loss": 0.7664, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8558968172960936, |
|
"grad_norm": 0.3444710969924927, |
|
"learning_rate": 2.6148079939514634e-05, |
|
"loss": 0.7775, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8566391389069314, |
|
"grad_norm": 0.35780924558639526, |
|
"learning_rate": 2.612502629279086e-05, |
|
"loss": 0.9237, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8573814605177693, |
|
"grad_norm": 0.4052909314632416, |
|
"learning_rate": 2.6101963662109233e-05, |
|
"loss": 0.9076, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8581237821286072, |
|
"grad_norm": 0.3919762372970581, |
|
"learning_rate": 2.607889208129714e-05, |
|
"loss": 0.7643, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8588661037394452, |
|
"grad_norm": 0.36909744143486023, |
|
"learning_rate": 2.6055811584195116e-05, |
|
"loss": 0.8057, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.859608425350283, |
|
"grad_norm": 0.37541574239730835, |
|
"learning_rate": 2.6032722204656752e-05, |
|
"loss": 0.7, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8603507469611209, |
|
"grad_norm": 0.3770912289619446, |
|
"learning_rate": 2.600962397654869e-05, |
|
"loss": 0.7992, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.8610930685719588, |
|
"grad_norm": 0.38514405488967896, |
|
"learning_rate": 2.5986516933750547e-05, |
|
"loss": 0.8241, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8618353901827966, |
|
"grad_norm": 0.46653178334236145, |
|
"learning_rate": 2.596340111015485e-05, |
|
"loss": 0.9005, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8625777117936346, |
|
"grad_norm": 0.39433014392852783, |
|
"learning_rate": 2.5940276539667023e-05, |
|
"loss": 0.7639, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8633200334044725, |
|
"grad_norm": 0.39670971035957336, |
|
"learning_rate": 2.5917143256205315e-05, |
|
"loss": 0.8006, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8640623550153104, |
|
"grad_norm": 0.3866695463657379, |
|
"learning_rate": 2.5894001293700762e-05, |
|
"loss": 0.9459, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.8648046766261482, |
|
"grad_norm": 0.3981380760669708, |
|
"learning_rate": 2.587085068609711e-05, |
|
"loss": 0.7851, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8655469982369862, |
|
"grad_norm": 0.4058111608028412, |
|
"learning_rate": 2.5847691467350803e-05, |
|
"loss": 0.8604, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8662893198478241, |
|
"grad_norm": 0.42619138956069946, |
|
"learning_rate": 2.582452367143091e-05, |
|
"loss": 0.9086, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.867031641458662, |
|
"grad_norm": 0.3281800448894501, |
|
"learning_rate": 2.5801347332319094e-05, |
|
"loss": 0.7461, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8677739630694998, |
|
"grad_norm": 0.380833238363266, |
|
"learning_rate": 2.5778162484009523e-05, |
|
"loss": 0.8507, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.8685162846803377, |
|
"grad_norm": 0.404496431350708, |
|
"learning_rate": 2.575496916050886e-05, |
|
"loss": 0.7522, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8692586062911757, |
|
"grad_norm": 0.3586255609989166, |
|
"learning_rate": 2.5731767395836195e-05, |
|
"loss": 0.7842, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8700009279020136, |
|
"grad_norm": 0.40008819103240967, |
|
"learning_rate": 2.5708557224023015e-05, |
|
"loss": 0.7666, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8707432495128514, |
|
"grad_norm": 0.4241710901260376, |
|
"learning_rate": 2.5685338679113118e-05, |
|
"loss": 0.8025, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8714855711236893, |
|
"grad_norm": 0.377708375453949, |
|
"learning_rate": 2.5662111795162597e-05, |
|
"loss": 0.8052, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8722278927345273, |
|
"grad_norm": 0.5053303241729736, |
|
"learning_rate": 2.5638876606239756e-05, |
|
"loss": 0.8903, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8729702143453651, |
|
"grad_norm": 0.3756983280181885, |
|
"learning_rate": 2.561563314642511e-05, |
|
"loss": 0.8037, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.873712535956203, |
|
"grad_norm": 0.3644946217536926, |
|
"learning_rate": 2.5592381449811284e-05, |
|
"loss": 0.7777, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8744548575670409, |
|
"grad_norm": 0.3979732394218445, |
|
"learning_rate": 2.5569121550502992e-05, |
|
"loss": 0.7259, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8751971791778789, |
|
"grad_norm": 0.365654319524765, |
|
"learning_rate": 2.5545853482616975e-05, |
|
"loss": 0.8369, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8759395007887167, |
|
"grad_norm": 0.37514373660087585, |
|
"learning_rate": 2.5522577280281958e-05, |
|
"loss": 0.7921, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8766818223995546, |
|
"grad_norm": 0.31953245401382446, |
|
"learning_rate": 2.5499292977638607e-05, |
|
"loss": 0.7783, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8774241440103925, |
|
"grad_norm": 0.35402050614356995, |
|
"learning_rate": 2.5476000608839454e-05, |
|
"loss": 0.7105, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8781664656212304, |
|
"grad_norm": 0.3901765048503876, |
|
"learning_rate": 2.5452700208048864e-05, |
|
"loss": 0.8499, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8789087872320683, |
|
"grad_norm": 0.39869165420532227, |
|
"learning_rate": 2.5429391809442993e-05, |
|
"loss": 0.8133, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8796511088429062, |
|
"grad_norm": 0.42267906665802, |
|
"learning_rate": 2.5406075447209725e-05, |
|
"loss": 0.8178, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8803934304537441, |
|
"grad_norm": 0.40030765533447266, |
|
"learning_rate": 2.5382751155548615e-05, |
|
"loss": 0.795, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.881135752064582, |
|
"grad_norm": 0.34904223680496216, |
|
"learning_rate": 2.535941896867086e-05, |
|
"loss": 0.826, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8818780736754199, |
|
"grad_norm": 0.34118303656578064, |
|
"learning_rate": 2.533607892079923e-05, |
|
"loss": 0.7497, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8826203952862578, |
|
"grad_norm": 0.37169280648231506, |
|
"learning_rate": 2.5312731046168026e-05, |
|
"loss": 0.7827, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.8833627168970957, |
|
"grad_norm": 0.4534485936164856, |
|
"learning_rate": 2.5289375379023043e-05, |
|
"loss": 0.842, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8841050385079335, |
|
"grad_norm": 0.38551488518714905, |
|
"learning_rate": 2.526601195362147e-05, |
|
"loss": 0.7868, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.8848473601187714, |
|
"grad_norm": 0.3991512656211853, |
|
"learning_rate": 2.5242640804231922e-05, |
|
"loss": 0.7889, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.8855896817296094, |
|
"grad_norm": 0.36497434973716736, |
|
"learning_rate": 2.5219261965134315e-05, |
|
"loss": 0.7602, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.8863320033404473, |
|
"grad_norm": 0.38198092579841614, |
|
"learning_rate": 2.519587547061985e-05, |
|
"loss": 0.7992, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.8870743249512851, |
|
"grad_norm": 0.3975292444229126, |
|
"learning_rate": 2.517248135499095e-05, |
|
"loss": 0.7652, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.887816646562123, |
|
"grad_norm": 0.3680102229118347, |
|
"learning_rate": 2.5149079652561225e-05, |
|
"loss": 0.8046, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.888558968172961, |
|
"grad_norm": 0.38051730394363403, |
|
"learning_rate": 2.512567039765542e-05, |
|
"loss": 0.8289, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.8893012897837989, |
|
"grad_norm": 0.3820357620716095, |
|
"learning_rate": 2.5102253624609332e-05, |
|
"loss": 0.7923, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.8900436113946367, |
|
"grad_norm": 0.40367165207862854, |
|
"learning_rate": 2.507882936776981e-05, |
|
"loss": 0.7872, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.8907859330054746, |
|
"grad_norm": 0.3560531735420227, |
|
"learning_rate": 2.505539766149468e-05, |
|
"loss": 0.8427, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8915282546163125, |
|
"grad_norm": 0.34239915013313293, |
|
"learning_rate": 2.503195854015267e-05, |
|
"loss": 0.799, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.8922705762271504, |
|
"grad_norm": 0.3868923485279083, |
|
"learning_rate": 2.500851203812341e-05, |
|
"loss": 0.8314, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.8930128978379883, |
|
"grad_norm": 0.4198054373264313, |
|
"learning_rate": 2.4985058189797347e-05, |
|
"loss": 0.8763, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.8937552194488262, |
|
"grad_norm": 0.4096454083919525, |
|
"learning_rate": 2.4961597029575694e-05, |
|
"loss": 0.7835, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8944975410596641, |
|
"grad_norm": 0.407387912273407, |
|
"learning_rate": 2.493812859187041e-05, |
|
"loss": 0.8137, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.895239862670502, |
|
"grad_norm": 0.38549163937568665, |
|
"learning_rate": 2.4914652911104104e-05, |
|
"loss": 0.788, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.8959821842813399, |
|
"grad_norm": 0.37403562664985657, |
|
"learning_rate": 2.4891170021710025e-05, |
|
"loss": 0.819, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.8967245058921778, |
|
"grad_norm": 0.33642685413360596, |
|
"learning_rate": 2.4867679958131996e-05, |
|
"loss": 0.7957, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.8974668275030157, |
|
"grad_norm": 0.3692936897277832, |
|
"learning_rate": 2.4844182754824343e-05, |
|
"loss": 0.8125, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.8982091491138535, |
|
"grad_norm": 0.4189552664756775, |
|
"learning_rate": 2.4820678446251893e-05, |
|
"loss": 0.8238, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8989514707246915, |
|
"grad_norm": 0.411651074886322, |
|
"learning_rate": 2.4797167066889873e-05, |
|
"loss": 0.8231, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.8996937923355294, |
|
"grad_norm": 0.4330805540084839, |
|
"learning_rate": 2.477364865122389e-05, |
|
"loss": 0.8004, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9004361139463672, |
|
"grad_norm": 0.4067060351371765, |
|
"learning_rate": 2.4750123233749874e-05, |
|
"loss": 0.8056, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9011784355572051, |
|
"grad_norm": 0.4101119339466095, |
|
"learning_rate": 2.4726590848974015e-05, |
|
"loss": 0.842, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9019207571680431, |
|
"grad_norm": 0.4217219352722168, |
|
"learning_rate": 2.470305153141273e-05, |
|
"loss": 0.869, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.902663078778881, |
|
"grad_norm": 0.43955573439598083, |
|
"learning_rate": 2.4679505315592608e-05, |
|
"loss": 0.8974, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9034054003897188, |
|
"grad_norm": 0.3962491750717163, |
|
"learning_rate": 2.4655952236050347e-05, |
|
"loss": 0.8321, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9041477220005567, |
|
"grad_norm": 0.39119595289230347, |
|
"learning_rate": 2.463239232733272e-05, |
|
"loss": 0.816, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9048900436113947, |
|
"grad_norm": 0.37983494997024536, |
|
"learning_rate": 2.4608825623996513e-05, |
|
"loss": 0.9081, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9056323652222326, |
|
"grad_norm": 0.36598238348960876, |
|
"learning_rate": 2.4585252160608474e-05, |
|
"loss": 0.885, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9063746868330704, |
|
"grad_norm": 0.41146743297576904, |
|
"learning_rate": 2.4561671971745284e-05, |
|
"loss": 0.8678, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9071170084439083, |
|
"grad_norm": 0.40173497796058655, |
|
"learning_rate": 2.453808509199346e-05, |
|
"loss": 0.7682, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9078593300547462, |
|
"grad_norm": 0.3730056583881378, |
|
"learning_rate": 2.4514491555949356e-05, |
|
"loss": 0.7668, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9086016516655842, |
|
"grad_norm": 0.38427025079727173, |
|
"learning_rate": 2.4490891398219084e-05, |
|
"loss": 0.7851, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.909343973276422, |
|
"grad_norm": 0.4028315842151642, |
|
"learning_rate": 2.446728465341846e-05, |
|
"loss": 0.8517, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9100862948872599, |
|
"grad_norm": 0.3713853657245636, |
|
"learning_rate": 2.444367135617298e-05, |
|
"loss": 0.754, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9108286164980978, |
|
"grad_norm": 0.3742387592792511, |
|
"learning_rate": 2.4420051541117725e-05, |
|
"loss": 0.916, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9115709381089357, |
|
"grad_norm": 0.38887640833854675, |
|
"learning_rate": 2.4396425242897354e-05, |
|
"loss": 0.8266, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9123132597197736, |
|
"grad_norm": 0.43293431401252747, |
|
"learning_rate": 2.4372792496166032e-05, |
|
"loss": 0.8804, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9130555813306115, |
|
"grad_norm": 0.42462730407714844, |
|
"learning_rate": 2.4349153335587388e-05, |
|
"loss": 0.7723, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9137979029414494, |
|
"grad_norm": 0.38612687587738037, |
|
"learning_rate": 2.432550779583445e-05, |
|
"loss": 0.8027, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9145402245522872, |
|
"grad_norm": 0.42818307876586914, |
|
"learning_rate": 2.43018559115896e-05, |
|
"loss": 0.9014, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9152825461631252, |
|
"grad_norm": 0.34975147247314453, |
|
"learning_rate": 2.4278197717544533e-05, |
|
"loss": 0.7912, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9160248677739631, |
|
"grad_norm": 0.44405031204223633, |
|
"learning_rate": 2.4254533248400204e-05, |
|
"loss": 0.8931, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.916767189384801, |
|
"grad_norm": 0.4183354377746582, |
|
"learning_rate": 2.423086253886675e-05, |
|
"loss": 0.8008, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9175095109956388, |
|
"grad_norm": 0.40280815958976746, |
|
"learning_rate": 2.420718562366349e-05, |
|
"loss": 0.817, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9182518326064768, |
|
"grad_norm": 0.45507195591926575, |
|
"learning_rate": 2.4183502537518826e-05, |
|
"loss": 0.8518, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9189941542173147, |
|
"grad_norm": 0.4256339371204376, |
|
"learning_rate": 2.4159813315170217e-05, |
|
"loss": 0.8463, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9197364758281525, |
|
"grad_norm": 0.49776536226272583, |
|
"learning_rate": 2.413611799136412e-05, |
|
"loss": 0.8524, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9204787974389904, |
|
"grad_norm": 0.36638113856315613, |
|
"learning_rate": 2.4112416600855944e-05, |
|
"loss": 0.9285, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9212211190498283, |
|
"grad_norm": 0.4256168007850647, |
|
"learning_rate": 2.4088709178409997e-05, |
|
"loss": 0.9358, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9219634406606663, |
|
"grad_norm": 0.35663262009620667, |
|
"learning_rate": 2.4064995758799437e-05, |
|
"loss": 0.7417, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9227057622715041, |
|
"grad_norm": 0.4273064136505127, |
|
"learning_rate": 2.404127637680621e-05, |
|
"loss": 0.8254, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.923448083882342, |
|
"grad_norm": 0.3672143220901489, |
|
"learning_rate": 2.4017551067221014e-05, |
|
"loss": 0.8111, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9241904054931799, |
|
"grad_norm": 0.38435935974121094, |
|
"learning_rate": 2.3993819864843234e-05, |
|
"loss": 0.88, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9249327271040179, |
|
"grad_norm": 0.37791430950164795, |
|
"learning_rate": 2.3970082804480912e-05, |
|
"loss": 0.8167, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9256750487148557, |
|
"grad_norm": 0.4131454825401306, |
|
"learning_rate": 2.3946339920950675e-05, |
|
"loss": 0.8046, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9264173703256936, |
|
"grad_norm": 0.42721277475357056, |
|
"learning_rate": 2.392259124907768e-05, |
|
"loss": 0.8612, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9271596919365315, |
|
"grad_norm": 0.39011621475219727, |
|
"learning_rate": 2.3898836823695595e-05, |
|
"loss": 0.8757, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9279020135473695, |
|
"grad_norm": 0.41248777508735657, |
|
"learning_rate": 2.3875076679646503e-05, |
|
"loss": 0.9099, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9286443351582073, |
|
"grad_norm": 0.4113259017467499, |
|
"learning_rate": 2.38513108517809e-05, |
|
"loss": 0.8793, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9293866567690452, |
|
"grad_norm": 0.4344083368778229, |
|
"learning_rate": 2.3827539374957615e-05, |
|
"loss": 0.7458, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.9301289783798831, |
|
"grad_norm": 0.3516993522644043, |
|
"learning_rate": 2.380376228404373e-05, |
|
"loss": 0.796, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.9308712999907209, |
|
"grad_norm": 0.3689666986465454, |
|
"learning_rate": 2.3779979613914605e-05, |
|
"loss": 0.8158, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9316136216015589, |
|
"grad_norm": 0.35044315457344055, |
|
"learning_rate": 2.375619139945376e-05, |
|
"loss": 0.8159, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9323559432123968, |
|
"grad_norm": 0.39020657539367676, |
|
"learning_rate": 2.3732397675552847e-05, |
|
"loss": 0.7833, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.9330982648232347, |
|
"grad_norm": 0.3595007359981537, |
|
"learning_rate": 2.3708598477111616e-05, |
|
"loss": 0.8122, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9338405864340725, |
|
"grad_norm": 0.37129589915275574, |
|
"learning_rate": 2.3684793839037825e-05, |
|
"loss": 0.76, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9345829080449105, |
|
"grad_norm": 0.3745594918727875, |
|
"learning_rate": 2.3660983796247226e-05, |
|
"loss": 0.7385, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.9353252296557484, |
|
"grad_norm": 0.38395509123802185, |
|
"learning_rate": 2.3637168383663493e-05, |
|
"loss": 0.7137, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9360675512665863, |
|
"grad_norm": 0.37863481044769287, |
|
"learning_rate": 2.3613347636218164e-05, |
|
"loss": 0.8338, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9368098728774241, |
|
"grad_norm": 0.41848132014274597, |
|
"learning_rate": 2.358952158885063e-05, |
|
"loss": 0.7827, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.937552194488262, |
|
"grad_norm": 0.37910816073417664, |
|
"learning_rate": 2.356569027650803e-05, |
|
"loss": 0.7945, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.9382945160991, |
|
"grad_norm": 0.37581756711006165, |
|
"learning_rate": 2.354185373414524e-05, |
|
"loss": 0.8229, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9390368377099378, |
|
"grad_norm": 0.3768438696861267, |
|
"learning_rate": 2.3518011996724805e-05, |
|
"loss": 0.8108, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9397791593207757, |
|
"grad_norm": 0.4513545334339142, |
|
"learning_rate": 2.349416509921688e-05, |
|
"loss": 0.8078, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.9405214809316136, |
|
"grad_norm": 0.37917861342430115, |
|
"learning_rate": 2.34703130765992e-05, |
|
"loss": 0.7238, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.9412638025424516, |
|
"grad_norm": 0.4270203709602356, |
|
"learning_rate": 2.344645596385701e-05, |
|
"loss": 0.9255, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.9420061241532894, |
|
"grad_norm": 0.38025596737861633, |
|
"learning_rate": 2.3422593795983026e-05, |
|
"loss": 0.7485, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.9427484457641273, |
|
"grad_norm": 0.35476383566856384, |
|
"learning_rate": 2.3398726607977388e-05, |
|
"loss": 0.7331, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9434907673749652, |
|
"grad_norm": 0.40570273995399475, |
|
"learning_rate": 2.3374854434847572e-05, |
|
"loss": 0.8989, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.944233088985803, |
|
"grad_norm": 0.39702484011650085, |
|
"learning_rate": 2.335097731160839e-05, |
|
"loss": 0.7746, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.944975410596641, |
|
"grad_norm": 0.4499233067035675, |
|
"learning_rate": 2.3327095273281904e-05, |
|
"loss": 0.9053, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.9457177322074789, |
|
"grad_norm": 0.40540260076522827, |
|
"learning_rate": 2.3303208354897392e-05, |
|
"loss": 0.9249, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9464600538183168, |
|
"grad_norm": 0.37241825461387634, |
|
"learning_rate": 2.327931659149129e-05, |
|
"loss": 0.8205, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9472023754291546, |
|
"grad_norm": 0.42290613055229187, |
|
"learning_rate": 2.325542001810712e-05, |
|
"loss": 0.8871, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.9479446970399926, |
|
"grad_norm": 0.40947234630584717, |
|
"learning_rate": 2.32315186697955e-05, |
|
"loss": 0.8654, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9486870186508305, |
|
"grad_norm": 0.4005737900733948, |
|
"learning_rate": 2.3207612581614005e-05, |
|
"loss": 0.9048, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.9494293402616684, |
|
"grad_norm": 0.38072773814201355, |
|
"learning_rate": 2.3183701788627192e-05, |
|
"loss": 0.766, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9501716618725062, |
|
"grad_norm": 0.3864803612232208, |
|
"learning_rate": 2.3159786325906524e-05, |
|
"loss": 0.8, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9509139834833441, |
|
"grad_norm": 0.3687272071838379, |
|
"learning_rate": 2.313586622853028e-05, |
|
"loss": 0.7775, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9516563050941821, |
|
"grad_norm": 0.37793952226638794, |
|
"learning_rate": 2.3111941531583573e-05, |
|
"loss": 0.8045, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.95239862670502, |
|
"grad_norm": 0.3550565540790558, |
|
"learning_rate": 2.3088012270158244e-05, |
|
"loss": 0.8476, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9531409483158578, |
|
"grad_norm": 0.3786463737487793, |
|
"learning_rate": 2.3064078479352833e-05, |
|
"loss": 0.7822, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9538832699266957, |
|
"grad_norm": 0.45172375440597534, |
|
"learning_rate": 2.3040140194272515e-05, |
|
"loss": 0.8627, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.9546255915375337, |
|
"grad_norm": 0.44080403447151184, |
|
"learning_rate": 2.3016197450029076e-05, |
|
"loss": 0.9437, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.9553679131483716, |
|
"grad_norm": 0.40898117423057556, |
|
"learning_rate": 2.299225028174082e-05, |
|
"loss": 0.8085, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.9561102347592094, |
|
"grad_norm": 0.41421830654144287, |
|
"learning_rate": 2.2968298724532562e-05, |
|
"loss": 0.8255, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9568525563700473, |
|
"grad_norm": 0.33999302983283997, |
|
"learning_rate": 2.294434281353554e-05, |
|
"loss": 0.9014, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9575948779808853, |
|
"grad_norm": 0.47232887148857117, |
|
"learning_rate": 2.292038258388738e-05, |
|
"loss": 1.0212, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9583371995917231, |
|
"grad_norm": 0.37770771980285645, |
|
"learning_rate": 2.2896418070732048e-05, |
|
"loss": 0.7837, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.959079521202561, |
|
"grad_norm": 0.36593371629714966, |
|
"learning_rate": 2.2872449309219778e-05, |
|
"loss": 0.7225, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9598218428133989, |
|
"grad_norm": 0.36546435952186584, |
|
"learning_rate": 2.2848476334507068e-05, |
|
"loss": 0.7716, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.9605641644242368, |
|
"grad_norm": 0.3623177111148834, |
|
"learning_rate": 2.2824499181756555e-05, |
|
"loss": 0.7491, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9613064860350747, |
|
"grad_norm": 0.3944539427757263, |
|
"learning_rate": 2.280051788613703e-05, |
|
"loss": 0.7789, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9620488076459126, |
|
"grad_norm": 0.42587587237358093, |
|
"learning_rate": 2.277653248282336e-05, |
|
"loss": 0.7938, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.9627911292567505, |
|
"grad_norm": 0.41713660955429077, |
|
"learning_rate": 2.275254300699642e-05, |
|
"loss": 0.8678, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9635334508675883, |
|
"grad_norm": 0.402723491191864, |
|
"learning_rate": 2.272854949384308e-05, |
|
"loss": 0.8656, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9642757724784263, |
|
"grad_norm": 0.3979452848434448, |
|
"learning_rate": 2.2704551978556112e-05, |
|
"loss": 0.8606, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9650180940892642, |
|
"grad_norm": 0.3622892200946808, |
|
"learning_rate": 2.2680550496334176e-05, |
|
"loss": 0.7591, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9657604157001021, |
|
"grad_norm": 0.4129877984523773, |
|
"learning_rate": 2.265654508238174e-05, |
|
"loss": 0.8572, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9665027373109399, |
|
"grad_norm": 0.38189956545829773, |
|
"learning_rate": 2.2632535771909036e-05, |
|
"loss": 0.8457, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9672450589217778, |
|
"grad_norm": 0.3791535794734955, |
|
"learning_rate": 2.2608522600132017e-05, |
|
"loss": 0.7564, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9679873805326158, |
|
"grad_norm": 0.3876500427722931, |
|
"learning_rate": 2.2584505602272305e-05, |
|
"loss": 0.8574, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9687297021434537, |
|
"grad_norm": 0.36182138323783875, |
|
"learning_rate": 2.2560484813557122e-05, |
|
"loss": 0.7202, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9694720237542915, |
|
"grad_norm": 0.3705620765686035, |
|
"learning_rate": 2.253646026921926e-05, |
|
"loss": 0.7622, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.9702143453651294, |
|
"grad_norm": 0.3661751449108124, |
|
"learning_rate": 2.2512432004497015e-05, |
|
"loss": 0.7586, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9709566669759674, |
|
"grad_norm": 0.35583674907684326, |
|
"learning_rate": 2.248840005463414e-05, |
|
"loss": 0.8295, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9716989885868053, |
|
"grad_norm": 0.34773266315460205, |
|
"learning_rate": 2.2464364454879792e-05, |
|
"loss": 0.8581, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9724413101976431, |
|
"grad_norm": 0.35552486777305603, |
|
"learning_rate": 2.2440325240488484e-05, |
|
"loss": 0.8472, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.973183631808481, |
|
"grad_norm": 0.332487016916275, |
|
"learning_rate": 2.241628244672003e-05, |
|
"loss": 0.791, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9739259534193189, |
|
"grad_norm": 0.41265782713890076, |
|
"learning_rate": 2.2392236108839503e-05, |
|
"loss": 0.8364, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9746682750301568, |
|
"grad_norm": 0.4064471125602722, |
|
"learning_rate": 2.236818626211715e-05, |
|
"loss": 0.8414, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9754105966409947, |
|
"grad_norm": 0.3656854033470154, |
|
"learning_rate": 2.2344132941828395e-05, |
|
"loss": 0.7787, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9761529182518326, |
|
"grad_norm": 0.40122607350349426, |
|
"learning_rate": 2.2320076183253733e-05, |
|
"loss": 0.8778, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9768952398626705, |
|
"grad_norm": 0.46013033390045166, |
|
"learning_rate": 2.229601602167871e-05, |
|
"loss": 0.8577, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9776375614735084, |
|
"grad_norm": 0.41439104080200195, |
|
"learning_rate": 2.227195249239387e-05, |
|
"loss": 0.7765, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9783798830843463, |
|
"grad_norm": 0.422951340675354, |
|
"learning_rate": 2.224788563069469e-05, |
|
"loss": 0.8019, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.9791222046951842, |
|
"grad_norm": 0.3669542372226715, |
|
"learning_rate": 2.2223815471881543e-05, |
|
"loss": 0.7908, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.979864526306022, |
|
"grad_norm": 0.39412543177604675, |
|
"learning_rate": 2.2199742051259617e-05, |
|
"loss": 0.8098, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9806068479168599, |
|
"grad_norm": 0.41053861379623413, |
|
"learning_rate": 2.2175665404138907e-05, |
|
"loss": 0.7639, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9813491695276979, |
|
"grad_norm": 0.44688349962234497, |
|
"learning_rate": 2.215158556583414e-05, |
|
"loss": 0.707, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.9820914911385358, |
|
"grad_norm": 0.4231646955013275, |
|
"learning_rate": 2.2127502571664698e-05, |
|
"loss": 0.794, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9828338127493736, |
|
"grad_norm": 0.3481779992580414, |
|
"learning_rate": 2.2103416456954623e-05, |
|
"loss": 0.7119, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9835761343602115, |
|
"grad_norm": 0.4194432497024536, |
|
"learning_rate": 2.2079327257032515e-05, |
|
"loss": 0.8184, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9843184559710495, |
|
"grad_norm": 0.48259228467941284, |
|
"learning_rate": 2.2055235007231507e-05, |
|
"loss": 0.8588, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.9850607775818874, |
|
"grad_norm": 0.33273354172706604, |
|
"learning_rate": 2.203113974288921e-05, |
|
"loss": 0.7598, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.9858030991927252, |
|
"grad_norm": 0.3975766599178314, |
|
"learning_rate": 2.2007041499347634e-05, |
|
"loss": 0.7695, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.9865454208035631, |
|
"grad_norm": 0.3873244524002075, |
|
"learning_rate": 2.1982940311953187e-05, |
|
"loss": 0.8279, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.9872877424144011, |
|
"grad_norm": 0.3644872307777405, |
|
"learning_rate": 2.195883621605658e-05, |
|
"loss": 0.7318, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.988030064025239, |
|
"grad_norm": 0.4143805205821991, |
|
"learning_rate": 2.1934729247012793e-05, |
|
"loss": 0.8224, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.9887723856360768, |
|
"grad_norm": 0.372543066740036, |
|
"learning_rate": 2.1910619440181025e-05, |
|
"loss": 0.8265, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.9895147072469147, |
|
"grad_norm": 0.34617361426353455, |
|
"learning_rate": 2.1886506830924625e-05, |
|
"loss": 0.7999, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9902570288577526, |
|
"grad_norm": 0.4121123254299164, |
|
"learning_rate": 2.186239145461106e-05, |
|
"loss": 0.7929, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.9909993504685906, |
|
"grad_norm": 0.41998445987701416, |
|
"learning_rate": 2.1838273346611866e-05, |
|
"loss": 0.8936, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9917416720794284, |
|
"grad_norm": 0.3765590488910675, |
|
"learning_rate": 2.181415254230256e-05, |
|
"loss": 0.8088, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.9924839936902663, |
|
"grad_norm": 0.426413893699646, |
|
"learning_rate": 2.1790029077062643e-05, |
|
"loss": 0.8305, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.9932263153011042, |
|
"grad_norm": 0.38797062635421753, |
|
"learning_rate": 2.17659029862755e-05, |
|
"loss": 0.6533, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.9939686369119421, |
|
"grad_norm": 0.4096301794052124, |
|
"learning_rate": 2.1741774305328365e-05, |
|
"loss": 0.8569, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.99471095852278, |
|
"grad_norm": 0.402988076210022, |
|
"learning_rate": 2.1717643069612296e-05, |
|
"loss": 0.799, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9954532801336179, |
|
"grad_norm": 0.5020378232002258, |
|
"learning_rate": 2.169350931452206e-05, |
|
"loss": 0.9032, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.9961956017444558, |
|
"grad_norm": 0.4305264353752136, |
|
"learning_rate": 2.1669373075456154e-05, |
|
"loss": 0.9215, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.9969379233552936, |
|
"grad_norm": 0.43394702672958374, |
|
"learning_rate": 2.1645234387816695e-05, |
|
"loss": 0.8829, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.9976802449661316, |
|
"grad_norm": 0.4227968752384186, |
|
"learning_rate": 2.1621093287009406e-05, |
|
"loss": 0.8431, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.9984225665769695, |
|
"grad_norm": 0.4018241763114929, |
|
"learning_rate": 2.1596949808443553e-05, |
|
"loss": 0.7647, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9991648881878074, |
|
"grad_norm": 0.36082741618156433, |
|
"learning_rate": 2.1572803987531864e-05, |
|
"loss": 0.7796, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.9999072097986452, |
|
"grad_norm": 0.4092870354652405, |
|
"learning_rate": 2.154865585969053e-05, |
|
"loss": 0.8429, |
|
"step": 1347 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2694, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 1.6697839224954552e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|