|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.999028560326404, |
|
"eval_steps": 500, |
|
"global_step": 7719, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0038857586943850785, |
|
"grad_norm": 6.868951908344346, |
|
"learning_rate": 5.181347150259067e-07, |
|
"loss": 1.0933, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007771517388770157, |
|
"grad_norm": 5.837615214745706, |
|
"learning_rate": 1.0362694300518134e-06, |
|
"loss": 1.0482, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011657276083155236, |
|
"grad_norm": 3.7584728670248264, |
|
"learning_rate": 1.5544041450777204e-06, |
|
"loss": 0.9528, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.015543034777540314, |
|
"grad_norm": 2.430849231904627, |
|
"learning_rate": 2.072538860103627e-06, |
|
"loss": 0.7595, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.019428793471925394, |
|
"grad_norm": 1.9877795062873533, |
|
"learning_rate": 2.5906735751295338e-06, |
|
"loss": 0.5683, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02331455216631047, |
|
"grad_norm": 1.1748343816269307, |
|
"learning_rate": 3.1088082901554407e-06, |
|
"loss": 0.393, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02720031086069555, |
|
"grad_norm": 1.2009973092317858, |
|
"learning_rate": 3.6269430051813476e-06, |
|
"loss": 0.2619, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.031086069555080628, |
|
"grad_norm": 1.0407742203046373, |
|
"learning_rate": 4.145077720207254e-06, |
|
"loss": 0.2057, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03497182824946571, |
|
"grad_norm": 1.1295981297490019, |
|
"learning_rate": 4.663212435233161e-06, |
|
"loss": 0.1671, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03885758694385079, |
|
"grad_norm": 1.1504503826508636, |
|
"learning_rate": 5.1813471502590676e-06, |
|
"loss": 0.155, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04274334563823587, |
|
"grad_norm": 0.7676430272864883, |
|
"learning_rate": 5.699481865284975e-06, |
|
"loss": 0.1451, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04662910433262094, |
|
"grad_norm": 1.1281617722679143, |
|
"learning_rate": 6.217616580310881e-06, |
|
"loss": 0.1436, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05051486302700602, |
|
"grad_norm": 1.042258959715133, |
|
"learning_rate": 6.735751295336788e-06, |
|
"loss": 0.1295, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0544006217213911, |
|
"grad_norm": 1.0836324717622774, |
|
"learning_rate": 7.253886010362695e-06, |
|
"loss": 0.1149, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05828638041577618, |
|
"grad_norm": 1.4204705637615596, |
|
"learning_rate": 7.772020725388602e-06, |
|
"loss": 0.1268, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.062172139110161256, |
|
"grad_norm": 1.0083666041980668, |
|
"learning_rate": 8.290155440414507e-06, |
|
"loss": 0.1182, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06605789780454634, |
|
"grad_norm": 0.833955996150633, |
|
"learning_rate": 8.808290155440415e-06, |
|
"loss": 0.1037, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06994365649893142, |
|
"grad_norm": 0.762829972908706, |
|
"learning_rate": 9.326424870466322e-06, |
|
"loss": 0.1047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0738294151933165, |
|
"grad_norm": 1.018781373894918, |
|
"learning_rate": 9.844559585492228e-06, |
|
"loss": 0.1059, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07771517388770158, |
|
"grad_norm": 0.8149796744145043, |
|
"learning_rate": 1.0362694300518135e-05, |
|
"loss": 0.1094, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08160093258208666, |
|
"grad_norm": 1.0562790321924551, |
|
"learning_rate": 1.0880829015544042e-05, |
|
"loss": 0.0978, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08548669127647174, |
|
"grad_norm": 0.8392051142898549, |
|
"learning_rate": 1.139896373056995e-05, |
|
"loss": 0.1074, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0893724499708568, |
|
"grad_norm": 0.8606053025510271, |
|
"learning_rate": 1.1917098445595855e-05, |
|
"loss": 0.0936, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09325820866524188, |
|
"grad_norm": 0.8010752114927957, |
|
"learning_rate": 1.2435233160621763e-05, |
|
"loss": 0.084, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09714396735962696, |
|
"grad_norm": 0.8865363433827388, |
|
"learning_rate": 1.2953367875647668e-05, |
|
"loss": 0.0849, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10102972605401205, |
|
"grad_norm": 1.0150630528923608, |
|
"learning_rate": 1.3471502590673576e-05, |
|
"loss": 0.0941, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.10491548474839713, |
|
"grad_norm": 1.1564503498169056, |
|
"learning_rate": 1.3989637305699483e-05, |
|
"loss": 0.0884, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1088012434427822, |
|
"grad_norm": 0.7425602409996528, |
|
"learning_rate": 1.450777202072539e-05, |
|
"loss": 0.0904, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11268700213716729, |
|
"grad_norm": 1.010495639141353, |
|
"learning_rate": 1.5025906735751296e-05, |
|
"loss": 0.0877, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.11657276083155237, |
|
"grad_norm": 0.7312574416672747, |
|
"learning_rate": 1.5544041450777204e-05, |
|
"loss": 0.0833, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12045851952593743, |
|
"grad_norm": 0.8186188565897778, |
|
"learning_rate": 1.606217616580311e-05, |
|
"loss": 0.082, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.12434427822032251, |
|
"grad_norm": 0.6725849359490335, |
|
"learning_rate": 1.6580310880829015e-05, |
|
"loss": 0.0773, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1282300369147076, |
|
"grad_norm": 0.6044289285905357, |
|
"learning_rate": 1.7098445595854924e-05, |
|
"loss": 0.0793, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13211579560909267, |
|
"grad_norm": 0.648628136760092, |
|
"learning_rate": 1.761658031088083e-05, |
|
"loss": 0.0748, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.13600155430347777, |
|
"grad_norm": 0.7654085124832696, |
|
"learning_rate": 1.813471502590674e-05, |
|
"loss": 0.0656, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.13988731299786283, |
|
"grad_norm": 0.6998519217873228, |
|
"learning_rate": 1.8652849740932644e-05, |
|
"loss": 0.0841, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1437730716922479, |
|
"grad_norm": 0.5847858909362429, |
|
"learning_rate": 1.917098445595855e-05, |
|
"loss": 0.0692, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.147658830386633, |
|
"grad_norm": 0.7494257595598578, |
|
"learning_rate": 1.9689119170984456e-05, |
|
"loss": 0.0783, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.15154458908101806, |
|
"grad_norm": 0.7368291090701112, |
|
"learning_rate": 2.0207253886010365e-05, |
|
"loss": 0.0672, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.15543034777540315, |
|
"grad_norm": 0.6162219355087554, |
|
"learning_rate": 2.072538860103627e-05, |
|
"loss": 0.0768, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15931610646978822, |
|
"grad_norm": 1.0522389801039482, |
|
"learning_rate": 2.124352331606218e-05, |
|
"loss": 0.0753, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.16320186516417332, |
|
"grad_norm": 0.6631859133005514, |
|
"learning_rate": 2.1761658031088085e-05, |
|
"loss": 0.0853, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.16708762385855838, |
|
"grad_norm": 0.8284655987791505, |
|
"learning_rate": 2.227979274611399e-05, |
|
"loss": 0.0644, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17097338255294348, |
|
"grad_norm": 0.6911832435770443, |
|
"learning_rate": 2.27979274611399e-05, |
|
"loss": 0.0623, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.17485914124732854, |
|
"grad_norm": 0.5902621273486757, |
|
"learning_rate": 2.3316062176165802e-05, |
|
"loss": 0.077, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1787448999417136, |
|
"grad_norm": 0.7554872423202403, |
|
"learning_rate": 2.383419689119171e-05, |
|
"loss": 0.0798, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1826306586360987, |
|
"grad_norm": 0.6639542635427919, |
|
"learning_rate": 2.435233160621762e-05, |
|
"loss": 0.0786, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.18651641733048377, |
|
"grad_norm": 0.7784648823322334, |
|
"learning_rate": 2.4870466321243526e-05, |
|
"loss": 0.0814, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19040217602486886, |
|
"grad_norm": 0.6900832905628588, |
|
"learning_rate": 2.5388601036269435e-05, |
|
"loss": 0.0787, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.19428793471925393, |
|
"grad_norm": 0.6434291181769316, |
|
"learning_rate": 2.5906735751295337e-05, |
|
"loss": 0.0694, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19817369341363902, |
|
"grad_norm": 0.6727244264583743, |
|
"learning_rate": 2.6424870466321246e-05, |
|
"loss": 0.0696, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2020594521080241, |
|
"grad_norm": 0.5145508471366054, |
|
"learning_rate": 2.694300518134715e-05, |
|
"loss": 0.0762, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.20594521080240916, |
|
"grad_norm": 0.5458188017984406, |
|
"learning_rate": 2.746113989637306e-05, |
|
"loss": 0.067, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.20983096949679425, |
|
"grad_norm": 0.615156585002907, |
|
"learning_rate": 2.7979274611398966e-05, |
|
"loss": 0.0674, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.21371672819117932, |
|
"grad_norm": 0.5503543665259718, |
|
"learning_rate": 2.8497409326424872e-05, |
|
"loss": 0.0764, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2176024868855644, |
|
"grad_norm": 0.6637963707127615, |
|
"learning_rate": 2.901554404145078e-05, |
|
"loss": 0.0666, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.22148824557994948, |
|
"grad_norm": 0.5116291190440031, |
|
"learning_rate": 2.9533678756476683e-05, |
|
"loss": 0.0648, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.22537400427433457, |
|
"grad_norm": 0.6270451144717418, |
|
"learning_rate": 3.0051813471502592e-05, |
|
"loss": 0.0634, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.22925976296871964, |
|
"grad_norm": 0.5329413421123648, |
|
"learning_rate": 3.0569948186528505e-05, |
|
"loss": 0.0605, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.23314552166310473, |
|
"grad_norm": 0.7863078930627488, |
|
"learning_rate": 3.108808290155441e-05, |
|
"loss": 0.0611, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2370312803574898, |
|
"grad_norm": 0.5641860345765728, |
|
"learning_rate": 3.1606217616580316e-05, |
|
"loss": 0.0713, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.24091703905187487, |
|
"grad_norm": 0.6298950201322868, |
|
"learning_rate": 3.212435233160622e-05, |
|
"loss": 0.062, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.24480279774625996, |
|
"grad_norm": 0.5576657376337026, |
|
"learning_rate": 3.264248704663213e-05, |
|
"loss": 0.0729, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.24868855644064503, |
|
"grad_norm": 0.5849778617897893, |
|
"learning_rate": 3.316062176165803e-05, |
|
"loss": 0.0663, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2525743151350301, |
|
"grad_norm": 0.6862718900926282, |
|
"learning_rate": 3.367875647668394e-05, |
|
"loss": 0.0702, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2564600738294152, |
|
"grad_norm": 0.620792145685114, |
|
"learning_rate": 3.419689119170985e-05, |
|
"loss": 0.0647, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2603458325238003, |
|
"grad_norm": 0.5377947147204513, |
|
"learning_rate": 3.471502590673575e-05, |
|
"loss": 0.0629, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.26423159121818535, |
|
"grad_norm": 0.5488958030608141, |
|
"learning_rate": 3.523316062176166e-05, |
|
"loss": 0.0671, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2681173499125704, |
|
"grad_norm": 0.5954204106913672, |
|
"learning_rate": 3.575129533678757e-05, |
|
"loss": 0.064, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.27200310860695553, |
|
"grad_norm": 0.5519696206558653, |
|
"learning_rate": 3.626943005181348e-05, |
|
"loss": 0.055, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2758888673013406, |
|
"grad_norm": 0.6671381342735235, |
|
"learning_rate": 3.6787564766839386e-05, |
|
"loss": 0.0566, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.27977462599572567, |
|
"grad_norm": 0.46839311589453947, |
|
"learning_rate": 3.730569948186529e-05, |
|
"loss": 0.0655, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.28366038469011073, |
|
"grad_norm": 0.49157316560679837, |
|
"learning_rate": 3.78238341968912e-05, |
|
"loss": 0.0535, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2875461433844958, |
|
"grad_norm": 0.49892347263304754, |
|
"learning_rate": 3.83419689119171e-05, |
|
"loss": 0.0674, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2914319020788809, |
|
"grad_norm": 0.5208448100813172, |
|
"learning_rate": 3.886010362694301e-05, |
|
"loss": 0.0511, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.295317660773266, |
|
"grad_norm": 0.2894917388721045, |
|
"learning_rate": 3.937823834196891e-05, |
|
"loss": 0.0568, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.29920341946765106, |
|
"grad_norm": 0.532926439876675, |
|
"learning_rate": 3.989637305699482e-05, |
|
"loss": 0.0565, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3030891781620361, |
|
"grad_norm": 0.49821621909804464, |
|
"learning_rate": 3.999986911657599e-05, |
|
"loss": 0.0718, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.30697493685642124, |
|
"grad_norm": 0.4380644811650154, |
|
"learning_rate": 3.99993374056019e-05, |
|
"loss": 0.0578, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3108606955508063, |
|
"grad_norm": 0.47585349907521746, |
|
"learning_rate": 3.999839669772912e-05, |
|
"loss": 0.0523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3147464542451914, |
|
"grad_norm": 0.31641494499233286, |
|
"learning_rate": 3.999704701219562e-05, |
|
"loss": 0.0563, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.31863221293957644, |
|
"grad_norm": 0.48142926317508933, |
|
"learning_rate": 3.999528837660319e-05, |
|
"loss": 0.0587, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3225179716339615, |
|
"grad_norm": 0.48754014218594893, |
|
"learning_rate": 3.999312082691682e-05, |
|
"loss": 0.0659, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.32640373032834663, |
|
"grad_norm": 0.6302273551444069, |
|
"learning_rate": 3.9990544407464044e-05, |
|
"loss": 0.0617, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3302894890227317, |
|
"grad_norm": 0.4908822983385492, |
|
"learning_rate": 3.9987559170934e-05, |
|
"loss": 0.0573, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.33417524771711676, |
|
"grad_norm": 0.3297602359842017, |
|
"learning_rate": 3.9984165178376316e-05, |
|
"loss": 0.0491, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.33806100641150183, |
|
"grad_norm": 0.419433697037015, |
|
"learning_rate": 3.9980362499199915e-05, |
|
"loss": 0.0555, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.34194676510588695, |
|
"grad_norm": 0.5429807682582697, |
|
"learning_rate": 3.997615121117159e-05, |
|
"loss": 0.0501, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.345832523800272, |
|
"grad_norm": 0.5045801179188242, |
|
"learning_rate": 3.997153140041437e-05, |
|
"loss": 0.0527, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3497182824946571, |
|
"grad_norm": 0.4119373168535617, |
|
"learning_rate": 3.9966503161405786e-05, |
|
"loss": 0.0579, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.35360404118904215, |
|
"grad_norm": 0.3652010084324409, |
|
"learning_rate": 3.996106659697597e-05, |
|
"loss": 0.0394, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3574897998834272, |
|
"grad_norm": 0.49911111921282303, |
|
"learning_rate": 3.9955221818305504e-05, |
|
"loss": 0.0618, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.36137555857781234, |
|
"grad_norm": 0.4691524202220399, |
|
"learning_rate": 3.994896894492316e-05, |
|
"loss": 0.0449, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3652613172721974, |
|
"grad_norm": 0.4522219450999137, |
|
"learning_rate": 3.9942308104703464e-05, |
|
"loss": 0.0637, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.36914707596658247, |
|
"grad_norm": 0.3759169728636205, |
|
"learning_rate": 3.993523943386408e-05, |
|
"loss": 0.0556, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.37303283466096754, |
|
"grad_norm": 0.4043985895545187, |
|
"learning_rate": 3.9927763076963026e-05, |
|
"loss": 0.0497, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.37691859335535266, |
|
"grad_norm": 0.36167729094148904, |
|
"learning_rate": 3.99198791868957e-05, |
|
"loss": 0.0443, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3808043520497377, |
|
"grad_norm": 0.43188183331177943, |
|
"learning_rate": 3.991158792489178e-05, |
|
"loss": 0.0492, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3846901107441228, |
|
"grad_norm": 0.591934470494083, |
|
"learning_rate": 3.9902889460511895e-05, |
|
"loss": 0.0433, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.38857586943850786, |
|
"grad_norm": 0.38540473325489366, |
|
"learning_rate": 3.989378397164419e-05, |
|
"loss": 0.062, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3924616281328929, |
|
"grad_norm": 0.4686847716122605, |
|
"learning_rate": 3.988427164450067e-05, |
|
"loss": 0.053, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.39634738682727805, |
|
"grad_norm": 0.44830904799588184, |
|
"learning_rate": 3.98743526736134e-05, |
|
"loss": 0.0546, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4002331455216631, |
|
"grad_norm": 0.38427649669131925, |
|
"learning_rate": 3.986402726183051e-05, |
|
"loss": 0.044, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4041189042160482, |
|
"grad_norm": 0.3797043059039449, |
|
"learning_rate": 3.985329562031207e-05, |
|
"loss": 0.0507, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.40800466291043325, |
|
"grad_norm": 0.4645705602868689, |
|
"learning_rate": 3.9842157968525755e-05, |
|
"loss": 0.0488, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4118904216048183, |
|
"grad_norm": 0.45995234050567774, |
|
"learning_rate": 3.9830614534242365e-05, |
|
"loss": 0.0504, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.41577618029920344, |
|
"grad_norm": 0.6048279655888865, |
|
"learning_rate": 3.981866555353115e-05, |
|
"loss": 0.0554, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.4196619389935885, |
|
"grad_norm": 0.4778278100735776, |
|
"learning_rate": 3.9806311270755026e-05, |
|
"loss": 0.0504, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.42354769768797357, |
|
"grad_norm": 0.2819163749757861, |
|
"learning_rate": 3.9793551938565513e-05, |
|
"loss": 0.0473, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.42743345638235863, |
|
"grad_norm": 0.34078755188718823, |
|
"learning_rate": 3.978038781789764e-05, |
|
"loss": 0.0513, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.43131921507674376, |
|
"grad_norm": 0.42583054736380277, |
|
"learning_rate": 3.9766819177964535e-05, |
|
"loss": 0.0469, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.4352049737711288, |
|
"grad_norm": 0.5037049238340003, |
|
"learning_rate": 3.975284629625198e-05, |
|
"loss": 0.0552, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.4390907324655139, |
|
"grad_norm": 0.4127984535316269, |
|
"learning_rate": 3.973846945851271e-05, |
|
"loss": 0.0431, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.44297649115989896, |
|
"grad_norm": 0.4024463238112972, |
|
"learning_rate": 3.972368895876056e-05, |
|
"loss": 0.0442, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.446862249854284, |
|
"grad_norm": 0.34036792334896143, |
|
"learning_rate": 3.970850509926448e-05, |
|
"loss": 0.0429, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.45074800854866914, |
|
"grad_norm": 0.43486746219653427, |
|
"learning_rate": 3.969291819054232e-05, |
|
"loss": 0.0465, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.4546337672430542, |
|
"grad_norm": 0.3533700876931675, |
|
"learning_rate": 3.9676928551354524e-05, |
|
"loss": 0.0442, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.4585195259374393, |
|
"grad_norm": 0.4418640383769793, |
|
"learning_rate": 3.9660536508697545e-05, |
|
"loss": 0.0433, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.46240528463182434, |
|
"grad_norm": 0.45970453713501874, |
|
"learning_rate": 3.9643742397797236e-05, |
|
"loss": 0.0511, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.46629104332620946, |
|
"grad_norm": 0.35306399118899623, |
|
"learning_rate": 3.9626546562101936e-05, |
|
"loss": 0.0508, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.47017680202059453, |
|
"grad_norm": 0.3211095806787999, |
|
"learning_rate": 3.960894935327546e-05, |
|
"loss": 0.039, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4740625607149796, |
|
"grad_norm": 0.36627785482679437, |
|
"learning_rate": 3.9590951131189934e-05, |
|
"loss": 0.0558, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.47794831940936466, |
|
"grad_norm": 0.421462123124338, |
|
"learning_rate": 3.957255226391839e-05, |
|
"loss": 0.0497, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.48183407810374973, |
|
"grad_norm": 0.3353485956514776, |
|
"learning_rate": 3.955375312772729e-05, |
|
"loss": 0.0384, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.48571983679813485, |
|
"grad_norm": 11.666925837495032, |
|
"learning_rate": 3.9534554107068786e-05, |
|
"loss": 0.0472, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4896055954925199, |
|
"grad_norm": 0.4841474924556115, |
|
"learning_rate": 3.9514955594572874e-05, |
|
"loss": 0.0487, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.493491354186905, |
|
"grad_norm": 0.3248614577525704, |
|
"learning_rate": 3.9494957991039386e-05, |
|
"loss": 0.0548, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.49737711288129005, |
|
"grad_norm": 0.3970898472778102, |
|
"learning_rate": 3.947456170542976e-05, |
|
"loss": 0.0489, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5012628715756752, |
|
"grad_norm": 0.4161749757032391, |
|
"learning_rate": 3.945376715485868e-05, |
|
"loss": 0.0526, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5051486302700602, |
|
"grad_norm": 0.3452729447702609, |
|
"learning_rate": 3.9432574764585574e-05, |
|
"loss": 0.048, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5090343889644453, |
|
"grad_norm": 0.47170016327450176, |
|
"learning_rate": 3.9410984968005904e-05, |
|
"loss": 0.0405, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5129201476588304, |
|
"grad_norm": 0.42610644718786006, |
|
"learning_rate": 3.938899820664229e-05, |
|
"loss": 0.0458, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5168059063532154, |
|
"grad_norm": 0.3408319733233165, |
|
"learning_rate": 3.936661493013548e-05, |
|
"loss": 0.0391, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5206916650476006, |
|
"grad_norm": 0.30711332622300047, |
|
"learning_rate": 3.934383559623518e-05, |
|
"loss": 0.042, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5245774237419856, |
|
"grad_norm": 0.5571217272080251, |
|
"learning_rate": 3.932066067079066e-05, |
|
"loss": 0.0431, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5284631824363707, |
|
"grad_norm": 0.4027909880638002, |
|
"learning_rate": 3.929709062774127e-05, |
|
"loss": 0.0374, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5323489411307558, |
|
"grad_norm": 0.30456945378456746, |
|
"learning_rate": 3.9273125949106675e-05, |
|
"loss": 0.0424, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5362346998251408, |
|
"grad_norm": 0.3964517011394958, |
|
"learning_rate": 3.924876712497711e-05, |
|
"loss": 0.0467, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.540120458519526, |
|
"grad_norm": 0.3649307036795635, |
|
"learning_rate": 3.9224014653503226e-05, |
|
"loss": 0.0497, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.5440062172139111, |
|
"grad_norm": 0.3198268167473866, |
|
"learning_rate": 3.919886904088601e-05, |
|
"loss": 0.0456, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5478919759082961, |
|
"grad_norm": 0.3463770316931819, |
|
"learning_rate": 3.917333080136638e-05, |
|
"loss": 0.0368, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5517777346026812, |
|
"grad_norm": 0.35644328141911197, |
|
"learning_rate": 3.9147400457214674e-05, |
|
"loss": 0.0481, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5556634932970662, |
|
"grad_norm": 0.40914490698280265, |
|
"learning_rate": 3.9121078538719975e-05, |
|
"loss": 0.0474, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.5595492519914513, |
|
"grad_norm": 0.3427548933748361, |
|
"learning_rate": 3.9094365584179264e-05, |
|
"loss": 0.0385, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5634350106858365, |
|
"grad_norm": 0.4352394889252363, |
|
"learning_rate": 3.906726213988642e-05, |
|
"loss": 0.0383, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5673207693802215, |
|
"grad_norm": 0.29442224609914946, |
|
"learning_rate": 3.903976876012105e-05, |
|
"loss": 0.0499, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5712065280746066, |
|
"grad_norm": 0.2407859708001586, |
|
"learning_rate": 3.901188600713712e-05, |
|
"loss": 0.0459, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5750922867689916, |
|
"grad_norm": 0.3994426590406398, |
|
"learning_rate": 3.89836144511515e-05, |
|
"loss": 0.0399, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5789780454633767, |
|
"grad_norm": 0.3839601198021419, |
|
"learning_rate": 3.895495467033229e-05, |
|
"loss": 0.0455, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5828638041577618, |
|
"grad_norm": 0.5414728833934257, |
|
"learning_rate": 3.8925907250786966e-05, |
|
"loss": 0.0428, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5867495628521469, |
|
"grad_norm": 0.4855025115080349, |
|
"learning_rate": 3.8896472786550444e-05, |
|
"loss": 0.0415, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.590635321546532, |
|
"grad_norm": 0.3542282158759638, |
|
"learning_rate": 3.886665187957289e-05, |
|
"loss": 0.05, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.594521080240917, |
|
"grad_norm": 0.32139501424521916, |
|
"learning_rate": 3.883644513970744e-05, |
|
"loss": 0.0386, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5984068389353021, |
|
"grad_norm": 0.32361000000718626, |
|
"learning_rate": 3.8805853184697694e-05, |
|
"loss": 0.0407, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6022925976296872, |
|
"grad_norm": 0.2699532107849867, |
|
"learning_rate": 3.877487664016513e-05, |
|
"loss": 0.035, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6061783563240722, |
|
"grad_norm": 0.31749658242328194, |
|
"learning_rate": 3.8743516139596244e-05, |
|
"loss": 0.0449, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6100641150184574, |
|
"grad_norm": 0.38042009032251006, |
|
"learning_rate": 3.871177232432969e-05, |
|
"loss": 0.0433, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.6139498737128425, |
|
"grad_norm": 0.27994580128185775, |
|
"learning_rate": 3.867964584354305e-05, |
|
"loss": 0.0375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6178356324072275, |
|
"grad_norm": 0.44326692252464484, |
|
"learning_rate": 3.864713735423964e-05, |
|
"loss": 0.0527, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6217213911016126, |
|
"grad_norm": 0.338368734908726, |
|
"learning_rate": 3.861424752123506e-05, |
|
"loss": 0.0356, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6256071497959976, |
|
"grad_norm": 0.31262748879439145, |
|
"learning_rate": 3.858097701714358e-05, |
|
"loss": 0.0391, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.6294929084903828, |
|
"grad_norm": 0.37350424104203495, |
|
"learning_rate": 3.8547326522364386e-05, |
|
"loss": 0.0437, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.6333786671847679, |
|
"grad_norm": 0.3610320718135361, |
|
"learning_rate": 3.851329672506768e-05, |
|
"loss": 0.0412, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.6372644258791529, |
|
"grad_norm": 0.46752288451915147, |
|
"learning_rate": 3.847888832118059e-05, |
|
"loss": 0.0426, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.641150184573538, |
|
"grad_norm": 0.42134006799700907, |
|
"learning_rate": 3.844410201437296e-05, |
|
"loss": 0.0598, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.645035943267923, |
|
"grad_norm": 0.31290423259787065, |
|
"learning_rate": 3.840893851604294e-05, |
|
"loss": 0.0346, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.6489217019623081, |
|
"grad_norm": 0.40257644426810313, |
|
"learning_rate": 3.837339854530243e-05, |
|
"loss": 0.0452, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.6528074606566933, |
|
"grad_norm": 0.35775285851697575, |
|
"learning_rate": 3.833748282896241e-05, |
|
"loss": 0.0375, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6566932193510783, |
|
"grad_norm": 0.39620710106307544, |
|
"learning_rate": 3.8301192101518034e-05, |
|
"loss": 0.0389, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6605789780454634, |
|
"grad_norm": 0.35116612697915695, |
|
"learning_rate": 3.8264527105133655e-05, |
|
"loss": 0.0416, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6644647367398484, |
|
"grad_norm": 0.3593763085620626, |
|
"learning_rate": 3.822748858962759e-05, |
|
"loss": 0.039, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.6683504954342335, |
|
"grad_norm": 0.3636137986839074, |
|
"learning_rate": 3.8190077312456837e-05, |
|
"loss": 0.0437, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.6722362541286186, |
|
"grad_norm": 0.3070384884366638, |
|
"learning_rate": 3.815229403870156e-05, |
|
"loss": 0.0441, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6761220128230037, |
|
"grad_norm": 0.452855427559548, |
|
"learning_rate": 3.811413954104944e-05, |
|
"loss": 0.0476, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6800077715173888, |
|
"grad_norm": 0.3645618682663316, |
|
"learning_rate": 3.80756145997799e-05, |
|
"loss": 0.044, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6838935302117739, |
|
"grad_norm": 0.3236245721921845, |
|
"learning_rate": 3.8036720002748116e-05, |
|
"loss": 0.0392, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6877792889061589, |
|
"grad_norm": 0.3755492942938037, |
|
"learning_rate": 3.79974565453689e-05, |
|
"loss": 0.0391, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.691665047600544, |
|
"grad_norm": 0.32387171524447206, |
|
"learning_rate": 3.795782503060049e-05, |
|
"loss": 0.0387, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.695550806294929, |
|
"grad_norm": 0.34576851613064813, |
|
"learning_rate": 3.791782626892806e-05, |
|
"loss": 0.041, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6994365649893142, |
|
"grad_norm": 0.3841280667838816, |
|
"learning_rate": 3.7877461078347184e-05, |
|
"loss": 0.0421, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7033223236836993, |
|
"grad_norm": 0.3143570778817021, |
|
"learning_rate": 3.78367302843471e-05, |
|
"loss": 0.0471, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7072080823780843, |
|
"grad_norm": 0.27227430217810716, |
|
"learning_rate": 3.7795634719893824e-05, |
|
"loss": 0.0368, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7110938410724694, |
|
"grad_norm": 0.7857732487925049, |
|
"learning_rate": 3.7754175225413116e-05, |
|
"loss": 0.0459, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.7149795997668544, |
|
"grad_norm": 0.3672886777669717, |
|
"learning_rate": 3.771235264877331e-05, |
|
"loss": 0.0337, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.7188653584612396, |
|
"grad_norm": 0.35164352388978687, |
|
"learning_rate": 3.7670167845267934e-05, |
|
"loss": 0.0385, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.7227511171556247, |
|
"grad_norm": 0.40071179887944486, |
|
"learning_rate": 3.762762167759827e-05, |
|
"loss": 0.0467, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.7266368758500097, |
|
"grad_norm": 0.4969346423226924, |
|
"learning_rate": 3.758471501585567e-05, |
|
"loss": 0.0356, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.7305226345443948, |
|
"grad_norm": 0.3649736322224178, |
|
"learning_rate": 3.7541448737503785e-05, |
|
"loss": 0.0417, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.7344083932387798, |
|
"grad_norm": 0.3412635607398391, |
|
"learning_rate": 3.749782372736061e-05, |
|
"loss": 0.036, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.7382941519331649, |
|
"grad_norm": 0.2934505710318939, |
|
"learning_rate": 3.74538408775804e-05, |
|
"loss": 0.0442, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7421799106275501, |
|
"grad_norm": 0.1935092511725485, |
|
"learning_rate": 3.740950108763541e-05, |
|
"loss": 0.0429, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.7460656693219351, |
|
"grad_norm": 0.2578986935595479, |
|
"learning_rate": 3.73648052642975e-05, |
|
"loss": 0.0392, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.7499514280163202, |
|
"grad_norm": 0.28980154790022605, |
|
"learning_rate": 3.7319754321619625e-05, |
|
"loss": 0.0395, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.7538371867107053, |
|
"grad_norm": 0.2637087037771915, |
|
"learning_rate": 3.7274349180917094e-05, |
|
"loss": 0.0415, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.7577229454050903, |
|
"grad_norm": 0.2854361833691479, |
|
"learning_rate": 3.722859077074875e-05, |
|
"loss": 0.0425, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.7616087040994755, |
|
"grad_norm": 0.2742967763695032, |
|
"learning_rate": 3.718248002689799e-05, |
|
"loss": 0.0352, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.7654944627938605, |
|
"grad_norm": 0.3604576092657196, |
|
"learning_rate": 3.7136017892353626e-05, |
|
"loss": 0.0413, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.7693802214882456, |
|
"grad_norm": 0.2855589921061211, |
|
"learning_rate": 3.7089205317290564e-05, |
|
"loss": 0.0458, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.7732659801826307, |
|
"grad_norm": 0.3073335348834244, |
|
"learning_rate": 3.7042043259050444e-05, |
|
"loss": 0.0333, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.7771517388770157, |
|
"grad_norm": 0.28916930774412825, |
|
"learning_rate": 3.699453268212199e-05, |
|
"loss": 0.0424, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7810374975714008, |
|
"grad_norm": 0.3314165818828528, |
|
"learning_rate": 3.694667455812131e-05, |
|
"loss": 0.0395, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.7849232562657859, |
|
"grad_norm": 0.39228463874894487, |
|
"learning_rate": 3.6898469865772055e-05, |
|
"loss": 0.0377, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.788809014960171, |
|
"grad_norm": 0.19523419184608223, |
|
"learning_rate": 3.684991959088537e-05, |
|
"loss": 0.0362, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7926947736545561, |
|
"grad_norm": 0.29565378498547584, |
|
"learning_rate": 3.680102472633974e-05, |
|
"loss": 0.0354, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7965805323489411, |
|
"grad_norm": 0.3948911652054535, |
|
"learning_rate": 3.675178627206068e-05, |
|
"loss": 0.0411, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8004662910433262, |
|
"grad_norm": 0.3555760694304167, |
|
"learning_rate": 3.6702205235000315e-05, |
|
"loss": 0.0409, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.8043520497377112, |
|
"grad_norm": 0.33270407018218245, |
|
"learning_rate": 3.665228262911676e-05, |
|
"loss": 0.0306, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.8082378084320964, |
|
"grad_norm": 0.5008566240131939, |
|
"learning_rate": 3.660201947535338e-05, |
|
"loss": 0.0415, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.8121235671264815, |
|
"grad_norm": 0.3354219331000289, |
|
"learning_rate": 3.655141680161793e-05, |
|
"loss": 0.0362, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.8160093258208665, |
|
"grad_norm": 0.39746802052357905, |
|
"learning_rate": 3.650047564276152e-05, |
|
"loss": 0.0418, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8198950845152516, |
|
"grad_norm": 0.2251580580435554, |
|
"learning_rate": 3.644919704055748e-05, |
|
"loss": 0.0394, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.8237808432096366, |
|
"grad_norm": 0.2699540407370164, |
|
"learning_rate": 3.639758204368001e-05, |
|
"loss": 0.0384, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.8276666019040217, |
|
"grad_norm": 0.2866910109291188, |
|
"learning_rate": 3.6345631707682744e-05, |
|
"loss": 0.0357, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.8315523605984069, |
|
"grad_norm": 0.2713788435071836, |
|
"learning_rate": 3.6293347094977224e-05, |
|
"loss": 0.0409, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.8354381192927919, |
|
"grad_norm": 0.3195763251154283, |
|
"learning_rate": 3.624072927481107e-05, |
|
"loss": 0.0403, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.839323877987177, |
|
"grad_norm": 0.2983473462358474, |
|
"learning_rate": 3.618777932324621e-05, |
|
"loss": 0.0341, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.8432096366815621, |
|
"grad_norm": 0.29251828904033916, |
|
"learning_rate": 3.613449832313683e-05, |
|
"loss": 0.036, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.8470953953759471, |
|
"grad_norm": 0.23448459997179785, |
|
"learning_rate": 3.608088736410718e-05, |
|
"loss": 0.0338, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.8509811540703323, |
|
"grad_norm": 0.2757212169813267, |
|
"learning_rate": 3.6026947542529415e-05, |
|
"loss": 0.0409, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.8548669127647173, |
|
"grad_norm": 0.33239782308647875, |
|
"learning_rate": 3.597267996150106e-05, |
|
"loss": 0.0374, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8587526714591024, |
|
"grad_norm": 0.33403655068983207, |
|
"learning_rate": 3.591808573082249e-05, |
|
"loss": 0.0357, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.8626384301534875, |
|
"grad_norm": 0.2378398763830906, |
|
"learning_rate": 3.586316596697426e-05, |
|
"loss": 0.0344, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.8665241888478725, |
|
"grad_norm": 0.3216096369297081, |
|
"learning_rate": 3.580792179309422e-05, |
|
"loss": 0.0382, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.8704099475422576, |
|
"grad_norm": 0.37090625174379876, |
|
"learning_rate": 3.5752354338954594e-05, |
|
"loss": 0.0383, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.8742957062366427, |
|
"grad_norm": 0.26521085264464783, |
|
"learning_rate": 3.569646474093885e-05, |
|
"loss": 0.0346, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8781814649310278, |
|
"grad_norm": 0.30913299216593815, |
|
"learning_rate": 3.564025414201846e-05, |
|
"loss": 0.0373, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.8820672236254129, |
|
"grad_norm": 0.3053308654147181, |
|
"learning_rate": 3.558372369172956e-05, |
|
"loss": 0.0339, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.8859529823197979, |
|
"grad_norm": 0.2351240040993444, |
|
"learning_rate": 3.552687454614938e-05, |
|
"loss": 0.0331, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.889838741014183, |
|
"grad_norm": 0.37288388901346237, |
|
"learning_rate": 3.546970786787264e-05, |
|
"loss": 0.0361, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.893724499708568, |
|
"grad_norm": 0.44083971894002005, |
|
"learning_rate": 3.541222482598779e-05, |
|
"loss": 0.0418, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8976102584029532, |
|
"grad_norm": 0.321928529274947, |
|
"learning_rate": 3.5354426596053066e-05, |
|
"loss": 0.0296, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.9014960170973383, |
|
"grad_norm": 0.28686426327838627, |
|
"learning_rate": 3.529631436007246e-05, |
|
"loss": 0.0324, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.9053817757917233, |
|
"grad_norm": 0.22912536137270445, |
|
"learning_rate": 3.523788930647157e-05, |
|
"loss": 0.0429, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.9092675344861084, |
|
"grad_norm": 0.3085807580953211, |
|
"learning_rate": 3.5179152630073256e-05, |
|
"loss": 0.0313, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.9131532931804935, |
|
"grad_norm": 0.34303257055116365, |
|
"learning_rate": 3.512010553207325e-05, |
|
"loss": 0.0371, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9170390518748786, |
|
"grad_norm": 0.2772929349898867, |
|
"learning_rate": 3.506074922001554e-05, |
|
"loss": 0.0328, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.9209248105692637, |
|
"grad_norm": 0.30788741712363193, |
|
"learning_rate": 3.500108490776774e-05, |
|
"loss": 0.0402, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.9248105692636487, |
|
"grad_norm": 0.3455599730329004, |
|
"learning_rate": 3.494111381549618e-05, |
|
"loss": 0.0321, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.9286963279580338, |
|
"grad_norm": 0.4074372387990901, |
|
"learning_rate": 3.4880837169641056e-05, |
|
"loss": 0.0384, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.9325820866524189, |
|
"grad_norm": 0.2793409167713601, |
|
"learning_rate": 3.482025620289125e-05, |
|
"loss": 0.0317, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9364678453468039, |
|
"grad_norm": 0.3747211944134066, |
|
"learning_rate": 3.4759372154159185e-05, |
|
"loss": 0.0339, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.9403536040411891, |
|
"grad_norm": 0.2805669269123183, |
|
"learning_rate": 3.469818626855546e-05, |
|
"loss": 0.0339, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.9442393627355741, |
|
"grad_norm": 0.40602944782370093, |
|
"learning_rate": 3.463669979736343e-05, |
|
"loss": 0.042, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.9481251214299592, |
|
"grad_norm": 0.225237503944127, |
|
"learning_rate": 3.457491399801353e-05, |
|
"loss": 0.034, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.9520108801243443, |
|
"grad_norm": 0.37947299716439314, |
|
"learning_rate": 3.451283013405764e-05, |
|
"loss": 0.0342, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.9558966388187293, |
|
"grad_norm": 0.4204039088153023, |
|
"learning_rate": 3.445044947514322e-05, |
|
"loss": 0.0396, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.9597823975131144, |
|
"grad_norm": 0.2632256881229335, |
|
"learning_rate": 3.438777329698733e-05, |
|
"loss": 0.0391, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.9636681562074995, |
|
"grad_norm": 0.31582365898299897, |
|
"learning_rate": 3.432480288135057e-05, |
|
"loss": 0.0425, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.9675539149018846, |
|
"grad_norm": 0.28143075132786943, |
|
"learning_rate": 3.426153951601082e-05, |
|
"loss": 0.0343, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.9714396735962697, |
|
"grad_norm": 0.24769109092709082, |
|
"learning_rate": 3.419798449473698e-05, |
|
"loss": 0.034, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9753254322906547, |
|
"grad_norm": 0.33732830476966047, |
|
"learning_rate": 3.413413911726241e-05, |
|
"loss": 0.0405, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.9792111909850398, |
|
"grad_norm": 0.32757343882785267, |
|
"learning_rate": 3.407000468925845e-05, |
|
"loss": 0.0339, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.983096949679425, |
|
"grad_norm": 0.30532242889882966, |
|
"learning_rate": 3.4005582522307664e-05, |
|
"loss": 0.0342, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.98698270837381, |
|
"grad_norm": 0.25099007166252546, |
|
"learning_rate": 3.394087393387702e-05, |
|
"loss": 0.0336, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.9908684670681951, |
|
"grad_norm": 0.32436067338283625, |
|
"learning_rate": 3.387588024729096e-05, |
|
"loss": 0.0399, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9947542257625801, |
|
"grad_norm": 0.4181129951433982, |
|
"learning_rate": 3.3810602791704325e-05, |
|
"loss": 0.0361, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.9986399844569652, |
|
"grad_norm": 0.3470403621225102, |
|
"learning_rate": 3.374504290207519e-05, |
|
"loss": 0.0343, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.0023314552166311, |
|
"grad_norm": 0.26511898565361997, |
|
"learning_rate": 3.367920191913755e-05, |
|
"loss": 0.0321, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.0062172139110162, |
|
"grad_norm": 0.2702160755775835, |
|
"learning_rate": 3.3613081189373914e-05, |
|
"loss": 0.0314, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.0101029726054012, |
|
"grad_norm": 0.3118992978670784, |
|
"learning_rate": 3.3546682064987735e-05, |
|
"loss": 0.033, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0139887312997864, |
|
"grad_norm": 0.23058487649261575, |
|
"learning_rate": 3.34800059038758e-05, |
|
"loss": 0.0239, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.0178744899941714, |
|
"grad_norm": 0.36097802187023126, |
|
"learning_rate": 3.341305406960045e-05, |
|
"loss": 0.033, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.0217602486885564, |
|
"grad_norm": 0.28013397849201116, |
|
"learning_rate": 3.3345827931361666e-05, |
|
"loss": 0.0255, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.0256460073829414, |
|
"grad_norm": 0.2029992608524442, |
|
"learning_rate": 3.32783288639691e-05, |
|
"loss": 0.0244, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.0295317660773267, |
|
"grad_norm": 0.3553166383478555, |
|
"learning_rate": 3.321055824781394e-05, |
|
"loss": 0.0273, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.0334175247717117, |
|
"grad_norm": 0.24984422004673013, |
|
"learning_rate": 3.31425174688407e-05, |
|
"loss": 0.0296, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.0373032834660967, |
|
"grad_norm": 0.18909677592052765, |
|
"learning_rate": 3.307420791851887e-05, |
|
"loss": 0.0273, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.041189042160482, |
|
"grad_norm": 0.436082933063616, |
|
"learning_rate": 3.3005630993814416e-05, |
|
"loss": 0.041, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.045074800854867, |
|
"grad_norm": 0.23112116433945518, |
|
"learning_rate": 3.293678809716129e-05, |
|
"loss": 0.0304, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.048960559549252, |
|
"grad_norm": 0.2941866407157775, |
|
"learning_rate": 3.28676806364327e-05, |
|
"loss": 0.0296, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.0528463182436372, |
|
"grad_norm": 0.2235983248751329, |
|
"learning_rate": 3.279831002491232e-05, |
|
"loss": 0.0276, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.0567320769380222, |
|
"grad_norm": 0.3904007075572595, |
|
"learning_rate": 3.27286776812654e-05, |
|
"loss": 0.0347, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.0606178356324072, |
|
"grad_norm": 0.2861997613717581, |
|
"learning_rate": 3.2658785029509746e-05, |
|
"loss": 0.0264, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.0645035943267924, |
|
"grad_norm": 0.3566819164029042, |
|
"learning_rate": 3.258863349898659e-05, |
|
"loss": 0.0277, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.0683893530211774, |
|
"grad_norm": 0.18390854912707696, |
|
"learning_rate": 3.251822452433141e-05, |
|
"loss": 0.0307, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0722751117155624, |
|
"grad_norm": 0.3062331302207858, |
|
"learning_rate": 3.244755954544449e-05, |
|
"loss": 0.0312, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.0761608704099475, |
|
"grad_norm": 0.3047851916691974, |
|
"learning_rate": 3.2376640007461595e-05, |
|
"loss": 0.0339, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.0800466291043327, |
|
"grad_norm": 0.2300653857497523, |
|
"learning_rate": 3.230546736072432e-05, |
|
"loss": 0.0297, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.0839323877987177, |
|
"grad_norm": 0.20305416815265903, |
|
"learning_rate": 3.2234043060750464e-05, |
|
"loss": 0.0324, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.0878181464931027, |
|
"grad_norm": 0.24117134662351375, |
|
"learning_rate": 3.216236856820429e-05, |
|
"loss": 0.0225, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.091703905187488, |
|
"grad_norm": 0.23857567814841038, |
|
"learning_rate": 3.2090445348866616e-05, |
|
"loss": 0.037, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.095589663881873, |
|
"grad_norm": 0.20744357816037623, |
|
"learning_rate": 3.201827487360485e-05, |
|
"loss": 0.0343, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.099475422576258, |
|
"grad_norm": 0.3148313503304088, |
|
"learning_rate": 3.194585861834292e-05, |
|
"loss": 0.0329, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.103361181270643, |
|
"grad_norm": 0.2597458756041528, |
|
"learning_rate": 3.187319806403108e-05, |
|
"loss": 0.0324, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.1072469399650282, |
|
"grad_norm": 0.37462454725250943, |
|
"learning_rate": 3.180029469661563e-05, |
|
"loss": 0.0298, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.1111326986594132, |
|
"grad_norm": 0.26990355348646067, |
|
"learning_rate": 3.172715000700851e-05, |
|
"loss": 0.0256, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.1150184573537982, |
|
"grad_norm": 0.2193580972033405, |
|
"learning_rate": 3.165376549105686e-05, |
|
"loss": 0.035, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.1189042160481835, |
|
"grad_norm": 0.2966997098167283, |
|
"learning_rate": 3.158014264951234e-05, |
|
"loss": 0.0341, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.1227899747425685, |
|
"grad_norm": 0.3014871881210635, |
|
"learning_rate": 3.150628298800055e-05, |
|
"loss": 0.0328, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.1266757334369535, |
|
"grad_norm": 0.33425883934391964, |
|
"learning_rate": 3.1432188016990154e-05, |
|
"loss": 0.0262, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.1305614921313387, |
|
"grad_norm": 0.23135238874039699, |
|
"learning_rate": 3.1357859251762005e-05, |
|
"loss": 0.0349, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.1344472508257237, |
|
"grad_norm": 0.2957844330256544, |
|
"learning_rate": 3.1283298212378204e-05, |
|
"loss": 0.0308, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.1383330095201087, |
|
"grad_norm": 0.309553215175613, |
|
"learning_rate": 3.120850642365094e-05, |
|
"loss": 0.0378, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.142218768214494, |
|
"grad_norm": 0.27357475926464664, |
|
"learning_rate": 3.113348541511139e-05, |
|
"loss": 0.0315, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.146104526908879, |
|
"grad_norm": 0.2922282115073422, |
|
"learning_rate": 3.1058236720978357e-05, |
|
"loss": 0.0207, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.149990285603264, |
|
"grad_norm": 0.30551015741363674, |
|
"learning_rate": 3.0982761880126956e-05, |
|
"loss": 0.0309, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.153876044297649, |
|
"grad_norm": 0.19603484059399878, |
|
"learning_rate": 3.090706243605712e-05, |
|
"loss": 0.0275, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.1577618029920342, |
|
"grad_norm": 0.12126922576885169, |
|
"learning_rate": 3.083113993686202e-05, |
|
"loss": 0.0214, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.1616475616864192, |
|
"grad_norm": 0.24570529004069103, |
|
"learning_rate": 3.075499593519643e-05, |
|
"loss": 0.0313, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.1655333203808043, |
|
"grad_norm": 0.21996601041925384, |
|
"learning_rate": 3.067863198824499e-05, |
|
"loss": 0.0322, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1694190790751895, |
|
"grad_norm": 0.26523791368864613, |
|
"learning_rate": 3.0602049657690275e-05, |
|
"loss": 0.033, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.1733048377695745, |
|
"grad_norm": 0.3743941590932805, |
|
"learning_rate": 3.0525250509680975e-05, |
|
"loss": 0.0315, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.1771905964639595, |
|
"grad_norm": 0.29800477638021083, |
|
"learning_rate": 3.0448236114799798e-05, |
|
"loss": 0.0314, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.1810763551583447, |
|
"grad_norm": 0.41913805025695794, |
|
"learning_rate": 3.0371008048031335e-05, |
|
"loss": 0.0252, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.1849621138527298, |
|
"grad_norm": 0.2972323169585253, |
|
"learning_rate": 3.029356788872992e-05, |
|
"loss": 0.0284, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.1888478725471148, |
|
"grad_norm": 0.2548640539234551, |
|
"learning_rate": 3.0215917220587264e-05, |
|
"loss": 0.0259, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.1927336312415, |
|
"grad_norm": 0.18370703051409343, |
|
"learning_rate": 3.013805763160009e-05, |
|
"loss": 0.023, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.196619389935885, |
|
"grad_norm": 0.17576524808213698, |
|
"learning_rate": 3.0059990714037678e-05, |
|
"loss": 0.0268, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.20050514863027, |
|
"grad_norm": 0.25633777261633506, |
|
"learning_rate": 2.9981718064409284e-05, |
|
"loss": 0.0307, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.204390907324655, |
|
"grad_norm": 0.27763678833721916, |
|
"learning_rate": 2.9903241283431472e-05, |
|
"loss": 0.0279, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.2082766660190403, |
|
"grad_norm": 0.3087663315027572, |
|
"learning_rate": 2.9824561975995427e-05, |
|
"loss": 0.0276, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.2121624247134253, |
|
"grad_norm": 0.28744858586674227, |
|
"learning_rate": 2.974568175113409e-05, |
|
"loss": 0.024, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.2160481834078103, |
|
"grad_norm": 0.21935445525902675, |
|
"learning_rate": 2.9666602221989267e-05, |
|
"loss": 0.0286, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.2199339421021955, |
|
"grad_norm": 0.23246090578146508, |
|
"learning_rate": 2.958732500577864e-05, |
|
"loss": 0.0212, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.2238197007965805, |
|
"grad_norm": 0.2698352542678301, |
|
"learning_rate": 2.9507851723762716e-05, |
|
"loss": 0.0308, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.2277054594909655, |
|
"grad_norm": 0.4180270105674614, |
|
"learning_rate": 2.9428184001211616e-05, |
|
"loss": 0.0212, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.2315912181853508, |
|
"grad_norm": 0.2841163727116711, |
|
"learning_rate": 2.9348323467371897e-05, |
|
"loss": 0.0237, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.2354769768797358, |
|
"grad_norm": 0.3283735446941676, |
|
"learning_rate": 2.9268271755433198e-05, |
|
"loss": 0.0268, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.2393627355741208, |
|
"grad_norm": 0.17964907995508142, |
|
"learning_rate": 2.9188030502494853e-05, |
|
"loss": 0.026, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.243248494268506, |
|
"grad_norm": 0.3739453975454048, |
|
"learning_rate": 2.9107601349532406e-05, |
|
"loss": 0.0351, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.247134252962891, |
|
"grad_norm": 0.22041723340395544, |
|
"learning_rate": 2.9026985941364053e-05, |
|
"loss": 0.0289, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.251020011657276, |
|
"grad_norm": 0.16977718972527425, |
|
"learning_rate": 2.8946185926617012e-05, |
|
"loss": 0.0227, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.254905770351661, |
|
"grad_norm": 0.3419426868688641, |
|
"learning_rate": 2.88652029576938e-05, |
|
"loss": 0.0265, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.2587915290460463, |
|
"grad_norm": 0.22438684261772954, |
|
"learning_rate": 2.878403869073843e-05, |
|
"loss": 0.0262, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.2626772877404313, |
|
"grad_norm": 0.30489902115039463, |
|
"learning_rate": 2.8702694785602587e-05, |
|
"loss": 0.0314, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.2665630464348163, |
|
"grad_norm": 0.2434714851331239, |
|
"learning_rate": 2.8621172905811613e-05, |
|
"loss": 0.0275, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.2704488051292016, |
|
"grad_norm": 0.27588341692139456, |
|
"learning_rate": 2.8539474718530543e-05, |
|
"loss": 0.0264, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.2743345638235866, |
|
"grad_norm": 0.29322871157127156, |
|
"learning_rate": 2.8457601894529997e-05, |
|
"loss": 0.0375, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.2782203225179716, |
|
"grad_norm": 0.19868867147652092, |
|
"learning_rate": 2.8375556108151995e-05, |
|
"loss": 0.029, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.2821060812123566, |
|
"grad_norm": 0.3711457646768087, |
|
"learning_rate": 2.829333903727574e-05, |
|
"loss": 0.0327, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.2859918399067418, |
|
"grad_norm": 0.28271933767183866, |
|
"learning_rate": 2.821095236328328e-05, |
|
"loss": 0.0261, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.2898775986011268, |
|
"grad_norm": 0.23298730366288653, |
|
"learning_rate": 2.812839777102514e-05, |
|
"loss": 0.029, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.293763357295512, |
|
"grad_norm": 0.23059435959988767, |
|
"learning_rate": 2.8045676948785873e-05, |
|
"loss": 0.0321, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.297649115989897, |
|
"grad_norm": 0.19452335917095895, |
|
"learning_rate": 2.7962791588249492e-05, |
|
"loss": 0.029, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.301534874684282, |
|
"grad_norm": 0.30937914926531507, |
|
"learning_rate": 2.7879743384464942e-05, |
|
"loss": 0.027, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.305420633378667, |
|
"grad_norm": 0.30675762635230686, |
|
"learning_rate": 2.7796534035811378e-05, |
|
"loss": 0.0248, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.3093063920730523, |
|
"grad_norm": 0.14564788288302782, |
|
"learning_rate": 2.7713165243963444e-05, |
|
"loss": 0.0242, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.3131921507674373, |
|
"grad_norm": 0.3611570999157121, |
|
"learning_rate": 2.7629638713856503e-05, |
|
"loss": 0.0313, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.3170779094618223, |
|
"grad_norm": 0.15900271035042685, |
|
"learning_rate": 2.7545956153651712e-05, |
|
"loss": 0.0246, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.3209636681562076, |
|
"grad_norm": 0.3433667317929698, |
|
"learning_rate": 2.746211927470117e-05, |
|
"loss": 0.0269, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.3248494268505926, |
|
"grad_norm": 0.28124492243082266, |
|
"learning_rate": 2.737812979151284e-05, |
|
"loss": 0.0245, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.3287351855449776, |
|
"grad_norm": 0.187688472206659, |
|
"learning_rate": 2.7293989421715542e-05, |
|
"loss": 0.0253, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.3326209442393626, |
|
"grad_norm": 0.2829084594578858, |
|
"learning_rate": 2.720969988602379e-05, |
|
"loss": 0.0207, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.3365067029337478, |
|
"grad_norm": 0.3016772133080438, |
|
"learning_rate": 2.7125262908202633e-05, |
|
"loss": 0.0281, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.3403924616281329, |
|
"grad_norm": 0.2338038212652911, |
|
"learning_rate": 2.7040680215032377e-05, |
|
"loss": 0.0322, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.344278220322518, |
|
"grad_norm": 0.23012741652116103, |
|
"learning_rate": 2.6955953536273285e-05, |
|
"loss": 0.0342, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.348163979016903, |
|
"grad_norm": 0.1789676480348028, |
|
"learning_rate": 2.6871084604630214e-05, |
|
"loss": 0.0302, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.3520497377112881, |
|
"grad_norm": 0.2847505802479265, |
|
"learning_rate": 2.6786075155717147e-05, |
|
"loss": 0.0341, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.3559354964056731, |
|
"grad_norm": 0.21765574449058714, |
|
"learning_rate": 2.6700926928021736e-05, |
|
"loss": 0.0287, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.3598212551000584, |
|
"grad_norm": 0.28617645696636324, |
|
"learning_rate": 2.6615641662869714e-05, |
|
"loss": 0.0307, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3637070137944434, |
|
"grad_norm": 0.34355897351030124, |
|
"learning_rate": 2.6530221104389316e-05, |
|
"loss": 0.0232, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.3675927724888284, |
|
"grad_norm": 0.19082168638857241, |
|
"learning_rate": 2.6444666999475593e-05, |
|
"loss": 0.0301, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.3714785311832136, |
|
"grad_norm": 0.27997736990150557, |
|
"learning_rate": 2.635898109775468e-05, |
|
"loss": 0.0237, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.3753642898775986, |
|
"grad_norm": 0.2234204944915347, |
|
"learning_rate": 2.6273165151548047e-05, |
|
"loss": 0.0271, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.3792500485719836, |
|
"grad_norm": 0.2201998240178073, |
|
"learning_rate": 2.6187220915836627e-05, |
|
"loss": 0.0292, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.3831358072663686, |
|
"grad_norm": 0.14320713366865984, |
|
"learning_rate": 2.6101150148224928e-05, |
|
"loss": 0.0288, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.3870215659607539, |
|
"grad_norm": 0.2116339664682065, |
|
"learning_rate": 2.601495460890513e-05, |
|
"loss": 0.0269, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.3909073246551389, |
|
"grad_norm": 0.24959055444114717, |
|
"learning_rate": 2.5928636060621036e-05, |
|
"loss": 0.0337, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.3947930833495241, |
|
"grad_norm": 0.2954702102324182, |
|
"learning_rate": 2.5842196268632068e-05, |
|
"loss": 0.0228, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.3986788420439091, |
|
"grad_norm": 0.38085393262983136, |
|
"learning_rate": 2.5755637000677124e-05, |
|
"loss": 0.0247, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.4025646007382941, |
|
"grad_norm": 0.21224922937032303, |
|
"learning_rate": 2.566896002693845e-05, |
|
"loss": 0.0252, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.4064503594326792, |
|
"grad_norm": 0.3607238719844902, |
|
"learning_rate": 2.5582167120005467e-05, |
|
"loss": 0.0258, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.4103361181270644, |
|
"grad_norm": 0.19260362661029898, |
|
"learning_rate": 2.549526005483844e-05, |
|
"loss": 0.0235, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.4142218768214494, |
|
"grad_norm": 0.26994983197422656, |
|
"learning_rate": 2.5408240608732277e-05, |
|
"loss": 0.0253, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.4181076355158344, |
|
"grad_norm": 0.29929031715132187, |
|
"learning_rate": 2.5321110561280106e-05, |
|
"loss": 0.0275, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.4219933942102196, |
|
"grad_norm": 0.3196437552669507, |
|
"learning_rate": 2.523387169433692e-05, |
|
"loss": 0.0311, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.4258791529046047, |
|
"grad_norm": 0.2296744794340079, |
|
"learning_rate": 2.514652579198312e-05, |
|
"loss": 0.0278, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.4297649115989897, |
|
"grad_norm": 0.16892616731103247, |
|
"learning_rate": 2.5059074640488047e-05, |
|
"loss": 0.0263, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.4336506702933747, |
|
"grad_norm": 0.2394458582673369, |
|
"learning_rate": 2.497152002827345e-05, |
|
"loss": 0.0277, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.43753642898776, |
|
"grad_norm": 0.1974720107762794, |
|
"learning_rate": 2.488386374587688e-05, |
|
"loss": 0.0288, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.441422187682145, |
|
"grad_norm": 0.21920589738520604, |
|
"learning_rate": 2.479610758591511e-05, |
|
"loss": 0.0216, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.4453079463765302, |
|
"grad_norm": 0.21707363978296285, |
|
"learning_rate": 2.4708253343047456e-05, |
|
"loss": 0.0292, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.4491937050709152, |
|
"grad_norm": 0.18601726681673092, |
|
"learning_rate": 2.4620302813939093e-05, |
|
"loss": 0.0245, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.4530794637653002, |
|
"grad_norm": 0.2635531513187159, |
|
"learning_rate": 2.4532257797224287e-05, |
|
"loss": 0.0295, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.4569652224596852, |
|
"grad_norm": 0.3251174321742817, |
|
"learning_rate": 2.4444120093469632e-05, |
|
"loss": 0.0298, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.4608509811540702, |
|
"grad_norm": 0.4977849600583359, |
|
"learning_rate": 2.4355891505137216e-05, |
|
"loss": 0.0325, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.4647367398484554, |
|
"grad_norm": 0.1750593962332665, |
|
"learning_rate": 2.4267573836547768e-05, |
|
"loss": 0.022, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.4686224985428404, |
|
"grad_norm": 0.271292018124954, |
|
"learning_rate": 2.417916889384374e-05, |
|
"loss": 0.0281, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.4725082572372257, |
|
"grad_norm": 0.2221308512528936, |
|
"learning_rate": 2.4090678484952416e-05, |
|
"loss": 0.0209, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.4763940159316107, |
|
"grad_norm": 0.3356499834645714, |
|
"learning_rate": 2.400210441954888e-05, |
|
"loss": 0.0286, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.4802797746259957, |
|
"grad_norm": 0.19837629045900065, |
|
"learning_rate": 2.3913448509019047e-05, |
|
"loss": 0.0268, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.4841655333203807, |
|
"grad_norm": 0.22984338791117062, |
|
"learning_rate": 2.3824712566422613e-05, |
|
"loss": 0.0324, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.488051292014766, |
|
"grad_norm": 0.149938685223149, |
|
"learning_rate": 2.3735898406455945e-05, |
|
"loss": 0.0204, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.491937050709151, |
|
"grad_norm": 0.2297420680782252, |
|
"learning_rate": 2.364700784541504e-05, |
|
"loss": 0.0281, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.495822809403536, |
|
"grad_norm": 0.21161937179194085, |
|
"learning_rate": 2.3558042701158294e-05, |
|
"loss": 0.0296, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4997085680979212, |
|
"grad_norm": 0.2681086390248988, |
|
"learning_rate": 2.34690047930694e-05, |
|
"loss": 0.024, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.5035943267923062, |
|
"grad_norm": 0.26044197357045645, |
|
"learning_rate": 2.337989594202009e-05, |
|
"loss": 0.0242, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.5074800854866912, |
|
"grad_norm": 0.2581684322985289, |
|
"learning_rate": 2.3290717970332918e-05, |
|
"loss": 0.0236, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.5113658441810762, |
|
"grad_norm": 0.23713590285989272, |
|
"learning_rate": 2.3201472701744013e-05, |
|
"loss": 0.0251, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.5152516028754615, |
|
"grad_norm": 0.2501508071933915, |
|
"learning_rate": 2.3112161961365724e-05, |
|
"loss": 0.0301, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.5191373615698465, |
|
"grad_norm": 0.24802013587982075, |
|
"learning_rate": 2.302278757564937e-05, |
|
"loss": 0.0261, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.5230231202642317, |
|
"grad_norm": 0.3445587520016882, |
|
"learning_rate": 2.2933351372347822e-05, |
|
"loss": 0.0279, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.5269088789586167, |
|
"grad_norm": 0.22828900210653533, |
|
"learning_rate": 2.2843855180478167e-05, |
|
"loss": 0.0224, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.5307946376530017, |
|
"grad_norm": 0.14993393496314023, |
|
"learning_rate": 2.2754300830284287e-05, |
|
"loss": 0.0228, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.5346803963473867, |
|
"grad_norm": 0.22112935196732578, |
|
"learning_rate": 2.266469015319943e-05, |
|
"loss": 0.0298, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.5385661550417717, |
|
"grad_norm": 0.19193655052829417, |
|
"learning_rate": 2.2575024981808763e-05, |
|
"loss": 0.0253, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.542451913736157, |
|
"grad_norm": 0.2670658908912582, |
|
"learning_rate": 2.2485307149811894e-05, |
|
"loss": 0.029, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.5463376724305422, |
|
"grad_norm": 0.2243861088423283, |
|
"learning_rate": 2.2395538491985338e-05, |
|
"loss": 0.0246, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.5502234311249272, |
|
"grad_norm": 0.22493381727938802, |
|
"learning_rate": 2.230572084414507e-05, |
|
"loss": 0.0253, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.5541091898193122, |
|
"grad_norm": 0.2866864450702556, |
|
"learning_rate": 2.2215856043108896e-05, |
|
"loss": 0.0291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5579949485136972, |
|
"grad_norm": 0.16815062034208167, |
|
"learning_rate": 2.212594592665896e-05, |
|
"loss": 0.025, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.5618807072080823, |
|
"grad_norm": 0.2612232980649845, |
|
"learning_rate": 2.2035992333504127e-05, |
|
"loss": 0.0326, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.5657664659024675, |
|
"grad_norm": 0.33854813622270197, |
|
"learning_rate": 2.1945997103242344e-05, |
|
"loss": 0.0292, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.5696522245968525, |
|
"grad_norm": 0.2617347504462213, |
|
"learning_rate": 2.1855962076323115e-05, |
|
"loss": 0.021, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.5735379832912377, |
|
"grad_norm": 0.1548116373638503, |
|
"learning_rate": 2.1765889094009762e-05, |
|
"loss": 0.0269, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.5774237419856227, |
|
"grad_norm": 0.19918562909410628, |
|
"learning_rate": 2.167577999834185e-05, |
|
"loss": 0.0237, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.5813095006800078, |
|
"grad_norm": 0.11447983969077741, |
|
"learning_rate": 2.1585636632097446e-05, |
|
"loss": 0.0235, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.5851952593743928, |
|
"grad_norm": 0.1598181388976202, |
|
"learning_rate": 2.1495460838755492e-05, |
|
"loss": 0.0249, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.5890810180687778, |
|
"grad_norm": 0.1998709332001755, |
|
"learning_rate": 2.140525446245808e-05, |
|
"loss": 0.0223, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.592966776763163, |
|
"grad_norm": 0.23042732127911256, |
|
"learning_rate": 2.1315019347972723e-05, |
|
"loss": 0.025, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.5968525354575482, |
|
"grad_norm": 0.28671695920142976, |
|
"learning_rate": 2.1224757340654672e-05, |
|
"loss": 0.0282, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.6007382941519332, |
|
"grad_norm": 0.2013168208153807, |
|
"learning_rate": 2.1134470286409118e-05, |
|
"loss": 0.0223, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.6046240528463183, |
|
"grad_norm": 0.22132045622532287, |
|
"learning_rate": 2.10441600316535e-05, |
|
"loss": 0.0209, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.6085098115407033, |
|
"grad_norm": 0.18246569785225308, |
|
"learning_rate": 2.095382842327971e-05, |
|
"loss": 0.0258, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.6123955702350883, |
|
"grad_norm": 0.15024515227995486, |
|
"learning_rate": 2.086347730861633e-05, |
|
"loss": 0.018, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.6162813289294735, |
|
"grad_norm": 0.21028730383896185, |
|
"learning_rate": 2.077310853539086e-05, |
|
"loss": 0.0246, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.6201670876238585, |
|
"grad_norm": 0.18683373176287732, |
|
"learning_rate": 2.068272395169193e-05, |
|
"loss": 0.0207, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.6240528463182438, |
|
"grad_norm": 0.2838911905964799, |
|
"learning_rate": 2.0592325405931498e-05, |
|
"loss": 0.0282, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.6279386050126288, |
|
"grad_norm": 0.28170440553062887, |
|
"learning_rate": 2.050191474680705e-05, |
|
"loss": 0.0226, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.6318243637070138, |
|
"grad_norm": 0.20578915632228978, |
|
"learning_rate": 2.04114938232638e-05, |
|
"loss": 0.0221, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.6357101224013988, |
|
"grad_norm": 0.3124892105648976, |
|
"learning_rate": 2.0321064484456875e-05, |
|
"loss": 0.0199, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.6395958810957838, |
|
"grad_norm": 0.1570324139378916, |
|
"learning_rate": 2.0230628579713505e-05, |
|
"loss": 0.0255, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.643481639790169, |
|
"grad_norm": 0.24972195531663, |
|
"learning_rate": 2.0140187958495187e-05, |
|
"loss": 0.0252, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.6473673984845543, |
|
"grad_norm": 0.2018721430297068, |
|
"learning_rate": 2.004974447035988e-05, |
|
"loss": 0.0225, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.6512531571789393, |
|
"grad_norm": 0.19112522378491964, |
|
"learning_rate": 1.9959299964924156e-05, |
|
"loss": 0.024, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.6551389158733243, |
|
"grad_norm": 0.14928748310133091, |
|
"learning_rate": 1.9868856291825417e-05, |
|
"loss": 0.0199, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.6590246745677093, |
|
"grad_norm": 0.22380334602146915, |
|
"learning_rate": 1.9778415300684033e-05, |
|
"loss": 0.0274, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.6629104332620943, |
|
"grad_norm": 0.24112965797496708, |
|
"learning_rate": 1.9687978841065514e-05, |
|
"loss": 0.0217, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.6667961919564795, |
|
"grad_norm": 0.16406141595970072, |
|
"learning_rate": 1.9597548762442712e-05, |
|
"loss": 0.0283, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.6706819506508646, |
|
"grad_norm": 0.1948101464396577, |
|
"learning_rate": 1.9507126914157973e-05, |
|
"loss": 0.0251, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.6745677093452498, |
|
"grad_norm": 0.2956340770250799, |
|
"learning_rate": 1.941671514538536e-05, |
|
"loss": 0.0185, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.6784534680396348, |
|
"grad_norm": 0.12177397943751574, |
|
"learning_rate": 1.9326315305092746e-05, |
|
"loss": 0.0263, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.6823392267340198, |
|
"grad_norm": 0.19695542020311577, |
|
"learning_rate": 1.923592924200412e-05, |
|
"loss": 0.021, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.6862249854284048, |
|
"grad_norm": 0.21181764645601958, |
|
"learning_rate": 1.9145558804561686e-05, |
|
"loss": 0.0254, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.6901107441227898, |
|
"grad_norm": 0.20176994374162602, |
|
"learning_rate": 1.90552058408881e-05, |
|
"loss": 0.0247, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.693996502817175, |
|
"grad_norm": 0.23272965832055748, |
|
"learning_rate": 1.8964872198748694e-05, |
|
"loss": 0.0207, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.6978822615115603, |
|
"grad_norm": 0.17179212412190722, |
|
"learning_rate": 1.8874559725513618e-05, |
|
"loss": 0.0236, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.7017680202059453, |
|
"grad_norm": 0.2088323094557007, |
|
"learning_rate": 1.8784270268120148e-05, |
|
"loss": 0.0215, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.7056537789003303, |
|
"grad_norm": 0.25618504375528284, |
|
"learning_rate": 1.869400567303486e-05, |
|
"loss": 0.0253, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.7095395375947153, |
|
"grad_norm": 0.29335176738438906, |
|
"learning_rate": 1.8603767786215886e-05, |
|
"loss": 0.0247, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.7134252962891003, |
|
"grad_norm": 0.19297739454353136, |
|
"learning_rate": 1.8513558453075145e-05, |
|
"loss": 0.0176, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.7173110549834856, |
|
"grad_norm": 0.21907186035510737, |
|
"learning_rate": 1.8423379518440637e-05, |
|
"loss": 0.0252, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.7211968136778706, |
|
"grad_norm": 0.1662707063311211, |
|
"learning_rate": 1.833323282651869e-05, |
|
"loss": 0.0264, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.7250825723722558, |
|
"grad_norm": 0.2782775098963104, |
|
"learning_rate": 1.824312022085625e-05, |
|
"loss": 0.0256, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.7289683310666408, |
|
"grad_norm": 0.2244270504850629, |
|
"learning_rate": 1.8153043544303187e-05, |
|
"loss": 0.022, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.7328540897610258, |
|
"grad_norm": 0.19984321027915625, |
|
"learning_rate": 1.806300463897459e-05, |
|
"loss": 0.0258, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.7367398484554109, |
|
"grad_norm": 0.34358544847861805, |
|
"learning_rate": 1.7973005346213112e-05, |
|
"loss": 0.0247, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.7406256071497959, |
|
"grad_norm": 0.258634512592744, |
|
"learning_rate": 1.7883047506551323e-05, |
|
"loss": 0.0241, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.744511365844181, |
|
"grad_norm": 0.2993875451174172, |
|
"learning_rate": 1.779313295967404e-05, |
|
"loss": 0.025, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.748397124538566, |
|
"grad_norm": 0.2524644984996203, |
|
"learning_rate": 1.7703263544380712e-05, |
|
"loss": 0.0256, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.7522828832329513, |
|
"grad_norm": 0.15203592796958135, |
|
"learning_rate": 1.7613441098547844e-05, |
|
"loss": 0.0277, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.7561686419273363, |
|
"grad_norm": 0.3339303036694701, |
|
"learning_rate": 1.7523667459091372e-05, |
|
"loss": 0.0253, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.7600544006217214, |
|
"grad_norm": 0.10575616905501665, |
|
"learning_rate": 1.743394446192915e-05, |
|
"loss": 0.0239, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.7639401593161064, |
|
"grad_norm": 0.23336439363322206, |
|
"learning_rate": 1.734427394194331e-05, |
|
"loss": 0.0177, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.7678259180104916, |
|
"grad_norm": 0.18372002116915823, |
|
"learning_rate": 1.725465773294286e-05, |
|
"loss": 0.0238, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.7717116767048766, |
|
"grad_norm": 0.4517812923928783, |
|
"learning_rate": 1.7165097667626085e-05, |
|
"loss": 0.0241, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.7755974353992618, |
|
"grad_norm": 0.3032985382795743, |
|
"learning_rate": 1.7075595577543112e-05, |
|
"loss": 0.0246, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.7794831940936469, |
|
"grad_norm": 0.2883198267970469, |
|
"learning_rate": 1.698615329305846e-05, |
|
"loss": 0.0363, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.7833689527880319, |
|
"grad_norm": 0.212183942746847, |
|
"learning_rate": 1.6896772643313545e-05, |
|
"loss": 0.0184, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.7872547114824169, |
|
"grad_norm": 0.20532524865736027, |
|
"learning_rate": 1.6807455456189375e-05, |
|
"loss": 0.0238, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.791140470176802, |
|
"grad_norm": 0.34333620427983147, |
|
"learning_rate": 1.671820355826909e-05, |
|
"loss": 0.0262, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.7950262288711871, |
|
"grad_norm": 0.20001883941281645, |
|
"learning_rate": 1.6629018774800626e-05, |
|
"loss": 0.0272, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.7989119875655721, |
|
"grad_norm": 0.34691765585073897, |
|
"learning_rate": 1.6539902929659398e-05, |
|
"loss": 0.025, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.8027977462599574, |
|
"grad_norm": 0.2742832126748164, |
|
"learning_rate": 1.6450857845310995e-05, |
|
"loss": 0.0235, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.8066835049543424, |
|
"grad_norm": 0.28750335882588557, |
|
"learning_rate": 1.6361885342773928e-05, |
|
"loss": 0.0217, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.8105692636487274, |
|
"grad_norm": 0.20616162983525588, |
|
"learning_rate": 1.6272987241582344e-05, |
|
"loss": 0.0234, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.8144550223431124, |
|
"grad_norm": 0.28032518340338625, |
|
"learning_rate": 1.6184165359748873e-05, |
|
"loss": 0.0241, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.8183407810374974, |
|
"grad_norm": 0.19596210439624756, |
|
"learning_rate": 1.6095421513727393e-05, |
|
"loss": 0.0195, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.8222265397318826, |
|
"grad_norm": 0.23644823136897267, |
|
"learning_rate": 1.600675751837591e-05, |
|
"loss": 0.0219, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.8261122984262679, |
|
"grad_norm": 0.2630361162326872, |
|
"learning_rate": 1.591817518691947e-05, |
|
"loss": 0.0228, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.8299980571206529, |
|
"grad_norm": 0.2595409653034801, |
|
"learning_rate": 1.582967633091303e-05, |
|
"loss": 0.0231, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.833883815815038, |
|
"grad_norm": 0.3326391117683167, |
|
"learning_rate": 1.5741262760204424e-05, |
|
"loss": 0.0291, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.837769574509423, |
|
"grad_norm": 0.24035275119514785, |
|
"learning_rate": 1.5652936282897365e-05, |
|
"loss": 0.0219, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.841655333203808, |
|
"grad_norm": 0.244068103767282, |
|
"learning_rate": 1.5564698705314457e-05, |
|
"loss": 0.0224, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.8455410918981932, |
|
"grad_norm": 0.24784872474552172, |
|
"learning_rate": 1.5476551831960283e-05, |
|
"loss": 0.0269, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.8494268505925782, |
|
"grad_norm": 0.2820854981793686, |
|
"learning_rate": 1.5388497465484427e-05, |
|
"loss": 0.0236, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.8533126092869634, |
|
"grad_norm": 0.18652076792233116, |
|
"learning_rate": 1.5300537406644707e-05, |
|
"loss": 0.0248, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.8571983679813484, |
|
"grad_norm": 0.21164601939248687, |
|
"learning_rate": 1.5212673454270275e-05, |
|
"loss": 0.0187, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.8610841266757334, |
|
"grad_norm": 0.19650678419372633, |
|
"learning_rate": 1.5124907405224857e-05, |
|
"loss": 0.0202, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.8649698853701184, |
|
"grad_norm": 0.19671419185547953, |
|
"learning_rate": 1.5037241054370031e-05, |
|
"loss": 0.0226, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.8688556440645034, |
|
"grad_norm": 0.28963451060296624, |
|
"learning_rate": 1.4949676194528443e-05, |
|
"loss": 0.0212, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.8727414027588887, |
|
"grad_norm": 0.2291236266085277, |
|
"learning_rate": 1.4862214616447246e-05, |
|
"loss": 0.0265, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.876627161453274, |
|
"grad_norm": 0.21720512675401074, |
|
"learning_rate": 1.4774858108761399e-05, |
|
"loss": 0.0247, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.880512920147659, |
|
"grad_norm": 0.24043869727722034, |
|
"learning_rate": 1.4687608457957131e-05, |
|
"loss": 0.0237, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.884398678842044, |
|
"grad_norm": 0.3039350538024942, |
|
"learning_rate": 1.4600467448335377e-05, |
|
"loss": 0.0209, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.888284437536429, |
|
"grad_norm": 0.27910808051006536, |
|
"learning_rate": 1.4513436861975309e-05, |
|
"loss": 0.0239, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.892170196230814, |
|
"grad_norm": 0.186631017294127, |
|
"learning_rate": 1.4426518478697898e-05, |
|
"loss": 0.0267, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.8960559549251992, |
|
"grad_norm": 0.1881734007091214, |
|
"learning_rate": 1.4339714076029485e-05, |
|
"loss": 0.0218, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.8999417136195842, |
|
"grad_norm": 0.13892970586490075, |
|
"learning_rate": 1.4253025429165464e-05, |
|
"loss": 0.0212, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.9038274723139694, |
|
"grad_norm": 0.288495543969294, |
|
"learning_rate": 1.4166454310933941e-05, |
|
"loss": 0.0209, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.9077132310083544, |
|
"grad_norm": 0.2555017196077912, |
|
"learning_rate": 1.4080002491759519e-05, |
|
"loss": 0.0244, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.9115989897027394, |
|
"grad_norm": 0.18634334770662844, |
|
"learning_rate": 1.3993671739627072e-05, |
|
"loss": 0.0217, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.9154847483971245, |
|
"grad_norm": 0.3164288797558731, |
|
"learning_rate": 1.3907463820045589e-05, |
|
"loss": 0.0222, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.9193705070915095, |
|
"grad_norm": 0.2818084680815898, |
|
"learning_rate": 1.382138049601205e-05, |
|
"loss": 0.0218, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.9232562657858947, |
|
"grad_norm": 0.36520600715684526, |
|
"learning_rate": 1.3735423527975416e-05, |
|
"loss": 0.0268, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.92714202448028, |
|
"grad_norm": 0.29834225940452946, |
|
"learning_rate": 1.3649594673800585e-05, |
|
"loss": 0.027, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.931027783174665, |
|
"grad_norm": 0.24025465966717943, |
|
"learning_rate": 1.3563895688732476e-05, |
|
"loss": 0.0201, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.93491354186905, |
|
"grad_norm": 0.36547751994851313, |
|
"learning_rate": 1.3478328325360108e-05, |
|
"loss": 0.0175, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.938799300563435, |
|
"grad_norm": 0.30354949309324647, |
|
"learning_rate": 1.3392894333580757e-05, |
|
"loss": 0.0222, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.94268505925782, |
|
"grad_norm": 0.23953269705131022, |
|
"learning_rate": 1.3307595460564196e-05, |
|
"loss": 0.0245, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.9465708179522052, |
|
"grad_norm": 0.25015753705105026, |
|
"learning_rate": 1.3222433450716939e-05, |
|
"loss": 0.0273, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.9504565766465902, |
|
"grad_norm": 0.1601753057955312, |
|
"learning_rate": 1.3137410045646593e-05, |
|
"loss": 0.0223, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.9543423353409755, |
|
"grad_norm": 0.2242880052150341, |
|
"learning_rate": 1.3052526984126192e-05, |
|
"loss": 0.0183, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.9582280940353605, |
|
"grad_norm": 0.2607444099422183, |
|
"learning_rate": 1.2967786002058712e-05, |
|
"loss": 0.0241, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.9621138527297455, |
|
"grad_norm": 0.24290540890508416, |
|
"learning_rate": 1.2883188832441496e-05, |
|
"loss": 0.0226, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.9659996114241305, |
|
"grad_norm": 0.3409873620160428, |
|
"learning_rate": 1.2798737205330869e-05, |
|
"loss": 0.041, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.9698853701185155, |
|
"grad_norm": 0.1874649872758006, |
|
"learning_rate": 1.2714432847806721e-05, |
|
"loss": 0.0256, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.9737711288129007, |
|
"grad_norm": 0.18360888920333324, |
|
"learning_rate": 1.263027748393721e-05, |
|
"loss": 0.0219, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.9776568875072857, |
|
"grad_norm": 0.23427360199598463, |
|
"learning_rate": 1.2546272834743496e-05, |
|
"loss": 0.0221, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.981542646201671, |
|
"grad_norm": 0.31230044573711585, |
|
"learning_rate": 1.2462420618164548e-05, |
|
"loss": 0.0204, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.985428404896056, |
|
"grad_norm": 0.250237465475584, |
|
"learning_rate": 1.2378722549022e-05, |
|
"loss": 0.02, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.989314163590441, |
|
"grad_norm": 0.24977255925651687, |
|
"learning_rate": 1.2295180338985092e-05, |
|
"loss": 0.0221, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.993199922284826, |
|
"grad_norm": 0.3918157450288067, |
|
"learning_rate": 1.2211795696535664e-05, |
|
"loss": 0.0264, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.9970856809792112, |
|
"grad_norm": 0.24230360473123272, |
|
"learning_rate": 1.2128570326933224e-05, |
|
"loss": 0.0167, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.000777151738877, |
|
"grad_norm": 0.19021696383018177, |
|
"learning_rate": 1.2045505932180069e-05, |
|
"loss": 0.0227, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.0046629104332623, |
|
"grad_norm": 0.1996009218747256, |
|
"learning_rate": 1.1962604210986455e-05, |
|
"loss": 0.0214, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.0085486691276473, |
|
"grad_norm": 0.25730395478860757, |
|
"learning_rate": 1.18798668587359e-05, |
|
"loss": 0.0209, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.0124344278220323, |
|
"grad_norm": 0.1771816589400498, |
|
"learning_rate": 1.179729556745048e-05, |
|
"loss": 0.0188, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.0163201865164173, |
|
"grad_norm": 0.1992628058679068, |
|
"learning_rate": 1.171489202575624e-05, |
|
"loss": 0.0159, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.0202059452108023, |
|
"grad_norm": 0.15396300645984096, |
|
"learning_rate": 1.163265791884868e-05, |
|
"loss": 0.0165, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.0240917039051873, |
|
"grad_norm": 0.2379685245286237, |
|
"learning_rate": 1.1550594928458224e-05, |
|
"loss": 0.011, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.027977462599573, |
|
"grad_norm": 0.1501164586957791, |
|
"learning_rate": 1.14687047328159e-05, |
|
"loss": 0.0124, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.031863221293958, |
|
"grad_norm": 0.22785045077674615, |
|
"learning_rate": 1.138698900661901e-05, |
|
"loss": 0.0173, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.035748979988343, |
|
"grad_norm": 0.19489104500055154, |
|
"learning_rate": 1.130544942099685e-05, |
|
"loss": 0.0156, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.039634738682728, |
|
"grad_norm": 0.2578847419234991, |
|
"learning_rate": 1.1224087643476525e-05, |
|
"loss": 0.0138, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.043520497377113, |
|
"grad_norm": 0.24662743252166966, |
|
"learning_rate": 1.1142905337948905e-05, |
|
"loss": 0.0145, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.047406256071498, |
|
"grad_norm": 0.2704090578235268, |
|
"learning_rate": 1.1061904164634547e-05, |
|
"loss": 0.0201, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.051292014765883, |
|
"grad_norm": 0.2277257711569673, |
|
"learning_rate": 1.0981085780049783e-05, |
|
"loss": 0.0156, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.0551777734602683, |
|
"grad_norm": 0.24393861223637625, |
|
"learning_rate": 1.0900451836972779e-05, |
|
"loss": 0.0157, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.0590635321546533, |
|
"grad_norm": 0.22148212227817382, |
|
"learning_rate": 1.0820003984409809e-05, |
|
"loss": 0.0182, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.0629492908490383, |
|
"grad_norm": 0.17873179551577242, |
|
"learning_rate": 1.0739743867561484e-05, |
|
"loss": 0.0223, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.0668350495434233, |
|
"grad_norm": 0.26424010200649295, |
|
"learning_rate": 1.0659673127789123e-05, |
|
"loss": 0.0149, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.0707208082378084, |
|
"grad_norm": 0.14038451344369343, |
|
"learning_rate": 1.0579793402581208e-05, |
|
"loss": 0.0125, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.0746065669321934, |
|
"grad_norm": 0.22811068872894222, |
|
"learning_rate": 1.050010632551983e-05, |
|
"loss": 0.0132, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.078492325626579, |
|
"grad_norm": 0.20031881335249865, |
|
"learning_rate": 1.0420613526247356e-05, |
|
"loss": 0.0166, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.082378084320964, |
|
"grad_norm": 0.2633505577038377, |
|
"learning_rate": 1.0341316630433062e-05, |
|
"loss": 0.0146, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.086263843015349, |
|
"grad_norm": 0.18868039147588853, |
|
"learning_rate": 1.0262217259739897e-05, |
|
"loss": 0.0117, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.090149601709734, |
|
"grad_norm": 0.1483051784876369, |
|
"learning_rate": 1.0183317031791318e-05, |
|
"loss": 0.0175, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.094035360404119, |
|
"grad_norm": 0.1594861493991722, |
|
"learning_rate": 1.0104617560138205e-05, |
|
"loss": 0.0117, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.097921119098504, |
|
"grad_norm": 0.26030303220977924, |
|
"learning_rate": 1.0026120454225877e-05, |
|
"loss": 0.018, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.101806877792889, |
|
"grad_norm": 0.2308638280548361, |
|
"learning_rate": 9.947827319361152e-06, |
|
"loss": 0.0178, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.1056926364872743, |
|
"grad_norm": 0.10900704882374063, |
|
"learning_rate": 9.869739756679551e-06, |
|
"loss": 0.0165, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.1095783951816593, |
|
"grad_norm": 0.21796588094597985, |
|
"learning_rate": 9.791859363112521e-06, |
|
"loss": 0.0191, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.1134641538760444, |
|
"grad_norm": 0.1672673980678181, |
|
"learning_rate": 9.714187731354805e-06, |
|
"loss": 0.0177, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.1173499125704294, |
|
"grad_norm": 0.2684028767592411, |
|
"learning_rate": 9.63672644983185e-06, |
|
"loss": 0.0162, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.1212356712648144, |
|
"grad_norm": 0.2649724905943757, |
|
"learning_rate": 9.559477102667331e-06, |
|
"loss": 0.0159, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.1251214299591994, |
|
"grad_norm": 0.1855609816934914, |
|
"learning_rate": 9.482441269650762e-06, |
|
"loss": 0.0168, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.129007188653585, |
|
"grad_norm": 0.20799545221446245, |
|
"learning_rate": 9.405620526205173e-06, |
|
"loss": 0.0229, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.13289294734797, |
|
"grad_norm": 0.2169295403294615, |
|
"learning_rate": 9.329016443354899e-06, |
|
"loss": 0.0166, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.136778706042355, |
|
"grad_norm": 0.20155345013696116, |
|
"learning_rate": 9.25263058769347e-06, |
|
"loss": 0.0164, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.14066446473674, |
|
"grad_norm": 0.24455184564544635, |
|
"learning_rate": 9.176464521351517e-06, |
|
"loss": 0.0137, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.144550223431125, |
|
"grad_norm": 0.14491602829427766, |
|
"learning_rate": 9.100519801964913e-06, |
|
"loss": 0.0124, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.14843598212551, |
|
"grad_norm": 0.31403014524681455, |
|
"learning_rate": 9.024797982642841e-06, |
|
"loss": 0.0199, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.152321740819895, |
|
"grad_norm": 0.16133633738818282, |
|
"learning_rate": 8.949300611936065e-06, |
|
"loss": 0.0177, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.1562074995142804, |
|
"grad_norm": 0.34872137324864605, |
|
"learning_rate": 8.874029233805269e-06, |
|
"loss": 0.0176, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.1600932582086654, |
|
"grad_norm": 0.09809795832817098, |
|
"learning_rate": 8.798985387589436e-06, |
|
"loss": 0.0108, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.1639790169030504, |
|
"grad_norm": 0.2159359078852452, |
|
"learning_rate": 8.724170607974454e-06, |
|
"loss": 0.0147, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.1678647755974354, |
|
"grad_norm": 0.21468205780280344, |
|
"learning_rate": 8.649586424961645e-06, |
|
"loss": 0.0141, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.1717505342918204, |
|
"grad_norm": 0.2249780220781056, |
|
"learning_rate": 8.575234363836526e-06, |
|
"loss": 0.0186, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.1756362929862054, |
|
"grad_norm": 0.24124720508302283, |
|
"learning_rate": 8.501115945137577e-06, |
|
"loss": 0.0144, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.1795220516805904, |
|
"grad_norm": 0.26759279337799535, |
|
"learning_rate": 8.427232684625186e-06, |
|
"loss": 0.0193, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.183407810374976, |
|
"grad_norm": 0.2297544777314867, |
|
"learning_rate": 8.353586093250642e-06, |
|
"loss": 0.0147, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.187293569069361, |
|
"grad_norm": 0.23619415311251218, |
|
"learning_rate": 8.280177677125214e-06, |
|
"loss": 0.0166, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.191179327763746, |
|
"grad_norm": 0.1943560804911627, |
|
"learning_rate": 8.207008937489364e-06, |
|
"loss": 0.0172, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.195065086458131, |
|
"grad_norm": 0.22431037235771287, |
|
"learning_rate": 8.134081370682038e-06, |
|
"loss": 0.0161, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.198950845152516, |
|
"grad_norm": 0.21729765338858503, |
|
"learning_rate": 8.061396468110074e-06, |
|
"loss": 0.0175, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.202836603846901, |
|
"grad_norm": 0.18338237613630443, |
|
"learning_rate": 7.988955716217719e-06, |
|
"loss": 0.0106, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.206722362541286, |
|
"grad_norm": 0.20665577127269266, |
|
"learning_rate": 7.916760596456197e-06, |
|
"loss": 0.0138, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.2106081212356714, |
|
"grad_norm": 0.2316552326389069, |
|
"learning_rate": 7.84481258525341e-06, |
|
"loss": 0.0141, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.2144938799300564, |
|
"grad_norm": 0.26108396067769657, |
|
"learning_rate": 7.773113153983787e-06, |
|
"loss": 0.0169, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.2183796386244414, |
|
"grad_norm": 0.2243677380846199, |
|
"learning_rate": 7.701663768938146e-06, |
|
"loss": 0.0145, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.2222653973188264, |
|
"grad_norm": 0.18542682485202877, |
|
"learning_rate": 7.630465891293766e-06, |
|
"loss": 0.0184, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.2261511560132115, |
|
"grad_norm": 0.16604306041103264, |
|
"learning_rate": 7.559520977084416e-06, |
|
"loss": 0.0134, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.2300369147075965, |
|
"grad_norm": 0.15271062795031004, |
|
"learning_rate": 7.4888304771706675e-06, |
|
"loss": 0.0174, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.233922673401982, |
|
"grad_norm": 0.22620464359511355, |
|
"learning_rate": 7.418395837210177e-06, |
|
"loss": 0.0183, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.237808432096367, |
|
"grad_norm": 0.1932286630859644, |
|
"learning_rate": 7.34821849762813e-06, |
|
"loss": 0.015, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.241694190790752, |
|
"grad_norm": 0.20920536899184783, |
|
"learning_rate": 7.278299893587784e-06, |
|
"loss": 0.0128, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.245579949485137, |
|
"grad_norm": 0.1681609340326629, |
|
"learning_rate": 7.20864145496112e-06, |
|
"loss": 0.0139, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.249465708179522, |
|
"grad_norm": 0.23030013864097168, |
|
"learning_rate": 7.139244606299603e-06, |
|
"loss": 0.0202, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.253351466873907, |
|
"grad_norm": 0.20837500136002646, |
|
"learning_rate": 7.070110766805045e-06, |
|
"loss": 0.0151, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.257237225568292, |
|
"grad_norm": 0.09892626670584977, |
|
"learning_rate": 7.001241350300585e-06, |
|
"loss": 0.0139, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.2611229842626774, |
|
"grad_norm": 0.2421628202226961, |
|
"learning_rate": 6.932637765201767e-06, |
|
"loss": 0.0167, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.2650087429570624, |
|
"grad_norm": 0.22005154896696727, |
|
"learning_rate": 6.86430141448775e-06, |
|
"loss": 0.0163, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.2688945016514475, |
|
"grad_norm": 0.18906159377923581, |
|
"learning_rate": 6.796233695672611e-06, |
|
"loss": 0.0141, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.2727802603458325, |
|
"grad_norm": 0.1882303122476742, |
|
"learning_rate": 6.728436000776759e-06, |
|
"loss": 0.0167, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.2766660190402175, |
|
"grad_norm": 0.20942395612766407, |
|
"learning_rate": 6.6609097162984785e-06, |
|
"loss": 0.0167, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.2805517777346025, |
|
"grad_norm": 0.17048648869426325, |
|
"learning_rate": 6.593656223185565e-06, |
|
"loss": 0.0158, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.284437536428988, |
|
"grad_norm": 0.18912326858465023, |
|
"learning_rate": 6.526676896807092e-06, |
|
"loss": 0.0172, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.288323295123373, |
|
"grad_norm": 0.22429991946158165, |
|
"learning_rate": 6.459973106925272e-06, |
|
"loss": 0.0138, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.292209053817758, |
|
"grad_norm": 0.2719200154391883, |
|
"learning_rate": 6.393546217667464e-06, |
|
"loss": 0.0137, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.296094812512143, |
|
"grad_norm": 0.23193225915033114, |
|
"learning_rate": 6.327397587498254e-06, |
|
"loss": 0.0202, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.299980571206528, |
|
"grad_norm": 0.1773945719864865, |
|
"learning_rate": 6.26152856919169e-06, |
|
"loss": 0.0269, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.303866329900913, |
|
"grad_norm": 0.18357413512611562, |
|
"learning_rate": 6.19594050980361e-06, |
|
"loss": 0.0133, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.307752088595298, |
|
"grad_norm": 0.10329466907615475, |
|
"learning_rate": 6.130634750644102e-06, |
|
"loss": 0.0243, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.3116378472896835, |
|
"grad_norm": 0.26127302357511983, |
|
"learning_rate": 6.0656126272500485e-06, |
|
"loss": 0.0155, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.3155236059840685, |
|
"grad_norm": 0.15622640887182293, |
|
"learning_rate": 6.000875469357841e-06, |
|
"loss": 0.0101, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.3194093646784535, |
|
"grad_norm": 0.253685898111279, |
|
"learning_rate": 5.936424600876194e-06, |
|
"loss": 0.0189, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.3232951233728385, |
|
"grad_norm": 0.29404993122697953, |
|
"learning_rate": 5.872261339859038e-06, |
|
"loss": 0.0184, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.3271808820672235, |
|
"grad_norm": 0.14796428743903128, |
|
"learning_rate": 5.8083869984785836e-06, |
|
"loss": 0.0147, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.3310666407616085, |
|
"grad_norm": 0.20683726050768395, |
|
"learning_rate": 5.7448028829984745e-06, |
|
"loss": 0.0188, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.334952399455994, |
|
"grad_norm": 0.21092382019033648, |
|
"learning_rate": 5.681510293747092e-06, |
|
"loss": 0.016, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.338838158150379, |
|
"grad_norm": 0.21550234513080346, |
|
"learning_rate": 5.618510525090966e-06, |
|
"loss": 0.0189, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.342723916844764, |
|
"grad_norm": 0.16535694850993649, |
|
"learning_rate": 5.555804865408279e-06, |
|
"loss": 0.0191, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.346609675539149, |
|
"grad_norm": 0.21745148122170188, |
|
"learning_rate": 5.4933945970625225e-06, |
|
"loss": 0.0137, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.350495434233534, |
|
"grad_norm": 0.19584922849210273, |
|
"learning_rate": 5.431280996376294e-06, |
|
"loss": 0.0184, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.354381192927919, |
|
"grad_norm": 0.2120146691664677, |
|
"learning_rate": 5.369465333605172e-06, |
|
"loss": 0.0136, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.358266951622304, |
|
"grad_norm": 0.23504940817120928, |
|
"learning_rate": 5.307948872911772e-06, |
|
"loss": 0.0108, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.3621527103166895, |
|
"grad_norm": 0.2544407938266695, |
|
"learning_rate": 5.246732872339852e-06, |
|
"loss": 0.0162, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.3660384690110745, |
|
"grad_norm": 0.1787782566980142, |
|
"learning_rate": 5.185818583788596e-06, |
|
"loss": 0.0167, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.3699242277054595, |
|
"grad_norm": 0.18779821624668364, |
|
"learning_rate": 5.125207252987034e-06, |
|
"loss": 0.0164, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.3738099863998445, |
|
"grad_norm": 0.23079303888097902, |
|
"learning_rate": 5.064900119468544e-06, |
|
"loss": 0.0172, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.3776957450942295, |
|
"grad_norm": 0.15381004609158025, |
|
"learning_rate": 5.004898416545529e-06, |
|
"loss": 0.0184, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.3815815037886146, |
|
"grad_norm": 0.23537158479264925, |
|
"learning_rate": 4.945203371284147e-06, |
|
"loss": 0.0123, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.385467262483, |
|
"grad_norm": 0.15335724927915312, |
|
"learning_rate": 4.8858162044792654e-06, |
|
"loss": 0.0102, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.389353021177385, |
|
"grad_norm": 0.2832041836288288, |
|
"learning_rate": 4.826738130629473e-06, |
|
"loss": 0.0149, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.39323877987177, |
|
"grad_norm": 0.27813345843062076, |
|
"learning_rate": 4.767970357912246e-06, |
|
"loss": 0.0165, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.397124538566155, |
|
"grad_norm": 0.15264755246893946, |
|
"learning_rate": 4.7095140881592395e-06, |
|
"loss": 0.0153, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.40101029726054, |
|
"grad_norm": 0.24434480556286678, |
|
"learning_rate": 4.65137051683171e-06, |
|
"loss": 0.0203, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.404896055954925, |
|
"grad_norm": 0.17160358108568669, |
|
"learning_rate": 4.593540832996071e-06, |
|
"loss": 0.0126, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.40878181464931, |
|
"grad_norm": 0.2220871800365551, |
|
"learning_rate": 4.53602621929957e-06, |
|
"loss": 0.0174, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.4126675733436955, |
|
"grad_norm": 0.19387829594662281, |
|
"learning_rate": 4.478827851946102e-06, |
|
"loss": 0.0131, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.4165533320380805, |
|
"grad_norm": 0.2720443716921079, |
|
"learning_rate": 4.421946900672165e-06, |
|
"loss": 0.0174, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.4204390907324655, |
|
"grad_norm": 0.3727208802934669, |
|
"learning_rate": 4.365384528722931e-06, |
|
"loss": 0.0152, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.4243248494268506, |
|
"grad_norm": 0.2178296686386825, |
|
"learning_rate": 4.309141892828459e-06, |
|
"loss": 0.0142, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.4282106081212356, |
|
"grad_norm": 0.22728601802630374, |
|
"learning_rate": 4.2532201431800344e-06, |
|
"loss": 0.0115, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.4320963668156206, |
|
"grad_norm": 0.23843429430202012, |
|
"learning_rate": 4.197620423406657e-06, |
|
"loss": 0.0157, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.435982125510006, |
|
"grad_norm": 0.19116227547407497, |
|
"learning_rate": 4.1423438705516415e-06, |
|
"loss": 0.0154, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.439867884204391, |
|
"grad_norm": 0.22767774713522007, |
|
"learning_rate": 4.087391615049374e-06, |
|
"loss": 0.0141, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.443753642898776, |
|
"grad_norm": 0.29789802335991833, |
|
"learning_rate": 4.03276478070219e-06, |
|
"loss": 0.0159, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.447639401593161, |
|
"grad_norm": 0.24719135376807377, |
|
"learning_rate": 3.978464484657392e-06, |
|
"loss": 0.0157, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.451525160287546, |
|
"grad_norm": 0.32391016652290383, |
|
"learning_rate": 3.924491837384406e-06, |
|
"loss": 0.0149, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.455410918981931, |
|
"grad_norm": 0.20232458142688142, |
|
"learning_rate": 3.87084794265206e-06, |
|
"loss": 0.015, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.459296677676316, |
|
"grad_norm": 0.2712629616510874, |
|
"learning_rate": 3.817533897506036e-06, |
|
"loss": 0.0133, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.4631824363707016, |
|
"grad_norm": 0.17142782056379016, |
|
"learning_rate": 3.764550792246411e-06, |
|
"loss": 0.0178, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.4670681950650866, |
|
"grad_norm": 0.2625497363608154, |
|
"learning_rate": 3.7118997104053557e-06, |
|
"loss": 0.0177, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.4709539537594716, |
|
"grad_norm": 0.23418764898240915, |
|
"learning_rate": 3.659581728725017e-06, |
|
"loss": 0.0216, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.4748397124538566, |
|
"grad_norm": 0.2421515106857572, |
|
"learning_rate": 3.607597917135448e-06, |
|
"loss": 0.0102, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.4787254711482416, |
|
"grad_norm": 0.18002030517689566, |
|
"learning_rate": 3.5559493387327603e-06, |
|
"loss": 0.0125, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.4826112298426266, |
|
"grad_norm": 0.22127119378874585, |
|
"learning_rate": 3.5046370497573558e-06, |
|
"loss": 0.0171, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.486496988537012, |
|
"grad_norm": 0.24067053535828525, |
|
"learning_rate": 3.4536620995723524e-06, |
|
"loss": 0.0131, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.490382747231397, |
|
"grad_norm": 0.3548945672530695, |
|
"learning_rate": 3.4030255306421254e-06, |
|
"loss": 0.0172, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.494268505925782, |
|
"grad_norm": 0.34342268520149605, |
|
"learning_rate": 3.3527283785109565e-06, |
|
"loss": 0.0134, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.498154264620167, |
|
"grad_norm": 0.26685487402837554, |
|
"learning_rate": 3.3027716717818925e-06, |
|
"loss": 0.0121, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.502040023314552, |
|
"grad_norm": 0.23865361157570286, |
|
"learning_rate": 3.2531564320956745e-06, |
|
"loss": 0.0134, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.505925782008937, |
|
"grad_norm": 0.2262219648398464, |
|
"learning_rate": 3.2038836741098756e-06, |
|
"loss": 0.0164, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.509811540703322, |
|
"grad_norm": 0.11415732152468076, |
|
"learning_rate": 3.15495440547815e-06, |
|
"loss": 0.0126, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.513697299397707, |
|
"grad_norm": 0.1637749047242322, |
|
"learning_rate": 3.1063696268296063e-06, |
|
"loss": 0.0152, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.5175830580920926, |
|
"grad_norm": 0.19217591389578745, |
|
"learning_rate": 3.0581303317483367e-06, |
|
"loss": 0.0171, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.5214688167864776, |
|
"grad_norm": 0.1744921556168132, |
|
"learning_rate": 3.0102375067531375e-06, |
|
"loss": 0.0133, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.5253545754808626, |
|
"grad_norm": 0.19272649371157077, |
|
"learning_rate": 2.962692131277296e-06, |
|
"loss": 0.0131, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.5292403341752476, |
|
"grad_norm": 0.23696107664845892, |
|
"learning_rate": 2.9154951776485905e-06, |
|
"loss": 0.0169, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.5331260928696326, |
|
"grad_norm": 0.16637778487180105, |
|
"learning_rate": 2.8686476110693796e-06, |
|
"loss": 0.0167, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.537011851564018, |
|
"grad_norm": 0.22456349132177617, |
|
"learning_rate": 2.822150389596867e-06, |
|
"loss": 0.0134, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.540897610258403, |
|
"grad_norm": 0.3349504222512524, |
|
"learning_rate": 2.7760044641235295e-06, |
|
"loss": 0.0144, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.544783368952788, |
|
"grad_norm": 0.23230866877649609, |
|
"learning_rate": 2.730210778357649e-06, |
|
"loss": 0.0124, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.548669127647173, |
|
"grad_norm": 0.2876662681394313, |
|
"learning_rate": 2.6847702688040357e-06, |
|
"loss": 0.0174, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.552554886341558, |
|
"grad_norm": 0.24429770597495445, |
|
"learning_rate": 2.6396838647448353e-06, |
|
"loss": 0.0153, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.556440645035943, |
|
"grad_norm": 0.18614206123634575, |
|
"learning_rate": 2.594952488220577e-06, |
|
"loss": 0.0138, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.560326403730328, |
|
"grad_norm": 0.1707554494222318, |
|
"learning_rate": 2.550577054011274e-06, |
|
"loss": 0.0097, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.564212162424713, |
|
"grad_norm": 0.14383952391367794, |
|
"learning_rate": 2.5065584696177414e-06, |
|
"loss": 0.0149, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.5680979211190986, |
|
"grad_norm": 0.18379892796393424, |
|
"learning_rate": 2.4628976352430376e-06, |
|
"loss": 0.011, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.5719836798134836, |
|
"grad_norm": 0.24670126413024498, |
|
"learning_rate": 2.419595443774023e-06, |
|
"loss": 0.0133, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.5758694385078686, |
|
"grad_norm": 0.20077779669194387, |
|
"learning_rate": 2.3766527807631422e-06, |
|
"loss": 0.0143, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.5797551972022537, |
|
"grad_norm": 0.22864135011568798, |
|
"learning_rate": 2.33407052441029e-06, |
|
"loss": 0.0195, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.5836409558966387, |
|
"grad_norm": 0.1525621735784211, |
|
"learning_rate": 2.291849545544853e-06, |
|
"loss": 0.0186, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.587526714591024, |
|
"grad_norm": 0.23345566519875988, |
|
"learning_rate": 2.249990707607912e-06, |
|
"loss": 0.0134, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.591412473285409, |
|
"grad_norm": 0.2280149763833634, |
|
"learning_rate": 2.2084948666345695e-06, |
|
"loss": 0.0139, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.595298231979794, |
|
"grad_norm": 0.26595705322222424, |
|
"learning_rate": 2.1673628712364538e-06, |
|
"loss": 0.0142, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.599183990674179, |
|
"grad_norm": 0.3089658507273733, |
|
"learning_rate": 2.126595562584357e-06, |
|
"loss": 0.0182, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.603069749368564, |
|
"grad_norm": 0.2145635655464109, |
|
"learning_rate": 2.0861937743910456e-06, |
|
"loss": 0.0143, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.606955508062949, |
|
"grad_norm": 0.22760812179389878, |
|
"learning_rate": 2.046158332894195e-06, |
|
"loss": 0.014, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.610841266757334, |
|
"grad_norm": 0.20004208111148616, |
|
"learning_rate": 2.006490056839496e-06, |
|
"loss": 0.0138, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.614727025451719, |
|
"grad_norm": 0.1855756971876611, |
|
"learning_rate": 1.9671897574639233e-06, |
|
"loss": 0.0157, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.6186127841461047, |
|
"grad_norm": 0.23769731157003207, |
|
"learning_rate": 1.928258238479133e-06, |
|
"loss": 0.0164, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.6224985428404897, |
|
"grad_norm": 0.17194198866392246, |
|
"learning_rate": 1.8896962960550214e-06, |
|
"loss": 0.0123, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.6263843015348747, |
|
"grad_norm": 0.25716223538323435, |
|
"learning_rate": 1.8515047188034651e-06, |
|
"loss": 0.0132, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.6302700602292597, |
|
"grad_norm": 0.23923230278799787, |
|
"learning_rate": 1.8136842877621697e-06, |
|
"loss": 0.0132, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.6341558189236447, |
|
"grad_norm": 0.18419856300682974, |
|
"learning_rate": 1.7762357763787097e-06, |
|
"loss": 0.011, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.63804157761803, |
|
"grad_norm": 0.1915730985861053, |
|
"learning_rate": 1.7391599504947043e-06, |
|
"loss": 0.013, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.641927336312415, |
|
"grad_norm": 0.1566125187690401, |
|
"learning_rate": 1.702457568330167e-06, |
|
"loss": 0.0116, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.6458130950068, |
|
"grad_norm": 0.16344705005127574, |
|
"learning_rate": 1.666129380467989e-06, |
|
"loss": 0.0124, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.649698853701185, |
|
"grad_norm": 0.19446375972044477, |
|
"learning_rate": 1.63017612983859e-06, |
|
"loss": 0.0154, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.65358461239557, |
|
"grad_norm": 0.2483349003041754, |
|
"learning_rate": 1.5945985517047336e-06, |
|
"loss": 0.0176, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.657470371089955, |
|
"grad_norm": 0.1788660816854284, |
|
"learning_rate": 1.5593973736464718e-06, |
|
"loss": 0.0158, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.66135612978434, |
|
"grad_norm": 0.31388430244796356, |
|
"learning_rate": 1.5245733155462937e-06, |
|
"loss": 0.0117, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.6652418884787252, |
|
"grad_norm": 0.09656141608090874, |
|
"learning_rate": 1.4901270895743803e-06, |
|
"loss": 0.0179, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.6691276471731107, |
|
"grad_norm": 0.2572799089734027, |
|
"learning_rate": 1.4560594001740503e-06, |
|
"loss": 0.0125, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.6730134058674957, |
|
"grad_norm": 0.19767701320137746, |
|
"learning_rate": 1.4223709440473466e-06, |
|
"loss": 0.0149, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.6768991645618807, |
|
"grad_norm": 0.2720993871756462, |
|
"learning_rate": 1.3890624101407957e-06, |
|
"loss": 0.0142, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.6807849232562657, |
|
"grad_norm": 0.26464941453049734, |
|
"learning_rate": 1.356134479631328e-06, |
|
"loss": 0.0146, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.6846706819506507, |
|
"grad_norm": 0.19524351475983762, |
|
"learning_rate": 1.3235878259123226e-06, |
|
"loss": 0.0182, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.688556440645036, |
|
"grad_norm": 0.26891449363176717, |
|
"learning_rate": 1.2914231145798462e-06, |
|
"loss": 0.0185, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.692442199339421, |
|
"grad_norm": 0.3011765177624915, |
|
"learning_rate": 1.2596410034190543e-06, |
|
"loss": 0.014, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.696327958033806, |
|
"grad_norm": 0.3020550635806977, |
|
"learning_rate": 1.228242142390721e-06, |
|
"loss": 0.0128, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.700213716728191, |
|
"grad_norm": 0.24847929057204657, |
|
"learning_rate": 1.1972271736179653e-06, |
|
"loss": 0.0126, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.7040994754225762, |
|
"grad_norm": 0.14385502966292424, |
|
"learning_rate": 1.166596731373102e-06, |
|
"loss": 0.0142, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 2.7079852341169612, |
|
"grad_norm": 0.27969175657328743, |
|
"learning_rate": 1.1363514420646738e-06, |
|
"loss": 0.0122, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 2.7118709928113462, |
|
"grad_norm": 0.23975714101635787, |
|
"learning_rate": 1.1064919242246486e-06, |
|
"loss": 0.018, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 2.7157567515057313, |
|
"grad_norm": 0.18202493395247812, |
|
"learning_rate": 1.0770187884957673e-06, |
|
"loss": 0.0161, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 2.7196425102001167, |
|
"grad_norm": 0.15583830698569281, |
|
"learning_rate": 1.0479326376190602e-06, |
|
"loss": 0.0121, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.7235282688945017, |
|
"grad_norm": 0.23832508481487272, |
|
"learning_rate": 1.0192340664214995e-06, |
|
"loss": 0.0154, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 2.7274140275888867, |
|
"grad_norm": 0.16847478281366923, |
|
"learning_rate": 9.909236618038665e-07, |
|
"loss": 0.0146, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.7312997862832717, |
|
"grad_norm": 0.30569535019277033, |
|
"learning_rate": 9.630020027287213e-07, |
|
"loss": 0.0181, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 2.7351855449776568, |
|
"grad_norm": 0.23680858512407482, |
|
"learning_rate": 9.354696602085833e-07, |
|
"loss": 0.0179, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 2.739071303672042, |
|
"grad_norm": 0.24801887023205305, |
|
"learning_rate": 9.083271972942431e-07, |
|
"loss": 0.0178, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.7429570623664272, |
|
"grad_norm": 0.18244403571549045, |
|
"learning_rate": 8.815751690632423e-07, |
|
"loss": 0.0134, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 2.7468428210608122, |
|
"grad_norm": 0.3177601765145437, |
|
"learning_rate": 8.552141226085408e-07, |
|
"loss": 0.0135, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.7507285797551972, |
|
"grad_norm": 0.2478869375091213, |
|
"learning_rate": 8.292445970273055e-07, |
|
"loss": 0.0131, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.7546143384495823, |
|
"grad_norm": 0.16351159527021386, |
|
"learning_rate": 8.03667123409908e-07, |
|
"loss": 0.0116, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.7585000971439673, |
|
"grad_norm": 0.16164434978464962, |
|
"learning_rate": 7.784822248290424e-07, |
|
"loss": 0.0151, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.7623858558383523, |
|
"grad_norm": 0.19368227723734358, |
|
"learning_rate": 7.53690416329047e-07, |
|
"loss": 0.0115, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.7662716145327373, |
|
"grad_norm": 0.19844203086570225, |
|
"learning_rate": 7.292922049153528e-07, |
|
"loss": 0.0158, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.7701573732271227, |
|
"grad_norm": 0.12080846948403764, |
|
"learning_rate": 7.052880895441339e-07, |
|
"loss": 0.0117, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.7740431319215078, |
|
"grad_norm": 0.15523717768836803, |
|
"learning_rate": 6.816785611120913e-07, |
|
"loss": 0.0133, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.7779288906158928, |
|
"grad_norm": 0.25279826141773354, |
|
"learning_rate": 6.584641024464122e-07, |
|
"loss": 0.0108, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.7818146493102778, |
|
"grad_norm": 0.1026232646491208, |
|
"learning_rate": 6.356451882949088e-07, |
|
"loss": 0.0137, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.785700408004663, |
|
"grad_norm": 0.1843621477660204, |
|
"learning_rate": 6.132222853162972e-07, |
|
"loss": 0.0171, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.7895861666990482, |
|
"grad_norm": 0.2974243267699617, |
|
"learning_rate": 5.911958520706562e-07, |
|
"loss": 0.0105, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.7934719253934333, |
|
"grad_norm": 0.1450131937905085, |
|
"learning_rate": 5.695663390100548e-07, |
|
"loss": 0.0181, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.7973576840878183, |
|
"grad_norm": 0.19449961006516722, |
|
"learning_rate": 5.483341884693327e-07, |
|
"loss": 0.0151, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.8012434427822033, |
|
"grad_norm": 0.231835400931022, |
|
"learning_rate": 5.274998346570659e-07, |
|
"loss": 0.0174, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.8051292014765883, |
|
"grad_norm": 0.18990330928555402, |
|
"learning_rate": 5.070637036466753e-07, |
|
"loss": 0.0163, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.8090149601709733, |
|
"grad_norm": 0.19725672735140468, |
|
"learning_rate": 4.870262133677072e-07, |
|
"loss": 0.0117, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.8129007188653583, |
|
"grad_norm": 0.3493633046330908, |
|
"learning_rate": 4.6738777359731866e-07, |
|
"loss": 0.017, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.8167864775597433, |
|
"grad_norm": 0.2268035823210108, |
|
"learning_rate": 4.481487859518563e-07, |
|
"loss": 0.0112, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.8206722362541288, |
|
"grad_norm": 0.26841266073002745, |
|
"learning_rate": 4.293096438786726e-07, |
|
"loss": 0.0176, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.824557994948514, |
|
"grad_norm": 0.20423668146731536, |
|
"learning_rate": 4.108707326480632e-07, |
|
"loss": 0.0128, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.828443753642899, |
|
"grad_norm": 0.1743068137356259, |
|
"learning_rate": 3.9283242934539555e-07, |
|
"loss": 0.0111, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.832329512337284, |
|
"grad_norm": 0.2340589582843017, |
|
"learning_rate": 3.751951028633971e-07, |
|
"loss": 0.0125, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.836215271031669, |
|
"grad_norm": 0.21054160485426174, |
|
"learning_rate": 3.5795911389461033e-07, |
|
"loss": 0.012, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.8401010297260543, |
|
"grad_norm": 0.17738056254213855, |
|
"learning_rate": 3.411248149240165e-07, |
|
"loss": 0.0131, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.8439867884204393, |
|
"grad_norm": 0.18120809069881422, |
|
"learning_rate": 3.24692550221819e-07, |
|
"loss": 0.0126, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.8478725471148243, |
|
"grad_norm": 0.27728578365868417, |
|
"learning_rate": 3.086626558364203e-07, |
|
"loss": 0.0133, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.8517583058092093, |
|
"grad_norm": 0.15083855508982252, |
|
"learning_rate": 2.93035459587534e-07, |
|
"loss": 0.0111, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.8556440645035943, |
|
"grad_norm": 0.20682581691820806, |
|
"learning_rate": 2.7781128105949015e-07, |
|
"loss": 0.0158, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.8595298231979793, |
|
"grad_norm": 0.19888880451656954, |
|
"learning_rate": 2.6299043159468963e-07, |
|
"loss": 0.0199, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.8634155818923643, |
|
"grad_norm": 0.24115048647296294, |
|
"learning_rate": 2.485732142872488e-07, |
|
"loss": 0.0119, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.8673013405867493, |
|
"grad_norm": 0.20653770018813603, |
|
"learning_rate": 2.3455992397679595e-07, |
|
"loss": 0.0158, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.871187099281135, |
|
"grad_norm": 0.18076322308667522, |
|
"learning_rate": 2.2095084724243598e-07, |
|
"loss": 0.0125, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.87507285797552, |
|
"grad_norm": 0.20736237506128907, |
|
"learning_rate": 2.0774626239690176e-07, |
|
"loss": 0.0117, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.878958616669905, |
|
"grad_norm": 0.195234932120182, |
|
"learning_rate": 1.9494643948084979e-07, |
|
"loss": 0.0141, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.88284437536429, |
|
"grad_norm": 0.2350430358349683, |
|
"learning_rate": 1.8255164025734684e-07, |
|
"loss": 0.0123, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.886730134058675, |
|
"grad_norm": 0.2192321643235776, |
|
"learning_rate": 1.7056211820651425e-07, |
|
"loss": 0.0128, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.8906158927530603, |
|
"grad_norm": 0.2649035408787872, |
|
"learning_rate": 1.5897811852033873e-07, |
|
"loss": 0.0174, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.8945016514474453, |
|
"grad_norm": 0.1967812235705591, |
|
"learning_rate": 1.4779987809766528e-07, |
|
"loss": 0.0143, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.8983874101418303, |
|
"grad_norm": 0.1249890086380848, |
|
"learning_rate": 1.3702762553935656e-07, |
|
"loss": 0.0119, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.9022731688362153, |
|
"grad_norm": 0.22978595448983524, |
|
"learning_rate": 1.2666158114359894e-07, |
|
"loss": 0.0146, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.9061589275306003, |
|
"grad_norm": 0.16965468919403492, |
|
"learning_rate": 1.1670195690141939e-07, |
|
"loss": 0.0138, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.9100446862249854, |
|
"grad_norm": 0.28228206459702576, |
|
"learning_rate": 1.0714895649233781e-07, |
|
"loss": 0.0129, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.9139304449193704, |
|
"grad_norm": 0.20835241859756748, |
|
"learning_rate": 9.800277528020153e-08, |
|
"loss": 0.0151, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.9178162036137554, |
|
"grad_norm": 0.16424147151499843, |
|
"learning_rate": 8.926360030919513e-08, |
|
"loss": 0.0149, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.9217019623081404, |
|
"grad_norm": 0.1759921900549206, |
|
"learning_rate": 8.093161030001462e-08, |
|
"loss": 0.0108, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.925587721002526, |
|
"grad_norm": 0.2025940367611509, |
|
"learning_rate": 7.300697564620596e-08, |
|
"loss": 0.0124, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.929473479696911, |
|
"grad_norm": 0.3091292487177528, |
|
"learning_rate": 6.548985841069e-08, |
|
"loss": 0.0171, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.933359238391296, |
|
"grad_norm": 0.18550760451795564, |
|
"learning_rate": 5.8380412322440736e-08, |
|
"loss": 0.0098, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.937244997085681, |
|
"grad_norm": 0.2399355018987987, |
|
"learning_rate": 5.167878277334559e-08, |
|
"loss": 0.0105, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.9411307557800663, |
|
"grad_norm": 0.2129680096079058, |
|
"learning_rate": 4.538510681523001e-08, |
|
"loss": 0.0151, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 2.9450165144744513, |
|
"grad_norm": 0.15339355552498427, |
|
"learning_rate": 3.949951315705303e-08, |
|
"loss": 0.0154, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 2.9489022731688364, |
|
"grad_norm": 0.20862935241082844, |
|
"learning_rate": 3.4022122162282736e-08, |
|
"loss": 0.0114, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.9527880318632214, |
|
"grad_norm": 0.23246847497172307, |
|
"learning_rate": 2.895304584642711e-08, |
|
"loss": 0.0177, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.9566737905576064, |
|
"grad_norm": 0.20519849079048713, |
|
"learning_rate": 2.429238787474475e-08, |
|
"loss": 0.0126, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 2.9605595492519914, |
|
"grad_norm": 0.19918834292961288, |
|
"learning_rate": 2.004024356012435e-08, |
|
"loss": 0.0161, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 2.9644453079463764, |
|
"grad_norm": 0.19800296749468346, |
|
"learning_rate": 1.6196699861139586e-08, |
|
"loss": 0.0151, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 2.9683310666407614, |
|
"grad_norm": 0.1967950909462482, |
|
"learning_rate": 1.2761835380268317e-08, |
|
"loss": 0.0159, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 2.9722168253351464, |
|
"grad_norm": 0.2624813305392548, |
|
"learning_rate": 9.735720362282763e-09, |
|
"loss": 0.016, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.976102584029532, |
|
"grad_norm": 0.24779472153476578, |
|
"learning_rate": 7.1184166928151e-09, |
|
"loss": 0.0145, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 2.979988342723917, |
|
"grad_norm": 0.25286229822920325, |
|
"learning_rate": 4.90997789709402e-09, |
|
"loss": 0.0131, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 2.983874101418302, |
|
"grad_norm": 0.1409302028113683, |
|
"learning_rate": 3.1104491388478375e-09, |
|
"loss": 0.0154, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 2.987759860112687, |
|
"grad_norm": 0.24024651305037117, |
|
"learning_rate": 1.719867219378557e-09, |
|
"loss": 0.0142, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 2.991645618807072, |
|
"grad_norm": 0.1761325503880173, |
|
"learning_rate": 7.382605768113671e-10, |
|
"loss": 0.0123, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.9955313775014574, |
|
"grad_norm": 0.17792976628823454, |
|
"learning_rate": 1.6564928551732195e-10, |
|
"loss": 0.0113, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 2.999028560326404, |
|
"step": 7719, |
|
"total_flos": 301241258213376.0, |
|
"train_loss": 0.03801638325974335, |
|
"train_runtime": 73801.1602, |
|
"train_samples_per_second": 1.674, |
|
"train_steps_per_second": 0.105 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 7719, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 301241258213376.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|