|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9990309034133737, |
|
"eval_steps": 500, |
|
"global_step": 6963, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004307095940562076, |
|
"grad_norm": 9.035698890686035, |
|
"learning_rate": 5.730659025787966e-07, |
|
"loss": 1.2789, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008614191881124151, |
|
"grad_norm": 6.42362642288208, |
|
"learning_rate": 1.1461318051575932e-06, |
|
"loss": 1.1797, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012921287821686228, |
|
"grad_norm": 1.6813161373138428, |
|
"learning_rate": 1.7191977077363897e-06, |
|
"loss": 0.9736, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017228383762248303, |
|
"grad_norm": 0.9371042251586914, |
|
"learning_rate": 2.2922636103151864e-06, |
|
"loss": 0.871, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02153547970281038, |
|
"grad_norm": 0.7695503234863281, |
|
"learning_rate": 2.865329512893983e-06, |
|
"loss": 0.8025, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.025842575643372456, |
|
"grad_norm": 0.716374397277832, |
|
"learning_rate": 3.4383954154727795e-06, |
|
"loss": 0.7698, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.030149671583934532, |
|
"grad_norm": 0.6864265203475952, |
|
"learning_rate": 4.011461318051576e-06, |
|
"loss": 0.7701, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.034456767524496605, |
|
"grad_norm": 0.7581704258918762, |
|
"learning_rate": 4.584527220630373e-06, |
|
"loss": 0.7619, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03876386346505868, |
|
"grad_norm": 0.6616791486740112, |
|
"learning_rate": 5.157593123209169e-06, |
|
"loss": 0.7296, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04307095940562076, |
|
"grad_norm": 0.6397051811218262, |
|
"learning_rate": 5.730659025787966e-06, |
|
"loss": 0.7453, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.047378055346182835, |
|
"grad_norm": 0.6572911143302917, |
|
"learning_rate": 6.303724928366762e-06, |
|
"loss": 0.767, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05168515128674491, |
|
"grad_norm": 0.669222354888916, |
|
"learning_rate": 6.876790830945559e-06, |
|
"loss": 0.7369, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05599224722730699, |
|
"grad_norm": 0.6517964601516724, |
|
"learning_rate": 7.449856733524356e-06, |
|
"loss": 0.7186, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.060299343167869064, |
|
"grad_norm": 0.6209223866462708, |
|
"learning_rate": 8.022922636103152e-06, |
|
"loss": 0.7155, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06460643910843114, |
|
"grad_norm": 0.6591508388519287, |
|
"learning_rate": 8.595988538681949e-06, |
|
"loss": 0.7289, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06891353504899321, |
|
"grad_norm": 0.5842370390892029, |
|
"learning_rate": 9.169054441260746e-06, |
|
"loss": 0.7183, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0732206309895553, |
|
"grad_norm": 0.7117204070091248, |
|
"learning_rate": 9.742120343839543e-06, |
|
"loss": 0.7192, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07752772693011736, |
|
"grad_norm": 0.6163178086280823, |
|
"learning_rate": 1.0315186246418338e-05, |
|
"loss": 0.7193, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08183482287067945, |
|
"grad_norm": 0.5932906270027161, |
|
"learning_rate": 1.0888252148997137e-05, |
|
"loss": 0.714, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08614191881124152, |
|
"grad_norm": 0.5982919335365295, |
|
"learning_rate": 1.1461318051575932e-05, |
|
"loss": 0.7058, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0904490147518036, |
|
"grad_norm": 0.6208463907241821, |
|
"learning_rate": 1.2034383954154729e-05, |
|
"loss": 0.7189, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09475611069236567, |
|
"grad_norm": 0.5887411236763, |
|
"learning_rate": 1.2607449856733524e-05, |
|
"loss": 0.7249, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09906320663292775, |
|
"grad_norm": 0.5963988900184631, |
|
"learning_rate": 1.3180515759312323e-05, |
|
"loss": 0.7293, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10337030257348982, |
|
"grad_norm": 0.5715692043304443, |
|
"learning_rate": 1.3753581661891118e-05, |
|
"loss": 0.6845, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1076773985140519, |
|
"grad_norm": 0.639398455619812, |
|
"learning_rate": 1.4326647564469915e-05, |
|
"loss": 0.6994, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11198449445461398, |
|
"grad_norm": 0.6884477734565735, |
|
"learning_rate": 1.4899713467048712e-05, |
|
"loss": 0.7126, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11629159039517606, |
|
"grad_norm": 0.6021578907966614, |
|
"learning_rate": 1.5472779369627507e-05, |
|
"loss": 0.7215, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12059868633573813, |
|
"grad_norm": 0.6716468930244446, |
|
"learning_rate": 1.6045845272206304e-05, |
|
"loss": 0.6969, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1249057822763002, |
|
"grad_norm": 0.5783571600914001, |
|
"learning_rate": 1.66189111747851e-05, |
|
"loss": 0.7111, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12921287821686228, |
|
"grad_norm": 0.5546681880950928, |
|
"learning_rate": 1.7191977077363898e-05, |
|
"loss": 0.7, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13351997415742436, |
|
"grad_norm": 0.5409330129623413, |
|
"learning_rate": 1.7765042979942695e-05, |
|
"loss": 0.696, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13782707009798642, |
|
"grad_norm": 0.5752865672111511, |
|
"learning_rate": 1.833810888252149e-05, |
|
"loss": 0.6883, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1421341660385485, |
|
"grad_norm": 0.6340565085411072, |
|
"learning_rate": 1.891117478510029e-05, |
|
"loss": 0.6881, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1464412619791106, |
|
"grad_norm": 0.5298891067504883, |
|
"learning_rate": 1.9484240687679085e-05, |
|
"loss": 0.6935, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15074835791967267, |
|
"grad_norm": 0.5659753680229187, |
|
"learning_rate": 1.9999998871916207e-05, |
|
"loss": 0.7103, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15505545386023473, |
|
"grad_norm": 0.6017744541168213, |
|
"learning_rate": 1.999986350216883e-05, |
|
"loss": 0.6855, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1593625498007968, |
|
"grad_norm": 0.5426760911941528, |
|
"learning_rate": 1.999950251916212e-05, |
|
"loss": 0.6914, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1636696457413589, |
|
"grad_norm": 0.5532637238502502, |
|
"learning_rate": 1.999891593104044e-05, |
|
"loss": 0.6895, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16797674168192098, |
|
"grad_norm": 0.5581168532371521, |
|
"learning_rate": 1.9998103751038177e-05, |
|
"loss": 0.6897, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.17228383762248303, |
|
"grad_norm": 0.5208210945129395, |
|
"learning_rate": 1.9997065997479442e-05, |
|
"loss": 0.6889, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17659093356304512, |
|
"grad_norm": 0.5863595604896545, |
|
"learning_rate": 1.9995802693777644e-05, |
|
"loss": 0.6905, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1808980295036072, |
|
"grad_norm": 0.5605342984199524, |
|
"learning_rate": 1.9994313868434988e-05, |
|
"loss": 0.6815, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18520512544416926, |
|
"grad_norm": 0.5580301880836487, |
|
"learning_rate": 1.9992599555041798e-05, |
|
"loss": 0.7067, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18951222138473134, |
|
"grad_norm": 0.558312177658081, |
|
"learning_rate": 1.999065979227579e-05, |
|
"loss": 0.7061, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19381931732529342, |
|
"grad_norm": 0.5273975133895874, |
|
"learning_rate": 1.998849462390118e-05, |
|
"loss": 0.6905, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1981264132658555, |
|
"grad_norm": 0.4772217571735382, |
|
"learning_rate": 1.9986104098767703e-05, |
|
"loss": 0.686, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20243350920641756, |
|
"grad_norm": 0.5336763858795166, |
|
"learning_rate": 1.9983488270809515e-05, |
|
"loss": 0.6861, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.20674060514697964, |
|
"grad_norm": 0.4961983859539032, |
|
"learning_rate": 1.9980647199043966e-05, |
|
"loss": 0.6882, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21104770108754173, |
|
"grad_norm": 0.5408128499984741, |
|
"learning_rate": 1.9977580947570275e-05, |
|
"loss": 0.7001, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2153547970281038, |
|
"grad_norm": 0.5350680351257324, |
|
"learning_rate": 1.997428958556809e-05, |
|
"loss": 0.6931, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21966189296866587, |
|
"grad_norm": 0.5455281734466553, |
|
"learning_rate": 1.9970773187295917e-05, |
|
"loss": 0.6919, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.22396898890922795, |
|
"grad_norm": 0.524664580821991, |
|
"learning_rate": 1.9967031832089438e-05, |
|
"loss": 0.6738, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22827608484979003, |
|
"grad_norm": 0.48598727583885193, |
|
"learning_rate": 1.9963065604359746e-05, |
|
"loss": 0.6678, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.23258318079035212, |
|
"grad_norm": 0.5560494065284729, |
|
"learning_rate": 1.9958874593591418e-05, |
|
"loss": 0.694, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.23689027673091417, |
|
"grad_norm": 0.5516777038574219, |
|
"learning_rate": 1.99544588943405e-05, |
|
"loss": 0.6715, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24119737267147626, |
|
"grad_norm": 0.5097941756248474, |
|
"learning_rate": 1.9949818606232393e-05, |
|
"loss": 0.6782, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.24550446861203834, |
|
"grad_norm": 0.5353350639343262, |
|
"learning_rate": 1.9944953833959567e-05, |
|
"loss": 0.6904, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2498115645526004, |
|
"grad_norm": 0.5160298943519592, |
|
"learning_rate": 1.9939864687279237e-05, |
|
"loss": 0.6756, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2541186604931625, |
|
"grad_norm": 0.5377163887023926, |
|
"learning_rate": 1.993455128101087e-05, |
|
"loss": 0.712, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.25842575643372456, |
|
"grad_norm": 0.47318100929260254, |
|
"learning_rate": 1.992901373503359e-05, |
|
"loss": 0.6648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2627328523742866, |
|
"grad_norm": 0.4977729916572571, |
|
"learning_rate": 1.992325217428348e-05, |
|
"loss": 0.6893, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26703994831484873, |
|
"grad_norm": 0.5569038391113281, |
|
"learning_rate": 1.991726672875077e-05, |
|
"loss": 0.6876, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2713470442554108, |
|
"grad_norm": 0.544884443283081, |
|
"learning_rate": 1.9911057533476884e-05, |
|
"loss": 0.6736, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.27565414019597284, |
|
"grad_norm": 0.5159808993339539, |
|
"learning_rate": 1.9904624728551417e-05, |
|
"loss": 0.674, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.27996123613653495, |
|
"grad_norm": 0.48680537939071655, |
|
"learning_rate": 1.989796845910896e-05, |
|
"loss": 0.6903, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.284268332077097, |
|
"grad_norm": 0.527867317199707, |
|
"learning_rate": 1.9891088875325827e-05, |
|
"loss": 0.6693, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2885754280176591, |
|
"grad_norm": 0.5441365838050842, |
|
"learning_rate": 1.988398613241666e-05, |
|
"loss": 0.6721, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2928825239582212, |
|
"grad_norm": 0.5693966150283813, |
|
"learning_rate": 1.9876660390630954e-05, |
|
"loss": 0.6684, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.29718961989878323, |
|
"grad_norm": 0.5607503652572632, |
|
"learning_rate": 1.986911181524941e-05, |
|
"loss": 0.6783, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.30149671583934534, |
|
"grad_norm": 0.5421719551086426, |
|
"learning_rate": 1.9861340576580225e-05, |
|
"loss": 0.6658, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3058038117799074, |
|
"grad_norm": 0.497612863779068, |
|
"learning_rate": 1.9853346849955236e-05, |
|
"loss": 0.6816, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.31011090772046945, |
|
"grad_norm": 0.5503632426261902, |
|
"learning_rate": 1.984513081572598e-05, |
|
"loss": 0.6663, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.31441800366103156, |
|
"grad_norm": 0.5319767594337463, |
|
"learning_rate": 1.983669265925961e-05, |
|
"loss": 0.6513, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3187250996015936, |
|
"grad_norm": 0.5350950956344604, |
|
"learning_rate": 1.9828032570934726e-05, |
|
"loss": 0.6699, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3230321955421557, |
|
"grad_norm": 0.5330127477645874, |
|
"learning_rate": 1.9819150746137067e-05, |
|
"loss": 0.6786, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3273392914827178, |
|
"grad_norm": 0.4740910232067108, |
|
"learning_rate": 1.981004738525512e-05, |
|
"loss": 0.6867, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.33164638742327984, |
|
"grad_norm": 0.5131900906562805, |
|
"learning_rate": 1.980072269367557e-05, |
|
"loss": 0.6618, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.33595348336384195, |
|
"grad_norm": 0.4712623059749603, |
|
"learning_rate": 1.97911768817787e-05, |
|
"loss": 0.6863, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.340260579304404, |
|
"grad_norm": 0.5240254998207092, |
|
"learning_rate": 1.9781410164933626e-05, |
|
"loss": 0.6941, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.34456767524496607, |
|
"grad_norm": 0.5192612409591675, |
|
"learning_rate": 1.9771422763493434e-05, |
|
"loss": 0.6726, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3488747711855282, |
|
"grad_norm": 0.4864448010921478, |
|
"learning_rate": 1.9761214902790217e-05, |
|
"loss": 0.6541, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.35318186712609023, |
|
"grad_norm": 0.5248873829841614, |
|
"learning_rate": 1.9750786813129995e-05, |
|
"loss": 0.6713, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3574889630666523, |
|
"grad_norm": 0.5010212659835815, |
|
"learning_rate": 1.9740138729787505e-05, |
|
"loss": 0.6793, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3617960590072144, |
|
"grad_norm": 0.4966225326061249, |
|
"learning_rate": 1.9729270893000913e-05, |
|
"loss": 0.6692, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.36610315494777645, |
|
"grad_norm": 0.48576685786247253, |
|
"learning_rate": 1.9718183547966366e-05, |
|
"loss": 0.6812, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3704102508883385, |
|
"grad_norm": 0.5232109427452087, |
|
"learning_rate": 1.9706876944832486e-05, |
|
"loss": 0.6567, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3747173468289006, |
|
"grad_norm": 0.4847777485847473, |
|
"learning_rate": 1.9695351338694713e-05, |
|
"loss": 0.6638, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3790244427694627, |
|
"grad_norm": 0.49412795901298523, |
|
"learning_rate": 1.9683606989589553e-05, |
|
"loss": 0.6731, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3833315387100248, |
|
"grad_norm": 0.5143546462059021, |
|
"learning_rate": 1.9671644162488716e-05, |
|
"loss": 0.6779, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.38763863465058684, |
|
"grad_norm": 0.5516107082366943, |
|
"learning_rate": 1.965946312729312e-05, |
|
"loss": 0.6798, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3919457305911489, |
|
"grad_norm": 0.5140990018844604, |
|
"learning_rate": 1.9647064158826825e-05, |
|
"loss": 0.6473, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.396252826531711, |
|
"grad_norm": 0.4911974370479584, |
|
"learning_rate": 1.9634447536830815e-05, |
|
"loss": 0.6565, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.40055992247227307, |
|
"grad_norm": 0.4995877742767334, |
|
"learning_rate": 1.9621613545956703e-05, |
|
"loss": 0.6514, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4048670184128351, |
|
"grad_norm": 0.48752328753471375, |
|
"learning_rate": 1.9608562475760287e-05, |
|
"loss": 0.6751, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.40917411435339723, |
|
"grad_norm": 0.4956004321575165, |
|
"learning_rate": 1.9595294620695036e-05, |
|
"loss": 0.6492, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4134812102939593, |
|
"grad_norm": 0.48215603828430176, |
|
"learning_rate": 1.958181028010544e-05, |
|
"loss": 0.6741, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4177883062345214, |
|
"grad_norm": 0.48835939168930054, |
|
"learning_rate": 1.9568109758220253e-05, |
|
"loss": 0.6638, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.42209540217508346, |
|
"grad_norm": 0.47754788398742676, |
|
"learning_rate": 1.9554193364145635e-05, |
|
"loss": 0.6657, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4264024981156455, |
|
"grad_norm": 0.5080917477607727, |
|
"learning_rate": 1.9540061411858172e-05, |
|
"loss": 0.6675, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4307095940562076, |
|
"grad_norm": 0.4634297788143158, |
|
"learning_rate": 1.9525714220197802e-05, |
|
"loss": 0.6693, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4350166899967697, |
|
"grad_norm": 0.4760366678237915, |
|
"learning_rate": 1.951115211286061e-05, |
|
"loss": 0.6721, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.43932378593733173, |
|
"grad_norm": 0.5227916836738586, |
|
"learning_rate": 1.9496375418391525e-05, |
|
"loss": 0.6691, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.44363088187789385, |
|
"grad_norm": 0.5157990455627441, |
|
"learning_rate": 1.948138447017692e-05, |
|
"loss": 0.6774, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4479379778184559, |
|
"grad_norm": 0.49596408009529114, |
|
"learning_rate": 1.9466179606437087e-05, |
|
"loss": 0.6313, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.45224507375901796, |
|
"grad_norm": 0.47041237354278564, |
|
"learning_rate": 1.945076117021859e-05, |
|
"loss": 0.6724, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.45655216969958007, |
|
"grad_norm": 0.5206364989280701, |
|
"learning_rate": 1.9435129509386538e-05, |
|
"loss": 0.6843, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4608592656401421, |
|
"grad_norm": 0.5067657828330994, |
|
"learning_rate": 1.9419284976616745e-05, |
|
"loss": 0.6649, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.46516636158070424, |
|
"grad_norm": 1.3445152044296265, |
|
"learning_rate": 1.9403227929387756e-05, |
|
"loss": 0.6548, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4694734575212663, |
|
"grad_norm": 0.5465224385261536, |
|
"learning_rate": 1.93869587299728e-05, |
|
"loss": 0.6427, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.47378055346182835, |
|
"grad_norm": 0.49137911200523376, |
|
"learning_rate": 1.9370477745431587e-05, |
|
"loss": 0.6519, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.47808764940239046, |
|
"grad_norm": 0.48190736770629883, |
|
"learning_rate": 1.935378534760206e-05, |
|
"loss": 0.6615, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.4823947453429525, |
|
"grad_norm": 0.4869353771209717, |
|
"learning_rate": 1.9336881913091992e-05, |
|
"loss": 0.65, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.48670184128351457, |
|
"grad_norm": 0.4473590552806854, |
|
"learning_rate": 1.931976782327048e-05, |
|
"loss": 0.6821, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4910089372240767, |
|
"grad_norm": 0.4703207314014435, |
|
"learning_rate": 1.9302443464259352e-05, |
|
"loss": 0.657, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.49531603316463874, |
|
"grad_norm": 0.48172295093536377, |
|
"learning_rate": 1.9284909226924457e-05, |
|
"loss": 0.6581, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4996231291052008, |
|
"grad_norm": 0.4986841082572937, |
|
"learning_rate": 1.9267165506866835e-05, |
|
"loss": 0.664, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5039302250457629, |
|
"grad_norm": 0.4936910569667816, |
|
"learning_rate": 1.9249212704413803e-05, |
|
"loss": 0.6409, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.508237320986325, |
|
"grad_norm": 0.48618724942207336, |
|
"learning_rate": 1.9231051224609918e-05, |
|
"loss": 0.6566, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.512544416926887, |
|
"grad_norm": 0.5300356149673462, |
|
"learning_rate": 1.921268147720784e-05, |
|
"loss": 0.6533, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5168515128674491, |
|
"grad_norm": 0.4799743890762329, |
|
"learning_rate": 1.919410387665908e-05, |
|
"loss": 0.6677, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5211586088080112, |
|
"grad_norm": 0.5317394137382507, |
|
"learning_rate": 1.9175318842104667e-05, |
|
"loss": 0.6464, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5254657047485732, |
|
"grad_norm": 0.49199768900871277, |
|
"learning_rate": 1.9156326797365665e-05, |
|
"loss": 0.6655, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5297728006891353, |
|
"grad_norm": 0.4916874170303345, |
|
"learning_rate": 1.913712817093364e-05, |
|
"loss": 0.6372, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5340798966296975, |
|
"grad_norm": 0.48562970757484436, |
|
"learning_rate": 1.9117723395960972e-05, |
|
"loss": 0.6639, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5383869925702595, |
|
"grad_norm": 0.5152992010116577, |
|
"learning_rate": 1.909811291025109e-05, |
|
"loss": 0.6609, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5426940885108216, |
|
"grad_norm": 0.48352181911468506, |
|
"learning_rate": 1.907829715624859e-05, |
|
"loss": 0.6726, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5470011844513837, |
|
"grad_norm": 0.5064017176628113, |
|
"learning_rate": 1.905827658102926e-05, |
|
"loss": 0.6698, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5513082803919457, |
|
"grad_norm": 0.46494290232658386, |
|
"learning_rate": 1.9038051636289997e-05, |
|
"loss": 0.68, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5556153763325078, |
|
"grad_norm": 0.4788792133331299, |
|
"learning_rate": 1.9017622778338585e-05, |
|
"loss": 0.6501, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5599224722730699, |
|
"grad_norm": 0.4712987542152405, |
|
"learning_rate": 1.8996990468083448e-05, |
|
"loss": 0.6488, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5642295682136319, |
|
"grad_norm": 0.4997137784957886, |
|
"learning_rate": 1.8976155171023216e-05, |
|
"loss": 0.6518, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.568536664154194, |
|
"grad_norm": 0.5003030896186829, |
|
"learning_rate": 1.895511735723623e-05, |
|
"loss": 0.6317, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5728437600947561, |
|
"grad_norm": 0.4551664888858795, |
|
"learning_rate": 1.8933877501369944e-05, |
|
"loss": 0.6634, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5771508560353182, |
|
"grad_norm": 0.532534122467041, |
|
"learning_rate": 1.891243608263021e-05, |
|
"loss": 0.6656, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5814579519758802, |
|
"grad_norm": 0.47166600823402405, |
|
"learning_rate": 1.889079358477047e-05, |
|
"loss": 0.657, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5857650479164423, |
|
"grad_norm": 0.45552805066108704, |
|
"learning_rate": 1.8868950496080832e-05, |
|
"loss": 0.6652, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5900721438570045, |
|
"grad_norm": 0.5267536044120789, |
|
"learning_rate": 1.884690730937707e-05, |
|
"loss": 0.6463, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5943792397975665, |
|
"grad_norm": 0.49093228578567505, |
|
"learning_rate": 1.882466452198949e-05, |
|
"loss": 0.6604, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.5986863357381286, |
|
"grad_norm": 0.5105960369110107, |
|
"learning_rate": 1.880222263575172e-05, |
|
"loss": 0.6457, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6029934316786907, |
|
"grad_norm": 0.47326135635375977, |
|
"learning_rate": 1.8779582156989384e-05, |
|
"loss": 0.6464, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6073005276192527, |
|
"grad_norm": 0.4910115599632263, |
|
"learning_rate": 1.875674359650867e-05, |
|
"loss": 0.6547, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6116076235598148, |
|
"grad_norm": 0.48352956771850586, |
|
"learning_rate": 1.873370746958482e-05, |
|
"loss": 0.654, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6159147195003769, |
|
"grad_norm": 0.4722056984901428, |
|
"learning_rate": 1.871047429595049e-05, |
|
"loss": 0.6372, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6202218154409389, |
|
"grad_norm": 0.4340212345123291, |
|
"learning_rate": 1.868704459978405e-05, |
|
"loss": 0.6507, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.624528911381501, |
|
"grad_norm": 0.48497867584228516, |
|
"learning_rate": 1.8663418909697723e-05, |
|
"loss": 0.6349, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6288360073220631, |
|
"grad_norm": 0.4707370102405548, |
|
"learning_rate": 1.863959775872567e-05, |
|
"loss": 0.6445, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6331431032626251, |
|
"grad_norm": 0.5151925683021545, |
|
"learning_rate": 1.861558168431199e-05, |
|
"loss": 0.6493, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6374501992031872, |
|
"grad_norm": 0.47226110100746155, |
|
"learning_rate": 1.8591371228298554e-05, |
|
"loss": 0.6211, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6417572951437494, |
|
"grad_norm": 0.48166829347610474, |
|
"learning_rate": 1.856696693691281e-05, |
|
"loss": 0.6476, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6460643910843114, |
|
"grad_norm": 0.5039719343185425, |
|
"learning_rate": 1.8542369360755448e-05, |
|
"loss": 0.636, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6503714870248735, |
|
"grad_norm": 0.45818519592285156, |
|
"learning_rate": 1.8517579054787974e-05, |
|
"loss": 0.658, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6546785829654356, |
|
"grad_norm": 0.4803057014942169, |
|
"learning_rate": 1.8492596578320194e-05, |
|
"loss": 0.6468, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6589856789059977, |
|
"grad_norm": 0.480227530002594, |
|
"learning_rate": 1.8467422494997593e-05, |
|
"loss": 0.641, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6632927748465597, |
|
"grad_norm": 0.49187588691711426, |
|
"learning_rate": 1.844205737278863e-05, |
|
"loss": 0.6572, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6675998707871218, |
|
"grad_norm": 0.49701517820358276, |
|
"learning_rate": 1.84165017839719e-05, |
|
"loss": 0.6567, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6719069667276839, |
|
"grad_norm": 0.48368483781814575, |
|
"learning_rate": 1.8390756305123246e-05, |
|
"loss": 0.669, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6762140626682459, |
|
"grad_norm": 0.5007254481315613, |
|
"learning_rate": 1.836482151710273e-05, |
|
"loss": 0.6448, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.680521158608808, |
|
"grad_norm": 0.44526585936546326, |
|
"learning_rate": 1.8338698005041556e-05, |
|
"loss": 0.6386, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6848282545493701, |
|
"grad_norm": 0.4812663197517395, |
|
"learning_rate": 1.8312386358328828e-05, |
|
"loss": 0.6447, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6891353504899321, |
|
"grad_norm": 0.4910503029823303, |
|
"learning_rate": 1.828588717059829e-05, |
|
"loss": 0.6449, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6934424464304942, |
|
"grad_norm": 0.47431930899620056, |
|
"learning_rate": 1.8259201039714914e-05, |
|
"loss": 0.6372, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.6977495423710564, |
|
"grad_norm": 0.5024338364601135, |
|
"learning_rate": 1.8232328567761416e-05, |
|
"loss": 0.6433, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7020566383116184, |
|
"grad_norm": 0.47510799765586853, |
|
"learning_rate": 1.820527036102467e-05, |
|
"loss": 0.6601, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7063637342521805, |
|
"grad_norm": 0.47990313172340393, |
|
"learning_rate": 1.8178027029982027e-05, |
|
"loss": 0.6463, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7106708301927426, |
|
"grad_norm": 0.5117030739784241, |
|
"learning_rate": 1.8150599189287553e-05, |
|
"loss": 0.6455, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7149779261333046, |
|
"grad_norm": 0.4917861819267273, |
|
"learning_rate": 1.8122987457758147e-05, |
|
"loss": 0.6688, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7192850220738667, |
|
"grad_norm": 0.49872297048568726, |
|
"learning_rate": 1.8095192458359588e-05, |
|
"loss": 0.6513, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7235921180144288, |
|
"grad_norm": 0.47510796785354614, |
|
"learning_rate": 1.806721481819247e-05, |
|
"loss": 0.649, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7278992139549908, |
|
"grad_norm": 0.4924173057079315, |
|
"learning_rate": 1.8039055168478074e-05, |
|
"loss": 0.6177, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7322063098955529, |
|
"grad_norm": 0.4918348789215088, |
|
"learning_rate": 1.8010714144544104e-05, |
|
"loss": 0.6543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.736513405836115, |
|
"grad_norm": 0.45298415422439575, |
|
"learning_rate": 1.7982192385810372e-05, |
|
"loss": 0.6367, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.740820501776677, |
|
"grad_norm": 0.46879851818084717, |
|
"learning_rate": 1.795349053577435e-05, |
|
"loss": 0.6414, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7451275977172391, |
|
"grad_norm": 0.4573706388473511, |
|
"learning_rate": 1.7924609241996672e-05, |
|
"loss": 0.628, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7494346936578012, |
|
"grad_norm": 0.46929094195365906, |
|
"learning_rate": 1.7895549156086514e-05, |
|
"loss": 0.6478, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7537417895983634, |
|
"grad_norm": 0.5428628325462341, |
|
"learning_rate": 1.78663109336869e-05, |
|
"loss": 0.6405, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7580488855389254, |
|
"grad_norm": 0.47853079438209534, |
|
"learning_rate": 1.78368952344599e-05, |
|
"loss": 0.6442, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7623559814794875, |
|
"grad_norm": 0.46747061610221863, |
|
"learning_rate": 1.7807302722071742e-05, |
|
"loss": 0.6369, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7666630774200496, |
|
"grad_norm": 0.5107671022415161, |
|
"learning_rate": 1.7777534064177864e-05, |
|
"loss": 0.6322, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7709701733606116, |
|
"grad_norm": 0.5013517141342163, |
|
"learning_rate": 1.7747589932407826e-05, |
|
"loss": 0.6384, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.7752772693011737, |
|
"grad_norm": 0.5039073824882507, |
|
"learning_rate": 1.7717471002350162e-05, |
|
"loss": 0.6504, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7795843652417358, |
|
"grad_norm": 0.4767347276210785, |
|
"learning_rate": 1.7687177953537148e-05, |
|
"loss": 0.645, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7838914611822978, |
|
"grad_norm": 0.4766087532043457, |
|
"learning_rate": 1.7656711469429464e-05, |
|
"loss": 0.6249, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7881985571228599, |
|
"grad_norm": 0.5031486749649048, |
|
"learning_rate": 1.7626072237400764e-05, |
|
"loss": 0.6263, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.792505653063422, |
|
"grad_norm": 0.444658488035202, |
|
"learning_rate": 1.759526094872219e-05, |
|
"loss": 0.6561, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"grad_norm": 0.5070600509643555, |
|
"learning_rate": 1.7564278298546758e-05, |
|
"loss": 0.6477, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8011198449445461, |
|
"grad_norm": 0.45487794280052185, |
|
"learning_rate": 1.753312498589367e-05, |
|
"loss": 0.6257, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8054269408851082, |
|
"grad_norm": 0.4745471477508545, |
|
"learning_rate": 1.7501801713632568e-05, |
|
"loss": 0.6586, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8097340368256702, |
|
"grad_norm": 0.4743909537792206, |
|
"learning_rate": 1.7470309188467645e-05, |
|
"loss": 0.6255, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8140411327662324, |
|
"grad_norm": 0.5165956020355225, |
|
"learning_rate": 1.7438648120921736e-05, |
|
"loss": 0.6592, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8183482287067945, |
|
"grad_norm": 0.455861359834671, |
|
"learning_rate": 1.740681922532025e-05, |
|
"loss": 0.6467, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8226553246473565, |
|
"grad_norm": 0.468013733625412, |
|
"learning_rate": 1.7374823219775073e-05, |
|
"loss": 0.6382, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8269624205879186, |
|
"grad_norm": 0.46119919419288635, |
|
"learning_rate": 1.7342660826168374e-05, |
|
"loss": 0.6437, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8312695165284807, |
|
"grad_norm": 0.4399983286857605, |
|
"learning_rate": 1.73103327701363e-05, |
|
"loss": 0.6379, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8355766124690428, |
|
"grad_norm": 0.46829739212989807, |
|
"learning_rate": 1.7277839781052617e-05, |
|
"loss": 0.6402, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8398837084096048, |
|
"grad_norm": 0.5193459987640381, |
|
"learning_rate": 1.7245182592012248e-05, |
|
"loss": 0.6348, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8441908043501669, |
|
"grad_norm": 0.5310715436935425, |
|
"learning_rate": 1.7212361939814735e-05, |
|
"loss": 0.6351, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.848497900290729, |
|
"grad_norm": 0.4883059561252594, |
|
"learning_rate": 1.7179378564947615e-05, |
|
"loss": 0.6401, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.852804996231291, |
|
"grad_norm": 0.5028474926948547, |
|
"learning_rate": 1.7146233211569723e-05, |
|
"loss": 0.6559, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8571120921718531, |
|
"grad_norm": 0.48668941855430603, |
|
"learning_rate": 1.7112926627494385e-05, |
|
"loss": 0.6572, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8614191881124152, |
|
"grad_norm": 0.4668605327606201, |
|
"learning_rate": 1.7079459564172555e-05, |
|
"loss": 0.6321, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8657262840529772, |
|
"grad_norm": 0.4556910991668701, |
|
"learning_rate": 1.7045832776675863e-05, |
|
"loss": 0.6268, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.8700333799935394, |
|
"grad_norm": 0.45260846614837646, |
|
"learning_rate": 1.701204702367958e-05, |
|
"loss": 0.6271, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8743404759341015, |
|
"grad_norm": 0.4828309714794159, |
|
"learning_rate": 1.6978103067445494e-05, |
|
"loss": 0.6351, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.8786475718746635, |
|
"grad_norm": 0.4691152274608612, |
|
"learning_rate": 1.6944001673804723e-05, |
|
"loss": 0.6512, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8829546678152256, |
|
"grad_norm": 0.4812765419483185, |
|
"learning_rate": 1.6909743612140417e-05, |
|
"loss": 0.6335, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8872617637557877, |
|
"grad_norm": 0.4415755867958069, |
|
"learning_rate": 1.687532965537043e-05, |
|
"loss": 0.6541, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.8915688596963497, |
|
"grad_norm": 0.4993227422237396, |
|
"learning_rate": 1.6840760579929846e-05, |
|
"loss": 0.6318, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.8958759556369118, |
|
"grad_norm": 0.4628779888153076, |
|
"learning_rate": 1.6806037165753498e-05, |
|
"loss": 0.6369, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9001830515774739, |
|
"grad_norm": 0.5235878229141235, |
|
"learning_rate": 1.677116019625834e-05, |
|
"loss": 0.6415, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9044901475180359, |
|
"grad_norm": 0.4750138819217682, |
|
"learning_rate": 1.6736130458325793e-05, |
|
"loss": 0.6101, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.908797243458598, |
|
"grad_norm": 0.5292583107948303, |
|
"learning_rate": 1.6700948742283977e-05, |
|
"loss": 0.6248, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9131043393991601, |
|
"grad_norm": 0.45959070324897766, |
|
"learning_rate": 1.6665615841889885e-05, |
|
"loss": 0.6339, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9174114353397222, |
|
"grad_norm": 0.48287901282310486, |
|
"learning_rate": 1.6630132554311486e-05, |
|
"loss": 0.6161, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9217185312802842, |
|
"grad_norm": 0.4725618064403534, |
|
"learning_rate": 1.6594499680109722e-05, |
|
"loss": 0.627, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9260256272208464, |
|
"grad_norm": 0.4820912778377533, |
|
"learning_rate": 1.6558718023220457e-05, |
|
"loss": 0.6399, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9303327231614085, |
|
"grad_norm": 0.48815685510635376, |
|
"learning_rate": 1.6522788390936328e-05, |
|
"loss": 0.6437, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9346398191019705, |
|
"grad_norm": 0.4747340679168701, |
|
"learning_rate": 1.648671159388855e-05, |
|
"loss": 0.6455, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9389469150425326, |
|
"grad_norm": 0.4894673526287079, |
|
"learning_rate": 1.6450488446028612e-05, |
|
"loss": 0.6545, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9432540109830947, |
|
"grad_norm": 0.4756160080432892, |
|
"learning_rate": 1.641411976460991e-05, |
|
"loss": 0.6498, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9475611069236567, |
|
"grad_norm": 0.45228078961372375, |
|
"learning_rate": 1.637760637016932e-05, |
|
"loss": 0.6438, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9518682028642188, |
|
"grad_norm": 0.49898287653923035, |
|
"learning_rate": 1.6340949086508676e-05, |
|
"loss": 0.6518, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9561752988047809, |
|
"grad_norm": 0.4354493021965027, |
|
"learning_rate": 1.6304148740676204e-05, |
|
"loss": 0.6125, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9604823947453429, |
|
"grad_norm": 0.45118704438209534, |
|
"learning_rate": 1.6267206162947823e-05, |
|
"loss": 0.6146, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.964789490685905, |
|
"grad_norm": 0.4822487533092499, |
|
"learning_rate": 1.6230122186808443e-05, |
|
"loss": 0.6425, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9690965866264671, |
|
"grad_norm": 0.490903377532959, |
|
"learning_rate": 1.619289764893317e-05, |
|
"loss": 0.6353, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9734036825670291, |
|
"grad_norm": 0.4738866686820984, |
|
"learning_rate": 1.615553338916839e-05, |
|
"loss": 0.6315, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.9777107785075912, |
|
"grad_norm": 0.46285027265548706, |
|
"learning_rate": 1.6118030250512863e-05, |
|
"loss": 0.6501, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.9820178744481534, |
|
"grad_norm": 0.46414172649383545, |
|
"learning_rate": 1.6080389079098657e-05, |
|
"loss": 0.6501, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.9863249703887154, |
|
"grad_norm": 0.5042113661766052, |
|
"learning_rate": 1.604261072417211e-05, |
|
"loss": 0.6319, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.9906320663292775, |
|
"grad_norm": 0.43653419613838196, |
|
"learning_rate": 1.600469603807464e-05, |
|
"loss": 0.6461, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9949391622698396, |
|
"grad_norm": 0.4572006165981293, |
|
"learning_rate": 1.5966645876223505e-05, |
|
"loss": 0.6477, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.9992462582104016, |
|
"grad_norm": 0.43867436051368713, |
|
"learning_rate": 1.5928461097092532e-05, |
|
"loss": 0.6288, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0035533541509638, |
|
"grad_norm": 0.5620077848434448, |
|
"learning_rate": 1.589014256219273e-05, |
|
"loss": 0.5378, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.0078604500915258, |
|
"grad_norm": 0.4836018681526184, |
|
"learning_rate": 1.5851691136052842e-05, |
|
"loss": 0.5421, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0121675460320878, |
|
"grad_norm": 0.49632197618484497, |
|
"learning_rate": 1.581310768619988e-05, |
|
"loss": 0.5237, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.01647464197265, |
|
"grad_norm": 0.49445948004722595, |
|
"learning_rate": 1.5774393083139513e-05, |
|
"loss": 0.5313, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.020781737913212, |
|
"grad_norm": 0.5299666523933411, |
|
"learning_rate": 1.5735548200336435e-05, |
|
"loss": 0.5326, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.025088833853774, |
|
"grad_norm": 0.5012844204902649, |
|
"learning_rate": 1.569657391419468e-05, |
|
"loss": 0.5401, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0293959297943363, |
|
"grad_norm": 0.4741289019584656, |
|
"learning_rate": 1.565747110403781e-05, |
|
"loss": 0.5052, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.0337030257348983, |
|
"grad_norm": 0.4950823485851288, |
|
"learning_rate": 1.5618240652089123e-05, |
|
"loss": 0.5294, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0380101216754603, |
|
"grad_norm": 0.4934958517551422, |
|
"learning_rate": 1.557888344345171e-05, |
|
"loss": 0.5278, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0423172176160225, |
|
"grad_norm": 0.467101514339447, |
|
"learning_rate": 1.5539400366088503e-05, |
|
"loss": 0.504, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0466243135565845, |
|
"grad_norm": 0.5479716062545776, |
|
"learning_rate": 1.5499792310802238e-05, |
|
"loss": 0.5256, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0509314094971465, |
|
"grad_norm": 0.4706737697124481, |
|
"learning_rate": 1.5460060171215362e-05, |
|
"loss": 0.5251, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.0552385054377087, |
|
"grad_norm": 0.5142565965652466, |
|
"learning_rate": 1.5420204843749857e-05, |
|
"loss": 0.5333, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0595456013782707, |
|
"grad_norm": 0.5430694222450256, |
|
"learning_rate": 1.5380227227607032e-05, |
|
"loss": 0.5391, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.0638526973188327, |
|
"grad_norm": 0.4780258536338806, |
|
"learning_rate": 1.5340128224747225e-05, |
|
"loss": 0.5338, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.068159793259395, |
|
"grad_norm": 0.47647717595100403, |
|
"learning_rate": 1.5299908739869464e-05, |
|
"loss": 0.5178, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.072466889199957, |
|
"grad_norm": 0.5330241918563843, |
|
"learning_rate": 1.525956968039103e-05, |
|
"loss": 0.5027, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.076773985140519, |
|
"grad_norm": 0.4681854546070099, |
|
"learning_rate": 1.5219111956427027e-05, |
|
"loss": 0.5315, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.5060921311378479, |
|
"learning_rate": 1.5178536480769803e-05, |
|
"loss": 0.5103, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.0853881770216431, |
|
"grad_norm": 0.497199147939682, |
|
"learning_rate": 1.5137844168868391e-05, |
|
"loss": 0.5302, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.0896952729622051, |
|
"grad_norm": 0.4658927321434021, |
|
"learning_rate": 1.5097035938807834e-05, |
|
"loss": 0.5196, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.0940023689027674, |
|
"grad_norm": 0.5109249353408813, |
|
"learning_rate": 1.5056112711288475e-05, |
|
"loss": 0.5099, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.0983094648433294, |
|
"grad_norm": 0.5212246775627136, |
|
"learning_rate": 1.5015075409605189e-05, |
|
"loss": 0.4911, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1026165607838914, |
|
"grad_norm": 0.47850698232650757, |
|
"learning_rate": 1.497392495962656e-05, |
|
"loss": 0.5225, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.1069236567244536, |
|
"grad_norm": 0.4982755184173584, |
|
"learning_rate": 1.4932662289773969e-05, |
|
"loss": 0.5278, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.1112307526650156, |
|
"grad_norm": 0.49975791573524475, |
|
"learning_rate": 1.4891288331000668e-05, |
|
"loss": 0.5261, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.1155378486055776, |
|
"grad_norm": 0.5002388954162598, |
|
"learning_rate": 1.484980401677077e-05, |
|
"loss": 0.5313, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.1198449445461398, |
|
"grad_norm": 0.4950617253780365, |
|
"learning_rate": 1.4808210283038183e-05, |
|
"loss": 0.5286, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1241520404867018, |
|
"grad_norm": 0.49831753969192505, |
|
"learning_rate": 1.47665080682255e-05, |
|
"loss": 0.5133, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.128459136427264, |
|
"grad_norm": 0.6730148792266846, |
|
"learning_rate": 1.4724698313202825e-05, |
|
"loss": 0.5224, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.132766232367826, |
|
"grad_norm": 0.5355139374732971, |
|
"learning_rate": 1.4682781961266546e-05, |
|
"loss": 0.5188, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.137073328308388, |
|
"grad_norm": 0.5199829936027527, |
|
"learning_rate": 1.4640759958118045e-05, |
|
"loss": 0.5121, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.14138042424895, |
|
"grad_norm": 0.5292408466339111, |
|
"learning_rate": 1.4598633251842373e-05, |
|
"loss": 0.5267, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1456875201895123, |
|
"grad_norm": 0.5363121032714844, |
|
"learning_rate": 1.4556402792886856e-05, |
|
"loss": 0.5147, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.1499946161300743, |
|
"grad_norm": 0.5359490513801575, |
|
"learning_rate": 1.4514069534039649e-05, |
|
"loss": 0.5155, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1543017120706365, |
|
"grad_norm": 0.4707220792770386, |
|
"learning_rate": 1.4471634430408244e-05, |
|
"loss": 0.5419, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.1586088080111985, |
|
"grad_norm": 0.4798811376094818, |
|
"learning_rate": 1.4429098439397901e-05, |
|
"loss": 0.5152, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.1629159039517605, |
|
"grad_norm": 0.4730081260204315, |
|
"learning_rate": 1.4386462520690087e-05, |
|
"loss": 0.5283, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1672229998923225, |
|
"grad_norm": 0.524276614189148, |
|
"learning_rate": 1.4343727636220785e-05, |
|
"loss": 0.5087, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.1715300958328847, |
|
"grad_norm": 0.5093454122543335, |
|
"learning_rate": 1.430089475015882e-05, |
|
"loss": 0.5371, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.1758371917734467, |
|
"grad_norm": 0.5228180289268494, |
|
"learning_rate": 1.4257964828884077e-05, |
|
"loss": 0.5121, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.180144287714009, |
|
"grad_norm": 0.5263434052467346, |
|
"learning_rate": 1.4214938840965729e-05, |
|
"loss": 0.5104, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.184451383654571, |
|
"grad_norm": 0.5519675612449646, |
|
"learning_rate": 1.417181775714036e-05, |
|
"loss": 0.5081, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.188758479595133, |
|
"grad_norm": 0.48901626467704773, |
|
"learning_rate": 1.4128602550290078e-05, |
|
"loss": 0.5332, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.1930655755356951, |
|
"grad_norm": 0.5022098422050476, |
|
"learning_rate": 1.4085294195420563e-05, |
|
"loss": 0.5267, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.1973726714762571, |
|
"grad_norm": 0.5244942307472229, |
|
"learning_rate": 1.4041893669639053e-05, |
|
"loss": 0.5309, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.2016797674168191, |
|
"grad_norm": 0.5060109496116638, |
|
"learning_rate": 1.399840195213233e-05, |
|
"loss": 0.509, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.2059868633573814, |
|
"grad_norm": 0.48709142208099365, |
|
"learning_rate": 1.3954820024144595e-05, |
|
"loss": 0.5249, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2102939592979434, |
|
"grad_norm": 0.48755279183387756, |
|
"learning_rate": 1.3911148868955357e-05, |
|
"loss": 0.5216, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.2146010552385054, |
|
"grad_norm": 0.4871668219566345, |
|
"learning_rate": 1.3867389471857229e-05, |
|
"loss": 0.5199, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2189081511790676, |
|
"grad_norm": 0.5313363671302795, |
|
"learning_rate": 1.3823542820133706e-05, |
|
"loss": 0.5146, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.2232152471196296, |
|
"grad_norm": 0.48473960161209106, |
|
"learning_rate": 1.3779609903036894e-05, |
|
"loss": 0.5126, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.2275223430601916, |
|
"grad_norm": 0.5411814451217651, |
|
"learning_rate": 1.3735591711765189e-05, |
|
"loss": 0.5186, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2318294390007538, |
|
"grad_norm": 0.5286210775375366, |
|
"learning_rate": 1.3691489239440899e-05, |
|
"loss": 0.513, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.2361365349413158, |
|
"grad_norm": 0.47112423181533813, |
|
"learning_rate": 1.3647303481087858e-05, |
|
"loss": 0.5268, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.2404436308818778, |
|
"grad_norm": 0.5465208888053894, |
|
"learning_rate": 1.3603035433608977e-05, |
|
"loss": 0.5109, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.24475072682244, |
|
"grad_norm": 0.4758882522583008, |
|
"learning_rate": 1.3558686095763732e-05, |
|
"loss": 0.5307, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.249057822763002, |
|
"grad_norm": 0.5721794962882996, |
|
"learning_rate": 1.3514256468145645e-05, |
|
"loss": 0.5104, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2533649187035643, |
|
"grad_norm": 0.5125982761383057, |
|
"learning_rate": 1.3469747553159714e-05, |
|
"loss": 0.5278, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.2576720146441263, |
|
"grad_norm": 0.5272653698921204, |
|
"learning_rate": 1.342516035499978e-05, |
|
"loss": 0.5276, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.2619791105846883, |
|
"grad_norm": 0.5423816442489624, |
|
"learning_rate": 1.3380495879625884e-05, |
|
"loss": 0.5408, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.2662862065252503, |
|
"grad_norm": 0.4817509055137634, |
|
"learning_rate": 1.333575513474157e-05, |
|
"loss": 0.5152, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.2705933024658125, |
|
"grad_norm": 0.5113592147827148, |
|
"learning_rate": 1.3290939129771143e-05, |
|
"loss": 0.5397, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2749003984063745, |
|
"grad_norm": 0.5106224417686462, |
|
"learning_rate": 1.3246048875836898e-05, |
|
"loss": 0.5269, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.2792074943469367, |
|
"grad_norm": 0.5446826219558716, |
|
"learning_rate": 1.3201085385736313e-05, |
|
"loss": 0.5252, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.2835145902874987, |
|
"grad_norm": 0.484943151473999, |
|
"learning_rate": 1.3156049673919184e-05, |
|
"loss": 0.525, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.2878216862280607, |
|
"grad_norm": 0.5692194700241089, |
|
"learning_rate": 1.3110942756464764e-05, |
|
"loss": 0.5197, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.2921287821686227, |
|
"grad_norm": 0.5009827017784119, |
|
"learning_rate": 1.3065765651058802e-05, |
|
"loss": 0.5325, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.296435878109185, |
|
"grad_norm": 0.4953298568725586, |
|
"learning_rate": 1.3020519376970613e-05, |
|
"loss": 0.5095, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.300742974049747, |
|
"grad_norm": 0.5116891264915466, |
|
"learning_rate": 1.2975204955030068e-05, |
|
"loss": 0.5263, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.3050500699903091, |
|
"grad_norm": 0.4844088554382324, |
|
"learning_rate": 1.2929823407604567e-05, |
|
"loss": 0.5113, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3093571659308711, |
|
"grad_norm": 0.4732029438018799, |
|
"learning_rate": 1.2884375758575967e-05, |
|
"loss": 0.532, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.3136642618714331, |
|
"grad_norm": 0.5469485521316528, |
|
"learning_rate": 1.2838863033317484e-05, |
|
"loss": 0.519, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3179713578119951, |
|
"grad_norm": 0.4888254702091217, |
|
"learning_rate": 1.2793286258670565e-05, |
|
"loss": 0.5097, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3222784537525574, |
|
"grad_norm": 0.5359517335891724, |
|
"learning_rate": 1.2747646462921717e-05, |
|
"loss": 0.5246, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3265855496931194, |
|
"grad_norm": 0.5013801455497742, |
|
"learning_rate": 1.2701944675779299e-05, |
|
"loss": 0.524, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.3308926456336816, |
|
"grad_norm": 0.49307557940483093, |
|
"learning_rate": 1.2656181928350301e-05, |
|
"loss": 0.5403, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.3351997415742436, |
|
"grad_norm": 0.47625210881233215, |
|
"learning_rate": 1.2610359253117078e-05, |
|
"loss": 0.5275, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3395068375148056, |
|
"grad_norm": 0.5096368789672852, |
|
"learning_rate": 1.2564477683914053e-05, |
|
"loss": 0.5231, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.3438139334553676, |
|
"grad_norm": 0.4992668926715851, |
|
"learning_rate": 1.2518538255904389e-05, |
|
"loss": 0.5235, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.3481210293959298, |
|
"grad_norm": 0.491062194108963, |
|
"learning_rate": 1.2472542005556647e-05, |
|
"loss": 0.5432, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.3524281253364918, |
|
"grad_norm": 0.48666131496429443, |
|
"learning_rate": 1.2426489970621385e-05, |
|
"loss": 0.531, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.356735221277054, |
|
"grad_norm": 0.4706876575946808, |
|
"learning_rate": 1.2380383190107757e-05, |
|
"loss": 0.5188, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.361042317217616, |
|
"grad_norm": 0.4910385310649872, |
|
"learning_rate": 1.2334222704260063e-05, |
|
"loss": 0.5106, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.365349413158178, |
|
"grad_norm": 0.506514847278595, |
|
"learning_rate": 1.2288009554534291e-05, |
|
"loss": 0.5292, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.36965650909874, |
|
"grad_norm": 0.49671700596809387, |
|
"learning_rate": 1.2241744783574596e-05, |
|
"loss": 0.5284, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.3739636050393023, |
|
"grad_norm": 0.4892718195915222, |
|
"learning_rate": 1.219542943518981e-05, |
|
"loss": 0.5215, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.3782707009798643, |
|
"grad_norm": 0.5412102937698364, |
|
"learning_rate": 1.2149064554329864e-05, |
|
"loss": 0.5256, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3825777969204265, |
|
"grad_norm": 0.4869970679283142, |
|
"learning_rate": 1.2102651187062227e-05, |
|
"loss": 0.5218, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.3868848928609885, |
|
"grad_norm": 0.5195066332817078, |
|
"learning_rate": 1.2056190380548299e-05, |
|
"loss": 0.5269, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.3911919888015505, |
|
"grad_norm": 0.5343438982963562, |
|
"learning_rate": 1.2009683183019788e-05, |
|
"loss": 0.5301, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.3954990847421127, |
|
"grad_norm": 0.522270679473877, |
|
"learning_rate": 1.1963130643755055e-05, |
|
"loss": 0.545, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.3998061806826747, |
|
"grad_norm": 0.501485288143158, |
|
"learning_rate": 1.191653381305545e-05, |
|
"loss": 0.5253, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4041132766232367, |
|
"grad_norm": 0.5288712382316589, |
|
"learning_rate": 1.186989374222161e-05, |
|
"loss": 0.5181, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.408420372563799, |
|
"grad_norm": 0.5131502151489258, |
|
"learning_rate": 1.1823211483529733e-05, |
|
"loss": 0.5138, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.412727468504361, |
|
"grad_norm": 0.4853404462337494, |
|
"learning_rate": 1.1776488090207852e-05, |
|
"loss": 0.5319, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.417034564444923, |
|
"grad_norm": 0.5093010663986206, |
|
"learning_rate": 1.1729724616412062e-05, |
|
"loss": 0.5155, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.4213416603854852, |
|
"grad_norm": 0.5078168511390686, |
|
"learning_rate": 1.1682922117202736e-05, |
|
"loss": 0.5206, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4256487563260472, |
|
"grad_norm": 0.5315324664115906, |
|
"learning_rate": 1.163608164852073e-05, |
|
"loss": 0.5314, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.4299558522666094, |
|
"grad_norm": 0.4705192446708679, |
|
"learning_rate": 1.1589204267163545e-05, |
|
"loss": 0.4966, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.4342629482071714, |
|
"grad_norm": 0.48757535219192505, |
|
"learning_rate": 1.15422910307615e-05, |
|
"loss": 0.5299, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.4385700441477334, |
|
"grad_norm": 0.5582148432731628, |
|
"learning_rate": 1.1495342997753864e-05, |
|
"loss": 0.5201, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.4428771400882954, |
|
"grad_norm": 0.5134326219558716, |
|
"learning_rate": 1.1448361227364963e-05, |
|
"loss": 0.5061, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.4471842360288576, |
|
"grad_norm": 0.5316387414932251, |
|
"learning_rate": 1.1401346779580303e-05, |
|
"loss": 0.5145, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.4514913319694196, |
|
"grad_norm": 0.5328738689422607, |
|
"learning_rate": 1.1354300715122637e-05, |
|
"loss": 0.5288, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.4557984279099818, |
|
"grad_norm": 0.5279168486595154, |
|
"learning_rate": 1.1307224095428058e-05, |
|
"loss": 0.5031, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.4601055238505438, |
|
"grad_norm": 0.5049686431884766, |
|
"learning_rate": 1.1260117982622021e-05, |
|
"loss": 0.5004, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.4644126197911058, |
|
"grad_norm": 0.47000184655189514, |
|
"learning_rate": 1.1212983439495392e-05, |
|
"loss": 0.5267, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.4687197157316678, |
|
"grad_norm": 0.49505382776260376, |
|
"learning_rate": 1.1165821529480483e-05, |
|
"loss": 0.5278, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.47302681167223, |
|
"grad_norm": 0.568454384803772, |
|
"learning_rate": 1.1118633316627037e-05, |
|
"loss": 0.5116, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.477333907612792, |
|
"grad_norm": 0.5094279646873474, |
|
"learning_rate": 1.1071419865578241e-05, |
|
"loss": 0.5181, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.4816410035533543, |
|
"grad_norm": 0.5605435371398926, |
|
"learning_rate": 1.1024182241546686e-05, |
|
"loss": 0.5191, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.4859480994939163, |
|
"grad_norm": 0.49941274523735046, |
|
"learning_rate": 1.097692151029036e-05, |
|
"loss": 0.5036, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.4902551954344783, |
|
"grad_norm": 0.5064433813095093, |
|
"learning_rate": 1.0929638738088571e-05, |
|
"loss": 0.5195, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.4945622913750403, |
|
"grad_norm": 0.5021061301231384, |
|
"learning_rate": 1.088233499171792e-05, |
|
"loss": 0.522, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.4988693873156025, |
|
"grad_norm": 0.5188096761703491, |
|
"learning_rate": 1.0835011338428217e-05, |
|
"loss": 0.5156, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5031764832561645, |
|
"grad_norm": 0.6124559640884399, |
|
"learning_rate": 1.0787668845918393e-05, |
|
"loss": 0.5145, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.5074835791967267, |
|
"grad_norm": 0.48937344551086426, |
|
"learning_rate": 1.074030858231244e-05, |
|
"loss": 0.515, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5117906751372887, |
|
"grad_norm": 0.518526017665863, |
|
"learning_rate": 1.0692931616135283e-05, |
|
"loss": 0.505, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.5160977710778507, |
|
"grad_norm": 0.5395667552947998, |
|
"learning_rate": 1.0645539016288686e-05, |
|
"loss": 0.5076, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5204048670184127, |
|
"grad_norm": 0.495190292596817, |
|
"learning_rate": 1.059813185202714e-05, |
|
"loss": 0.523, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.524711962958975, |
|
"grad_norm": 0.49644342064857483, |
|
"learning_rate": 1.055071119293373e-05, |
|
"loss": 0.5038, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.5290190588995372, |
|
"grad_norm": 0.483696848154068, |
|
"learning_rate": 1.0503278108896e-05, |
|
"loss": 0.5103, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.5333261548400992, |
|
"grad_norm": 0.5149986147880554, |
|
"learning_rate": 1.0455833670081831e-05, |
|
"loss": 0.5402, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.5376332507806612, |
|
"grad_norm": 0.4734952449798584, |
|
"learning_rate": 1.0408378946915282e-05, |
|
"loss": 0.5292, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.5419403467212232, |
|
"grad_norm": 0.5490080118179321, |
|
"learning_rate": 1.0360915010052443e-05, |
|
"loss": 0.5155, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.5462474426617852, |
|
"grad_norm": 0.5176838636398315, |
|
"learning_rate": 1.0313442930357278e-05, |
|
"loss": 0.5111, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.5505545386023474, |
|
"grad_norm": 0.5659157633781433, |
|
"learning_rate": 1.026596377887747e-05, |
|
"loss": 0.5152, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5548616345429096, |
|
"grad_norm": 0.5195504426956177, |
|
"learning_rate": 1.0218478626820256e-05, |
|
"loss": 0.5178, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.5591687304834716, |
|
"grad_norm": 0.533338189125061, |
|
"learning_rate": 1.0170988545528248e-05, |
|
"loss": 0.5138, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.5634758264240336, |
|
"grad_norm": 0.5108840465545654, |
|
"learning_rate": 1.0123494606455278e-05, |
|
"loss": 0.5273, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.5677829223645956, |
|
"grad_norm": 0.4785379469394684, |
|
"learning_rate": 1.0075997881142208e-05, |
|
"loss": 0.5071, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.5720900183051576, |
|
"grad_norm": 0.49497827887535095, |
|
"learning_rate": 1.0028499441192765e-05, |
|
"loss": 0.5132, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.5763971142457198, |
|
"grad_norm": 0.5214102864265442, |
|
"learning_rate": 9.981000358249368e-06, |
|
"loss": 0.5133, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.580704210186282, |
|
"grad_norm": 0.47462400794029236, |
|
"learning_rate": 9.933501703968928e-06, |
|
"loss": 0.5226, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.585011306126844, |
|
"grad_norm": 0.4743979275226593, |
|
"learning_rate": 9.8860045499987e-06, |
|
"loss": 0.5219, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.589318402067406, |
|
"grad_norm": 0.5265910625457764, |
|
"learning_rate": 9.838509967952076e-06, |
|
"loss": 0.4945, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"grad_norm": 0.5075172185897827, |
|
"learning_rate": 9.791019029384437e-06, |
|
"loss": 0.5175, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.59793259394853, |
|
"grad_norm": 0.5206677913665771, |
|
"learning_rate": 9.743532805768948e-06, |
|
"loss": 0.5188, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.6022396898890923, |
|
"grad_norm": 0.4802674651145935, |
|
"learning_rate": 9.696052368472406e-06, |
|
"loss": 0.5064, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.6065467858296545, |
|
"grad_norm": 0.5289535522460938, |
|
"learning_rate": 9.648578788731044e-06, |
|
"loss": 0.5281, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.6108538817702165, |
|
"grad_norm": 0.47722700238227844, |
|
"learning_rate": 9.601113137626394e-06, |
|
"loss": 0.5151, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.6151609777107785, |
|
"grad_norm": 0.4994152784347534, |
|
"learning_rate": 9.553656486061098e-06, |
|
"loss": 0.52, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6194680736513405, |
|
"grad_norm": 0.48130089044570923, |
|
"learning_rate": 9.506209904734753e-06, |
|
"loss": 0.5336, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6237751695919027, |
|
"grad_norm": 0.48449528217315674, |
|
"learning_rate": 9.45877446411976e-06, |
|
"loss": 0.5252, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.6280822655324647, |
|
"grad_norm": 0.5411643981933594, |
|
"learning_rate": 9.411351234437163e-06, |
|
"loss": 0.5187, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.632389361473027, |
|
"grad_norm": 0.5133873820304871, |
|
"learning_rate": 9.363941285632507e-06, |
|
"loss": 0.5217, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.636696457413589, |
|
"grad_norm": 0.5814666748046875, |
|
"learning_rate": 9.3165456873517e-06, |
|
"loss": 0.5, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.641003553354151, |
|
"grad_norm": 0.52715665102005, |
|
"learning_rate": 9.269165508916883e-06, |
|
"loss": 0.5184, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.645310649294713, |
|
"grad_norm": 0.48196879029273987, |
|
"learning_rate": 9.221801819302288e-06, |
|
"loss": 0.5191, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.6496177452352752, |
|
"grad_norm": 0.49397778511047363, |
|
"learning_rate": 9.174455687110142e-06, |
|
"loss": 0.5013, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.6539248411758372, |
|
"grad_norm": 0.5037091970443726, |
|
"learning_rate": 9.127128180546548e-06, |
|
"loss": 0.5298, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.6582319371163994, |
|
"grad_norm": 0.5031833052635193, |
|
"learning_rate": 9.079820367397384e-06, |
|
"loss": 0.4929, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6625390330569614, |
|
"grad_norm": 0.5380353927612305, |
|
"learning_rate": 9.032533315004207e-06, |
|
"loss": 0.4968, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.6668461289975234, |
|
"grad_norm": 0.5191226005554199, |
|
"learning_rate": 8.98526809024018e-06, |
|
"loss": 0.5267, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.6711532249380854, |
|
"grad_norm": 0.5179468393325806, |
|
"learning_rate": 8.938025759486007e-06, |
|
"loss": 0.5159, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.6754603208786476, |
|
"grad_norm": 0.4779166579246521, |
|
"learning_rate": 8.89080738860585e-06, |
|
"loss": 0.5211, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.6797674168192096, |
|
"grad_norm": 0.5136571526527405, |
|
"learning_rate": 8.843614042923318e-06, |
|
"loss": 0.5003, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.6840745127597718, |
|
"grad_norm": 0.540773332118988, |
|
"learning_rate": 8.796446787197383e-06, |
|
"loss": 0.5131, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.6883816087003338, |
|
"grad_norm": 0.5126665234565735, |
|
"learning_rate": 8.749306685598409e-06, |
|
"loss": 0.5093, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.6926887046408958, |
|
"grad_norm": 0.47659188508987427, |
|
"learning_rate": 8.702194801684112e-06, |
|
"loss": 0.5158, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.6969958005814578, |
|
"grad_norm": 0.47945475578308105, |
|
"learning_rate": 8.655112198375564e-06, |
|
"loss": 0.5026, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.70130289652202, |
|
"grad_norm": 0.4939498007297516, |
|
"learning_rate": 8.60805993793323e-06, |
|
"loss": 0.5099, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.7056099924625823, |
|
"grad_norm": 0.5328351259231567, |
|
"learning_rate": 8.561039081932975e-06, |
|
"loss": 0.52, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.7099170884031443, |
|
"grad_norm": 0.49865198135375977, |
|
"learning_rate": 8.514050691242145e-06, |
|
"loss": 0.5077, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.7142241843437063, |
|
"grad_norm": 0.49807870388031006, |
|
"learning_rate": 8.467095825995605e-06, |
|
"loss": 0.4976, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.7185312802842683, |
|
"grad_norm": 0.5023031234741211, |
|
"learning_rate": 8.420175545571837e-06, |
|
"loss": 0.5233, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.7228383762248303, |
|
"grad_norm": 0.49054110050201416, |
|
"learning_rate": 8.373290908569026e-06, |
|
"loss": 0.5115, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7271454721653925, |
|
"grad_norm": 0.47637811303138733, |
|
"learning_rate": 8.32644297278119e-06, |
|
"loss": 0.5103, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.7314525681059547, |
|
"grad_norm": 0.5239661931991577, |
|
"learning_rate": 8.279632795174304e-06, |
|
"loss": 0.5161, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.7357596640465167, |
|
"grad_norm": 0.5000544190406799, |
|
"learning_rate": 8.232861431862457e-06, |
|
"loss": 0.5113, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.7400667599870787, |
|
"grad_norm": 0.5361005067825317, |
|
"learning_rate": 8.186129938084028e-06, |
|
"loss": 0.5137, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.7443738559276407, |
|
"grad_norm": 0.48270535469055176, |
|
"learning_rate": 8.139439368177868e-06, |
|
"loss": 0.5116, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7486809518682027, |
|
"grad_norm": 0.48645904660224915, |
|
"learning_rate": 8.092790775559522e-06, |
|
"loss": 0.517, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.752988047808765, |
|
"grad_norm": 0.4865799844264984, |
|
"learning_rate": 8.046185212697459e-06, |
|
"loss": 0.5202, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.7572951437493272, |
|
"grad_norm": 0.5095897912979126, |
|
"learning_rate": 7.999623731089327e-06, |
|
"loss": 0.5186, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.7616022396898892, |
|
"grad_norm": 0.49918055534362793, |
|
"learning_rate": 7.953107381238226e-06, |
|
"loss": 0.5091, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.7659093356304512, |
|
"grad_norm": 0.5209227204322815, |
|
"learning_rate": 7.906637212629011e-06, |
|
"loss": 0.5098, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.7702164315710132, |
|
"grad_norm": 0.5320930480957031, |
|
"learning_rate": 7.860214273704614e-06, |
|
"loss": 0.5172, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.7745235275115752, |
|
"grad_norm": 0.4841155707836151, |
|
"learning_rate": 7.813839611842387e-06, |
|
"loss": 0.4851, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.7788306234521374, |
|
"grad_norm": 0.5300472378730774, |
|
"learning_rate": 7.767514273330473e-06, |
|
"loss": 0.4953, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.7831377193926996, |
|
"grad_norm": 0.5021957159042358, |
|
"learning_rate": 7.721239303344201e-06, |
|
"loss": 0.5112, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.7874448153332616, |
|
"grad_norm": 0.498737096786499, |
|
"learning_rate": 7.675015745922499e-06, |
|
"loss": 0.5045, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.7917519112738236, |
|
"grad_norm": 0.4690532684326172, |
|
"learning_rate": 7.628844643944349e-06, |
|
"loss": 0.5102, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.7960590072143856, |
|
"grad_norm": 0.5077162384986877, |
|
"learning_rate": 7.582727039105255e-06, |
|
"loss": 0.5105, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.8003661031549478, |
|
"grad_norm": 0.47492554783821106, |
|
"learning_rate": 7.536663971893724e-06, |
|
"loss": 0.5008, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.8046731990955098, |
|
"grad_norm": 0.5036799907684326, |
|
"learning_rate": 7.4906564815678205e-06, |
|
"loss": 0.5179, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.808980295036072, |
|
"grad_norm": 0.5044455528259277, |
|
"learning_rate": 7.444705606131697e-06, |
|
"loss": 0.5171, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.813287390976634, |
|
"grad_norm": 0.5645790696144104, |
|
"learning_rate": 7.39881238231218e-06, |
|
"loss": 0.5111, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.817594486917196, |
|
"grad_norm": 0.4966265857219696, |
|
"learning_rate": 7.352977845535387e-06, |
|
"loss": 0.5144, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.821901582857758, |
|
"grad_norm": 0.5225628614425659, |
|
"learning_rate": 7.307203029903354e-06, |
|
"loss": 0.5115, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.8262086787983203, |
|
"grad_norm": 0.5282090902328491, |
|
"learning_rate": 7.261488968170713e-06, |
|
"loss": 0.5251, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.8305157747388823, |
|
"grad_norm": 0.5346629023551941, |
|
"learning_rate": 7.21583669172139e-06, |
|
"loss": 0.5042, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.8348228706794445, |
|
"grad_norm": 0.5141210556030273, |
|
"learning_rate": 7.170247230545335e-06, |
|
"loss": 0.5199, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.8391299666200065, |
|
"grad_norm": 0.5251668691635132, |
|
"learning_rate": 7.124721613215275e-06, |
|
"loss": 0.4936, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.8434370625605685, |
|
"grad_norm": 0.5125293731689453, |
|
"learning_rate": 7.079260866863523e-06, |
|
"loss": 0.5161, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.8477441585011305, |
|
"grad_norm": 0.4881208837032318, |
|
"learning_rate": 7.033866017158797e-06, |
|
"loss": 0.5142, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.8520512544416927, |
|
"grad_norm": 0.5215027928352356, |
|
"learning_rate": 6.9885380882830735e-06, |
|
"loss": 0.5097, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.8563583503822547, |
|
"grad_norm": 0.4931368827819824, |
|
"learning_rate": 6.943278102908491e-06, |
|
"loss": 0.5123, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.860665446322817, |
|
"grad_norm": 0.5080362558364868, |
|
"learning_rate": 6.898087082174267e-06, |
|
"loss": 0.5093, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.864972542263379, |
|
"grad_norm": 0.537807285785675, |
|
"learning_rate": 6.852966045663671e-06, |
|
"loss": 0.5245, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.869279638203941, |
|
"grad_norm": 0.5395597815513611, |
|
"learning_rate": 6.807916011381008e-06, |
|
"loss": 0.5016, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.873586734144503, |
|
"grad_norm": 0.48623430728912354, |
|
"learning_rate": 6.762937995728663e-06, |
|
"loss": 0.4962, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.8778938300850652, |
|
"grad_norm": 0.5058403611183167, |
|
"learning_rate": 6.718033013484147e-06, |
|
"loss": 0.5401, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.8822009260256274, |
|
"grad_norm": 0.5220633149147034, |
|
"learning_rate": 6.673202077777239e-06, |
|
"loss": 0.5112, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.8865080219661894, |
|
"grad_norm": 0.5163370966911316, |
|
"learning_rate": 6.6284462000670924e-06, |
|
"loss": 0.5231, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.8908151179067514, |
|
"grad_norm": 0.508660614490509, |
|
"learning_rate": 6.583766390119437e-06, |
|
"loss": 0.5304, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.8951222138473134, |
|
"grad_norm": 0.568144679069519, |
|
"learning_rate": 6.539163655983786e-06, |
|
"loss": 0.5086, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.8994293097878754, |
|
"grad_norm": 0.5001341700553894, |
|
"learning_rate": 6.494639003970701e-06, |
|
"loss": 0.5084, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.9037364057284376, |
|
"grad_norm": 0.5228297710418701, |
|
"learning_rate": 6.450193438629078e-06, |
|
"loss": 0.504, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.9080435016689998, |
|
"grad_norm": 0.4816001057624817, |
|
"learning_rate": 6.40582796272349e-06, |
|
"loss": 0.5102, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.9123505976095618, |
|
"grad_norm": 0.5058324933052063, |
|
"learning_rate": 6.361543577211566e-06, |
|
"loss": 0.524, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.9166576935501238, |
|
"grad_norm": 0.5428106188774109, |
|
"learning_rate": 6.317341281221392e-06, |
|
"loss": 0.5082, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9209647894906858, |
|
"grad_norm": 0.5131290555000305, |
|
"learning_rate": 6.273222072028991e-06, |
|
"loss": 0.5316, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.9252718854312478, |
|
"grad_norm": 0.5238609910011292, |
|
"learning_rate": 6.2291869450358074e-06, |
|
"loss": 0.5021, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.92957898137181, |
|
"grad_norm": 0.4843258261680603, |
|
"learning_rate": 6.1852368937462585e-06, |
|
"loss": 0.5048, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.9338860773123723, |
|
"grad_norm": 0.5138316750526428, |
|
"learning_rate": 6.141372909745307e-06, |
|
"loss": 0.5352, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.9381931732529343, |
|
"grad_norm": 0.49319642782211304, |
|
"learning_rate": 6.097595982676103e-06, |
|
"loss": 0.5065, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9425002691934963, |
|
"grad_norm": 0.5176106095314026, |
|
"learning_rate": 6.053907100217648e-06, |
|
"loss": 0.5155, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.9468073651340583, |
|
"grad_norm": 0.4772352874279022, |
|
"learning_rate": 6.010307248062514e-06, |
|
"loss": 0.5056, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.9511144610746203, |
|
"grad_norm": 0.5366437435150146, |
|
"learning_rate": 5.966797409894607e-06, |
|
"loss": 0.4888, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.9554215570151825, |
|
"grad_norm": 0.4917809069156647, |
|
"learning_rate": 5.923378567366956e-06, |
|
"loss": 0.5221, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.9597286529557447, |
|
"grad_norm": 0.5597509741783142, |
|
"learning_rate": 5.880051700079596e-06, |
|
"loss": 0.5225, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.9640357488963067, |
|
"grad_norm": 0.5258151888847351, |
|
"learning_rate": 5.836817785557448e-06, |
|
"loss": 0.5031, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.9683428448368687, |
|
"grad_norm": 0.5679864287376404, |
|
"learning_rate": 5.7936777992282565e-06, |
|
"loss": 0.5074, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.9726499407774307, |
|
"grad_norm": 0.5309889912605286, |
|
"learning_rate": 5.750632714400607e-06, |
|
"loss": 0.521, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.976957036717993, |
|
"grad_norm": 0.5293132662773132, |
|
"learning_rate": 5.707683502241936e-06, |
|
"loss": 0.5133, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.981264132658555, |
|
"grad_norm": 0.5223381519317627, |
|
"learning_rate": 5.664831131756652e-06, |
|
"loss": 0.5129, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.9855712285991172, |
|
"grad_norm": 0.5365522503852844, |
|
"learning_rate": 5.622076569764247e-06, |
|
"loss": 0.504, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.9898783245396792, |
|
"grad_norm": 0.5084212422370911, |
|
"learning_rate": 5.5794207808774904e-06, |
|
"loss": 0.488, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.9941854204802412, |
|
"grad_norm": 0.4913804531097412, |
|
"learning_rate": 5.536864727480683e-06, |
|
"loss": 0.5098, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.9984925164208032, |
|
"grad_norm": 0.5197212100028992, |
|
"learning_rate": 5.4944093697079136e-06, |
|
"loss": 0.5066, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.002799612361365, |
|
"grad_norm": 0.51143479347229, |
|
"learning_rate": 5.45205566542143e-06, |
|
"loss": 0.4521, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.0071067083019276, |
|
"grad_norm": 0.5107315182685852, |
|
"learning_rate": 5.4098045701899934e-06, |
|
"loss": 0.3968, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.0114138042424896, |
|
"grad_norm": 0.5407351851463318, |
|
"learning_rate": 5.367657037267354e-06, |
|
"loss": 0.3933, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.0157209001830516, |
|
"grad_norm": 0.5835046172142029, |
|
"learning_rate": 5.325614017570712e-06, |
|
"loss": 0.3897, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.0200279961236136, |
|
"grad_norm": 0.5047739744186401, |
|
"learning_rate": 5.283676459659288e-06, |
|
"loss": 0.3992, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.0243350920641756, |
|
"grad_norm": 0.5422953963279724, |
|
"learning_rate": 5.241845309712921e-06, |
|
"loss": 0.4131, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.0286421880047376, |
|
"grad_norm": 0.5471384525299072, |
|
"learning_rate": 5.2001215115106814e-06, |
|
"loss": 0.3955, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.0329492839453, |
|
"grad_norm": 0.5800908803939819, |
|
"learning_rate": 5.158506006409644e-06, |
|
"loss": 0.397, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.037256379885862, |
|
"grad_norm": 0.5329377055168152, |
|
"learning_rate": 5.116999733323591e-06, |
|
"loss": 0.4017, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.041563475826424, |
|
"grad_norm": 0.556845486164093, |
|
"learning_rate": 5.075603628701869e-06, |
|
"loss": 0.4009, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.045870571766986, |
|
"grad_norm": 0.5501790642738342, |
|
"learning_rate": 5.034318626508223e-06, |
|
"loss": 0.3969, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.050177667707548, |
|
"grad_norm": 0.5467825531959534, |
|
"learning_rate": 4.993145658199766e-06, |
|
"loss": 0.3996, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.05448476364811, |
|
"grad_norm": 0.5644121766090393, |
|
"learning_rate": 4.952085652705938e-06, |
|
"loss": 0.3926, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.0587918595886725, |
|
"grad_norm": 0.5279033780097961, |
|
"learning_rate": 4.911139536407542e-06, |
|
"loss": 0.3742, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.0630989555292345, |
|
"grad_norm": 0.5283676981925964, |
|
"learning_rate": 4.870308233115876e-06, |
|
"loss": 0.3893, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.0674060514697965, |
|
"grad_norm": 0.5302291512489319, |
|
"learning_rate": 4.82959266405184e-06, |
|
"loss": 0.3956, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.0717131474103585, |
|
"grad_norm": 0.5381713509559631, |
|
"learning_rate": 4.788993747825209e-06, |
|
"loss": 0.4124, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.0760202433509205, |
|
"grad_norm": 0.5772622227668762, |
|
"learning_rate": 4.748512400413861e-06, |
|
"loss": 0.405, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.0803273392914825, |
|
"grad_norm": 0.5383191704750061, |
|
"learning_rate": 4.708149535143138e-06, |
|
"loss": 0.3874, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.084634435232045, |
|
"grad_norm": 0.5546970963478088, |
|
"learning_rate": 4.667906062665234e-06, |
|
"loss": 0.3994, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.088941531172607, |
|
"grad_norm": 0.5541481375694275, |
|
"learning_rate": 4.627782890938632e-06, |
|
"loss": 0.4073, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.093248627113169, |
|
"grad_norm": 0.5656886100769043, |
|
"learning_rate": 4.587780925207654e-06, |
|
"loss": 0.3986, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.097555723053731, |
|
"grad_norm": 0.5167860984802246, |
|
"learning_rate": 4.5479010679819965e-06, |
|
"loss": 0.3994, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.101862818994293, |
|
"grad_norm": 0.585415780544281, |
|
"learning_rate": 4.50814421901641e-06, |
|
"loss": 0.3959, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.1061699149348554, |
|
"grad_norm": 0.5390037894248962, |
|
"learning_rate": 4.46851127529035e-06, |
|
"loss": 0.393, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.1104770108754174, |
|
"grad_norm": 0.5685362815856934, |
|
"learning_rate": 4.42900313098779e-06, |
|
"loss": 0.4031, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.1147841068159794, |
|
"grad_norm": 0.5294394493103027, |
|
"learning_rate": 4.389620677477023e-06, |
|
"loss": 0.3926, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.1190912027565414, |
|
"grad_norm": 0.5693227648735046, |
|
"learning_rate": 4.3503648032905384e-06, |
|
"loss": 0.3909, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.1233982986971034, |
|
"grad_norm": 0.6294069886207581, |
|
"learning_rate": 4.311236394105006e-06, |
|
"loss": 0.3908, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.1277053946376654, |
|
"grad_norm": 0.566862165927887, |
|
"learning_rate": 4.27223633272126e-06, |
|
"loss": 0.4019, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.132012490578228, |
|
"grad_norm": 0.5680539608001709, |
|
"learning_rate": 4.233365499044416e-06, |
|
"loss": 0.3957, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.13631958651879, |
|
"grad_norm": 0.5697780251502991, |
|
"learning_rate": 4.194624770063985e-06, |
|
"loss": 0.3876, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.140626682459352, |
|
"grad_norm": 0.5857852697372437, |
|
"learning_rate": 4.1560150198341174e-06, |
|
"loss": 0.3986, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.144933778399914, |
|
"grad_norm": 0.5707722306251526, |
|
"learning_rate": 4.11753711945386e-06, |
|
"loss": 0.4165, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.149240874340476, |
|
"grad_norm": 0.5498836040496826, |
|
"learning_rate": 4.079191937047511e-06, |
|
"loss": 0.4236, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.153547970281038, |
|
"grad_norm": 0.6008414626121521, |
|
"learning_rate": 4.040980337745044e-06, |
|
"loss": 0.3955, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.1578550662216003, |
|
"grad_norm": 0.5871570110321045, |
|
"learning_rate": 4.002903183662566e-06, |
|
"loss": 0.3939, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 0.5556260347366333, |
|
"learning_rate": 3.964961333882893e-06, |
|
"loss": 0.4005, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.1664692581027243, |
|
"grad_norm": 0.5592585206031799, |
|
"learning_rate": 3.927155644436144e-06, |
|
"loss": 0.4035, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.1707763540432863, |
|
"grad_norm": 0.5638931393623352, |
|
"learning_rate": 3.889486968280448e-06, |
|
"loss": 0.3961, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.1750834499838483, |
|
"grad_norm": 0.5473156571388245, |
|
"learning_rate": 3.851956155282682e-06, |
|
"loss": 0.3999, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.1793905459244103, |
|
"grad_norm": 0.7088154554367065, |
|
"learning_rate": 3.814564052199313e-06, |
|
"loss": 0.3919, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.1836976418649727, |
|
"grad_norm": 0.569315493106842, |
|
"learning_rate": 3.777311502657279e-06, |
|
"loss": 0.3924, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.1880047378055347, |
|
"grad_norm": 0.6128218770027161, |
|
"learning_rate": 3.7401993471349616e-06, |
|
"loss": 0.4094, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.1923118337460967, |
|
"grad_norm": 0.5971004962921143, |
|
"learning_rate": 3.7032284229432325e-06, |
|
"loss": 0.3786, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.1966189296866587, |
|
"grad_norm": 0.5701526999473572, |
|
"learning_rate": 3.666399564206541e-06, |
|
"loss": 0.3912, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.2009260256272207, |
|
"grad_norm": 0.5547009706497192, |
|
"learning_rate": 3.6297136018441215e-06, |
|
"loss": 0.3866, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.2052331215677827, |
|
"grad_norm": 0.5613463521003723, |
|
"learning_rate": 3.59317136355122e-06, |
|
"loss": 0.3926, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.209540217508345, |
|
"grad_norm": 0.6126610040664673, |
|
"learning_rate": 3.556773673780446e-06, |
|
"loss": 0.389, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.213847313448907, |
|
"grad_norm": 0.5699272751808167, |
|
"learning_rate": 3.520521353723142e-06, |
|
"loss": 0.3982, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.218154409389469, |
|
"grad_norm": 0.593333899974823, |
|
"learning_rate": 3.484415221290889e-06, |
|
"loss": 0.3826, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.222461505330031, |
|
"grad_norm": 0.6188777685165405, |
|
"learning_rate": 3.448456091097023e-06, |
|
"loss": 0.4, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.226768601270593, |
|
"grad_norm": 0.5949888825416565, |
|
"learning_rate": 3.4126447744382753e-06, |
|
"loss": 0.4062, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.231075697211155, |
|
"grad_norm": 0.5788257718086243, |
|
"learning_rate": 3.376982079276464e-06, |
|
"loss": 0.3881, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.2353827931517176, |
|
"grad_norm": 0.5726456642150879, |
|
"learning_rate": 3.3414688102202564e-06, |
|
"loss": 0.3968, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.2396898890922796, |
|
"grad_norm": 0.5855600833892822, |
|
"learning_rate": 3.3061057685070354e-06, |
|
"loss": 0.3925, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.2439969850328416, |
|
"grad_norm": 0.5823237299919128, |
|
"learning_rate": 3.2708937519847916e-06, |
|
"loss": 0.3875, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.2483040809734036, |
|
"grad_norm": 0.5852989554405212, |
|
"learning_rate": 3.23583355509416e-06, |
|
"loss": 0.3985, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.2526111769139656, |
|
"grad_norm": 0.5461825728416443, |
|
"learning_rate": 3.200925968850459e-06, |
|
"loss": 0.3917, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.256918272854528, |
|
"grad_norm": 0.5536659359931946, |
|
"learning_rate": 3.166171780825876e-06, |
|
"loss": 0.3963, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.26122536879509, |
|
"grad_norm": 0.5736192464828491, |
|
"learning_rate": 3.1315717751316755e-06, |
|
"loss": 0.4114, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.265532464735652, |
|
"grad_norm": 0.5808764100074768, |
|
"learning_rate": 3.097126732400515e-06, |
|
"loss": 0.3795, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.269839560676214, |
|
"grad_norm": 0.5790621042251587, |
|
"learning_rate": 3.0628374297688436e-06, |
|
"loss": 0.3991, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.274146656616776, |
|
"grad_norm": 0.5211635231971741, |
|
"learning_rate": 3.0287046408593478e-06, |
|
"loss": 0.3796, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.278453752557338, |
|
"grad_norm": 0.6152241230010986, |
|
"learning_rate": 2.994729135763522e-06, |
|
"loss": 0.3976, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.2827608484979, |
|
"grad_norm": 0.6017261147499084, |
|
"learning_rate": 2.9609116810242677e-06, |
|
"loss": 0.4031, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.2870679444384625, |
|
"grad_norm": 0.5612776279449463, |
|
"learning_rate": 2.9272530396186194e-06, |
|
"loss": 0.3985, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.2913750403790245, |
|
"grad_norm": 0.6065710186958313, |
|
"learning_rate": 2.893753970940525e-06, |
|
"loss": 0.3975, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.2956821363195865, |
|
"grad_norm": 0.5793972611427307, |
|
"learning_rate": 2.8604152307837064e-06, |
|
"loss": 0.3889, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.2999892322601485, |
|
"grad_norm": 0.5591062307357788, |
|
"learning_rate": 2.8272375713246125e-06, |
|
"loss": 0.3903, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.3042963282007105, |
|
"grad_norm": 0.5505937337875366, |
|
"learning_rate": 2.794221741105446e-06, |
|
"loss": 0.397, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.308603424141273, |
|
"grad_norm": 0.6174246668815613, |
|
"learning_rate": 2.7613684850172882e-06, |
|
"loss": 0.3966, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.312910520081835, |
|
"grad_norm": 0.6093124747276306, |
|
"learning_rate": 2.7286785442832685e-06, |
|
"loss": 0.3902, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.317217616022397, |
|
"grad_norm": 0.5350244045257568, |
|
"learning_rate": 2.696152656441868e-06, |
|
"loss": 0.3935, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.321524711962959, |
|
"grad_norm": 0.5422816276550293, |
|
"learning_rate": 2.663791555330255e-06, |
|
"loss": 0.3924, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.325831807903521, |
|
"grad_norm": 0.5582048892974854, |
|
"learning_rate": 2.6315959710677464e-06, |
|
"loss": 0.397, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.330138903844083, |
|
"grad_norm": 0.5601301789283752, |
|
"learning_rate": 2.599566630039332e-06, |
|
"loss": 0.3813, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.334445999784645, |
|
"grad_norm": 0.5601345896720886, |
|
"learning_rate": 2.567704254879274e-06, |
|
"loss": 0.3974, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.3387530957252074, |
|
"grad_norm": 0.614778459072113, |
|
"learning_rate": 2.536009564454817e-06, |
|
"loss": 0.3836, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.3430601916657694, |
|
"grad_norm": 0.5759994983673096, |
|
"learning_rate": 2.504483273849958e-06, |
|
"loss": 0.3949, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.3473672876063314, |
|
"grad_norm": 0.586625874042511, |
|
"learning_rate": 2.473126094349331e-06, |
|
"loss": 0.3829, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.3516743835468934, |
|
"grad_norm": 0.5470960736274719, |
|
"learning_rate": 2.4419387334221333e-06, |
|
"loss": 0.3881, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.3559814794874554, |
|
"grad_norm": 0.5486071705818176, |
|
"learning_rate": 2.4109218947061884e-06, |
|
"loss": 0.399, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.360288575428018, |
|
"grad_norm": 0.5942230820655823, |
|
"learning_rate": 2.3800762779920574e-06, |
|
"loss": 0.3921, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.36459567136858, |
|
"grad_norm": 0.5786502957344055, |
|
"learning_rate": 2.3494025792072474e-06, |
|
"loss": 0.3901, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.368902767309142, |
|
"grad_norm": 0.6082814931869507, |
|
"learning_rate": 2.3189014904005247e-06, |
|
"loss": 0.391, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.373209863249704, |
|
"grad_norm": 0.612694501876831, |
|
"learning_rate": 2.2885736997262863e-06, |
|
"loss": 0.3981, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.377516959190266, |
|
"grad_norm": 0.5050374865531921, |
|
"learning_rate": 2.2584198914290435e-06, |
|
"loss": 0.3951, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.381824055130828, |
|
"grad_norm": 0.5465214848518372, |
|
"learning_rate": 2.2284407458279743e-06, |
|
"loss": 0.4, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.3861311510713903, |
|
"grad_norm": 0.5544529557228088, |
|
"learning_rate": 2.1986369393015914e-06, |
|
"loss": 0.3836, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.3904382470119523, |
|
"grad_norm": 0.586337149143219, |
|
"learning_rate": 2.169009144272467e-06, |
|
"loss": 0.4139, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.3947453429525143, |
|
"grad_norm": 0.6219981908798218, |
|
"learning_rate": 2.1395580291920625e-06, |
|
"loss": 0.4011, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.3990524388930763, |
|
"grad_norm": 0.6941688060760498, |
|
"learning_rate": 2.110284258525658e-06, |
|
"loss": 0.405, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.4033595348336383, |
|
"grad_norm": 0.5210332274436951, |
|
"learning_rate": 2.081188492737345e-06, |
|
"loss": 0.4017, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.4076666307742007, |
|
"grad_norm": 0.5930879712104797, |
|
"learning_rate": 2.0522713882751445e-06, |
|
"loss": 0.3918, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.4119737267147627, |
|
"grad_norm": 0.5910641551017761, |
|
"learning_rate": 2.0235335975561775e-06, |
|
"loss": 0.3996, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.4162808226553247, |
|
"grad_norm": 0.5827698111534119, |
|
"learning_rate": 1.9949757689519555e-06, |
|
"loss": 0.3854, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.4205879185958867, |
|
"grad_norm": 0.5518185496330261, |
|
"learning_rate": 1.966598546773757e-06, |
|
"loss": 0.4077, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.4248950145364487, |
|
"grad_norm": 0.6005439162254333, |
|
"learning_rate": 1.938402571258073e-06, |
|
"loss": 0.4095, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.4292021104770107, |
|
"grad_norm": 0.5761522054672241, |
|
"learning_rate": 1.9103884785521887e-06, |
|
"loss": 0.3966, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.4335092064175727, |
|
"grad_norm": 0.5546764135360718, |
|
"learning_rate": 1.8825569006998012e-06, |
|
"loss": 0.395, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.437816302358135, |
|
"grad_norm": 0.5639533996582031, |
|
"learning_rate": 1.8549084656267846e-06, |
|
"loss": 0.3938, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.442123398298697, |
|
"grad_norm": 0.5662581324577332, |
|
"learning_rate": 1.8274437971270044e-06, |
|
"loss": 0.4004, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.446430494239259, |
|
"grad_norm": 0.5856819748878479, |
|
"learning_rate": 1.8001635148482621e-06, |
|
"loss": 0.3946, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.450737590179821, |
|
"grad_norm": 0.5766512751579285, |
|
"learning_rate": 1.7730682342782967e-06, |
|
"loss": 0.3931, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.455044686120383, |
|
"grad_norm": 0.6373909711837769, |
|
"learning_rate": 1.7461585667309045e-06, |
|
"loss": 0.4006, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.4593517820609456, |
|
"grad_norm": 0.5694748759269714, |
|
"learning_rate": 1.719435119332159e-06, |
|
"loss": 0.3989, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.4636588780015076, |
|
"grad_norm": 0.5339934229850769, |
|
"learning_rate": 1.6928984950066918e-06, |
|
"loss": 0.3966, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.4679659739420696, |
|
"grad_norm": 0.5888383388519287, |
|
"learning_rate": 1.6665492924641113e-06, |
|
"loss": 0.3833, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.4722730698826316, |
|
"grad_norm": 0.5573282241821289, |
|
"learning_rate": 1.6403881061854732e-06, |
|
"loss": 0.4, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.4765801658231936, |
|
"grad_norm": 0.5756634473800659, |
|
"learning_rate": 1.6144155264098883e-06, |
|
"loss": 0.3964, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.4808872617637556, |
|
"grad_norm": 0.5784355401992798, |
|
"learning_rate": 1.58863213912119e-06, |
|
"loss": 0.3762, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.4851943577043176, |
|
"grad_norm": 0.6090006828308105, |
|
"learning_rate": 1.563038526034727e-06, |
|
"loss": 0.3986, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.48950145364488, |
|
"grad_norm": 0.5565779209136963, |
|
"learning_rate": 1.5376352645842242e-06, |
|
"loss": 0.3916, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.493808549585442, |
|
"grad_norm": 0.6107103228569031, |
|
"learning_rate": 1.5124229279087655e-06, |
|
"loss": 0.4093, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.498115645526004, |
|
"grad_norm": 0.5300205945968628, |
|
"learning_rate": 1.487402084839864e-06, |
|
"loss": 0.4047, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.502422741466566, |
|
"grad_norm": 0.6008495688438416, |
|
"learning_rate": 1.4625732998886178e-06, |
|
"loss": 0.4023, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.5067298374071285, |
|
"grad_norm": 0.5560673475265503, |
|
"learning_rate": 1.437937133232985e-06, |
|
"loss": 0.3968, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.5110369333476905, |
|
"grad_norm": 0.5503118634223938, |
|
"learning_rate": 1.413494140705136e-06, |
|
"loss": 0.3876, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.5153440292882525, |
|
"grad_norm": 0.5559957027435303, |
|
"learning_rate": 1.3892448737789243e-06, |
|
"loss": 0.392, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.5196511252288145, |
|
"grad_norm": 0.5354902148246765, |
|
"learning_rate": 1.365189879557426e-06, |
|
"loss": 0.3988, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.5239582211693765, |
|
"grad_norm": 0.577046275138855, |
|
"learning_rate": 1.3413297007606196e-06, |
|
"loss": 0.3948, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.5282653171099385, |
|
"grad_norm": 0.5745800733566284, |
|
"learning_rate": 1.3176648757131205e-06, |
|
"loss": 0.395, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.5325724130505005, |
|
"grad_norm": 0.5721185207366943, |
|
"learning_rate": 1.2941959383320478e-06, |
|
"loss": 0.3918, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.5368795089910625, |
|
"grad_norm": 0.5935482978820801, |
|
"learning_rate": 1.2709234181149765e-06, |
|
"loss": 0.376, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.541186604931625, |
|
"grad_norm": 0.5709375143051147, |
|
"learning_rate": 1.2478478401279848e-06, |
|
"loss": 0.3881, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.545493700872187, |
|
"grad_norm": 0.5233684182167053, |
|
"learning_rate": 1.2249697249938197e-06, |
|
"loss": 0.3945, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.549800796812749, |
|
"grad_norm": 0.5812388062477112, |
|
"learning_rate": 1.2022895888801333e-06, |
|
"loss": 0.3984, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.554107892753311, |
|
"grad_norm": 0.560550332069397, |
|
"learning_rate": 1.1798079434878584e-06, |
|
"loss": 0.3942, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.5584149886938734, |
|
"grad_norm": 0.6010858416557312, |
|
"learning_rate": 1.1575252960396422e-06, |
|
"loss": 0.3851, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.5627220846344354, |
|
"grad_norm": 0.5857875347137451, |
|
"learning_rate": 1.1354421492684252e-06, |
|
"loss": 0.3993, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.5670291805749974, |
|
"grad_norm": 0.604179859161377, |
|
"learning_rate": 1.1135590014060772e-06, |
|
"loss": 0.388, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.5713362765155594, |
|
"grad_norm": 0.569106936454773, |
|
"learning_rate": 1.0918763461721648e-06, |
|
"loss": 0.4014, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.5756433724561214, |
|
"grad_norm": 0.5742547512054443, |
|
"learning_rate": 1.0703946727628234e-06, |
|
"loss": 0.3839, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.5799504683966834, |
|
"grad_norm": 0.5561407208442688, |
|
"learning_rate": 1.0491144658397e-06, |
|
"loss": 0.3853, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.5842575643372454, |
|
"grad_norm": 0.5482295155525208, |
|
"learning_rate": 1.0280362055190341e-06, |
|
"loss": 0.3876, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.588564660277808, |
|
"grad_norm": 0.5737982392311096, |
|
"learning_rate": 1.0071603673608176e-06, |
|
"loss": 0.4059, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.59287175621837, |
|
"grad_norm": 0.547715961933136, |
|
"learning_rate": 9.864874223580668e-07, |
|
"loss": 0.3837, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.597178852158932, |
|
"grad_norm": 0.607851505279541, |
|
"learning_rate": 9.66017836926203e-07, |
|
"loss": 0.3779, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.601485948099494, |
|
"grad_norm": 0.5557613968849182, |
|
"learning_rate": 9.457520728925151e-07, |
|
"loss": 0.3995, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.605793044040056, |
|
"grad_norm": 0.5470052361488342, |
|
"learning_rate": 9.256905874857535e-07, |
|
"loss": 0.3916, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.6101001399806183, |
|
"grad_norm": 0.5718830227851868, |
|
"learning_rate": 9.058338333258032e-07, |
|
"loss": 0.3997, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.6144072359211803, |
|
"grad_norm": 0.5838637948036194, |
|
"learning_rate": 8.861822584134882e-07, |
|
"loss": 0.39, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.6187143318617423, |
|
"grad_norm": 0.5819488763809204, |
|
"learning_rate": 8.667363061204415e-07, |
|
"loss": 0.4028, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.6230214278023043, |
|
"grad_norm": 0.5477743744850159, |
|
"learning_rate": 8.474964151791232e-07, |
|
"loss": 0.3979, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.6273285237428663, |
|
"grad_norm": 0.6217262744903564, |
|
"learning_rate": 8.284630196729059e-07, |
|
"loss": 0.3993, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.6316356196834283, |
|
"grad_norm": 0.5514227747917175, |
|
"learning_rate": 8.096365490262925e-07, |
|
"loss": 0.4058, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.6359427156239903, |
|
"grad_norm": 0.645946204662323, |
|
"learning_rate": 7.910174279952232e-07, |
|
"loss": 0.3992, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.6402498115645527, |
|
"grad_norm": 0.5741420984268188, |
|
"learning_rate": 7.726060766574883e-07, |
|
"loss": 0.3938, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.6445569075051147, |
|
"grad_norm": 0.5910946726799011, |
|
"learning_rate": 7.544029104032558e-07, |
|
"loss": 0.3898, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.6488640034456767, |
|
"grad_norm": 0.5803595185279846, |
|
"learning_rate": 7.364083399256971e-07, |
|
"loss": 0.388, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.6531710993862387, |
|
"grad_norm": 0.596809446811676, |
|
"learning_rate": 7.186227712117266e-07, |
|
"loss": 0.388, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.6574781953268007, |
|
"grad_norm": 0.6213387250900269, |
|
"learning_rate": 7.010466055328313e-07, |
|
"loss": 0.3839, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.661785291267363, |
|
"grad_norm": 0.5913180112838745, |
|
"learning_rate": 6.836802394360276e-07, |
|
"loss": 0.3989, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.666092387207925, |
|
"grad_norm": 0.6089721322059631, |
|
"learning_rate": 6.665240647349125e-07, |
|
"loss": 0.4039, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.670399483148487, |
|
"grad_norm": 0.5730729103088379, |
|
"learning_rate": 6.495784685008133e-07, |
|
"loss": 0.3951, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.674706579089049, |
|
"grad_norm": 0.5562758445739746, |
|
"learning_rate": 6.32843833054072e-07, |
|
"loss": 0.3837, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.679013675029611, |
|
"grad_norm": 0.5627213716506958, |
|
"learning_rate": 6.16320535955407e-07, |
|
"loss": 0.3712, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.683320770970173, |
|
"grad_norm": 0.559660017490387, |
|
"learning_rate": 6.000089499973971e-07, |
|
"loss": 0.3901, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.687627866910735, |
|
"grad_norm": 0.6018761992454529, |
|
"learning_rate": 5.839094431960713e-07, |
|
"loss": 0.383, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.6919349628512976, |
|
"grad_norm": 0.5534284710884094, |
|
"learning_rate": 5.680223787826089e-07, |
|
"loss": 0.3925, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.6962420587918596, |
|
"grad_norm": 0.5682888031005859, |
|
"learning_rate": 5.523481151951427e-07, |
|
"loss": 0.3929, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.7005491547324216, |
|
"grad_norm": 0.6271238923072815, |
|
"learning_rate": 5.368870060706677e-07, |
|
"loss": 0.3942, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.7048562506729836, |
|
"grad_norm": 0.5881267786026001, |
|
"learning_rate": 5.216394002370695e-07, |
|
"loss": 0.3876, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.709163346613546, |
|
"grad_norm": 0.6085900068283081, |
|
"learning_rate": 5.066056417052445e-07, |
|
"loss": 0.3958, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.713470442554108, |
|
"grad_norm": 0.5912172198295593, |
|
"learning_rate": 4.917860696613541e-07, |
|
"loss": 0.3887, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.71777753849467, |
|
"grad_norm": 0.6698789596557617, |
|
"learning_rate": 4.771810184591541e-07, |
|
"loss": 0.3899, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.722084634435232, |
|
"grad_norm": 0.5682712197303772, |
|
"learning_rate": 4.627908176124618e-07, |
|
"loss": 0.3826, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.726391730375794, |
|
"grad_norm": 0.5702280402183533, |
|
"learning_rate": 4.486157917877232e-07, |
|
"loss": 0.3908, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.730698826316356, |
|
"grad_norm": 0.5540564060211182, |
|
"learning_rate": 4.346562607966787e-07, |
|
"loss": 0.3962, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.735005922256918, |
|
"grad_norm": 0.6031074523925781, |
|
"learning_rate": 4.209125395891589e-07, |
|
"loss": 0.3791, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.73931301819748, |
|
"grad_norm": 0.5727553963661194, |
|
"learning_rate": 4.0738493824596715e-07, |
|
"loss": 0.4023, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.7436201141380425, |
|
"grad_norm": 0.5374717116355896, |
|
"learning_rate": 3.940737619718937e-07, |
|
"loss": 0.38, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.7479272100786045, |
|
"grad_norm": 0.5720168352127075, |
|
"learning_rate": 3.809793110888249e-07, |
|
"loss": 0.4011, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.7522343060191665, |
|
"grad_norm": 0.5751203894615173, |
|
"learning_rate": 3.6810188102896605e-07, |
|
"loss": 0.3941, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.7565414019597285, |
|
"grad_norm": 0.5838513970375061, |
|
"learning_rate": 3.554417623281825e-07, |
|
"loss": 0.3834, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.760848497900291, |
|
"grad_norm": 0.6204310059547424, |
|
"learning_rate": 3.429992406194338e-07, |
|
"loss": 0.3933, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.765155593840853, |
|
"grad_norm": 0.6237754225730896, |
|
"learning_rate": 3.3077459662634205e-07, |
|
"loss": 0.3911, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.769462689781415, |
|
"grad_norm": 0.561553418636322, |
|
"learning_rate": 3.1876810615684705e-07, |
|
"loss": 0.3847, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.773769785721977, |
|
"grad_norm": 0.568580150604248, |
|
"learning_rate": 3.069800400969947e-07, |
|
"loss": 0.3967, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.778076881662539, |
|
"grad_norm": 0.6103531122207642, |
|
"learning_rate": 2.954106644048127e-07, |
|
"loss": 0.3731, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.782383977603101, |
|
"grad_norm": 0.560199499130249, |
|
"learning_rate": 2.840602401043213e-07, |
|
"loss": 0.3889, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.786691073543663, |
|
"grad_norm": 0.5612174868583679, |
|
"learning_rate": 2.7292902327963776e-07, |
|
"loss": 0.3915, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.7909981694842254, |
|
"grad_norm": 0.5860500335693359, |
|
"learning_rate": 2.620172650692021e-07, |
|
"loss": 0.4063, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.7953052654247874, |
|
"grad_norm": 0.6044652462005615, |
|
"learning_rate": 2.513252116601062e-07, |
|
"loss": 0.39, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.7996123613653494, |
|
"grad_norm": 0.5966377258300781, |
|
"learning_rate": 2.408531042825446e-07, |
|
"loss": 0.3965, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.8039194573059114, |
|
"grad_norm": 0.5729289650917053, |
|
"learning_rate": 2.3060117920437164e-07, |
|
"loss": 0.3798, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.8082265532464734, |
|
"grad_norm": 0.6403810977935791, |
|
"learning_rate": 2.2056966772576626e-07, |
|
"loss": 0.4096, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.812533649187036, |
|
"grad_norm": 0.5852852463722229, |
|
"learning_rate": 2.1075879617401984e-07, |
|
"loss": 0.383, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.816840745127598, |
|
"grad_norm": 0.6858223080635071, |
|
"learning_rate": 2.0116878589842236e-07, |
|
"loss": 0.3763, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.82114784106816, |
|
"grad_norm": 0.5583459138870239, |
|
"learning_rate": 1.917998532652765e-07, |
|
"loss": 0.4007, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.825454937008722, |
|
"grad_norm": 0.6212313175201416, |
|
"learning_rate": 1.8265220965300812e-07, |
|
"loss": 0.3946, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.829762032949284, |
|
"grad_norm": 0.5777102112770081, |
|
"learning_rate": 1.7372606144740567e-07, |
|
"loss": 0.3908, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.834069128889846, |
|
"grad_norm": 0.5885289311408997, |
|
"learning_rate": 1.6502161003695615e-07, |
|
"loss": 0.4051, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.838376224830408, |
|
"grad_norm": 0.6133362054824829, |
|
"learning_rate": 1.5653905180830432e-07, |
|
"loss": 0.3909, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.8426833207709703, |
|
"grad_norm": 0.5662548542022705, |
|
"learning_rate": 1.48278578141825e-07, |
|
"loss": 0.3689, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.8469904167115323, |
|
"grad_norm": 0.5703479647636414, |
|
"learning_rate": 1.4024037540730006e-07, |
|
"loss": 0.3812, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.8512975126520943, |
|
"grad_norm": 0.5604844689369202, |
|
"learning_rate": 1.324246249597183e-07, |
|
"loss": 0.3992, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.8556046085926563, |
|
"grad_norm": 0.6033147573471069, |
|
"learning_rate": 1.2483150313517766e-07, |
|
"loss": 0.3937, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.8599117045332187, |
|
"grad_norm": 0.5846080780029297, |
|
"learning_rate": 1.1746118124691508e-07, |
|
"loss": 0.4123, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.8642188004737807, |
|
"grad_norm": 0.63025963306427, |
|
"learning_rate": 1.103138255814329e-07, |
|
"loss": 0.3998, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.8685258964143427, |
|
"grad_norm": 0.5580465197563171, |
|
"learning_rate": 1.0338959739475296e-07, |
|
"loss": 0.4007, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.8728329923549047, |
|
"grad_norm": 0.5767059326171875, |
|
"learning_rate": 9.66886529087785e-08, |
|
"loss": 0.4008, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.8771400882954667, |
|
"grad_norm": 0.583044707775116, |
|
"learning_rate": 9.021114330776348e-08, |
|
"loss": 0.403, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.8814471842360287, |
|
"grad_norm": 0.5440847873687744, |
|
"learning_rate": 8.395721473490992e-08, |
|
"loss": 0.3839, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.8857542801765907, |
|
"grad_norm": 0.55162513256073, |
|
"learning_rate": 7.792700828906374e-08, |
|
"loss": 0.4017, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.8900613761171527, |
|
"grad_norm": 0.5817933082580566, |
|
"learning_rate": 7.212066002153518e-08, |
|
"loss": 0.4009, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.894368472057715, |
|
"grad_norm": 0.6080750226974487, |
|
"learning_rate": 6.653830093302782e-08, |
|
"loss": 0.3964, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.898675567998277, |
|
"grad_norm": 0.5681482553482056, |
|
"learning_rate": 6.11800569706833e-08, |
|
"loss": 0.4003, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.902982663938839, |
|
"grad_norm": 0.5769705176353455, |
|
"learning_rate": 5.604604902524235e-08, |
|
"loss": 0.4017, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.907289759879401, |
|
"grad_norm": 0.546116828918457, |
|
"learning_rate": 5.113639292831152e-08, |
|
"loss": 0.3828, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.9115968558199636, |
|
"grad_norm": 0.590798020362854, |
|
"learning_rate": 4.645119944975296e-08, |
|
"loss": 0.3853, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.9159039517605256, |
|
"grad_norm": 0.5748469233512878, |
|
"learning_rate": 4.1990574295187606e-08, |
|
"loss": 0.4107, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.9202110477010876, |
|
"grad_norm": 0.5733410716056824, |
|
"learning_rate": 3.7754618103608144e-08, |
|
"loss": 0.4052, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.9245181436416496, |
|
"grad_norm": 0.5576743483543396, |
|
"learning_rate": 3.374342644510531e-08, |
|
"loss": 0.3846, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.9288252395822116, |
|
"grad_norm": 0.596834123134613, |
|
"learning_rate": 2.9957089818718476e-08, |
|
"loss": 0.4029, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.9331323355227736, |
|
"grad_norm": 0.5680873990058899, |
|
"learning_rate": 2.639569365038841e-08, |
|
"loss": 0.381, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.9374394314633356, |
|
"grad_norm": 0.5597060918807983, |
|
"learning_rate": 2.305931829102992e-08, |
|
"loss": 0.3974, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.941746527403898, |
|
"grad_norm": 0.5827191472053528, |
|
"learning_rate": 1.9948039014724417e-08, |
|
"loss": 0.3973, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.94605362334446, |
|
"grad_norm": 0.6119829416275024, |
|
"learning_rate": 1.706192601701462e-08, |
|
"loss": 0.3984, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.950360719285022, |
|
"grad_norm": 0.602497935295105, |
|
"learning_rate": 1.4401044413324682e-08, |
|
"loss": 0.4086, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.954667815225584, |
|
"grad_norm": 0.5783790349960327, |
|
"learning_rate": 1.1965454237493623e-08, |
|
"loss": 0.3945, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.958974911166146, |
|
"grad_norm": 0.5653091073036194, |
|
"learning_rate": 9.755210440413055e-09, |
|
"loss": 0.3938, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.9632820071067085, |
|
"grad_norm": 0.5716846585273743, |
|
"learning_rate": 7.770362888795957e-09, |
|
"loss": 0.3935, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.9675891030472705, |
|
"grad_norm": 0.6015262603759766, |
|
"learning_rate": 6.0109563640442515e-09, |
|
"loss": 0.3955, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.9718961989878325, |
|
"grad_norm": 0.5763514041900635, |
|
"learning_rate": 4.477030561246265e-09, |
|
"loss": 0.4069, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.9762032949283945, |
|
"grad_norm": 0.5644577741622925, |
|
"learning_rate": 3.168620088271901e-09, |
|
"loss": 0.3921, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.9805103908689565, |
|
"grad_norm": 0.5302848219871521, |
|
"learning_rate": 2.0857544650010332e-09, |
|
"loss": 0.404, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.9848174868095185, |
|
"grad_norm": 0.6025976538658142, |
|
"learning_rate": 1.2284581226507108e-09, |
|
"loss": 0.4037, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.9891245827500805, |
|
"grad_norm": 0.5681896805763245, |
|
"learning_rate": 5.967504032267091e-10, |
|
"loss": 0.4031, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.993431678690643, |
|
"grad_norm": 0.5708478093147278, |
|
"learning_rate": 1.906455590883205e-10, |
|
"loss": 0.4206, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.997738774631205, |
|
"grad_norm": 0.5966918468475342, |
|
"learning_rate": 1.015275262306048e-11, |
|
"loss": 0.4014, |
|
"step": 6960 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6963, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7523782707118080.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|