{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998871968415116, "eval_steps": 500, "global_step": 2216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004512126339537507, "grad_norm": 11.364033095495042, "learning_rate": 2.9850746268656714e-08, "loss": 1.1259, "step": 1 }, { "epoch": 0.0009024252679075014, "grad_norm": 13.931051354363555, "learning_rate": 5.970149253731343e-08, "loss": 1.0695, "step": 2 }, { "epoch": 0.0013536379018612521, "grad_norm": 12.591108456147174, "learning_rate": 8.955223880597014e-08, "loss": 1.1413, "step": 3 }, { "epoch": 0.0018048505358150028, "grad_norm": 13.097679442569106, "learning_rate": 1.1940298507462686e-07, "loss": 1.1058, "step": 4 }, { "epoch": 0.0022560631697687537, "grad_norm": 10.498761541717155, "learning_rate": 1.4925373134328355e-07, "loss": 1.1735, "step": 5 }, { "epoch": 0.0027072758037225042, "grad_norm": 10.84063689789702, "learning_rate": 1.7910447761194027e-07, "loss": 1.1406, "step": 6 }, { "epoch": 0.003158488437676255, "grad_norm": 11.189978725198145, "learning_rate": 2.08955223880597e-07, "loss": 1.1035, "step": 7 }, { "epoch": 0.0036097010716300056, "grad_norm": 11.74878304446126, "learning_rate": 2.388059701492537e-07, "loss": 1.1278, "step": 8 }, { "epoch": 0.0040609137055837565, "grad_norm": 11.451667410942289, "learning_rate": 2.686567164179104e-07, "loss": 1.0887, "step": 9 }, { "epoch": 0.0045121263395375075, "grad_norm": 13.400719338755062, "learning_rate": 2.985074626865671e-07, "loss": 0.9145, "step": 10 }, { "epoch": 0.0049633389734912575, "grad_norm": 12.493648403772585, "learning_rate": 3.2835820895522385e-07, "loss": 0.9701, "step": 11 }, { "epoch": 0.0054145516074450084, "grad_norm": 11.383629551476218, "learning_rate": 3.5820895522388055e-07, "loss": 1.0357, "step": 12 }, { "epoch": 0.005865764241398759, "grad_norm": 10.68706994675752, "learning_rate": 3.880597014925373e-07, "loss": 1.0357, "step": 13 }, { "epoch": 0.00631697687535251, "grad_norm": 12.110795882375692, "learning_rate": 4.17910447761194e-07, "loss": 1.1691, "step": 14 }, { "epoch": 0.00676818950930626, "grad_norm": 11.01201459317177, "learning_rate": 4.4776119402985074e-07, "loss": 1.0552, "step": 15 }, { "epoch": 0.007219402143260011, "grad_norm": 11.621902494318055, "learning_rate": 4.776119402985074e-07, "loss": 1.1426, "step": 16 }, { "epoch": 0.007670614777213762, "grad_norm": 12.473147801613736, "learning_rate": 5.074626865671642e-07, "loss": 1.0951, "step": 17 }, { "epoch": 0.008121827411167513, "grad_norm": 9.182896427775415, "learning_rate": 5.373134328358208e-07, "loss": 1.101, "step": 18 }, { "epoch": 0.008573040045121263, "grad_norm": 9.270909685736212, "learning_rate": 5.671641791044775e-07, "loss": 1.1811, "step": 19 }, { "epoch": 0.009024252679075015, "grad_norm": 10.991256459745955, "learning_rate": 5.970149253731342e-07, "loss": 1.1503, "step": 20 }, { "epoch": 0.009475465313028765, "grad_norm": 9.255537906212872, "learning_rate": 6.26865671641791e-07, "loss": 0.9254, "step": 21 }, { "epoch": 0.009926677946982515, "grad_norm": 8.790099892645406, "learning_rate": 6.567164179104477e-07, "loss": 1.196, "step": 22 }, { "epoch": 0.010377890580936267, "grad_norm": 8.636133146821528, "learning_rate": 6.865671641791044e-07, "loss": 1.0438, "step": 23 }, { "epoch": 0.010829103214890017, "grad_norm": 7.949499233764216, "learning_rate": 7.164179104477611e-07, "loss": 1.0595, "step": 24 }, { "epoch": 0.011280315848843767, "grad_norm": 8.40491249369533, "learning_rate": 7.462686567164179e-07, "loss": 0.9882, "step": 25 }, { "epoch": 0.011731528482797519, "grad_norm": 7.041925202309207, "learning_rate": 7.761194029850746e-07, "loss": 0.8373, "step": 26 }, { "epoch": 0.012182741116751269, "grad_norm": 5.936172454200004, "learning_rate": 8.059701492537313e-07, "loss": 1.0756, "step": 27 }, { "epoch": 0.01263395375070502, "grad_norm": 5.908249319041824, "learning_rate": 8.35820895522388e-07, "loss": 1.0573, "step": 28 }, { "epoch": 0.01308516638465877, "grad_norm": 4.780207987320098, "learning_rate": 8.656716417910447e-07, "loss": 0.9898, "step": 29 }, { "epoch": 0.01353637901861252, "grad_norm": 4.772574247160276, "learning_rate": 8.955223880597015e-07, "loss": 0.7025, "step": 30 }, { "epoch": 0.013987591652566272, "grad_norm": 4.950826482077291, "learning_rate": 9.253731343283582e-07, "loss": 0.935, "step": 31 }, { "epoch": 0.014438804286520023, "grad_norm": 5.647161557749149, "learning_rate": 9.552238805970149e-07, "loss": 0.9128, "step": 32 }, { "epoch": 0.014890016920473773, "grad_norm": 4.248897174075301, "learning_rate": 9.850746268656714e-07, "loss": 0.92, "step": 33 }, { "epoch": 0.015341229554427524, "grad_norm": 4.260585778705935, "learning_rate": 1.0149253731343285e-06, "loss": 0.997, "step": 34 }, { "epoch": 0.015792442188381276, "grad_norm": 4.712117773863939, "learning_rate": 1.0447761194029848e-06, "loss": 0.9365, "step": 35 }, { "epoch": 0.016243654822335026, "grad_norm": 4.558718307740576, "learning_rate": 1.0746268656716416e-06, "loss": 0.9952, "step": 36 }, { "epoch": 0.016694867456288776, "grad_norm": 4.536125586817622, "learning_rate": 1.1044776119402984e-06, "loss": 0.9118, "step": 37 }, { "epoch": 0.017146080090242526, "grad_norm": 4.333053203593244, "learning_rate": 1.134328358208955e-06, "loss": 0.9406, "step": 38 }, { "epoch": 0.017597292724196276, "grad_norm": 3.913517979851077, "learning_rate": 1.1641791044776118e-06, "loss": 0.68, "step": 39 }, { "epoch": 0.01804850535815003, "grad_norm": 3.8887693821373746, "learning_rate": 1.1940298507462684e-06, "loss": 0.8987, "step": 40 }, { "epoch": 0.01849971799210378, "grad_norm": 3.7966150058507626, "learning_rate": 1.2238805970149252e-06, "loss": 0.8783, "step": 41 }, { "epoch": 0.01895093062605753, "grad_norm": 3.8413216293595465, "learning_rate": 1.253731343283582e-06, "loss": 1.0114, "step": 42 }, { "epoch": 0.01940214326001128, "grad_norm": 4.031064280953991, "learning_rate": 1.2835820895522386e-06, "loss": 0.9885, "step": 43 }, { "epoch": 0.01985335589396503, "grad_norm": 4.3421298053507025, "learning_rate": 1.3134328358208954e-06, "loss": 0.7812, "step": 44 }, { "epoch": 0.02030456852791878, "grad_norm": 3.6498986771766377, "learning_rate": 1.3432835820895522e-06, "loss": 0.864, "step": 45 }, { "epoch": 0.020755781161872534, "grad_norm": 3.959983938073483, "learning_rate": 1.3731343283582088e-06, "loss": 1.0879, "step": 46 }, { "epoch": 0.021206993795826284, "grad_norm": 3.897851022301326, "learning_rate": 1.4029850746268656e-06, "loss": 0.8353, "step": 47 }, { "epoch": 0.021658206429780034, "grad_norm": 3.9557985042022734, "learning_rate": 1.4328358208955222e-06, "loss": 1.0151, "step": 48 }, { "epoch": 0.022109419063733784, "grad_norm": 3.954727075094809, "learning_rate": 1.462686567164179e-06, "loss": 0.8335, "step": 49 }, { "epoch": 0.022560631697687534, "grad_norm": 4.085425811341359, "learning_rate": 1.4925373134328358e-06, "loss": 0.8118, "step": 50 }, { "epoch": 0.023011844331641287, "grad_norm": 3.8699428653568715, "learning_rate": 1.5223880597014924e-06, "loss": 0.8204, "step": 51 }, { "epoch": 0.023463056965595037, "grad_norm": 3.9398844157805035, "learning_rate": 1.5522388059701492e-06, "loss": 0.8355, "step": 52 }, { "epoch": 0.023914269599548788, "grad_norm": 3.9157701162909917, "learning_rate": 1.5820895522388058e-06, "loss": 0.7559, "step": 53 }, { "epoch": 0.024365482233502538, "grad_norm": 3.2664411221295166, "learning_rate": 1.6119402985074626e-06, "loss": 0.6912, "step": 54 }, { "epoch": 0.024816694867456288, "grad_norm": 4.102566095975955, "learning_rate": 1.6417910447761194e-06, "loss": 0.8001, "step": 55 }, { "epoch": 0.02526790750141004, "grad_norm": 3.984516243751416, "learning_rate": 1.671641791044776e-06, "loss": 0.6703, "step": 56 }, { "epoch": 0.02571912013536379, "grad_norm": 3.282165881291406, "learning_rate": 1.7014925373134328e-06, "loss": 0.6232, "step": 57 }, { "epoch": 0.02617033276931754, "grad_norm": 3.5552685115913896, "learning_rate": 1.7313432835820893e-06, "loss": 0.8499, "step": 58 }, { "epoch": 0.02662154540327129, "grad_norm": 3.963753035943293, "learning_rate": 1.7611940298507461e-06, "loss": 0.7578, "step": 59 }, { "epoch": 0.02707275803722504, "grad_norm": 3.9201161090299235, "learning_rate": 1.791044776119403e-06, "loss": 0.7639, "step": 60 }, { "epoch": 0.02752397067117879, "grad_norm": 4.525487683658723, "learning_rate": 1.8208955223880595e-06, "loss": 0.7993, "step": 61 }, { "epoch": 0.027975183305132545, "grad_norm": 3.577108451322024, "learning_rate": 1.8507462686567163e-06, "loss": 0.712, "step": 62 }, { "epoch": 0.028426395939086295, "grad_norm": 3.750651576099599, "learning_rate": 1.8805970149253731e-06, "loss": 0.8058, "step": 63 }, { "epoch": 0.028877608573040045, "grad_norm": 4.016735732488834, "learning_rate": 1.9104477611940297e-06, "loss": 0.8695, "step": 64 }, { "epoch": 0.029328821206993795, "grad_norm": 3.274144203992301, "learning_rate": 1.9402985074626867e-06, "loss": 0.7518, "step": 65 }, { "epoch": 0.029780033840947545, "grad_norm": 3.8331394793119284, "learning_rate": 1.970149253731343e-06, "loss": 0.7467, "step": 66 }, { "epoch": 0.0302312464749013, "grad_norm": 3.7944270550638914, "learning_rate": 2e-06, "loss": 0.6613, "step": 67 }, { "epoch": 0.03068245910885505, "grad_norm": 3.9078761683052377, "learning_rate": 1.9999989314450967e-06, "loss": 0.6253, "step": 68 }, { "epoch": 0.0311336717428088, "grad_norm": 3.7685631166339966, "learning_rate": 1.9999957257826715e-06, "loss": 0.6976, "step": 69 }, { "epoch": 0.03158488437676255, "grad_norm": 3.5388877908849423, "learning_rate": 1.9999903830195744e-06, "loss": 0.6982, "step": 70 }, { "epoch": 0.0320360970107163, "grad_norm": 3.9557829292992435, "learning_rate": 1.9999829031672236e-06, "loss": 0.9304, "step": 71 }, { "epoch": 0.03248730964467005, "grad_norm": 3.4688030496000697, "learning_rate": 1.9999732862416053e-06, "loss": 0.7131, "step": 72 }, { "epoch": 0.0329385222786238, "grad_norm": 3.4382929899009707, "learning_rate": 1.9999615322632707e-06, "loss": 0.67, "step": 73 }, { "epoch": 0.03338973491257755, "grad_norm": 3.68765625410286, "learning_rate": 1.9999476412573397e-06, "loss": 0.6871, "step": 74 }, { "epoch": 0.0338409475465313, "grad_norm": 3.9034136569075444, "learning_rate": 1.999931613253499e-06, "loss": 0.7368, "step": 75 }, { "epoch": 0.03429216018048505, "grad_norm": 3.4434580645266983, "learning_rate": 1.9999134482860026e-06, "loss": 0.7582, "step": 76 }, { "epoch": 0.0347433728144388, "grad_norm": 3.4427590958689827, "learning_rate": 1.9998931463936704e-06, "loss": 0.6917, "step": 77 }, { "epoch": 0.03519458544839255, "grad_norm": 3.5129243585420045, "learning_rate": 1.9998707076198903e-06, "loss": 0.7202, "step": 78 }, { "epoch": 0.0356457980823463, "grad_norm": 3.3097035106926334, "learning_rate": 1.999846132012616e-06, "loss": 0.6518, "step": 79 }, { "epoch": 0.03609701071630006, "grad_norm": 4.01365302973293, "learning_rate": 1.9998194196243685e-06, "loss": 0.8061, "step": 80 }, { "epoch": 0.03654822335025381, "grad_norm": 3.936175515147698, "learning_rate": 1.999790570512235e-06, "loss": 0.6833, "step": 81 }, { "epoch": 0.03699943598420756, "grad_norm": 4.047146080445624, "learning_rate": 1.9997595847378693e-06, "loss": 0.5754, "step": 82 }, { "epoch": 0.03745064861816131, "grad_norm": 4.520964419884895, "learning_rate": 1.9997264623674913e-06, "loss": 0.7149, "step": 83 }, { "epoch": 0.03790186125211506, "grad_norm": 4.192870395330733, "learning_rate": 1.999691203471887e-06, "loss": 0.8313, "step": 84 }, { "epoch": 0.03835307388606881, "grad_norm": 3.9624103451987347, "learning_rate": 1.9996538081264093e-06, "loss": 0.6514, "step": 85 }, { "epoch": 0.03880428652002256, "grad_norm": 3.520519118359727, "learning_rate": 1.9996142764109753e-06, "loss": 0.7045, "step": 86 }, { "epoch": 0.03925549915397631, "grad_norm": 3.5083592386843914, "learning_rate": 1.999572608410069e-06, "loss": 0.5047, "step": 87 }, { "epoch": 0.03970671178793006, "grad_norm": 3.620127744810978, "learning_rate": 1.999528804212739e-06, "loss": 0.6376, "step": 88 }, { "epoch": 0.04015792442188381, "grad_norm": 3.0483993598270906, "learning_rate": 1.9994828639126008e-06, "loss": 0.6099, "step": 89 }, { "epoch": 0.04060913705583756, "grad_norm": 3.7495513331273984, "learning_rate": 1.999434787607833e-06, "loss": 0.7896, "step": 90 }, { "epoch": 0.04106034968979132, "grad_norm": 3.4700395599360796, "learning_rate": 1.9993845754011797e-06, "loss": 0.6285, "step": 91 }, { "epoch": 0.04151156232374507, "grad_norm": 3.131175268232334, "learning_rate": 1.9993322273999505e-06, "loss": 0.6492, "step": 92 }, { "epoch": 0.04196277495769882, "grad_norm": 3.5622755072245127, "learning_rate": 1.9992777437160185e-06, "loss": 0.8218, "step": 93 }, { "epoch": 0.04241398759165257, "grad_norm": 3.3564760065634744, "learning_rate": 1.9992211244658214e-06, "loss": 0.6667, "step": 94 }, { "epoch": 0.04286520022560632, "grad_norm": 3.687670006527879, "learning_rate": 1.999162369770361e-06, "loss": 0.6048, "step": 95 }, { "epoch": 0.04331641285956007, "grad_norm": 3.7369728445839776, "learning_rate": 1.9991014797552027e-06, "loss": 0.8528, "step": 96 }, { "epoch": 0.04376762549351382, "grad_norm": 4.326301281314035, "learning_rate": 1.999038454550474e-06, "loss": 0.6348, "step": 97 }, { "epoch": 0.04421883812746757, "grad_norm": 3.330654698188114, "learning_rate": 1.998973294290868e-06, "loss": 0.6672, "step": 98 }, { "epoch": 0.04467005076142132, "grad_norm": 3.4395950335828465, "learning_rate": 1.998905999115639e-06, "loss": 0.6424, "step": 99 }, { "epoch": 0.04512126339537507, "grad_norm": 3.4714288997142835, "learning_rate": 1.998836569168603e-06, "loss": 0.6887, "step": 100 }, { "epoch": 0.04557247602932882, "grad_norm": 3.739675822036197, "learning_rate": 1.998765004598141e-06, "loss": 0.744, "step": 101 }, { "epoch": 0.046023688663282575, "grad_norm": 3.5376293890493167, "learning_rate": 1.998691305557194e-06, "loss": 0.75, "step": 102 }, { "epoch": 0.046474901297236325, "grad_norm": 3.9480104427554745, "learning_rate": 1.9986154722032646e-06, "loss": 0.594, "step": 103 }, { "epoch": 0.046926113931190075, "grad_norm": 3.9967536358361135, "learning_rate": 1.9985375046984165e-06, "loss": 0.7111, "step": 104 }, { "epoch": 0.047377326565143825, "grad_norm": 4.404083699085148, "learning_rate": 1.9984574032092758e-06, "loss": 0.6737, "step": 105 }, { "epoch": 0.047828539199097575, "grad_norm": 3.9446789384138943, "learning_rate": 1.998375167907028e-06, "loss": 0.5581, "step": 106 }, { "epoch": 0.048279751833051325, "grad_norm": 3.2749627513940354, "learning_rate": 1.9982907989674186e-06, "loss": 0.6848, "step": 107 }, { "epoch": 0.048730964467005075, "grad_norm": 3.1745752031824503, "learning_rate": 1.9982042965707534e-06, "loss": 0.6993, "step": 108 }, { "epoch": 0.049182177100958825, "grad_norm": 2.918514031480819, "learning_rate": 1.9981156609018974e-06, "loss": 0.5622, "step": 109 }, { "epoch": 0.049633389734912575, "grad_norm": 3.2967345302885773, "learning_rate": 1.998024892150275e-06, "loss": 0.7764, "step": 110 }, { "epoch": 0.050084602368866325, "grad_norm": 4.096076864953819, "learning_rate": 1.997931990509869e-06, "loss": 0.8217, "step": 111 }, { "epoch": 0.05053581500282008, "grad_norm": 3.5086960564534073, "learning_rate": 1.9978369561792203e-06, "loss": 0.7334, "step": 112 }, { "epoch": 0.05098702763677383, "grad_norm": 4.957647662303255, "learning_rate": 1.997739789361428e-06, "loss": 0.7043, "step": 113 }, { "epoch": 0.05143824027072758, "grad_norm": 3.108513749586831, "learning_rate": 1.9976404902641475e-06, "loss": 0.5838, "step": 114 }, { "epoch": 0.05188945290468133, "grad_norm": 4.374801020766516, "learning_rate": 1.9975390590995923e-06, "loss": 0.6207, "step": 115 }, { "epoch": 0.05234066553863508, "grad_norm": 3.5847626690371746, "learning_rate": 1.9974354960845323e-06, "loss": 0.7108, "step": 116 }, { "epoch": 0.05279187817258883, "grad_norm": 3.3972794164820566, "learning_rate": 1.9973298014402927e-06, "loss": 0.6389, "step": 117 }, { "epoch": 0.05324309080654258, "grad_norm": 3.4765618382257686, "learning_rate": 1.9972219753927547e-06, "loss": 0.7736, "step": 118 }, { "epoch": 0.05369430344049633, "grad_norm": 3.983880838580834, "learning_rate": 1.997112018172354e-06, "loss": 0.6782, "step": 119 }, { "epoch": 0.05414551607445008, "grad_norm": 3.475938176625616, "learning_rate": 1.9969999300140816e-06, "loss": 0.6435, "step": 120 }, { "epoch": 0.05459672870840383, "grad_norm": 3.2723738223571908, "learning_rate": 1.9968857111574823e-06, "loss": 0.725, "step": 121 }, { "epoch": 0.05504794134235758, "grad_norm": 2.7837662665336684, "learning_rate": 1.9967693618466537e-06, "loss": 0.5725, "step": 122 }, { "epoch": 0.05549915397631134, "grad_norm": 3.6895192194143287, "learning_rate": 1.996650882330248e-06, "loss": 0.6869, "step": 123 }, { "epoch": 0.05595036661026509, "grad_norm": 3.4109605481942475, "learning_rate": 1.9965302728614685e-06, "loss": 0.7059, "step": 124 }, { "epoch": 0.05640157924421884, "grad_norm": 3.1175030442363023, "learning_rate": 1.9964075336980705e-06, "loss": 0.5166, "step": 125 }, { "epoch": 0.05685279187817259, "grad_norm": 3.7149554868454207, "learning_rate": 1.9962826651023618e-06, "loss": 0.6477, "step": 126 }, { "epoch": 0.05730400451212634, "grad_norm": 2.7779966288953006, "learning_rate": 1.9961556673412e-06, "loss": 0.5851, "step": 127 }, { "epoch": 0.05775521714608009, "grad_norm": 3.6464661606212627, "learning_rate": 1.9960265406859927e-06, "loss": 0.7105, "step": 128 }, { "epoch": 0.05820642978003384, "grad_norm": 4.222181937491618, "learning_rate": 1.9958952854126986e-06, "loss": 0.8397, "step": 129 }, { "epoch": 0.05865764241398759, "grad_norm": 3.463205955817649, "learning_rate": 1.995761901801824e-06, "loss": 0.6813, "step": 130 }, { "epoch": 0.05910885504794134, "grad_norm": 3.5951321527152214, "learning_rate": 1.995626390138425e-06, "loss": 0.6965, "step": 131 }, { "epoch": 0.05956006768189509, "grad_norm": 3.321905615889881, "learning_rate": 1.995488750712104e-06, "loss": 0.7583, "step": 132 }, { "epoch": 0.06001128031584884, "grad_norm": 3.5601557448366763, "learning_rate": 1.995348983817012e-06, "loss": 0.595, "step": 133 }, { "epoch": 0.0604624929498026, "grad_norm": 3.734905545964483, "learning_rate": 1.9952070897518465e-06, "loss": 0.6582, "step": 134 }, { "epoch": 0.06091370558375635, "grad_norm": 3.3634272520299824, "learning_rate": 1.99506306881985e-06, "loss": 0.635, "step": 135 }, { "epoch": 0.0613649182177101, "grad_norm": 3.2571407250429774, "learning_rate": 1.9949169213288123e-06, "loss": 0.6879, "step": 136 }, { "epoch": 0.06181613085166385, "grad_norm": 3.402056992142215, "learning_rate": 1.9947686475910653e-06, "loss": 0.66, "step": 137 }, { "epoch": 0.0622673434856176, "grad_norm": 3.230669775565966, "learning_rate": 1.9946182479234867e-06, "loss": 0.6797, "step": 138 }, { "epoch": 0.06271855611957135, "grad_norm": 3.2932122097098606, "learning_rate": 1.9944657226474975e-06, "loss": 0.6038, "step": 139 }, { "epoch": 0.0631697687535251, "grad_norm": 3.453841593898078, "learning_rate": 1.9943110720890605e-06, "loss": 0.7284, "step": 140 }, { "epoch": 0.06362098138747885, "grad_norm": 3.4659777218616945, "learning_rate": 1.994154296578681e-06, "loss": 0.7008, "step": 141 }, { "epoch": 0.0640721940214326, "grad_norm": 3.2914543179139737, "learning_rate": 1.993995396451406e-06, "loss": 0.679, "step": 142 }, { "epoch": 0.06452340665538635, "grad_norm": 3.5718717851262873, "learning_rate": 1.9938343720468215e-06, "loss": 0.7916, "step": 143 }, { "epoch": 0.0649746192893401, "grad_norm": 3.822827292185195, "learning_rate": 1.993671223709055e-06, "loss": 0.7997, "step": 144 }, { "epoch": 0.06542583192329385, "grad_norm": 3.636935399595887, "learning_rate": 1.9935059517867726e-06, "loss": 0.8537, "step": 145 }, { "epoch": 0.0658770445572476, "grad_norm": 3.6710814044757787, "learning_rate": 1.993338556633178e-06, "loss": 0.7573, "step": 146 }, { "epoch": 0.06632825719120135, "grad_norm": 2.7858354684349282, "learning_rate": 1.993169038606014e-06, "loss": 0.5819, "step": 147 }, { "epoch": 0.0667794698251551, "grad_norm": 3.276036115967757, "learning_rate": 1.992997398067558e-06, "loss": 0.574, "step": 148 }, { "epoch": 0.06723068245910886, "grad_norm": 3.0839012680951052, "learning_rate": 1.992823635384625e-06, "loss": 0.524, "step": 149 }, { "epoch": 0.0676818950930626, "grad_norm": 3.506642162753119, "learning_rate": 1.9926477509285654e-06, "loss": 0.8239, "step": 150 }, { "epoch": 0.06813310772701636, "grad_norm": 3.148995922283508, "learning_rate": 1.9924697450752634e-06, "loss": 0.6518, "step": 151 }, { "epoch": 0.0685843203609701, "grad_norm": 3.42401846246941, "learning_rate": 1.9922896182051368e-06, "loss": 0.7514, "step": 152 }, { "epoch": 0.06903553299492386, "grad_norm": 3.418930096776447, "learning_rate": 1.9921073707031367e-06, "loss": 0.7616, "step": 153 }, { "epoch": 0.0694867456288776, "grad_norm": 3.4667255988772614, "learning_rate": 1.9919230029587457e-06, "loss": 0.7787, "step": 154 }, { "epoch": 0.06993795826283136, "grad_norm": 3.4038766672764527, "learning_rate": 1.991736515365979e-06, "loss": 0.5716, "step": 155 }, { "epoch": 0.0703891708967851, "grad_norm": 2.9809305533819743, "learning_rate": 1.99154790832338e-06, "loss": 0.6286, "step": 156 }, { "epoch": 0.07084038353073886, "grad_norm": 3.4845882856327264, "learning_rate": 1.9913571822340225e-06, "loss": 0.6862, "step": 157 }, { "epoch": 0.0712915961646926, "grad_norm": 3.5834795110879507, "learning_rate": 1.9911643375055103e-06, "loss": 0.8095, "step": 158 }, { "epoch": 0.07174280879864636, "grad_norm": 3.4230242670945867, "learning_rate": 1.9909693745499727e-06, "loss": 0.7338, "step": 159 }, { "epoch": 0.07219402143260012, "grad_norm": 3.423862511414622, "learning_rate": 1.9907722937840673e-06, "loss": 0.6397, "step": 160 }, { "epoch": 0.07264523406655386, "grad_norm": 3.2464417100716476, "learning_rate": 1.990573095628977e-06, "loss": 0.6437, "step": 161 }, { "epoch": 0.07309644670050762, "grad_norm": 3.2452080634188603, "learning_rate": 1.990371780510411e-06, "loss": 0.7933, "step": 162 }, { "epoch": 0.07354765933446136, "grad_norm": 3.8701961071397193, "learning_rate": 1.990168348858601e-06, "loss": 0.6959, "step": 163 }, { "epoch": 0.07399887196841512, "grad_norm": 3.346386820700462, "learning_rate": 1.9899628011083025e-06, "loss": 0.7028, "step": 164 }, { "epoch": 0.07445008460236886, "grad_norm": 3.3695620544061917, "learning_rate": 1.9897551376987948e-06, "loss": 0.7207, "step": 165 }, { "epoch": 0.07490129723632262, "grad_norm": 3.3459806929523928, "learning_rate": 1.9895453590738766e-06, "loss": 0.565, "step": 166 }, { "epoch": 0.07535250987027636, "grad_norm": 3.5190095763107907, "learning_rate": 1.9893334656818678e-06, "loss": 0.5682, "step": 167 }, { "epoch": 0.07580372250423012, "grad_norm": 3.4154315855452277, "learning_rate": 1.989119457975608e-06, "loss": 0.7248, "step": 168 }, { "epoch": 0.07625493513818386, "grad_norm": 3.619554148268442, "learning_rate": 1.988903336412455e-06, "loss": 0.7166, "step": 169 }, { "epoch": 0.07670614777213762, "grad_norm": 3.628782657060156, "learning_rate": 1.988685101454285e-06, "loss": 0.6744, "step": 170 }, { "epoch": 0.07715736040609138, "grad_norm": 3.222286302019397, "learning_rate": 1.9884647535674897e-06, "loss": 0.6345, "step": 171 }, { "epoch": 0.07760857304004512, "grad_norm": 3.3817029503603746, "learning_rate": 1.988242293222976e-06, "loss": 0.6987, "step": 172 }, { "epoch": 0.07805978567399888, "grad_norm": 3.192434440346879, "learning_rate": 1.9880177208961674e-06, "loss": 0.6651, "step": 173 }, { "epoch": 0.07851099830795262, "grad_norm": 3.459120242900092, "learning_rate": 1.9877910370669984e-06, "loss": 0.6704, "step": 174 }, { "epoch": 0.07896221094190638, "grad_norm": 3.5176326381584473, "learning_rate": 1.9875622422199184e-06, "loss": 0.7315, "step": 175 }, { "epoch": 0.07941342357586012, "grad_norm": 3.42358670836462, "learning_rate": 1.9873313368438856e-06, "loss": 0.709, "step": 176 }, { "epoch": 0.07986463620981388, "grad_norm": 3.514397087656355, "learning_rate": 1.987098321432372e-06, "loss": 0.6897, "step": 177 }, { "epoch": 0.08031584884376762, "grad_norm": 3.9791349562876364, "learning_rate": 1.9868631964833554e-06, "loss": 0.6663, "step": 178 }, { "epoch": 0.08076706147772138, "grad_norm": 3.511691191882589, "learning_rate": 1.9866259624993243e-06, "loss": 0.7678, "step": 179 }, { "epoch": 0.08121827411167512, "grad_norm": 3.6035462780242082, "learning_rate": 1.9863866199872745e-06, "loss": 0.6732, "step": 180 }, { "epoch": 0.08166948674562888, "grad_norm": 3.3265262825383384, "learning_rate": 1.986145169458706e-06, "loss": 0.6459, "step": 181 }, { "epoch": 0.08212069937958263, "grad_norm": 3.070737136256683, "learning_rate": 1.9859016114296256e-06, "loss": 0.6672, "step": 182 }, { "epoch": 0.08257191201353638, "grad_norm": 3.1293576967982006, "learning_rate": 1.985655946420544e-06, "loss": 0.6482, "step": 183 }, { "epoch": 0.08302312464749013, "grad_norm": 3.2300733698316, "learning_rate": 1.9854081749564737e-06, "loss": 0.5774, "step": 184 }, { "epoch": 0.08347433728144388, "grad_norm": 3.1911556869196525, "learning_rate": 1.98515829756693e-06, "loss": 0.5046, "step": 185 }, { "epoch": 0.08392554991539763, "grad_norm": 3.463324413729419, "learning_rate": 1.984906314785928e-06, "loss": 0.6925, "step": 186 }, { "epoch": 0.08437676254935138, "grad_norm": 3.4082698478159053, "learning_rate": 1.984652227151982e-06, "loss": 0.6869, "step": 187 }, { "epoch": 0.08482797518330513, "grad_norm": 3.0232384513032526, "learning_rate": 1.984396035208107e-06, "loss": 0.6854, "step": 188 }, { "epoch": 0.08527918781725888, "grad_norm": 3.405093474595666, "learning_rate": 1.984137739501811e-06, "loss": 0.8003, "step": 189 }, { "epoch": 0.08573040045121263, "grad_norm": 2.8650239504287827, "learning_rate": 1.983877340585102e-06, "loss": 0.6247, "step": 190 }, { "epoch": 0.08618161308516638, "grad_norm": 3.1843877890785053, "learning_rate": 1.98361483901448e-06, "loss": 0.6268, "step": 191 }, { "epoch": 0.08663282571912014, "grad_norm": 3.2905100987787574, "learning_rate": 1.983350235350941e-06, "loss": 0.6307, "step": 192 }, { "epoch": 0.08708403835307389, "grad_norm": 3.6238689522296896, "learning_rate": 1.9830835301599705e-06, "loss": 0.6972, "step": 193 }, { "epoch": 0.08753525098702764, "grad_norm": 3.6407089349858994, "learning_rate": 1.982814724011548e-06, "loss": 0.5048, "step": 194 }, { "epoch": 0.08798646362098139, "grad_norm": 3.414185881113126, "learning_rate": 1.982543817480141e-06, "loss": 0.6903, "step": 195 }, { "epoch": 0.08843767625493514, "grad_norm": 3.5617662518759423, "learning_rate": 1.9822708111447073e-06, "loss": 0.7267, "step": 196 }, { "epoch": 0.08888888888888889, "grad_norm": 3.260537631153434, "learning_rate": 1.9819957055886903e-06, "loss": 0.6019, "step": 197 }, { "epoch": 0.08934010152284264, "grad_norm": 3.9131056691786648, "learning_rate": 1.9817185014000216e-06, "loss": 0.7248, "step": 198 }, { "epoch": 0.08979131415679639, "grad_norm": 3.2158505559140087, "learning_rate": 1.981439199171117e-06, "loss": 0.4917, "step": 199 }, { "epoch": 0.09024252679075014, "grad_norm": 3.3742251974916306, "learning_rate": 1.9811577994988754e-06, "loss": 0.6419, "step": 200 }, { "epoch": 0.09069373942470389, "grad_norm": 3.960767715416477, "learning_rate": 1.9808743029846793e-06, "loss": 0.6485, "step": 201 }, { "epoch": 0.09114495205865764, "grad_norm": 3.2106160146208556, "learning_rate": 1.980588710234392e-06, "loss": 0.6513, "step": 202 }, { "epoch": 0.09159616469261139, "grad_norm": 3.3168520724652435, "learning_rate": 1.980301021858356e-06, "loss": 0.6639, "step": 203 }, { "epoch": 0.09204737732656515, "grad_norm": 3.3182631173626, "learning_rate": 1.9800112384713937e-06, "loss": 0.6178, "step": 204 }, { "epoch": 0.09249858996051889, "grad_norm": 3.162501767023835, "learning_rate": 1.9797193606928037e-06, "loss": 0.6681, "step": 205 }, { "epoch": 0.09294980259447265, "grad_norm": 3.2385347157247955, "learning_rate": 1.9794253891463602e-06, "loss": 0.5039, "step": 206 }, { "epoch": 0.09340101522842639, "grad_norm": 3.573031152116351, "learning_rate": 1.979129324460314e-06, "loss": 0.6903, "step": 207 }, { "epoch": 0.09385222786238015, "grad_norm": 3.601364277364461, "learning_rate": 1.978831167267387e-06, "loss": 0.5897, "step": 208 }, { "epoch": 0.09430344049633389, "grad_norm": 3.2791769809356537, "learning_rate": 1.9785309182047735e-06, "loss": 0.5525, "step": 209 }, { "epoch": 0.09475465313028765, "grad_norm": 4.31342038746554, "learning_rate": 1.9782285779141393e-06, "loss": 0.5973, "step": 210 }, { "epoch": 0.09520586576424139, "grad_norm": 3.196166578673568, "learning_rate": 1.977924147041619e-06, "loss": 0.662, "step": 211 }, { "epoch": 0.09565707839819515, "grad_norm": 3.5134072643179586, "learning_rate": 1.9776176262378144e-06, "loss": 0.6167, "step": 212 }, { "epoch": 0.09610829103214891, "grad_norm": 3.4425320121595977, "learning_rate": 1.977309016157794e-06, "loss": 0.595, "step": 213 }, { "epoch": 0.09655950366610265, "grad_norm": 3.374754780954467, "learning_rate": 1.9769983174610917e-06, "loss": 0.5101, "step": 214 }, { "epoch": 0.09701071630005641, "grad_norm": 3.107648208296178, "learning_rate": 1.9766855308117048e-06, "loss": 0.705, "step": 215 }, { "epoch": 0.09746192893401015, "grad_norm": 3.507381249990775, "learning_rate": 1.9763706568780925e-06, "loss": 0.6518, "step": 216 }, { "epoch": 0.09791314156796391, "grad_norm": 3.178375092239186, "learning_rate": 1.9760536963331747e-06, "loss": 0.8446, "step": 217 }, { "epoch": 0.09836435420191765, "grad_norm": 3.361779236537557, "learning_rate": 1.9757346498543316e-06, "loss": 0.5807, "step": 218 }, { "epoch": 0.09881556683587141, "grad_norm": 3.859132201884954, "learning_rate": 1.9754135181234003e-06, "loss": 0.6968, "step": 219 }, { "epoch": 0.09926677946982515, "grad_norm": 3.300316326276715, "learning_rate": 1.9750903018266743e-06, "loss": 0.7015, "step": 220 }, { "epoch": 0.09971799210377891, "grad_norm": 3.149000744047261, "learning_rate": 1.9747650016549027e-06, "loss": 0.6008, "step": 221 }, { "epoch": 0.10016920473773265, "grad_norm": 3.4892072583658695, "learning_rate": 1.9744376183032873e-06, "loss": 0.6077, "step": 222 }, { "epoch": 0.10062041737168641, "grad_norm": 3.6046339473790323, "learning_rate": 1.9741081524714825e-06, "loss": 0.6846, "step": 223 }, { "epoch": 0.10107163000564016, "grad_norm": 3.4520154967728085, "learning_rate": 1.9737766048635928e-06, "loss": 0.6671, "step": 224 }, { "epoch": 0.10152284263959391, "grad_norm": 3.2952487774101797, "learning_rate": 1.973442976188172e-06, "loss": 0.6594, "step": 225 }, { "epoch": 0.10197405527354766, "grad_norm": 3.2514684073236584, "learning_rate": 1.973107267158221e-06, "loss": 0.6518, "step": 226 }, { "epoch": 0.10242526790750141, "grad_norm": 3.266600653278959, "learning_rate": 1.9727694784911876e-06, "loss": 0.6731, "step": 227 }, { "epoch": 0.10287648054145516, "grad_norm": 3.1924596316986533, "learning_rate": 1.972429610908962e-06, "loss": 0.6636, "step": 228 }, { "epoch": 0.10332769317540891, "grad_norm": 3.2104025132138623, "learning_rate": 1.9720876651378794e-06, "loss": 0.6779, "step": 229 }, { "epoch": 0.10377890580936266, "grad_norm": 3.2963718496191294, "learning_rate": 1.9717436419087155e-06, "loss": 0.634, "step": 230 }, { "epoch": 0.10423011844331641, "grad_norm": 3.2073822291013254, "learning_rate": 1.9713975419566858e-06, "loss": 0.5812, "step": 231 }, { "epoch": 0.10468133107727016, "grad_norm": 3.4189730159980884, "learning_rate": 1.971049366021443e-06, "loss": 0.7316, "step": 232 }, { "epoch": 0.10513254371122391, "grad_norm": 3.23394924091578, "learning_rate": 1.9706991148470783e-06, "loss": 0.6624, "step": 233 }, { "epoch": 0.10558375634517767, "grad_norm": 3.1317986475999375, "learning_rate": 1.970346789182116e-06, "loss": 0.5124, "step": 234 }, { "epoch": 0.10603496897913142, "grad_norm": 3.385294259471175, "learning_rate": 1.969992389779516e-06, "loss": 0.5569, "step": 235 }, { "epoch": 0.10648618161308517, "grad_norm": 4.118491129332631, "learning_rate": 1.9696359173966676e-06, "loss": 0.6456, "step": 236 }, { "epoch": 0.10693739424703892, "grad_norm": 3.385938350325588, "learning_rate": 1.9692773727953923e-06, "loss": 0.6794, "step": 237 }, { "epoch": 0.10738860688099267, "grad_norm": 3.401423714020342, "learning_rate": 1.9689167567419383e-06, "loss": 0.6555, "step": 238 }, { "epoch": 0.10783981951494642, "grad_norm": 3.401519447295867, "learning_rate": 1.9685540700069827e-06, "loss": 0.578, "step": 239 }, { "epoch": 0.10829103214890017, "grad_norm": 4.10708011535945, "learning_rate": 1.9681893133656257e-06, "loss": 0.7618, "step": 240 }, { "epoch": 0.10874224478285392, "grad_norm": 3.5017274039945163, "learning_rate": 1.9678224875973932e-06, "loss": 0.592, "step": 241 }, { "epoch": 0.10919345741680767, "grad_norm": 3.1688095237487017, "learning_rate": 1.9674535934862324e-06, "loss": 0.6643, "step": 242 }, { "epoch": 0.10964467005076142, "grad_norm": 3.1685960960934336, "learning_rate": 1.9670826318205098e-06, "loss": 0.6199, "step": 243 }, { "epoch": 0.11009588268471517, "grad_norm": 3.4775317850948104, "learning_rate": 1.9667096033930114e-06, "loss": 0.6675, "step": 244 }, { "epoch": 0.11054709531866892, "grad_norm": 3.643632342788612, "learning_rate": 1.96633450900094e-06, "loss": 0.711, "step": 245 }, { "epoch": 0.11099830795262268, "grad_norm": 2.9819737147125003, "learning_rate": 1.965957349445914e-06, "loss": 0.6962, "step": 246 }, { "epoch": 0.11144952058657642, "grad_norm": 2.9768216888435477, "learning_rate": 1.9655781255339632e-06, "loss": 0.65, "step": 247 }, { "epoch": 0.11190073322053018, "grad_norm": 3.174224366971097, "learning_rate": 1.965196838075533e-06, "loss": 0.5309, "step": 248 }, { "epoch": 0.11235194585448392, "grad_norm": 3.0582374952193985, "learning_rate": 1.9648134878854744e-06, "loss": 0.5635, "step": 249 }, { "epoch": 0.11280315848843768, "grad_norm": 3.8277212668333114, "learning_rate": 1.9644280757830507e-06, "loss": 0.7526, "step": 250 }, { "epoch": 0.11325437112239142, "grad_norm": 3.8709397850662945, "learning_rate": 1.9640406025919285e-06, "loss": 0.6035, "step": 251 }, { "epoch": 0.11370558375634518, "grad_norm": 3.3137132906235203, "learning_rate": 1.963651069140181e-06, "loss": 0.6656, "step": 252 }, { "epoch": 0.11415679639029892, "grad_norm": 3.574058474566791, "learning_rate": 1.963259476260284e-06, "loss": 0.7095, "step": 253 }, { "epoch": 0.11460800902425268, "grad_norm": 3.6146347899303115, "learning_rate": 1.962865824789115e-06, "loss": 0.6267, "step": 254 }, { "epoch": 0.11505922165820642, "grad_norm": 4.082630583384243, "learning_rate": 1.96247011556795e-06, "loss": 0.7022, "step": 255 }, { "epoch": 0.11551043429216018, "grad_norm": 3.291509482076301, "learning_rate": 1.9620723494424623e-06, "loss": 0.5721, "step": 256 }, { "epoch": 0.11596164692611394, "grad_norm": 3.58368271064381, "learning_rate": 1.961672527262723e-06, "loss": 0.6236, "step": 257 }, { "epoch": 0.11641285956006768, "grad_norm": 3.1224675347563275, "learning_rate": 1.9612706498831957e-06, "loss": 0.5134, "step": 258 }, { "epoch": 0.11686407219402144, "grad_norm": 3.5167852362721654, "learning_rate": 1.9608667181627357e-06, "loss": 0.6986, "step": 259 }, { "epoch": 0.11731528482797518, "grad_norm": 3.5809347550436277, "learning_rate": 1.96046073296459e-06, "loss": 0.6251, "step": 260 }, { "epoch": 0.11776649746192894, "grad_norm": 2.8641754562099244, "learning_rate": 1.9600526951563937e-06, "loss": 0.6369, "step": 261 }, { "epoch": 0.11821771009588268, "grad_norm": 3.613739867174843, "learning_rate": 1.9596426056101684e-06, "loss": 0.786, "step": 262 }, { "epoch": 0.11866892272983644, "grad_norm": 3.580296089646273, "learning_rate": 1.9592304652023203e-06, "loss": 0.6038, "step": 263 }, { "epoch": 0.11912013536379018, "grad_norm": 3.15842598534144, "learning_rate": 1.958816274813639e-06, "loss": 0.5811, "step": 264 }, { "epoch": 0.11957134799774394, "grad_norm": 3.0081075803485247, "learning_rate": 1.958400035329294e-06, "loss": 0.528, "step": 265 }, { "epoch": 0.12002256063169768, "grad_norm": 2.8554740174867606, "learning_rate": 1.9579817476388357e-06, "loss": 0.625, "step": 266 }, { "epoch": 0.12047377326565144, "grad_norm": 3.3849924046760447, "learning_rate": 1.9575614126361907e-06, "loss": 0.5894, "step": 267 }, { "epoch": 0.1209249858996052, "grad_norm": 2.8658227520314994, "learning_rate": 1.9571390312196607e-06, "loss": 0.6311, "step": 268 }, { "epoch": 0.12137619853355894, "grad_norm": 3.22918688434795, "learning_rate": 1.9567146042919213e-06, "loss": 0.6711, "step": 269 }, { "epoch": 0.1218274111675127, "grad_norm": 3.102667861242874, "learning_rate": 1.9562881327600197e-06, "loss": 0.5661, "step": 270 }, { "epoch": 0.12227862380146644, "grad_norm": 2.9951718210029843, "learning_rate": 1.955859617535372e-06, "loss": 0.7687, "step": 271 }, { "epoch": 0.1227298364354202, "grad_norm": 3.1684228477861884, "learning_rate": 1.9554290595337625e-06, "loss": 0.6228, "step": 272 }, { "epoch": 0.12318104906937394, "grad_norm": 3.393968292035563, "learning_rate": 1.954996459675341e-06, "loss": 0.5848, "step": 273 }, { "epoch": 0.1236322617033277, "grad_norm": 2.852079323030113, "learning_rate": 1.9545618188846205e-06, "loss": 0.5769, "step": 274 }, { "epoch": 0.12408347433728144, "grad_norm": 2.9641384401198074, "learning_rate": 1.9541251380904762e-06, "loss": 0.5864, "step": 275 }, { "epoch": 0.1245346869712352, "grad_norm": 3.3554476588870568, "learning_rate": 1.9536864182261435e-06, "loss": 0.657, "step": 276 }, { "epoch": 0.12498589960518895, "grad_norm": 3.281618043698293, "learning_rate": 1.9532456602292146e-06, "loss": 0.6213, "step": 277 }, { "epoch": 0.1254371122391427, "grad_norm": 3.3143546652497644, "learning_rate": 1.9528028650416375e-06, "loss": 0.7062, "step": 278 }, { "epoch": 0.12588832487309645, "grad_norm": 3.3561372317938134, "learning_rate": 1.9523580336097145e-06, "loss": 0.4876, "step": 279 }, { "epoch": 0.1263395375070502, "grad_norm": 3.0109523300134264, "learning_rate": 1.9519111668840982e-06, "loss": 0.5256, "step": 280 }, { "epoch": 0.12679075014100394, "grad_norm": 3.578806428790207, "learning_rate": 1.9514622658197933e-06, "loss": 0.5089, "step": 281 }, { "epoch": 0.1272419627749577, "grad_norm": 3.074575948566442, "learning_rate": 1.95101133137615e-06, "loss": 0.695, "step": 282 }, { "epoch": 0.12769317540891145, "grad_norm": 3.1027160235638984, "learning_rate": 1.950558364516865e-06, "loss": 0.5853, "step": 283 }, { "epoch": 0.1281443880428652, "grad_norm": 3.3099951191328563, "learning_rate": 1.9501033662099777e-06, "loss": 0.5303, "step": 284 }, { "epoch": 0.12859560067681894, "grad_norm": 3.446711214585093, "learning_rate": 1.9496463374278696e-06, "loss": 0.5828, "step": 285 }, { "epoch": 0.1290468133107727, "grad_norm": 3.263902821177112, "learning_rate": 1.949187279147262e-06, "loss": 0.6326, "step": 286 }, { "epoch": 0.12949802594472645, "grad_norm": 3.571231287506934, "learning_rate": 1.9487261923492116e-06, "loss": 0.6723, "step": 287 }, { "epoch": 0.1299492385786802, "grad_norm": 3.001117794130769, "learning_rate": 1.9482630780191126e-06, "loss": 0.5732, "step": 288 }, { "epoch": 0.13040045121263397, "grad_norm": 3.474015517628523, "learning_rate": 1.947797937146691e-06, "loss": 0.6214, "step": 289 }, { "epoch": 0.1308516638465877, "grad_norm": 3.2034938171083955, "learning_rate": 1.947330770726004e-06, "loss": 0.7137, "step": 290 }, { "epoch": 0.13130287648054145, "grad_norm": 4.0987214667589935, "learning_rate": 1.946861579755437e-06, "loss": 0.6807, "step": 291 }, { "epoch": 0.1317540891144952, "grad_norm": 2.9958320731805355, "learning_rate": 1.9463903652377026e-06, "loss": 0.648, "step": 292 }, { "epoch": 0.13220530174844897, "grad_norm": 3.5326747287868274, "learning_rate": 1.945917128179839e-06, "loss": 0.5305, "step": 293 }, { "epoch": 0.1326565143824027, "grad_norm": 3.087656746813269, "learning_rate": 1.9454418695932045e-06, "loss": 0.6877, "step": 294 }, { "epoch": 0.13310772701635645, "grad_norm": 3.161562230483419, "learning_rate": 1.94496459049348e-06, "loss": 0.7082, "step": 295 }, { "epoch": 0.1335589396503102, "grad_norm": 3.1405430418827236, "learning_rate": 1.9444852919006623e-06, "loss": 0.6361, "step": 296 }, { "epoch": 0.13401015228426397, "grad_norm": 3.2609308388363165, "learning_rate": 1.944003974839066e-06, "loss": 0.4959, "step": 297 }, { "epoch": 0.13446136491821772, "grad_norm": 3.136461389328533, "learning_rate": 1.9435206403373178e-06, "loss": 0.7346, "step": 298 }, { "epoch": 0.13491257755217145, "grad_norm": 3.5274417741499198, "learning_rate": 1.9430352894283567e-06, "loss": 0.5936, "step": 299 }, { "epoch": 0.1353637901861252, "grad_norm": 3.7645608637306647, "learning_rate": 1.9425479231494318e-06, "loss": 0.6398, "step": 300 }, { "epoch": 0.13581500282007897, "grad_norm": 3.1396546903591718, "learning_rate": 1.942058542542097e-06, "loss": 0.5478, "step": 301 }, { "epoch": 0.13626621545403272, "grad_norm": 3.594716193105851, "learning_rate": 1.9415671486522137e-06, "loss": 0.7303, "step": 302 }, { "epoch": 0.13671742808798645, "grad_norm": 3.507738272824508, "learning_rate": 1.9410737425299434e-06, "loss": 0.6789, "step": 303 }, { "epoch": 0.1371686407219402, "grad_norm": 3.118749146822311, "learning_rate": 1.94057832522975e-06, "loss": 0.6171, "step": 304 }, { "epoch": 0.13761985335589397, "grad_norm": 3.210316055229788, "learning_rate": 1.9400808978103944e-06, "loss": 0.5672, "step": 305 }, { "epoch": 0.13807106598984772, "grad_norm": 3.0272573921584374, "learning_rate": 1.9395814613349338e-06, "loss": 0.6, "step": 306 }, { "epoch": 0.13852227862380145, "grad_norm": 2.8900194965740487, "learning_rate": 1.9390800168707182e-06, "loss": 0.6387, "step": 307 }, { "epoch": 0.1389734912577552, "grad_norm": 3.0767885306822738, "learning_rate": 1.93857656548939e-06, "loss": 0.6114, "step": 308 }, { "epoch": 0.13942470389170897, "grad_norm": 3.1091627287910777, "learning_rate": 1.93807110826688e-06, "loss": 0.5469, "step": 309 }, { "epoch": 0.13987591652566272, "grad_norm": 3.5058309066962976, "learning_rate": 1.937563646283406e-06, "loss": 0.5231, "step": 310 }, { "epoch": 0.14032712915961648, "grad_norm": 3.381960145855753, "learning_rate": 1.93705418062347e-06, "loss": 0.6071, "step": 311 }, { "epoch": 0.1407783417935702, "grad_norm": 3.3900203190115707, "learning_rate": 1.9365427123758547e-06, "loss": 0.6137, "step": 312 }, { "epoch": 0.14122955442752397, "grad_norm": 3.1063379022563007, "learning_rate": 1.936029242633626e-06, "loss": 0.5899, "step": 313 }, { "epoch": 0.14168076706147772, "grad_norm": 3.6211320331970343, "learning_rate": 1.9355137724941234e-06, "loss": 0.6263, "step": 314 }, { "epoch": 0.14213197969543148, "grad_norm": 3.1680939515915902, "learning_rate": 1.9349963030589644e-06, "loss": 0.6648, "step": 315 }, { "epoch": 0.1425831923293852, "grad_norm": 3.5195879110562562, "learning_rate": 1.9344768354340377e-06, "loss": 0.6575, "step": 316 }, { "epoch": 0.14303440496333897, "grad_norm": 3.1488331602409176, "learning_rate": 1.9339553707295018e-06, "loss": 0.5313, "step": 317 }, { "epoch": 0.14348561759729273, "grad_norm": 3.2151330395191557, "learning_rate": 1.933431910059785e-06, "loss": 0.528, "step": 318 }, { "epoch": 0.14393683023124648, "grad_norm": 3.132260724021693, "learning_rate": 1.93290645454358e-06, "loss": 0.5815, "step": 319 }, { "epoch": 0.14438804286520024, "grad_norm": 3.622222391906822, "learning_rate": 1.9323790053038433e-06, "loss": 0.7457, "step": 320 }, { "epoch": 0.14483925549915397, "grad_norm": 4.223750697040322, "learning_rate": 1.9318495634677907e-06, "loss": 0.5711, "step": 321 }, { "epoch": 0.14529046813310773, "grad_norm": 3.3650649933245376, "learning_rate": 1.9313181301668985e-06, "loss": 0.65, "step": 322 }, { "epoch": 0.14574168076706148, "grad_norm": 3.7805123756041934, "learning_rate": 1.9307847065368978e-06, "loss": 0.4404, "step": 323 }, { "epoch": 0.14619289340101524, "grad_norm": 3.6691078891308466, "learning_rate": 1.9302492937177733e-06, "loss": 0.5287, "step": 324 }, { "epoch": 0.14664410603496897, "grad_norm": 2.9203910805909925, "learning_rate": 1.9297118928537616e-06, "loss": 0.5946, "step": 325 }, { "epoch": 0.14709531866892273, "grad_norm": 3.4322784585242094, "learning_rate": 1.9291725050933466e-06, "loss": 0.5945, "step": 326 }, { "epoch": 0.14754653130287648, "grad_norm": 3.4949175306707487, "learning_rate": 1.9286311315892592e-06, "loss": 0.6565, "step": 327 }, { "epoch": 0.14799774393683024, "grad_norm": 3.3116625435966855, "learning_rate": 1.9280877734984745e-06, "loss": 0.5587, "step": 328 }, { "epoch": 0.14844895657078397, "grad_norm": 3.9017874847497596, "learning_rate": 1.9275424319822084e-06, "loss": 0.6092, "step": 329 }, { "epoch": 0.14890016920473773, "grad_norm": 3.483226347899875, "learning_rate": 1.926995108205915e-06, "loss": 0.661, "step": 330 }, { "epoch": 0.14935138183869148, "grad_norm": 3.358115279938035, "learning_rate": 1.926445803339286e-06, "loss": 0.588, "step": 331 }, { "epoch": 0.14980259447264524, "grad_norm": 3.6543876138753446, "learning_rate": 1.925894518556246e-06, "loss": 0.6363, "step": 332 }, { "epoch": 0.150253807106599, "grad_norm": 3.5803877393350514, "learning_rate": 1.9253412550349505e-06, "loss": 0.6382, "step": 333 }, { "epoch": 0.15070501974055273, "grad_norm": 3.514105036732028, "learning_rate": 1.9247860139577852e-06, "loss": 0.531, "step": 334 }, { "epoch": 0.15115623237450648, "grad_norm": 3.0800678193550617, "learning_rate": 1.924228796511361e-06, "loss": 0.5065, "step": 335 }, { "epoch": 0.15160744500846024, "grad_norm": 3.431976731385007, "learning_rate": 1.923669603886513e-06, "loss": 0.6681, "step": 336 }, { "epoch": 0.152058657642414, "grad_norm": 3.1591532382990493, "learning_rate": 1.9231084372782968e-06, "loss": 0.5994, "step": 337 }, { "epoch": 0.15250987027636773, "grad_norm": 2.8404865058044613, "learning_rate": 1.9225452978859873e-06, "loss": 0.5612, "step": 338 }, { "epoch": 0.15296108291032148, "grad_norm": 3.326070609318879, "learning_rate": 1.921980186913075e-06, "loss": 0.5316, "step": 339 }, { "epoch": 0.15341229554427524, "grad_norm": 3.5187885378329886, "learning_rate": 1.9214131055672642e-06, "loss": 0.7119, "step": 340 }, { "epoch": 0.153863508178229, "grad_norm": 3.544854067555435, "learning_rate": 1.9208440550604702e-06, "loss": 0.7238, "step": 341 }, { "epoch": 0.15431472081218275, "grad_norm": 3.2845637387346267, "learning_rate": 1.9202730366088164e-06, "loss": 0.6078, "step": 342 }, { "epoch": 0.15476593344613648, "grad_norm": 3.3136242560590294, "learning_rate": 1.9197000514326317e-06, "loss": 0.5787, "step": 343 }, { "epoch": 0.15521714608009024, "grad_norm": 3.284411489524786, "learning_rate": 1.9191251007564487e-06, "loss": 0.5574, "step": 344 }, { "epoch": 0.155668358714044, "grad_norm": 3.6355673133340747, "learning_rate": 1.9185481858089996e-06, "loss": 0.7101, "step": 345 }, { "epoch": 0.15611957134799775, "grad_norm": 4.128016030092157, "learning_rate": 1.9179693078232155e-06, "loss": 0.5079, "step": 346 }, { "epoch": 0.15657078398195148, "grad_norm": 3.0530977440321405, "learning_rate": 1.917388468036222e-06, "loss": 0.5866, "step": 347 }, { "epoch": 0.15702199661590524, "grad_norm": 3.165514172841862, "learning_rate": 1.9168056676893374e-06, "loss": 0.6284, "step": 348 }, { "epoch": 0.157473209249859, "grad_norm": 2.995277805594194, "learning_rate": 1.91622090802807e-06, "loss": 0.6062, "step": 349 }, { "epoch": 0.15792442188381275, "grad_norm": 3.005909595695992, "learning_rate": 1.9156341903021155e-06, "loss": 0.6028, "step": 350 }, { "epoch": 0.15837563451776648, "grad_norm": 3.721941326683315, "learning_rate": 1.9150455157653543e-06, "loss": 0.5475, "step": 351 }, { "epoch": 0.15882684715172024, "grad_norm": 3.5649796357457313, "learning_rate": 1.9144548856758486e-06, "loss": 0.6411, "step": 352 }, { "epoch": 0.159278059785674, "grad_norm": 3.2716596578005066, "learning_rate": 1.9138623012958393e-06, "loss": 0.5344, "step": 353 }, { "epoch": 0.15972927241962775, "grad_norm": 3.218727183746927, "learning_rate": 1.913267763891745e-06, "loss": 0.6509, "step": 354 }, { "epoch": 0.1601804850535815, "grad_norm": 3.7971566293616035, "learning_rate": 1.912671274734156e-06, "loss": 0.5323, "step": 355 }, { "epoch": 0.16063169768753524, "grad_norm": 3.201203458372683, "learning_rate": 1.9120728350978367e-06, "loss": 0.6743, "step": 356 }, { "epoch": 0.161082910321489, "grad_norm": 3.3662683266894255, "learning_rate": 1.9114724462617175e-06, "loss": 0.6395, "step": 357 }, { "epoch": 0.16153412295544275, "grad_norm": 3.4368545644579274, "learning_rate": 1.910870109508896e-06, "loss": 0.5264, "step": 358 }, { "epoch": 0.1619853355893965, "grad_norm": 3.0271768583204435, "learning_rate": 1.9102658261266306e-06, "loss": 0.6154, "step": 359 }, { "epoch": 0.16243654822335024, "grad_norm": 3.4935218886533757, "learning_rate": 1.9096595974063424e-06, "loss": 0.5963, "step": 360 }, { "epoch": 0.162887760857304, "grad_norm": 3.64027486586054, "learning_rate": 1.9090514246436083e-06, "loss": 0.6211, "step": 361 }, { "epoch": 0.16333897349125776, "grad_norm": 3.396850249691266, "learning_rate": 1.908441309138161e-06, "loss": 0.5142, "step": 362 }, { "epoch": 0.1637901861252115, "grad_norm": 3.3870576147192053, "learning_rate": 1.907829252193883e-06, "loss": 0.6988, "step": 363 }, { "epoch": 0.16424139875916527, "grad_norm": 3.788954000277857, "learning_rate": 1.9072152551188081e-06, "loss": 0.5813, "step": 364 }, { "epoch": 0.164692611393119, "grad_norm": 3.1429732682111933, "learning_rate": 1.9065993192251156e-06, "loss": 0.6034, "step": 365 }, { "epoch": 0.16514382402707276, "grad_norm": 3.678596714953962, "learning_rate": 1.9059814458291275e-06, "loss": 0.6919, "step": 366 }, { "epoch": 0.1655950366610265, "grad_norm": 3.062164866400341, "learning_rate": 1.9053616362513076e-06, "loss": 0.4805, "step": 367 }, { "epoch": 0.16604624929498027, "grad_norm": 3.3886077949868985, "learning_rate": 1.904739891816257e-06, "loss": 0.6668, "step": 368 }, { "epoch": 0.166497461928934, "grad_norm": 3.521006425014518, "learning_rate": 1.9041162138527112e-06, "loss": 0.7878, "step": 369 }, { "epoch": 0.16694867456288776, "grad_norm": 3.666265951900786, "learning_rate": 1.9034906036935391e-06, "loss": 0.6963, "step": 370 }, { "epoch": 0.1673998871968415, "grad_norm": 3.645045850919915, "learning_rate": 1.9028630626757386e-06, "loss": 0.5457, "step": 371 }, { "epoch": 0.16785109983079527, "grad_norm": 3.4414096877643097, "learning_rate": 1.902233592140433e-06, "loss": 0.6719, "step": 372 }, { "epoch": 0.16830231246474903, "grad_norm": 3.265461782165135, "learning_rate": 1.9016021934328706e-06, "loss": 0.7438, "step": 373 }, { "epoch": 0.16875352509870276, "grad_norm": 3.5380564324518415, "learning_rate": 1.9009688679024189e-06, "loss": 0.5506, "step": 374 }, { "epoch": 0.1692047377326565, "grad_norm": 3.8268327357230345, "learning_rate": 1.9003336169025653e-06, "loss": 0.6524, "step": 375 }, { "epoch": 0.16965595036661027, "grad_norm": 3.017258959670839, "learning_rate": 1.89969644179091e-06, "loss": 0.5096, "step": 376 }, { "epoch": 0.17010716300056403, "grad_norm": 3.624267899480704, "learning_rate": 1.8990573439291665e-06, "loss": 0.6393, "step": 377 }, { "epoch": 0.17055837563451776, "grad_norm": 3.3240315516106405, "learning_rate": 1.8984163246831569e-06, "loss": 0.5305, "step": 378 }, { "epoch": 0.1710095882684715, "grad_norm": 3.096119100843986, "learning_rate": 1.89777338542281e-06, "loss": 0.5599, "step": 379 }, { "epoch": 0.17146080090242527, "grad_norm": 3.4243899877650787, "learning_rate": 1.8971285275221577e-06, "loss": 0.5102, "step": 380 }, { "epoch": 0.17191201353637903, "grad_norm": 3.500631529144404, "learning_rate": 1.8964817523593318e-06, "loss": 0.5937, "step": 381 }, { "epoch": 0.17236322617033276, "grad_norm": 3.214480819264598, "learning_rate": 1.8958330613165621e-06, "loss": 0.7037, "step": 382 }, { "epoch": 0.1728144388042865, "grad_norm": 3.2032488316353307, "learning_rate": 1.8951824557801723e-06, "loss": 0.5986, "step": 383 }, { "epoch": 0.17326565143824027, "grad_norm": 2.773335792708175, "learning_rate": 1.8945299371405783e-06, "loss": 0.4929, "step": 384 }, { "epoch": 0.17371686407219403, "grad_norm": 3.5616413240552283, "learning_rate": 1.8938755067922836e-06, "loss": 0.6427, "step": 385 }, { "epoch": 0.17416807670614778, "grad_norm": 3.3452575187465423, "learning_rate": 1.893219166133878e-06, "loss": 0.6748, "step": 386 }, { "epoch": 0.1746192893401015, "grad_norm": 3.329395667620974, "learning_rate": 1.8925609165680336e-06, "loss": 0.5985, "step": 387 }, { "epoch": 0.17507050197405527, "grad_norm": 3.1397011497408323, "learning_rate": 1.8919007595015017e-06, "loss": 0.5812, "step": 388 }, { "epoch": 0.17552171460800903, "grad_norm": 3.1476385473200343, "learning_rate": 1.891238696345111e-06, "loss": 0.5604, "step": 389 }, { "epoch": 0.17597292724196278, "grad_norm": 3.1908486340362754, "learning_rate": 1.8905747285137625e-06, "loss": 0.6034, "step": 390 }, { "epoch": 0.1764241398759165, "grad_norm": 3.2455622054904376, "learning_rate": 1.889908857426429e-06, "loss": 0.6501, "step": 391 }, { "epoch": 0.17687535250987027, "grad_norm": 3.1460953645857925, "learning_rate": 1.8892410845061496e-06, "loss": 0.6749, "step": 392 }, { "epoch": 0.17732656514382403, "grad_norm": 3.5804988998358818, "learning_rate": 1.8885714111800286e-06, "loss": 0.6729, "step": 393 }, { "epoch": 0.17777777777777778, "grad_norm": 2.8850245833802806, "learning_rate": 1.8878998388792312e-06, "loss": 0.5124, "step": 394 }, { "epoch": 0.17822899041173154, "grad_norm": 3.3696439645865772, "learning_rate": 1.8872263690389817e-06, "loss": 0.6009, "step": 395 }, { "epoch": 0.17868020304568527, "grad_norm": 4.50248696358533, "learning_rate": 1.8865510030985585e-06, "loss": 0.8121, "step": 396 }, { "epoch": 0.17913141567963903, "grad_norm": 3.2758219521283856, "learning_rate": 1.8858737425012932e-06, "loss": 0.6326, "step": 397 }, { "epoch": 0.17958262831359278, "grad_norm": 3.0876038288374974, "learning_rate": 1.8851945886945658e-06, "loss": 0.5091, "step": 398 }, { "epoch": 0.18003384094754654, "grad_norm": 3.161733247624105, "learning_rate": 1.8845135431298025e-06, "loss": 0.5075, "step": 399 }, { "epoch": 0.18048505358150027, "grad_norm": 3.3744189725502096, "learning_rate": 1.8838306072624729e-06, "loss": 0.6991, "step": 400 }, { "epoch": 0.18093626621545403, "grad_norm": 3.297593259153372, "learning_rate": 1.8831457825520851e-06, "loss": 0.6062, "step": 401 }, { "epoch": 0.18138747884940778, "grad_norm": 3.366058459902841, "learning_rate": 1.8824590704621856e-06, "loss": 0.6666, "step": 402 }, { "epoch": 0.18183869148336154, "grad_norm": 3.7126042071152976, "learning_rate": 1.8817704724603533e-06, "loss": 0.6618, "step": 403 }, { "epoch": 0.18228990411731527, "grad_norm": 2.9848703959766825, "learning_rate": 1.8810799900181976e-06, "loss": 0.609, "step": 404 }, { "epoch": 0.18274111675126903, "grad_norm": 3.649965565236517, "learning_rate": 1.8803876246113552e-06, "loss": 0.5993, "step": 405 }, { "epoch": 0.18319232938522279, "grad_norm": 2.89156057373746, "learning_rate": 1.8796933777194871e-06, "loss": 0.6068, "step": 406 }, { "epoch": 0.18364354201917654, "grad_norm": 3.0603328302400197, "learning_rate": 1.8789972508262752e-06, "loss": 0.5925, "step": 407 }, { "epoch": 0.1840947546531303, "grad_norm": 3.2452938065021484, "learning_rate": 1.8782992454194192e-06, "loss": 0.4789, "step": 408 }, { "epoch": 0.18454596728708403, "grad_norm": 2.8228292796413963, "learning_rate": 1.877599362990633e-06, "loss": 0.5509, "step": 409 }, { "epoch": 0.18499717992103779, "grad_norm": 3.556457875267904, "learning_rate": 1.8768976050356424e-06, "loss": 0.5953, "step": 410 }, { "epoch": 0.18544839255499154, "grad_norm": 3.792739279541135, "learning_rate": 1.876193973054181e-06, "loss": 0.7014, "step": 411 }, { "epoch": 0.1858996051889453, "grad_norm": 3.2493298778303354, "learning_rate": 1.8754884685499884e-06, "loss": 0.4082, "step": 412 }, { "epoch": 0.18635081782289903, "grad_norm": 3.6609624714373457, "learning_rate": 1.874781093030804e-06, "loss": 0.6845, "step": 413 }, { "epoch": 0.18680203045685279, "grad_norm": 3.2137855218137594, "learning_rate": 1.8740718480083678e-06, "loss": 0.6125, "step": 414 }, { "epoch": 0.18725324309080654, "grad_norm": 3.499410379932756, "learning_rate": 1.8733607349984138e-06, "loss": 0.6784, "step": 415 }, { "epoch": 0.1877044557247603, "grad_norm": 3.2539894662199673, "learning_rate": 1.8726477555206688e-06, "loss": 0.6767, "step": 416 }, { "epoch": 0.18815566835871406, "grad_norm": 3.7584814555532913, "learning_rate": 1.8719329110988484e-06, "loss": 0.5895, "step": 417 }, { "epoch": 0.18860688099266779, "grad_norm": 2.7529038721428045, "learning_rate": 1.8712162032606536e-06, "loss": 0.5273, "step": 418 }, { "epoch": 0.18905809362662154, "grad_norm": 3.033230260922539, "learning_rate": 1.8704976335377676e-06, "loss": 0.4975, "step": 419 }, { "epoch": 0.1895093062605753, "grad_norm": 3.383380728150317, "learning_rate": 1.8697772034658525e-06, "loss": 0.7518, "step": 420 }, { "epoch": 0.18996051889452906, "grad_norm": 3.4073642749167408, "learning_rate": 1.8690549145845473e-06, "loss": 0.6152, "step": 421 }, { "epoch": 0.19041173152848279, "grad_norm": 2.9297215005432586, "learning_rate": 1.8683307684374618e-06, "loss": 0.5265, "step": 422 }, { "epoch": 0.19086294416243654, "grad_norm": 3.167963569614919, "learning_rate": 1.8676047665721763e-06, "loss": 0.5796, "step": 423 }, { "epoch": 0.1913141567963903, "grad_norm": 3.1693108021010654, "learning_rate": 1.8668769105402365e-06, "loss": 0.5744, "step": 424 }, { "epoch": 0.19176536943034406, "grad_norm": 3.0256228579439046, "learning_rate": 1.8661472018971502e-06, "loss": 0.6616, "step": 425 }, { "epoch": 0.19221658206429781, "grad_norm": 3.2078343663005486, "learning_rate": 1.8654156422023858e-06, "loss": 0.5892, "step": 426 }, { "epoch": 0.19266779469825154, "grad_norm": 3.227710950393321, "learning_rate": 1.8646822330193657e-06, "loss": 0.6133, "step": 427 }, { "epoch": 0.1931190073322053, "grad_norm": 3.941318300397022, "learning_rate": 1.8639469759154665e-06, "loss": 0.6455, "step": 428 }, { "epoch": 0.19357021996615906, "grad_norm": 3.698927481165211, "learning_rate": 1.863209872462013e-06, "loss": 0.7186, "step": 429 }, { "epoch": 0.19402143260011281, "grad_norm": 3.3302251096427833, "learning_rate": 1.8624709242342763e-06, "loss": 0.6038, "step": 430 }, { "epoch": 0.19447264523406654, "grad_norm": 3.290865194019538, "learning_rate": 1.8617301328114702e-06, "loss": 0.5836, "step": 431 }, { "epoch": 0.1949238578680203, "grad_norm": 3.310231926134767, "learning_rate": 1.8609874997767471e-06, "loss": 0.5143, "step": 432 }, { "epoch": 0.19537507050197406, "grad_norm": 3.458415221251539, "learning_rate": 1.8602430267171953e-06, "loss": 0.5223, "step": 433 }, { "epoch": 0.19582628313592781, "grad_norm": 3.5702337410450005, "learning_rate": 1.8594967152238356e-06, "loss": 0.6378, "step": 434 }, { "epoch": 0.19627749576988154, "grad_norm": 3.3767941834158477, "learning_rate": 1.8587485668916175e-06, "loss": 0.7065, "step": 435 }, { "epoch": 0.1967287084038353, "grad_norm": 3.376132837313131, "learning_rate": 1.857998583319416e-06, "loss": 0.6704, "step": 436 }, { "epoch": 0.19717992103778906, "grad_norm": 3.7269212236357614, "learning_rate": 1.8572467661100285e-06, "loss": 0.6655, "step": 437 }, { "epoch": 0.19763113367174281, "grad_norm": 2.885465129936691, "learning_rate": 1.856493116870171e-06, "loss": 0.5092, "step": 438 }, { "epoch": 0.19808234630569657, "grad_norm": 3.2219073896145374, "learning_rate": 1.855737637210475e-06, "loss": 0.6651, "step": 439 }, { "epoch": 0.1985335589396503, "grad_norm": 3.020821738843593, "learning_rate": 1.8549803287454828e-06, "loss": 0.5339, "step": 440 }, { "epoch": 0.19898477157360406, "grad_norm": 3.3074095664255814, "learning_rate": 1.8542211930936461e-06, "loss": 0.6784, "step": 441 }, { "epoch": 0.19943598420755781, "grad_norm": 3.0185027225876344, "learning_rate": 1.8534602318773211e-06, "loss": 0.4799, "step": 442 }, { "epoch": 0.19988719684151157, "grad_norm": 3.3658699683573587, "learning_rate": 1.8526974467227657e-06, "loss": 0.5748, "step": 443 }, { "epoch": 0.2003384094754653, "grad_norm": 3.3754909363924925, "learning_rate": 1.8519328392601348e-06, "loss": 0.6109, "step": 444 }, { "epoch": 0.20078962210941906, "grad_norm": 3.3689745519480665, "learning_rate": 1.8511664111234796e-06, "loss": 0.6079, "step": 445 }, { "epoch": 0.20124083474337282, "grad_norm": 3.843717566439344, "learning_rate": 1.8503981639507402e-06, "loss": 0.6312, "step": 446 }, { "epoch": 0.20169204737732657, "grad_norm": 3.2274835544812848, "learning_rate": 1.8496280993837457e-06, "loss": 0.5936, "step": 447 }, { "epoch": 0.20214326001128033, "grad_norm": 3.311187513569298, "learning_rate": 1.8488562190682087e-06, "loss": 0.5793, "step": 448 }, { "epoch": 0.20259447264523406, "grad_norm": 2.9215114080546214, "learning_rate": 1.8480825246537217e-06, "loss": 0.5959, "step": 449 }, { "epoch": 0.20304568527918782, "grad_norm": 3.404182604070715, "learning_rate": 1.847307017793755e-06, "loss": 0.5429, "step": 450 }, { "epoch": 0.20349689791314157, "grad_norm": 3.2363535948970856, "learning_rate": 1.846529700145652e-06, "loss": 0.6569, "step": 451 }, { "epoch": 0.20394811054709533, "grad_norm": 3.511507526063595, "learning_rate": 1.8457505733706258e-06, "loss": 0.4839, "step": 452 }, { "epoch": 0.20439932318104906, "grad_norm": 3.37043417245356, "learning_rate": 1.8449696391337554e-06, "loss": 0.669, "step": 453 }, { "epoch": 0.20485053581500282, "grad_norm": 3.9931457464636155, "learning_rate": 1.8441868991039837e-06, "loss": 0.5805, "step": 454 }, { "epoch": 0.20530174844895657, "grad_norm": 2.9942829712689605, "learning_rate": 1.8434023549541115e-06, "loss": 0.5925, "step": 455 }, { "epoch": 0.20575296108291033, "grad_norm": 3.1956405252290594, "learning_rate": 1.8426160083607964e-06, "loss": 0.5746, "step": 456 }, { "epoch": 0.20620417371686406, "grad_norm": 2.916883450705641, "learning_rate": 1.841827861004547e-06, "loss": 0.5662, "step": 457 }, { "epoch": 0.20665538635081782, "grad_norm": 3.431394584303988, "learning_rate": 1.8410379145697208e-06, "loss": 0.6516, "step": 458 }, { "epoch": 0.20710659898477157, "grad_norm": 3.165177547700952, "learning_rate": 1.8402461707445203e-06, "loss": 0.5172, "step": 459 }, { "epoch": 0.20755781161872533, "grad_norm": 3.7977061154161302, "learning_rate": 1.8394526312209885e-06, "loss": 0.6817, "step": 460 }, { "epoch": 0.2080090242526791, "grad_norm": 2.924039062504755, "learning_rate": 1.838657297695007e-06, "loss": 0.7493, "step": 461 }, { "epoch": 0.20846023688663282, "grad_norm": 3.120504427001902, "learning_rate": 1.8378601718662905e-06, "loss": 0.5429, "step": 462 }, { "epoch": 0.20891144952058657, "grad_norm": 3.45109119066591, "learning_rate": 1.8370612554383848e-06, "loss": 0.6886, "step": 463 }, { "epoch": 0.20936266215454033, "grad_norm": 3.9533453151193005, "learning_rate": 1.8362605501186618e-06, "loss": 0.6993, "step": 464 }, { "epoch": 0.2098138747884941, "grad_norm": 3.5868823288557463, "learning_rate": 1.8354580576183167e-06, "loss": 0.7202, "step": 465 }, { "epoch": 0.21026508742244782, "grad_norm": 3.360808407157414, "learning_rate": 1.8346537796523642e-06, "loss": 0.7506, "step": 466 }, { "epoch": 0.21071630005640157, "grad_norm": 3.5999786014669453, "learning_rate": 1.8338477179396347e-06, "loss": 0.6462, "step": 467 }, { "epoch": 0.21116751269035533, "grad_norm": 3.089511771354358, "learning_rate": 1.8330398742027702e-06, "loss": 0.5945, "step": 468 }, { "epoch": 0.2116187253243091, "grad_norm": 3.1839344612610376, "learning_rate": 1.8322302501682216e-06, "loss": 0.5521, "step": 469 }, { "epoch": 0.21206993795826284, "grad_norm": 3.2784855658730883, "learning_rate": 1.831418847566245e-06, "loss": 0.5371, "step": 470 }, { "epoch": 0.21252115059221657, "grad_norm": 3.346783346886957, "learning_rate": 1.8306056681308957e-06, "loss": 0.6287, "step": 471 }, { "epoch": 0.21297236322617033, "grad_norm": 3.2663627359220646, "learning_rate": 1.8297907136000283e-06, "loss": 0.6486, "step": 472 }, { "epoch": 0.2134235758601241, "grad_norm": 3.035708873461981, "learning_rate": 1.8289739857152903e-06, "loss": 0.5249, "step": 473 }, { "epoch": 0.21387478849407784, "grad_norm": 3.14810881896666, "learning_rate": 1.8281554862221179e-06, "loss": 0.6863, "step": 474 }, { "epoch": 0.21432600112803157, "grad_norm": 3.606635360142977, "learning_rate": 1.827335216869735e-06, "loss": 0.5768, "step": 475 }, { "epoch": 0.21477721376198533, "grad_norm": 3.420453255878494, "learning_rate": 1.8265131794111477e-06, "loss": 0.5868, "step": 476 }, { "epoch": 0.2152284263959391, "grad_norm": 3.5479674298971213, "learning_rate": 1.8256893756031396e-06, "loss": 0.5529, "step": 477 }, { "epoch": 0.21567963902989284, "grad_norm": 3.5672449049409143, "learning_rate": 1.82486380720627e-06, "loss": 0.5414, "step": 478 }, { "epoch": 0.2161308516638466, "grad_norm": 3.4755060623971157, "learning_rate": 1.8240364759848697e-06, "loss": 0.6827, "step": 479 }, { "epoch": 0.21658206429780033, "grad_norm": 3.5075155658583532, "learning_rate": 1.823207383707036e-06, "loss": 0.6522, "step": 480 }, { "epoch": 0.2170332769317541, "grad_norm": 3.615334770032639, "learning_rate": 1.82237653214463e-06, "loss": 0.6112, "step": 481 }, { "epoch": 0.21748448956570784, "grad_norm": 2.930096209443241, "learning_rate": 1.8215439230732728e-06, "loss": 0.4904, "step": 482 }, { "epoch": 0.2179357021996616, "grad_norm": 3.045321232724037, "learning_rate": 1.8207095582723416e-06, "loss": 0.4742, "step": 483 }, { "epoch": 0.21838691483361533, "grad_norm": 3.318218916061034, "learning_rate": 1.8198734395249654e-06, "loss": 0.5816, "step": 484 }, { "epoch": 0.2188381274675691, "grad_norm": 3.238638276761325, "learning_rate": 1.8190355686180218e-06, "loss": 0.7086, "step": 485 }, { "epoch": 0.21928934010152284, "grad_norm": 3.088879882296647, "learning_rate": 1.8181959473421334e-06, "loss": 0.5967, "step": 486 }, { "epoch": 0.2197405527354766, "grad_norm": 3.3716940597716585, "learning_rate": 1.8173545774916626e-06, "loss": 0.7092, "step": 487 }, { "epoch": 0.22019176536943033, "grad_norm": 3.7448981916189825, "learning_rate": 1.816511460864709e-06, "loss": 0.6427, "step": 488 }, { "epoch": 0.2206429780033841, "grad_norm": 2.9887789178064548, "learning_rate": 1.8156665992631057e-06, "loss": 0.5822, "step": 489 }, { "epoch": 0.22109419063733785, "grad_norm": 3.3414881969228714, "learning_rate": 1.8148199944924146e-06, "loss": 0.5946, "step": 490 }, { "epoch": 0.2215454032712916, "grad_norm": 3.4422877305450763, "learning_rate": 1.8139716483619232e-06, "loss": 0.6066, "step": 491 }, { "epoch": 0.22199661590524536, "grad_norm": 3.4139774261820093, "learning_rate": 1.8131215626846403e-06, "loss": 0.6513, "step": 492 }, { "epoch": 0.2224478285391991, "grad_norm": 2.9492703190535465, "learning_rate": 1.8122697392772923e-06, "loss": 0.512, "step": 493 }, { "epoch": 0.22289904117315285, "grad_norm": 3.392493989272701, "learning_rate": 1.8114161799603192e-06, "loss": 0.6111, "step": 494 }, { "epoch": 0.2233502538071066, "grad_norm": 3.4162235015689095, "learning_rate": 1.8105608865578712e-06, "loss": 0.7261, "step": 495 }, { "epoch": 0.22380146644106036, "grad_norm": 3.3485739796813645, "learning_rate": 1.809703860897804e-06, "loss": 0.5977, "step": 496 }, { "epoch": 0.2242526790750141, "grad_norm": 3.3359232989076917, "learning_rate": 1.808845104811676e-06, "loss": 0.4957, "step": 497 }, { "epoch": 0.22470389170896785, "grad_norm": 3.1119819516264706, "learning_rate": 1.8079846201347426e-06, "loss": 0.566, "step": 498 }, { "epoch": 0.2251551043429216, "grad_norm": 3.33013160940601, "learning_rate": 1.8071224087059545e-06, "loss": 0.6161, "step": 499 }, { "epoch": 0.22560631697687536, "grad_norm": 3.726562728905523, "learning_rate": 1.806258472367952e-06, "loss": 0.6273, "step": 500 }, { "epoch": 0.22605752961082912, "grad_norm": 3.112643238486848, "learning_rate": 1.805392812967062e-06, "loss": 0.524, "step": 501 }, { "epoch": 0.22650874224478285, "grad_norm": 3.078076912846401, "learning_rate": 1.8045254323532938e-06, "loss": 0.5933, "step": 502 }, { "epoch": 0.2269599548787366, "grad_norm": 3.3007163587543293, "learning_rate": 1.803656332380335e-06, "loss": 0.6058, "step": 503 }, { "epoch": 0.22741116751269036, "grad_norm": 3.350696137526816, "learning_rate": 1.8027855149055476e-06, "loss": 0.6761, "step": 504 }, { "epoch": 0.22786238014664412, "grad_norm": 3.056386670780574, "learning_rate": 1.8019129817899641e-06, "loss": 0.5403, "step": 505 }, { "epoch": 0.22831359278059785, "grad_norm": 3.3200840574141703, "learning_rate": 1.8010387348982834e-06, "loss": 0.4723, "step": 506 }, { "epoch": 0.2287648054145516, "grad_norm": 2.9031378599313964, "learning_rate": 1.8001627760988676e-06, "loss": 0.5124, "step": 507 }, { "epoch": 0.22921601804850536, "grad_norm": 3.0170169190296425, "learning_rate": 1.7992851072637364e-06, "loss": 0.5241, "step": 508 }, { "epoch": 0.22966723068245912, "grad_norm": 3.260274778241787, "learning_rate": 1.7984057302685645e-06, "loss": 0.612, "step": 509 }, { "epoch": 0.23011844331641285, "grad_norm": 2.957767077162712, "learning_rate": 1.7975246469926773e-06, "loss": 0.5856, "step": 510 }, { "epoch": 0.2305696559503666, "grad_norm": 3.077346118529033, "learning_rate": 1.7966418593190466e-06, "loss": 0.6476, "step": 511 }, { "epoch": 0.23102086858432036, "grad_norm": 3.40658654324856, "learning_rate": 1.7957573691342863e-06, "loss": 0.6496, "step": 512 }, { "epoch": 0.23147208121827412, "grad_norm": 3.1537347899768187, "learning_rate": 1.7948711783286494e-06, "loss": 0.4991, "step": 513 }, { "epoch": 0.23192329385222787, "grad_norm": 3.3406426536701064, "learning_rate": 1.7939832887960228e-06, "loss": 0.6539, "step": 514 }, { "epoch": 0.2323745064861816, "grad_norm": 3.6637558500642617, "learning_rate": 1.7930937024339236e-06, "loss": 0.5154, "step": 515 }, { "epoch": 0.23282571912013536, "grad_norm": 3.528943118739639, "learning_rate": 1.7922024211434958e-06, "loss": 0.4997, "step": 516 }, { "epoch": 0.23327693175408912, "grad_norm": 2.8648086426744386, "learning_rate": 1.7913094468295056e-06, "loss": 0.4715, "step": 517 }, { "epoch": 0.23372814438804287, "grad_norm": 3.39893956335253, "learning_rate": 1.790414781400337e-06, "loss": 0.6078, "step": 518 }, { "epoch": 0.2341793570219966, "grad_norm": 3.3165376220099976, "learning_rate": 1.7895184267679882e-06, "loss": 0.5808, "step": 519 }, { "epoch": 0.23463056965595036, "grad_norm": 3.220094259664805, "learning_rate": 1.7886203848480671e-06, "loss": 0.6231, "step": 520 }, { "epoch": 0.23508178228990412, "grad_norm": 3.1138985690928886, "learning_rate": 1.7877206575597887e-06, "loss": 0.6523, "step": 521 }, { "epoch": 0.23553299492385787, "grad_norm": 3.1979428387224935, "learning_rate": 1.7868192468259684e-06, "loss": 0.598, "step": 522 }, { "epoch": 0.23598420755781163, "grad_norm": 3.135032930567459, "learning_rate": 1.7859161545730204e-06, "loss": 0.7253, "step": 523 }, { "epoch": 0.23643542019176536, "grad_norm": 2.85623015158301, "learning_rate": 1.7850113827309516e-06, "loss": 0.485, "step": 524 }, { "epoch": 0.23688663282571912, "grad_norm": 3.126524923453851, "learning_rate": 1.7841049332333588e-06, "loss": 0.6822, "step": 525 }, { "epoch": 0.23733784545967287, "grad_norm": 3.2260751063220416, "learning_rate": 1.7831968080174245e-06, "loss": 0.5968, "step": 526 }, { "epoch": 0.23778905809362663, "grad_norm": 3.2163409363779656, "learning_rate": 1.7822870090239116e-06, "loss": 0.6282, "step": 527 }, { "epoch": 0.23824027072758036, "grad_norm": 3.223131291822765, "learning_rate": 1.7813755381971603e-06, "loss": 0.6361, "step": 528 }, { "epoch": 0.23869148336153412, "grad_norm": 3.5266582311851367, "learning_rate": 1.7804623974850843e-06, "loss": 0.5917, "step": 529 }, { "epoch": 0.23914269599548788, "grad_norm": 3.756212649445671, "learning_rate": 1.7795475888391654e-06, "loss": 0.5342, "step": 530 }, { "epoch": 0.23959390862944163, "grad_norm": 3.0598171380651658, "learning_rate": 1.7786311142144501e-06, "loss": 0.7084, "step": 531 }, { "epoch": 0.24004512126339536, "grad_norm": 3.3421256359633476, "learning_rate": 1.7777129755695453e-06, "loss": 0.5933, "step": 532 }, { "epoch": 0.24049633389734912, "grad_norm": 3.5281023446435564, "learning_rate": 1.7767931748666143e-06, "loss": 0.6578, "step": 533 }, { "epoch": 0.24094754653130288, "grad_norm": 3.0273532574157613, "learning_rate": 1.7758717140713717e-06, "loss": 0.5256, "step": 534 }, { "epoch": 0.24139875916525663, "grad_norm": 3.197844985849922, "learning_rate": 1.7749485951530812e-06, "loss": 0.6199, "step": 535 }, { "epoch": 0.2418499717992104, "grad_norm": 3.380058493493815, "learning_rate": 1.7740238200845484e-06, "loss": 0.5581, "step": 536 }, { "epoch": 0.24230118443316412, "grad_norm": 2.8794589169982405, "learning_rate": 1.7730973908421196e-06, "loss": 0.5515, "step": 537 }, { "epoch": 0.24275239706711788, "grad_norm": 3.303088479063469, "learning_rate": 1.772169309405676e-06, "loss": 0.6068, "step": 538 }, { "epoch": 0.24320360970107163, "grad_norm": 3.5894154844174775, "learning_rate": 1.7712395777586294e-06, "loss": 0.569, "step": 539 }, { "epoch": 0.2436548223350254, "grad_norm": 3.412578792730928, "learning_rate": 1.770308197887918e-06, "loss": 0.5934, "step": 540 }, { "epoch": 0.24410603496897912, "grad_norm": 3.4253075329490814, "learning_rate": 1.7693751717840035e-06, "loss": 0.6087, "step": 541 }, { "epoch": 0.24455724760293288, "grad_norm": 3.499633565706812, "learning_rate": 1.7684405014408647e-06, "loss": 0.588, "step": 542 }, { "epoch": 0.24500846023688663, "grad_norm": 2.814806236683677, "learning_rate": 1.767504188855995e-06, "loss": 0.482, "step": 543 }, { "epoch": 0.2454596728708404, "grad_norm": 3.098881310366283, "learning_rate": 1.766566236030397e-06, "loss": 0.5435, "step": 544 }, { "epoch": 0.24591088550479415, "grad_norm": 3.288381281791134, "learning_rate": 1.7656266449685792e-06, "loss": 0.5992, "step": 545 }, { "epoch": 0.24636209813874788, "grad_norm": 3.4041539654568425, "learning_rate": 1.7646854176785506e-06, "loss": 0.5966, "step": 546 }, { "epoch": 0.24681331077270163, "grad_norm": 3.4008922515143345, "learning_rate": 1.7637425561718174e-06, "loss": 0.7074, "step": 547 }, { "epoch": 0.2472645234066554, "grad_norm": 3.552760007080402, "learning_rate": 1.762798062463378e-06, "loss": 0.7019, "step": 548 }, { "epoch": 0.24771573604060915, "grad_norm": 3.6537377564708255, "learning_rate": 1.7618519385717192e-06, "loss": 0.6482, "step": 549 }, { "epoch": 0.24816694867456288, "grad_norm": 3.11958006185935, "learning_rate": 1.7609041865188118e-06, "loss": 0.5418, "step": 550 }, { "epoch": 0.24861816130851663, "grad_norm": 3.336252618186167, "learning_rate": 1.759954808330106e-06, "loss": 0.6389, "step": 551 }, { "epoch": 0.2490693739424704, "grad_norm": 2.921189003705698, "learning_rate": 1.7590038060345274e-06, "loss": 0.4537, "step": 552 }, { "epoch": 0.24952058657642415, "grad_norm": 3.33855785234176, "learning_rate": 1.7580511816644715e-06, "loss": 0.5591, "step": 553 }, { "epoch": 0.2499717992103779, "grad_norm": 4.125492421237276, "learning_rate": 1.7570969372558021e-06, "loss": 0.5379, "step": 554 }, { "epoch": 0.25042301184433163, "grad_norm": 3.0575497100367683, "learning_rate": 1.7561410748478441e-06, "loss": 0.5602, "step": 555 }, { "epoch": 0.2508742244782854, "grad_norm": 3.2758898433144075, "learning_rate": 1.75518359648338e-06, "loss": 0.4833, "step": 556 }, { "epoch": 0.25132543711223915, "grad_norm": 3.2778885737843777, "learning_rate": 1.7542245042086467e-06, "loss": 0.5926, "step": 557 }, { "epoch": 0.2517766497461929, "grad_norm": 3.2596395219328085, "learning_rate": 1.7532638000733293e-06, "loss": 0.555, "step": 558 }, { "epoch": 0.25222786238014666, "grad_norm": 2.9901086229008227, "learning_rate": 1.7523014861305585e-06, "loss": 0.4896, "step": 559 }, { "epoch": 0.2526790750141004, "grad_norm": 3.244315223694814, "learning_rate": 1.7513375644369046e-06, "loss": 0.585, "step": 560 }, { "epoch": 0.2531302876480541, "grad_norm": 3.198365136580784, "learning_rate": 1.750372037052374e-06, "loss": 0.528, "step": 561 }, { "epoch": 0.2535815002820079, "grad_norm": 3.2771916408813406, "learning_rate": 1.7494049060404047e-06, "loss": 0.7574, "step": 562 }, { "epoch": 0.25403271291596163, "grad_norm": 2.8807336033407487, "learning_rate": 1.7484361734678621e-06, "loss": 0.5565, "step": 563 }, { "epoch": 0.2544839255499154, "grad_norm": 3.6211865966847494, "learning_rate": 1.747465841405034e-06, "loss": 0.5855, "step": 564 }, { "epoch": 0.25493513818386915, "grad_norm": 3.1436849609005204, "learning_rate": 1.7464939119256266e-06, "loss": 0.6694, "step": 565 }, { "epoch": 0.2553863508178229, "grad_norm": 2.7285316400886113, "learning_rate": 1.7455203871067596e-06, "loss": 0.5036, "step": 566 }, { "epoch": 0.25583756345177666, "grad_norm": 3.186433555908316, "learning_rate": 1.744545269028963e-06, "loss": 0.553, "step": 567 }, { "epoch": 0.2562887760857304, "grad_norm": 2.8827236037421033, "learning_rate": 1.7435685597761707e-06, "loss": 0.5289, "step": 568 }, { "epoch": 0.2567399887196842, "grad_norm": 3.181781322960324, "learning_rate": 1.742590261435718e-06, "loss": 0.6055, "step": 569 }, { "epoch": 0.2571912013536379, "grad_norm": 3.4460098001320443, "learning_rate": 1.7416103760983356e-06, "loss": 0.5721, "step": 570 }, { "epoch": 0.25764241398759163, "grad_norm": 3.2888809340579033, "learning_rate": 1.7406289058581463e-06, "loss": 0.5706, "step": 571 }, { "epoch": 0.2580936266215454, "grad_norm": 3.039055552956028, "learning_rate": 1.7396458528126594e-06, "loss": 0.6311, "step": 572 }, { "epoch": 0.25854483925549915, "grad_norm": 3.1817781660973012, "learning_rate": 1.738661219062768e-06, "loss": 0.5731, "step": 573 }, { "epoch": 0.2589960518894529, "grad_norm": 3.2166470180702276, "learning_rate": 1.7376750067127412e-06, "loss": 0.6233, "step": 574 }, { "epoch": 0.25944726452340666, "grad_norm": 2.933567700518428, "learning_rate": 1.7366872178702246e-06, "loss": 0.4681, "step": 575 }, { "epoch": 0.2598984771573604, "grad_norm": 3.339854630057023, "learning_rate": 1.7356978546462305e-06, "loss": 0.5115, "step": 576 }, { "epoch": 0.2603496897913142, "grad_norm": 3.042537582951306, "learning_rate": 1.7347069191551367e-06, "loss": 0.4721, "step": 577 }, { "epoch": 0.26080090242526793, "grad_norm": 3.4742498601316583, "learning_rate": 1.7337144135146815e-06, "loss": 0.6191, "step": 578 }, { "epoch": 0.26125211505922163, "grad_norm": 3.338002903883107, "learning_rate": 1.7327203398459584e-06, "loss": 0.5757, "step": 579 }, { "epoch": 0.2617033276931754, "grad_norm": 3.582565654268005, "learning_rate": 1.731724700273412e-06, "loss": 0.5778, "step": 580 }, { "epoch": 0.26215454032712915, "grad_norm": 3.3503897003901773, "learning_rate": 1.7307274969248334e-06, "loss": 0.6079, "step": 581 }, { "epoch": 0.2626057529610829, "grad_norm": 3.6953227055441644, "learning_rate": 1.7297287319313552e-06, "loss": 0.6023, "step": 582 }, { "epoch": 0.26305696559503666, "grad_norm": 3.2964361664081143, "learning_rate": 1.7287284074274485e-06, "loss": 0.6274, "step": 583 }, { "epoch": 0.2635081782289904, "grad_norm": 3.347719926763811, "learning_rate": 1.7277265255509163e-06, "loss": 0.5585, "step": 584 }, { "epoch": 0.2639593908629442, "grad_norm": 2.9228072338236237, "learning_rate": 1.7267230884428903e-06, "loss": 0.5438, "step": 585 }, { "epoch": 0.26441060349689793, "grad_norm": 2.9981318621426545, "learning_rate": 1.7257180982478254e-06, "loss": 0.4731, "step": 586 }, { "epoch": 0.2648618161308517, "grad_norm": 2.7051775991961073, "learning_rate": 1.7247115571134968e-06, "loss": 0.5209, "step": 587 }, { "epoch": 0.2653130287648054, "grad_norm": 3.201881004333221, "learning_rate": 1.7237034671909927e-06, "loss": 0.5185, "step": 588 }, { "epoch": 0.26576424139875915, "grad_norm": 3.0674421520184803, "learning_rate": 1.7226938306347122e-06, "loss": 0.4585, "step": 589 }, { "epoch": 0.2662154540327129, "grad_norm": 2.9375079500042136, "learning_rate": 1.7216826496023592e-06, "loss": 0.5657, "step": 590 }, { "epoch": 0.26666666666666666, "grad_norm": 3.526546662395944, "learning_rate": 1.7206699262549394e-06, "loss": 0.6383, "step": 591 }, { "epoch": 0.2671178793006204, "grad_norm": 3.1723564070744406, "learning_rate": 1.719655662756753e-06, "loss": 0.4663, "step": 592 }, { "epoch": 0.2675690919345742, "grad_norm": 3.2019446199311017, "learning_rate": 1.7186398612753927e-06, "loss": 0.5142, "step": 593 }, { "epoch": 0.26802030456852793, "grad_norm": 3.041038824521175, "learning_rate": 1.7176225239817378e-06, "loss": 0.5848, "step": 594 }, { "epoch": 0.2684715172024817, "grad_norm": 2.9674872867935385, "learning_rate": 1.7166036530499502e-06, "loss": 0.5316, "step": 595 }, { "epoch": 0.26892272983643545, "grad_norm": 3.127580522901588, "learning_rate": 1.7155832506574686e-06, "loss": 0.6055, "step": 596 }, { "epoch": 0.26937394247038915, "grad_norm": 3.135126305745904, "learning_rate": 1.7145613189850048e-06, "loss": 0.6033, "step": 597 }, { "epoch": 0.2698251551043429, "grad_norm": 3.3477656942247425, "learning_rate": 1.713537860216539e-06, "loss": 0.5876, "step": 598 }, { "epoch": 0.27027636773829666, "grad_norm": 3.2848248111575455, "learning_rate": 1.7125128765393152e-06, "loss": 0.5931, "step": 599 }, { "epoch": 0.2707275803722504, "grad_norm": 3.4561387987225367, "learning_rate": 1.7114863701438363e-06, "loss": 0.6233, "step": 600 }, { "epoch": 0.2711787930062042, "grad_norm": 3.4262466490817234, "learning_rate": 1.7104583432238587e-06, "loss": 0.6797, "step": 601 }, { "epoch": 0.27163000564015793, "grad_norm": 3.333713435824011, "learning_rate": 1.7094287979763891e-06, "loss": 0.6204, "step": 602 }, { "epoch": 0.2720812182741117, "grad_norm": 3.351871668499492, "learning_rate": 1.7083977366016785e-06, "loss": 0.6415, "step": 603 }, { "epoch": 0.27253243090806545, "grad_norm": 3.2603491068445587, "learning_rate": 1.7073651613032184e-06, "loss": 0.5484, "step": 604 }, { "epoch": 0.27298364354201915, "grad_norm": 3.024159212795425, "learning_rate": 1.706331074287736e-06, "loss": 0.6172, "step": 605 }, { "epoch": 0.2734348561759729, "grad_norm": 2.8744869206183368, "learning_rate": 1.705295477765188e-06, "loss": 0.5581, "step": 606 }, { "epoch": 0.27388606880992666, "grad_norm": 3.3867448368720483, "learning_rate": 1.7042583739487584e-06, "loss": 0.7357, "step": 607 }, { "epoch": 0.2743372814438804, "grad_norm": 2.935426978961505, "learning_rate": 1.703219765054852e-06, "loss": 0.5314, "step": 608 }, { "epoch": 0.2747884940778342, "grad_norm": 3.0973120617238004, "learning_rate": 1.70217965330309e-06, "loss": 0.598, "step": 609 }, { "epoch": 0.27523970671178793, "grad_norm": 3.028317877474072, "learning_rate": 1.701138040916305e-06, "loss": 0.5377, "step": 610 }, { "epoch": 0.2756909193457417, "grad_norm": 3.1165117895424723, "learning_rate": 1.7000949301205373e-06, "loss": 0.6877, "step": 611 }, { "epoch": 0.27614213197969545, "grad_norm": 3.7422765877651516, "learning_rate": 1.6990503231450297e-06, "loss": 0.5404, "step": 612 }, { "epoch": 0.2765933446136492, "grad_norm": 3.229450690131071, "learning_rate": 1.6980042222222216e-06, "loss": 0.6637, "step": 613 }, { "epoch": 0.2770445572476029, "grad_norm": 2.9411077856787355, "learning_rate": 1.696956629587745e-06, "loss": 0.6576, "step": 614 }, { "epoch": 0.27749576988155666, "grad_norm": 3.1585991420412554, "learning_rate": 1.6959075474804215e-06, "loss": 0.5854, "step": 615 }, { "epoch": 0.2779469825155104, "grad_norm": 3.5738811250095197, "learning_rate": 1.6948569781422538e-06, "loss": 0.6342, "step": 616 }, { "epoch": 0.2783981951494642, "grad_norm": 3.2043997534775635, "learning_rate": 1.6938049238184244e-06, "loss": 0.6077, "step": 617 }, { "epoch": 0.27884940778341794, "grad_norm": 3.009802670893003, "learning_rate": 1.6927513867572887e-06, "loss": 0.5461, "step": 618 }, { "epoch": 0.2793006204173717, "grad_norm": 3.735900498450287, "learning_rate": 1.6916963692103713e-06, "loss": 0.61, "step": 619 }, { "epoch": 0.27975183305132545, "grad_norm": 3.3631424989752525, "learning_rate": 1.6906398734323606e-06, "loss": 0.7085, "step": 620 }, { "epoch": 0.2802030456852792, "grad_norm": 3.217240383492006, "learning_rate": 1.6895819016811038e-06, "loss": 0.6147, "step": 621 }, { "epoch": 0.28065425831923296, "grad_norm": 3.1433256959207885, "learning_rate": 1.6885224562176031e-06, "loss": 0.6808, "step": 622 }, { "epoch": 0.28110547095318666, "grad_norm": 3.6645785813982856, "learning_rate": 1.6874615393060091e-06, "loss": 0.493, "step": 623 }, { "epoch": 0.2815566835871404, "grad_norm": 2.9879222448827014, "learning_rate": 1.6863991532136184e-06, "loss": 0.4975, "step": 624 }, { "epoch": 0.2820078962210942, "grad_norm": 3.373734037289056, "learning_rate": 1.6853353002108667e-06, "loss": 0.5831, "step": 625 }, { "epoch": 0.28245910885504794, "grad_norm": 3.3186458821895872, "learning_rate": 1.6842699825713242e-06, "loss": 0.5788, "step": 626 }, { "epoch": 0.2829103214890017, "grad_norm": 3.240295183925887, "learning_rate": 1.683203202571692e-06, "loss": 0.5798, "step": 627 }, { "epoch": 0.28336153412295545, "grad_norm": 2.8461310264884188, "learning_rate": 1.682134962491796e-06, "loss": 0.624, "step": 628 }, { "epoch": 0.2838127467569092, "grad_norm": 3.182637425551016, "learning_rate": 1.6810652646145828e-06, "loss": 0.5762, "step": 629 }, { "epoch": 0.28426395939086296, "grad_norm": 3.21165606325256, "learning_rate": 1.679994111226114e-06, "loss": 0.5685, "step": 630 }, { "epoch": 0.2847151720248167, "grad_norm": 3.065992459300649, "learning_rate": 1.678921504615562e-06, "loss": 0.5511, "step": 631 }, { "epoch": 0.2851663846587704, "grad_norm": 4.0157081970954405, "learning_rate": 1.677847447075205e-06, "loss": 0.6992, "step": 632 }, { "epoch": 0.2856175972927242, "grad_norm": 3.656189168497442, "learning_rate": 1.676771940900422e-06, "loss": 0.6023, "step": 633 }, { "epoch": 0.28606880992667794, "grad_norm": 3.641835731718804, "learning_rate": 1.6756949883896874e-06, "loss": 0.657, "step": 634 }, { "epoch": 0.2865200225606317, "grad_norm": 3.631195546639338, "learning_rate": 1.6746165918445672e-06, "loss": 0.5844, "step": 635 }, { "epoch": 0.28697123519458545, "grad_norm": 3.305073818481503, "learning_rate": 1.6735367535697133e-06, "loss": 0.691, "step": 636 }, { "epoch": 0.2874224478285392, "grad_norm": 3.6875236525441935, "learning_rate": 1.6724554758728586e-06, "loss": 0.4427, "step": 637 }, { "epoch": 0.28787366046249296, "grad_norm": 3.270744869677528, "learning_rate": 1.6713727610648122e-06, "loss": 0.6261, "step": 638 }, { "epoch": 0.2883248730964467, "grad_norm": 3.294150755730515, "learning_rate": 1.670288611459455e-06, "loss": 0.6095, "step": 639 }, { "epoch": 0.2887760857304005, "grad_norm": 3.418142081910065, "learning_rate": 1.669203029373733e-06, "loss": 0.7189, "step": 640 }, { "epoch": 0.2892272983643542, "grad_norm": 3.114231217877707, "learning_rate": 1.6681160171276548e-06, "loss": 0.5874, "step": 641 }, { "epoch": 0.28967851099830794, "grad_norm": 3.3583157977609233, "learning_rate": 1.6670275770442849e-06, "loss": 0.6205, "step": 642 }, { "epoch": 0.2901297236322617, "grad_norm": 2.946320692741977, "learning_rate": 1.665937711449739e-06, "loss": 0.532, "step": 643 }, { "epoch": 0.29058093626621545, "grad_norm": 3.4190265435248155, "learning_rate": 1.66484642267318e-06, "loss": 0.5944, "step": 644 }, { "epoch": 0.2910321489001692, "grad_norm": 2.8472323370866612, "learning_rate": 1.6637537130468113e-06, "loss": 0.6206, "step": 645 }, { "epoch": 0.29148336153412296, "grad_norm": 3.2713722111872245, "learning_rate": 1.662659584905874e-06, "loss": 0.5323, "step": 646 }, { "epoch": 0.2919345741680767, "grad_norm": 2.922995795945147, "learning_rate": 1.6615640405886395e-06, "loss": 0.5084, "step": 647 }, { "epoch": 0.2923857868020305, "grad_norm": 3.5339931194135077, "learning_rate": 1.6604670824364067e-06, "loss": 0.6357, "step": 648 }, { "epoch": 0.2928369994359842, "grad_norm": 3.317497856029728, "learning_rate": 1.659368712793495e-06, "loss": 0.6874, "step": 649 }, { "epoch": 0.29328821206993794, "grad_norm": 3.7424311686840577, "learning_rate": 1.6582689340072417e-06, "loss": 0.7115, "step": 650 }, { "epoch": 0.2937394247038917, "grad_norm": 3.1345128131384383, "learning_rate": 1.6571677484279948e-06, "loss": 0.578, "step": 651 }, { "epoch": 0.29419063733784545, "grad_norm": 3.911109938851912, "learning_rate": 1.6560651584091082e-06, "loss": 0.7279, "step": 652 }, { "epoch": 0.2946418499717992, "grad_norm": 3.3288650410464253, "learning_rate": 1.6549611663069383e-06, "loss": 0.5636, "step": 653 }, { "epoch": 0.29509306260575296, "grad_norm": 2.863150590413283, "learning_rate": 1.6538557744808371e-06, "loss": 0.6478, "step": 654 }, { "epoch": 0.2955442752397067, "grad_norm": 3.006213042305221, "learning_rate": 1.6527489852931486e-06, "loss": 0.562, "step": 655 }, { "epoch": 0.2959954878736605, "grad_norm": 3.638111266983312, "learning_rate": 1.6516408011092027e-06, "loss": 0.5496, "step": 656 }, { "epoch": 0.29644670050761424, "grad_norm": 3.316824374679788, "learning_rate": 1.6505312242973108e-06, "loss": 0.665, "step": 657 }, { "epoch": 0.29689791314156794, "grad_norm": 3.0712540463962914, "learning_rate": 1.6494202572287605e-06, "loss": 0.5783, "step": 658 }, { "epoch": 0.2973491257755217, "grad_norm": 3.792785889031542, "learning_rate": 1.64830790227781e-06, "loss": 0.6901, "step": 659 }, { "epoch": 0.29780033840947545, "grad_norm": 3.420839278819223, "learning_rate": 1.6471941618216842e-06, "loss": 0.617, "step": 660 }, { "epoch": 0.2982515510434292, "grad_norm": 3.310228590338887, "learning_rate": 1.6460790382405688e-06, "loss": 0.693, "step": 661 }, { "epoch": 0.29870276367738297, "grad_norm": 3.4270595757910964, "learning_rate": 1.6449625339176053e-06, "loss": 0.5201, "step": 662 }, { "epoch": 0.2991539763113367, "grad_norm": 3.1683236124814274, "learning_rate": 1.643844651238886e-06, "loss": 0.5696, "step": 663 }, { "epoch": 0.2996051889452905, "grad_norm": 3.7401623649500473, "learning_rate": 1.6427253925934492e-06, "loss": 0.7044, "step": 664 }, { "epoch": 0.30005640157924424, "grad_norm": 3.4634585244712124, "learning_rate": 1.641604760373273e-06, "loss": 0.5573, "step": 665 }, { "epoch": 0.300507614213198, "grad_norm": 3.3656674389925443, "learning_rate": 1.640482756973272e-06, "loss": 0.5742, "step": 666 }, { "epoch": 0.3009588268471517, "grad_norm": 3.5089959297906654, "learning_rate": 1.6393593847912903e-06, "loss": 0.625, "step": 667 }, { "epoch": 0.30141003948110545, "grad_norm": 2.8305729096605194, "learning_rate": 1.6382346462280979e-06, "loss": 0.4971, "step": 668 }, { "epoch": 0.3018612521150592, "grad_norm": 3.2836377236231926, "learning_rate": 1.6371085436873843e-06, "loss": 0.573, "step": 669 }, { "epoch": 0.30231246474901297, "grad_norm": 2.839883087141276, "learning_rate": 1.635981079575755e-06, "loss": 0.4281, "step": 670 }, { "epoch": 0.3027636773829667, "grad_norm": 3.0551538272584913, "learning_rate": 1.6348522563027235e-06, "loss": 0.5763, "step": 671 }, { "epoch": 0.3032148900169205, "grad_norm": 3.1227628457864633, "learning_rate": 1.6337220762807098e-06, "loss": 0.5347, "step": 672 }, { "epoch": 0.30366610265087424, "grad_norm": 3.0644130436580297, "learning_rate": 1.6325905419250325e-06, "loss": 0.5723, "step": 673 }, { "epoch": 0.304117315284828, "grad_norm": 3.369765645299152, "learning_rate": 1.631457655653905e-06, "loss": 0.5128, "step": 674 }, { "epoch": 0.30456852791878175, "grad_norm": 3.130938520045816, "learning_rate": 1.6303234198884294e-06, "loss": 0.5637, "step": 675 }, { "epoch": 0.30501974055273545, "grad_norm": 2.8738261799739755, "learning_rate": 1.6291878370525925e-06, "loss": 0.5167, "step": 676 }, { "epoch": 0.3054709531866892, "grad_norm": 3.566044327017721, "learning_rate": 1.6280509095732588e-06, "loss": 0.5657, "step": 677 }, { "epoch": 0.30592216582064297, "grad_norm": 3.4988087210997554, "learning_rate": 1.6269126398801679e-06, "loss": 0.5211, "step": 678 }, { "epoch": 0.3063733784545967, "grad_norm": 3.159865034013114, "learning_rate": 1.6257730304059263e-06, "loss": 0.5702, "step": 679 }, { "epoch": 0.3068245910885505, "grad_norm": 2.765987657389912, "learning_rate": 1.6246320835860052e-06, "loss": 0.5558, "step": 680 }, { "epoch": 0.30727580372250424, "grad_norm": 3.2675556171262277, "learning_rate": 1.6234898018587336e-06, "loss": 0.5354, "step": 681 }, { "epoch": 0.307727016356458, "grad_norm": 3.0700923951988, "learning_rate": 1.622346187665292e-06, "loss": 0.4686, "step": 682 }, { "epoch": 0.30817822899041175, "grad_norm": 2.7395711139916425, "learning_rate": 1.6212012434497101e-06, "loss": 0.5564, "step": 683 }, { "epoch": 0.3086294416243655, "grad_norm": 2.9605613921627616, "learning_rate": 1.6200549716588595e-06, "loss": 0.4968, "step": 684 }, { "epoch": 0.3090806542583192, "grad_norm": 3.055443187363974, "learning_rate": 1.6189073747424482e-06, "loss": 0.5546, "step": 685 }, { "epoch": 0.30953186689227297, "grad_norm": 3.304091566042142, "learning_rate": 1.6177584551530177e-06, "loss": 0.5676, "step": 686 }, { "epoch": 0.3099830795262267, "grad_norm": 3.127344665872412, "learning_rate": 1.6166082153459346e-06, "loss": 0.5305, "step": 687 }, { "epoch": 0.3104342921601805, "grad_norm": 3.313157696795387, "learning_rate": 1.6154566577793885e-06, "loss": 0.5159, "step": 688 }, { "epoch": 0.31088550479413424, "grad_norm": 3.4602680074652548, "learning_rate": 1.6143037849143832e-06, "loss": 0.5878, "step": 689 }, { "epoch": 0.311336717428088, "grad_norm": 3.47196454330974, "learning_rate": 1.6131495992147359e-06, "loss": 0.6752, "step": 690 }, { "epoch": 0.31178793006204175, "grad_norm": 2.926369050074452, "learning_rate": 1.6119941031470675e-06, "loss": 0.4326, "step": 691 }, { "epoch": 0.3122391426959955, "grad_norm": 2.8753404832004272, "learning_rate": 1.6108372991807996e-06, "loss": 0.6389, "step": 692 }, { "epoch": 0.31269035532994927, "grad_norm": 3.4818672558945845, "learning_rate": 1.6096791897881498e-06, "loss": 0.5769, "step": 693 }, { "epoch": 0.31314156796390297, "grad_norm": 3.045331557479258, "learning_rate": 1.608519777444125e-06, "loss": 0.5813, "step": 694 }, { "epoch": 0.3135927805978567, "grad_norm": 3.1173014954702665, "learning_rate": 1.607359064626517e-06, "loss": 0.609, "step": 695 }, { "epoch": 0.3140439932318105, "grad_norm": 3.1211308380487486, "learning_rate": 1.6061970538158958e-06, "loss": 0.5857, "step": 696 }, { "epoch": 0.31449520586576424, "grad_norm": 3.6380292500889255, "learning_rate": 1.6050337474956066e-06, "loss": 0.7401, "step": 697 }, { "epoch": 0.314946418499718, "grad_norm": 3.7195496711700793, "learning_rate": 1.6038691481517628e-06, "loss": 0.6073, "step": 698 }, { "epoch": 0.31539763113367175, "grad_norm": 3.461343986471695, "learning_rate": 1.6027032582732406e-06, "loss": 0.6371, "step": 699 }, { "epoch": 0.3158488437676255, "grad_norm": 3.0912912625302384, "learning_rate": 1.6015360803516752e-06, "loss": 0.5852, "step": 700 }, { "epoch": 0.31630005640157927, "grad_norm": 2.735792616102687, "learning_rate": 1.6003676168814536e-06, "loss": 0.4726, "step": 701 }, { "epoch": 0.31675126903553297, "grad_norm": 3.2460293296612894, "learning_rate": 1.5991978703597112e-06, "loss": 0.6615, "step": 702 }, { "epoch": 0.3172024816694867, "grad_norm": 3.2723530181109934, "learning_rate": 1.5980268432863239e-06, "loss": 0.5633, "step": 703 }, { "epoch": 0.3176536943034405, "grad_norm": 3.1604928857576415, "learning_rate": 1.596854538163906e-06, "loss": 0.5592, "step": 704 }, { "epoch": 0.31810490693739424, "grad_norm": 3.275606024012947, "learning_rate": 1.5956809574978011e-06, "loss": 0.5976, "step": 705 }, { "epoch": 0.318556119571348, "grad_norm": 3.66434228008565, "learning_rate": 1.594506103796081e-06, "loss": 0.6421, "step": 706 }, { "epoch": 0.31900733220530175, "grad_norm": 3.1389856099207183, "learning_rate": 1.5933299795695368e-06, "loss": 0.5529, "step": 707 }, { "epoch": 0.3194585448392555, "grad_norm": 3.040579790837814, "learning_rate": 1.5921525873316753e-06, "loss": 0.4726, "step": 708 }, { "epoch": 0.31990975747320927, "grad_norm": 3.3660094819872124, "learning_rate": 1.5909739295987122e-06, "loss": 0.5541, "step": 709 }, { "epoch": 0.320360970107163, "grad_norm": 3.387903668787852, "learning_rate": 1.5897940088895691e-06, "loss": 0.5683, "step": 710 }, { "epoch": 0.3208121827411167, "grad_norm": 3.2996751014380554, "learning_rate": 1.5886128277258661e-06, "loss": 0.6318, "step": 711 }, { "epoch": 0.3212633953750705, "grad_norm": 3.4811079536606004, "learning_rate": 1.5874303886319174e-06, "loss": 0.5905, "step": 712 }, { "epoch": 0.32171460800902424, "grad_norm": 2.974942368471126, "learning_rate": 1.5862466941347243e-06, "loss": 0.5155, "step": 713 }, { "epoch": 0.322165820642978, "grad_norm": 3.0452805867406942, "learning_rate": 1.5850617467639727e-06, "loss": 0.5682, "step": 714 }, { "epoch": 0.32261703327693175, "grad_norm": 3.2224139519675403, "learning_rate": 1.5838755490520249e-06, "loss": 0.6019, "step": 715 }, { "epoch": 0.3230682459108855, "grad_norm": 2.8944069319837817, "learning_rate": 1.5826881035339154e-06, "loss": 0.515, "step": 716 }, { "epoch": 0.32351945854483927, "grad_norm": 2.441725821700579, "learning_rate": 1.5814994127473465e-06, "loss": 0.3773, "step": 717 }, { "epoch": 0.323970671178793, "grad_norm": 3.470342462769914, "learning_rate": 1.5803094792326799e-06, "loss": 0.5145, "step": 718 }, { "epoch": 0.3244218838127468, "grad_norm": 3.56701743592647, "learning_rate": 1.5791183055329352e-06, "loss": 0.5895, "step": 719 }, { "epoch": 0.3248730964467005, "grad_norm": 3.123117232719042, "learning_rate": 1.5779258941937803e-06, "loss": 0.6079, "step": 720 }, { "epoch": 0.32532430908065424, "grad_norm": 3.50214397424285, "learning_rate": 1.5767322477635304e-06, "loss": 0.7037, "step": 721 }, { "epoch": 0.325775521714608, "grad_norm": 3.7189664252368364, "learning_rate": 1.575537368793138e-06, "loss": 0.554, "step": 722 }, { "epoch": 0.32622673434856175, "grad_norm": 2.9787465398404858, "learning_rate": 1.5743412598361909e-06, "loss": 0.589, "step": 723 }, { "epoch": 0.3266779469825155, "grad_norm": 3.322004142520188, "learning_rate": 1.5731439234489052e-06, "loss": 0.5615, "step": 724 }, { "epoch": 0.32712915961646927, "grad_norm": 3.26274181293833, "learning_rate": 1.571945362190121e-06, "loss": 0.5014, "step": 725 }, { "epoch": 0.327580372250423, "grad_norm": 3.7220892721550234, "learning_rate": 1.5707455786212944e-06, "loss": 0.6807, "step": 726 }, { "epoch": 0.3280315848843768, "grad_norm": 2.9903955440183148, "learning_rate": 1.569544575306495e-06, "loss": 0.4905, "step": 727 }, { "epoch": 0.32848279751833054, "grad_norm": 3.4657777627392936, "learning_rate": 1.5683423548123988e-06, "loss": 0.6067, "step": 728 }, { "epoch": 0.32893401015228424, "grad_norm": 3.4819746921424004, "learning_rate": 1.5671389197082828e-06, "loss": 0.7603, "step": 729 }, { "epoch": 0.329385222786238, "grad_norm": 3.3128509975650777, "learning_rate": 1.5659342725660204e-06, "loss": 0.5962, "step": 730 }, { "epoch": 0.32983643542019175, "grad_norm": 3.111013336637488, "learning_rate": 1.5647284159600742e-06, "loss": 0.5375, "step": 731 }, { "epoch": 0.3302876480541455, "grad_norm": 3.043014983711972, "learning_rate": 1.5635213524674926e-06, "loss": 0.4457, "step": 732 }, { "epoch": 0.33073886068809927, "grad_norm": 3.176508138409611, "learning_rate": 1.562313084667903e-06, "loss": 0.6087, "step": 733 }, { "epoch": 0.331190073322053, "grad_norm": 3.020201486936763, "learning_rate": 1.5611036151435057e-06, "loss": 0.4561, "step": 734 }, { "epoch": 0.3316412859560068, "grad_norm": 3.0231053359967497, "learning_rate": 1.5598929464790703e-06, "loss": 0.5651, "step": 735 }, { "epoch": 0.33209249858996054, "grad_norm": 3.275662082022039, "learning_rate": 1.5586810812619291e-06, "loss": 0.5691, "step": 736 }, { "epoch": 0.3325437112239143, "grad_norm": 3.3993781041664337, "learning_rate": 1.55746802208197e-06, "loss": 0.6204, "step": 737 }, { "epoch": 0.332994923857868, "grad_norm": 2.995463194362283, "learning_rate": 1.5562537715316349e-06, "loss": 0.564, "step": 738 }, { "epoch": 0.33344613649182175, "grad_norm": 3.134146072367485, "learning_rate": 1.55503833220591e-06, "loss": 0.5233, "step": 739 }, { "epoch": 0.3338973491257755, "grad_norm": 3.201637769439807, "learning_rate": 1.553821706702322e-06, "loss": 0.6123, "step": 740 }, { "epoch": 0.33434856175972927, "grad_norm": 3.232574838103331, "learning_rate": 1.5526038976209343e-06, "loss": 0.5694, "step": 741 }, { "epoch": 0.334799774393683, "grad_norm": 3.3442531523646544, "learning_rate": 1.5513849075643381e-06, "loss": 0.5079, "step": 742 }, { "epoch": 0.3352509870276368, "grad_norm": 3.0542429928779216, "learning_rate": 1.550164739137649e-06, "loss": 0.5389, "step": 743 }, { "epoch": 0.33570219966159054, "grad_norm": 3.1568454912469903, "learning_rate": 1.548943394948501e-06, "loss": 0.5308, "step": 744 }, { "epoch": 0.3361534122955443, "grad_norm": 3.542836071959044, "learning_rate": 1.5477208776070408e-06, "loss": 0.483, "step": 745 }, { "epoch": 0.33660462492949805, "grad_norm": 3.1222777445652894, "learning_rate": 1.5464971897259219e-06, "loss": 0.5749, "step": 746 }, { "epoch": 0.33705583756345175, "grad_norm": 3.628624782183597, "learning_rate": 1.5452723339202998e-06, "loss": 0.5553, "step": 747 }, { "epoch": 0.3375070501974055, "grad_norm": 3.2488842241245988, "learning_rate": 1.5440463128078261e-06, "loss": 0.6232, "step": 748 }, { "epoch": 0.33795826283135927, "grad_norm": 3.343015867459291, "learning_rate": 1.5428191290086422e-06, "loss": 0.6445, "step": 749 }, { "epoch": 0.338409475465313, "grad_norm": 3.1055865259237234, "learning_rate": 1.5415907851453747e-06, "loss": 0.5384, "step": 750 }, { "epoch": 0.3388606880992668, "grad_norm": 3.2377347656439817, "learning_rate": 1.5403612838431298e-06, "loss": 0.5763, "step": 751 }, { "epoch": 0.33931190073322054, "grad_norm": 3.445908663047367, "learning_rate": 1.539130627729486e-06, "loss": 0.5514, "step": 752 }, { "epoch": 0.3397631133671743, "grad_norm": 2.911061775987079, "learning_rate": 1.537898819434491e-06, "loss": 0.7235, "step": 753 }, { "epoch": 0.34021432600112805, "grad_norm": 2.7941569220035625, "learning_rate": 1.5366658615906545e-06, "loss": 0.6081, "step": 754 }, { "epoch": 0.34066553863508175, "grad_norm": 3.5600515194912847, "learning_rate": 1.5354317568329425e-06, "loss": 0.5073, "step": 755 }, { "epoch": 0.3411167512690355, "grad_norm": 3.076725754060217, "learning_rate": 1.5341965077987724e-06, "loss": 0.683, "step": 756 }, { "epoch": 0.34156796390298927, "grad_norm": 3.3055138999806424, "learning_rate": 1.5329601171280073e-06, "loss": 0.4901, "step": 757 }, { "epoch": 0.342019176536943, "grad_norm": 3.688970910602998, "learning_rate": 1.5317225874629496e-06, "loss": 0.6957, "step": 758 }, { "epoch": 0.3424703891708968, "grad_norm": 3.01278353896168, "learning_rate": 1.530483921448336e-06, "loss": 0.5273, "step": 759 }, { "epoch": 0.34292160180485054, "grad_norm": 2.7371700116966697, "learning_rate": 1.5292441217313322e-06, "loss": 0.5151, "step": 760 }, { "epoch": 0.3433728144388043, "grad_norm": 2.6873243424543682, "learning_rate": 1.5280031909615261e-06, "loss": 0.5559, "step": 761 }, { "epoch": 0.34382402707275805, "grad_norm": 3.247322078009431, "learning_rate": 1.5267611317909228e-06, "loss": 0.5374, "step": 762 }, { "epoch": 0.3442752397067118, "grad_norm": 3.2026837973300974, "learning_rate": 1.5255179468739393e-06, "loss": 0.5152, "step": 763 }, { "epoch": 0.3447264523406655, "grad_norm": 3.0865094325229734, "learning_rate": 1.5242736388673982e-06, "loss": 0.588, "step": 764 }, { "epoch": 0.34517766497461927, "grad_norm": 3.0866090252108282, "learning_rate": 1.5230282104305226e-06, "loss": 0.5795, "step": 765 }, { "epoch": 0.345628877608573, "grad_norm": 3.2541619817207814, "learning_rate": 1.5217816642249296e-06, "loss": 0.6493, "step": 766 }, { "epoch": 0.3460800902425268, "grad_norm": 3.6862448489806967, "learning_rate": 1.5205340029146253e-06, "loss": 0.586, "step": 767 }, { "epoch": 0.34653130287648054, "grad_norm": 2.6861569549572737, "learning_rate": 1.519285229165999e-06, "loss": 0.4518, "step": 768 }, { "epoch": 0.3469825155104343, "grad_norm": 3.039215785994512, "learning_rate": 1.5180353456478173e-06, "loss": 0.4648, "step": 769 }, { "epoch": 0.34743372814438805, "grad_norm": 3.6176540281690914, "learning_rate": 1.5167843550312188e-06, "loss": 0.6346, "step": 770 }, { "epoch": 0.3478849407783418, "grad_norm": 3.070746867448483, "learning_rate": 1.5155322599897073e-06, "loss": 0.5604, "step": 771 }, { "epoch": 0.34833615341229557, "grad_norm": 2.73731135380931, "learning_rate": 1.5142790631991478e-06, "loss": 0.5152, "step": 772 }, { "epoch": 0.34878736604624927, "grad_norm": 3.529456577457423, "learning_rate": 1.5130247673377587e-06, "loss": 0.5609, "step": 773 }, { "epoch": 0.349238578680203, "grad_norm": 3.4439646240760826, "learning_rate": 1.5117693750861094e-06, "loss": 0.6688, "step": 774 }, { "epoch": 0.3496897913141568, "grad_norm": 3.3042570111312335, "learning_rate": 1.51051288912711e-06, "loss": 0.4303, "step": 775 }, { "epoch": 0.35014100394811054, "grad_norm": 2.9401197668624013, "learning_rate": 1.5092553121460088e-06, "loss": 0.4583, "step": 776 }, { "epoch": 0.3505922165820643, "grad_norm": 3.1073494998401503, "learning_rate": 1.5079966468303863e-06, "loss": 0.5139, "step": 777 }, { "epoch": 0.35104342921601805, "grad_norm": 3.118708334430417, "learning_rate": 1.5067368958701485e-06, "loss": 0.4641, "step": 778 }, { "epoch": 0.3514946418499718, "grad_norm": 3.143071377710741, "learning_rate": 1.5054760619575215e-06, "loss": 0.5753, "step": 779 }, { "epoch": 0.35194585448392557, "grad_norm": 3.1861569232470766, "learning_rate": 1.5042141477870458e-06, "loss": 0.535, "step": 780 }, { "epoch": 0.3523970671178793, "grad_norm": 2.8866857302075117, "learning_rate": 1.5029511560555706e-06, "loss": 0.46, "step": 781 }, { "epoch": 0.352848279751833, "grad_norm": 3.3778859647717443, "learning_rate": 1.5016870894622473e-06, "loss": 0.5898, "step": 782 }, { "epoch": 0.3532994923857868, "grad_norm": 4.172842058459368, "learning_rate": 1.5004219507085262e-06, "loss": 0.5366, "step": 783 }, { "epoch": 0.35375070501974054, "grad_norm": 3.59549440239923, "learning_rate": 1.499155742498147e-06, "loss": 0.4775, "step": 784 }, { "epoch": 0.3542019176536943, "grad_norm": 3.0027968674611825, "learning_rate": 1.4978884675371352e-06, "loss": 0.5956, "step": 785 }, { "epoch": 0.35465313028764806, "grad_norm": 3.385447063632404, "learning_rate": 1.4966201285337976e-06, "loss": 0.6469, "step": 786 }, { "epoch": 0.3551043429216018, "grad_norm": 2.8706740349542597, "learning_rate": 1.4953507281987134e-06, "loss": 0.4904, "step": 787 }, { "epoch": 0.35555555555555557, "grad_norm": 3.424910056173964, "learning_rate": 1.4940802692447306e-06, "loss": 0.5708, "step": 788 }, { "epoch": 0.3560067681895093, "grad_norm": 3.0668549250942174, "learning_rate": 1.4928087543869593e-06, "loss": 0.5102, "step": 789 }, { "epoch": 0.3564579808234631, "grad_norm": 2.9435846287096594, "learning_rate": 1.491536186342766e-06, "loss": 0.4531, "step": 790 }, { "epoch": 0.3569091934574168, "grad_norm": 3.3647412629748, "learning_rate": 1.4902625678317694e-06, "loss": 0.5754, "step": 791 }, { "epoch": 0.35736040609137054, "grad_norm": 3.4720397428714147, "learning_rate": 1.4889879015758317e-06, "loss": 0.5361, "step": 792 }, { "epoch": 0.3578116187253243, "grad_norm": 3.169387365054564, "learning_rate": 1.4877121902990542e-06, "loss": 0.6178, "step": 793 }, { "epoch": 0.35826283135927806, "grad_norm": 3.1834769003859416, "learning_rate": 1.4864354367277723e-06, "loss": 0.5135, "step": 794 }, { "epoch": 0.3587140439932318, "grad_norm": 2.72897080565907, "learning_rate": 1.4851576435905486e-06, "loss": 0.5075, "step": 795 }, { "epoch": 0.35916525662718557, "grad_norm": 3.472851661152734, "learning_rate": 1.4838788136181674e-06, "loss": 0.481, "step": 796 }, { "epoch": 0.3596164692611393, "grad_norm": 3.3098456095281343, "learning_rate": 1.4825989495436284e-06, "loss": 0.6186, "step": 797 }, { "epoch": 0.3600676818950931, "grad_norm": 3.5939015234225318, "learning_rate": 1.4813180541021424e-06, "loss": 0.566, "step": 798 }, { "epoch": 0.36051889452904684, "grad_norm": 3.19607736166632, "learning_rate": 1.4800361300311227e-06, "loss": 0.6465, "step": 799 }, { "epoch": 0.36097010716300054, "grad_norm": 3.236820425024368, "learning_rate": 1.4787531800701825e-06, "loss": 0.5826, "step": 800 }, { "epoch": 0.3614213197969543, "grad_norm": 3.1520319660757927, "learning_rate": 1.4774692069611265e-06, "loss": 0.6347, "step": 801 }, { "epoch": 0.36187253243090806, "grad_norm": 3.4007171711887203, "learning_rate": 1.4761842134479461e-06, "loss": 0.5826, "step": 802 }, { "epoch": 0.3623237450648618, "grad_norm": 3.322956321362452, "learning_rate": 1.4748982022768136e-06, "loss": 0.5269, "step": 803 }, { "epoch": 0.36277495769881557, "grad_norm": 3.5976572006549015, "learning_rate": 1.4736111761960763e-06, "loss": 0.6353, "step": 804 }, { "epoch": 0.3632261703327693, "grad_norm": 3.358228871970233, "learning_rate": 1.4723231379562503e-06, "loss": 0.639, "step": 805 }, { "epoch": 0.3636773829667231, "grad_norm": 3.066740811078913, "learning_rate": 1.4710340903100142e-06, "loss": 0.6528, "step": 806 }, { "epoch": 0.36412859560067684, "grad_norm": 3.143447875416717, "learning_rate": 1.4697440360122046e-06, "loss": 0.5159, "step": 807 }, { "epoch": 0.36457980823463054, "grad_norm": 3.4609033817493087, "learning_rate": 1.4684529778198095e-06, "loss": 0.6577, "step": 808 }, { "epoch": 0.3650310208685843, "grad_norm": 3.3073777275985243, "learning_rate": 1.467160918491962e-06, "loss": 0.5486, "step": 809 }, { "epoch": 0.36548223350253806, "grad_norm": 2.955049404596728, "learning_rate": 1.4658678607899346e-06, "loss": 0.4085, "step": 810 }, { "epoch": 0.3659334461364918, "grad_norm": 3.1794400164176015, "learning_rate": 1.4645738074771331e-06, "loss": 0.5911, "step": 811 }, { "epoch": 0.36638465877044557, "grad_norm": 3.282347844957748, "learning_rate": 1.4632787613190927e-06, "loss": 0.6132, "step": 812 }, { "epoch": 0.3668358714043993, "grad_norm": 3.054992196038826, "learning_rate": 1.461982725083468e-06, "loss": 0.5046, "step": 813 }, { "epoch": 0.3672870840383531, "grad_norm": 2.7569599731911083, "learning_rate": 1.4606857015400315e-06, "loss": 0.5099, "step": 814 }, { "epoch": 0.36773829667230684, "grad_norm": 3.2540309485705454, "learning_rate": 1.4593876934606649e-06, "loss": 0.5801, "step": 815 }, { "epoch": 0.3681895093062606, "grad_norm": 3.2943527058039073, "learning_rate": 1.4580887036193537e-06, "loss": 0.6682, "step": 816 }, { "epoch": 0.3686407219402143, "grad_norm": 3.133778967882415, "learning_rate": 1.4567887347921816e-06, "loss": 0.6227, "step": 817 }, { "epoch": 0.36909193457416806, "grad_norm": 3.256206658405506, "learning_rate": 1.4554877897573257e-06, "loss": 0.493, "step": 818 }, { "epoch": 0.3695431472081218, "grad_norm": 3.182329974272021, "learning_rate": 1.4541858712950475e-06, "loss": 0.5302, "step": 819 }, { "epoch": 0.36999435984207557, "grad_norm": 3.4363468847418077, "learning_rate": 1.4528829821876898e-06, "loss": 0.6303, "step": 820 }, { "epoch": 0.37044557247602933, "grad_norm": 2.851895955662126, "learning_rate": 1.45157912521967e-06, "loss": 0.5902, "step": 821 }, { "epoch": 0.3708967851099831, "grad_norm": 2.9192604758917735, "learning_rate": 1.4502743031774735e-06, "loss": 0.4448, "step": 822 }, { "epoch": 0.37134799774393684, "grad_norm": 3.1234419282519297, "learning_rate": 1.4489685188496485e-06, "loss": 0.6215, "step": 823 }, { "epoch": 0.3717992103778906, "grad_norm": 3.2175572399766983, "learning_rate": 1.447661775026799e-06, "loss": 0.5736, "step": 824 }, { "epoch": 0.37225042301184436, "grad_norm": 3.0464327587263593, "learning_rate": 1.4463540745015804e-06, "loss": 0.4518, "step": 825 }, { "epoch": 0.37270163564579806, "grad_norm": 2.7548027619798114, "learning_rate": 1.4450454200686922e-06, "loss": 0.5115, "step": 826 }, { "epoch": 0.3731528482797518, "grad_norm": 3.508978871190512, "learning_rate": 1.4437358145248726e-06, "loss": 0.5587, "step": 827 }, { "epoch": 0.37360406091370557, "grad_norm": 2.8776633436137082, "learning_rate": 1.4424252606688923e-06, "loss": 0.3882, "step": 828 }, { "epoch": 0.37405527354765933, "grad_norm": 3.0186645553939444, "learning_rate": 1.4411137613015493e-06, "loss": 0.7998, "step": 829 }, { "epoch": 0.3745064861816131, "grad_norm": 3.1591798420581148, "learning_rate": 1.4398013192256612e-06, "loss": 0.5591, "step": 830 }, { "epoch": 0.37495769881556684, "grad_norm": 3.465443632908638, "learning_rate": 1.4384879372460614e-06, "loss": 0.5952, "step": 831 }, { "epoch": 0.3754089114495206, "grad_norm": 3.983463883157992, "learning_rate": 1.4371736181695906e-06, "loss": 0.6167, "step": 832 }, { "epoch": 0.37586012408347436, "grad_norm": 3.202176645358091, "learning_rate": 1.4358583648050938e-06, "loss": 0.6324, "step": 833 }, { "epoch": 0.3763113367174281, "grad_norm": 3.729738183408635, "learning_rate": 1.4345421799634117e-06, "loss": 0.5976, "step": 834 }, { "epoch": 0.3767625493513818, "grad_norm": 3.2963718228707286, "learning_rate": 1.4332250664573753e-06, "loss": 0.575, "step": 835 }, { "epoch": 0.37721376198533557, "grad_norm": 3.3781633349933236, "learning_rate": 1.4319070271018015e-06, "loss": 0.6036, "step": 836 }, { "epoch": 0.37766497461928933, "grad_norm": 3.4896071051826, "learning_rate": 1.4305880647134845e-06, "loss": 0.6199, "step": 837 }, { "epoch": 0.3781161872532431, "grad_norm": 3.4302159345440826, "learning_rate": 1.4292681821111917e-06, "loss": 0.6029, "step": 838 }, { "epoch": 0.37856739988719684, "grad_norm": 3.2071682961494195, "learning_rate": 1.4279473821156577e-06, "loss": 0.6575, "step": 839 }, { "epoch": 0.3790186125211506, "grad_norm": 3.417176487620392, "learning_rate": 1.4266256675495775e-06, "loss": 0.5037, "step": 840 }, { "epoch": 0.37946982515510436, "grad_norm": 3.1535347582785755, "learning_rate": 1.4253030412375992e-06, "loss": 0.6263, "step": 841 }, { "epoch": 0.3799210377890581, "grad_norm": 3.2890481570320693, "learning_rate": 1.4239795060063208e-06, "loss": 0.6117, "step": 842 }, { "epoch": 0.38037225042301187, "grad_norm": 3.48926192769284, "learning_rate": 1.422655064684283e-06, "loss": 0.6099, "step": 843 }, { "epoch": 0.38082346305696557, "grad_norm": 3.9372420818037006, "learning_rate": 1.4213297201019617e-06, "loss": 0.5924, "step": 844 }, { "epoch": 0.38127467569091933, "grad_norm": 2.7349996691946203, "learning_rate": 1.4200034750917637e-06, "loss": 0.4804, "step": 845 }, { "epoch": 0.3817258883248731, "grad_norm": 3.3642312266416075, "learning_rate": 1.4186763324880206e-06, "loss": 0.4911, "step": 846 }, { "epoch": 0.38217710095882684, "grad_norm": 3.2368382839246657, "learning_rate": 1.4173482951269822e-06, "loss": 0.5709, "step": 847 }, { "epoch": 0.3826283135927806, "grad_norm": 3.5193539688661972, "learning_rate": 1.4160193658468092e-06, "loss": 0.6303, "step": 848 }, { "epoch": 0.38307952622673436, "grad_norm": 2.9360735003460574, "learning_rate": 1.4146895474875705e-06, "loss": 0.5032, "step": 849 }, { "epoch": 0.3835307388606881, "grad_norm": 3.4786066803717497, "learning_rate": 1.4133588428912331e-06, "loss": 0.7037, "step": 850 }, { "epoch": 0.38398195149464187, "grad_norm": 3.1528126266207415, "learning_rate": 1.412027254901659e-06, "loss": 0.5899, "step": 851 }, { "epoch": 0.38443316412859563, "grad_norm": 3.174247820940736, "learning_rate": 1.4106947863645982e-06, "loss": 0.4902, "step": 852 }, { "epoch": 0.38488437676254933, "grad_norm": 3.957992235254075, "learning_rate": 1.4093614401276823e-06, "loss": 0.5707, "step": 853 }, { "epoch": 0.3853355893965031, "grad_norm": 3.218519092249444, "learning_rate": 1.4080272190404184e-06, "loss": 0.6134, "step": 854 }, { "epoch": 0.38578680203045684, "grad_norm": 3.1459639474006447, "learning_rate": 1.4066921259541836e-06, "loss": 0.6095, "step": 855 }, { "epoch": 0.3862380146644106, "grad_norm": 3.5759941707522565, "learning_rate": 1.405356163722218e-06, "loss": 0.6358, "step": 856 }, { "epoch": 0.38668922729836436, "grad_norm": 3.185073838148989, "learning_rate": 1.4040193351996204e-06, "loss": 0.4632, "step": 857 }, { "epoch": 0.3871404399323181, "grad_norm": 3.109686641994834, "learning_rate": 1.4026816432433398e-06, "loss": 0.5566, "step": 858 }, { "epoch": 0.38759165256627187, "grad_norm": 3.2959191557613665, "learning_rate": 1.4013430907121703e-06, "loss": 0.538, "step": 859 }, { "epoch": 0.38804286520022563, "grad_norm": 3.3692958569410263, "learning_rate": 1.4000036804667462e-06, "loss": 0.5392, "step": 860 }, { "epoch": 0.38849407783417933, "grad_norm": 2.6745649742861133, "learning_rate": 1.3986634153695342e-06, "loss": 0.3742, "step": 861 }, { "epoch": 0.3889452904681331, "grad_norm": 3.3623036565879945, "learning_rate": 1.3973222982848281e-06, "loss": 0.7448, "step": 862 }, { "epoch": 0.38939650310208684, "grad_norm": 3.277299965781224, "learning_rate": 1.3959803320787417e-06, "loss": 0.6002, "step": 863 }, { "epoch": 0.3898477157360406, "grad_norm": 3.0879829136475814, "learning_rate": 1.394637519619205e-06, "loss": 0.4925, "step": 864 }, { "epoch": 0.39029892836999436, "grad_norm": 3.365807183133468, "learning_rate": 1.3932938637759552e-06, "loss": 0.5599, "step": 865 }, { "epoch": 0.3907501410039481, "grad_norm": 3.107049993469059, "learning_rate": 1.3919493674205326e-06, "loss": 0.6655, "step": 866 }, { "epoch": 0.39120135363790187, "grad_norm": 3.053760677478005, "learning_rate": 1.3906040334262731e-06, "loss": 0.5843, "step": 867 }, { "epoch": 0.39165256627185563, "grad_norm": 3.093046175711346, "learning_rate": 1.3892578646683037e-06, "loss": 0.6043, "step": 868 }, { "epoch": 0.3921037789058094, "grad_norm": 2.86893503666343, "learning_rate": 1.3879108640235345e-06, "loss": 0.4397, "step": 869 }, { "epoch": 0.3925549915397631, "grad_norm": 3.118837171623341, "learning_rate": 1.386563034370654e-06, "loss": 0.5386, "step": 870 }, { "epoch": 0.39300620417371684, "grad_norm": 3.363263594306241, "learning_rate": 1.3852143785901223e-06, "loss": 0.5624, "step": 871 }, { "epoch": 0.3934574168076706, "grad_norm": 3.186495282439576, "learning_rate": 1.3838648995641644e-06, "loss": 0.4957, "step": 872 }, { "epoch": 0.39390862944162436, "grad_norm": 3.3353513396229335, "learning_rate": 1.3825146001767653e-06, "loss": 0.5313, "step": 873 }, { "epoch": 0.3943598420755781, "grad_norm": 3.0521731392139424, "learning_rate": 1.3811634833136637e-06, "loss": 0.4819, "step": 874 }, { "epoch": 0.3948110547095319, "grad_norm": 3.1624882499122724, "learning_rate": 1.379811551862344e-06, "loss": 0.5611, "step": 875 }, { "epoch": 0.39526226734348563, "grad_norm": 2.885520973170412, "learning_rate": 1.378458808712032e-06, "loss": 0.5237, "step": 876 }, { "epoch": 0.3957134799774394, "grad_norm": 3.1923720267502995, "learning_rate": 1.377105256753689e-06, "loss": 0.6018, "step": 877 }, { "epoch": 0.39616469261139314, "grad_norm": 3.3170122186250968, "learning_rate": 1.375750898880004e-06, "loss": 0.5146, "step": 878 }, { "epoch": 0.39661590524534684, "grad_norm": 2.9602277801561336, "learning_rate": 1.3743957379853884e-06, "loss": 0.5429, "step": 879 }, { "epoch": 0.3970671178793006, "grad_norm": 3.1080024064190432, "learning_rate": 1.3730397769659694e-06, "loss": 0.5487, "step": 880 }, { "epoch": 0.39751833051325436, "grad_norm": 3.439572751157547, "learning_rate": 1.3716830187195854e-06, "loss": 0.5924, "step": 881 }, { "epoch": 0.3979695431472081, "grad_norm": 3.005894061607398, "learning_rate": 1.3703254661457772e-06, "loss": 0.5435, "step": 882 }, { "epoch": 0.3984207557811619, "grad_norm": 2.9201664205070172, "learning_rate": 1.3689671221457838e-06, "loss": 0.6007, "step": 883 }, { "epoch": 0.39887196841511563, "grad_norm": 3.076358828456211, "learning_rate": 1.3676079896225357e-06, "loss": 0.5791, "step": 884 }, { "epoch": 0.3993231810490694, "grad_norm": 3.0203537087915278, "learning_rate": 1.3662480714806481e-06, "loss": 0.5749, "step": 885 }, { "epoch": 0.39977439368302314, "grad_norm": 3.0613670662226538, "learning_rate": 1.3648873706264158e-06, "loss": 0.4396, "step": 886 }, { "epoch": 0.4002256063169769, "grad_norm": 2.6309532474498574, "learning_rate": 1.363525889967805e-06, "loss": 0.4493, "step": 887 }, { "epoch": 0.4006768189509306, "grad_norm": 3.208084607808158, "learning_rate": 1.3621636324144507e-06, "loss": 0.4828, "step": 888 }, { "epoch": 0.40112803158488436, "grad_norm": 3.332508518536951, "learning_rate": 1.3608006008776458e-06, "loss": 0.7003, "step": 889 }, { "epoch": 0.4015792442188381, "grad_norm": 3.3403690099503907, "learning_rate": 1.3594367982703388e-06, "loss": 0.6317, "step": 890 }, { "epoch": 0.4020304568527919, "grad_norm": 3.4197414818392273, "learning_rate": 1.3580722275071253e-06, "loss": 0.5383, "step": 891 }, { "epoch": 0.40248166948674563, "grad_norm": 2.913591421357927, "learning_rate": 1.3567068915042433e-06, "loss": 0.4599, "step": 892 }, { "epoch": 0.4029328821206994, "grad_norm": 3.2044119627020495, "learning_rate": 1.355340793179566e-06, "loss": 0.5566, "step": 893 }, { "epoch": 0.40338409475465314, "grad_norm": 3.56074826789299, "learning_rate": 1.3539739354525946e-06, "loss": 0.6223, "step": 894 }, { "epoch": 0.4038353073886069, "grad_norm": 3.0506608223157787, "learning_rate": 1.352606321244455e-06, "loss": 0.7291, "step": 895 }, { "epoch": 0.40428652002256066, "grad_norm": 3.06038972236859, "learning_rate": 1.3512379534778882e-06, "loss": 0.667, "step": 896 }, { "epoch": 0.40473773265651436, "grad_norm": 3.094425625713067, "learning_rate": 1.3498688350772472e-06, "loss": 0.5412, "step": 897 }, { "epoch": 0.4051889452904681, "grad_norm": 3.6102713876984054, "learning_rate": 1.3484989689684879e-06, "loss": 0.6519, "step": 898 }, { "epoch": 0.4056401579244219, "grad_norm": 3.054024840623251, "learning_rate": 1.347128358079164e-06, "loss": 0.4936, "step": 899 }, { "epoch": 0.40609137055837563, "grad_norm": 3.4039347396157145, "learning_rate": 1.3457570053384224e-06, "loss": 0.592, "step": 900 }, { "epoch": 0.4065425831923294, "grad_norm": 3.3228410861857367, "learning_rate": 1.3443849136769945e-06, "loss": 0.4142, "step": 901 }, { "epoch": 0.40699379582628314, "grad_norm": 3.5304381752960654, "learning_rate": 1.3430120860271905e-06, "loss": 0.4718, "step": 902 }, { "epoch": 0.4074450084602369, "grad_norm": 3.303441875394312, "learning_rate": 1.3416385253228937e-06, "loss": 0.5454, "step": 903 }, { "epoch": 0.40789622109419066, "grad_norm": 3.1629313821139706, "learning_rate": 1.3402642344995542e-06, "loss": 0.5869, "step": 904 }, { "epoch": 0.4083474337281444, "grad_norm": 3.422560274484405, "learning_rate": 1.3388892164941826e-06, "loss": 0.5956, "step": 905 }, { "epoch": 0.4087986463620981, "grad_norm": 3.09873469521746, "learning_rate": 1.3375134742453434e-06, "loss": 0.545, "step": 906 }, { "epoch": 0.4092498589960519, "grad_norm": 3.0994234736000164, "learning_rate": 1.3361370106931485e-06, "loss": 0.6644, "step": 907 }, { "epoch": 0.40970107163000563, "grad_norm": 3.3133585021955976, "learning_rate": 1.3347598287792518e-06, "loss": 0.5737, "step": 908 }, { "epoch": 0.4101522842639594, "grad_norm": 3.126477001362301, "learning_rate": 1.3333819314468427e-06, "loss": 0.5358, "step": 909 }, { "epoch": 0.41060349689791314, "grad_norm": 3.843129597851106, "learning_rate": 1.3320033216406385e-06, "loss": 0.5734, "step": 910 }, { "epoch": 0.4110547095318669, "grad_norm": 3.0621269496803762, "learning_rate": 1.33062400230688e-06, "loss": 0.4941, "step": 911 }, { "epoch": 0.41150592216582066, "grad_norm": 3.456611325927705, "learning_rate": 1.3292439763933244e-06, "loss": 0.4588, "step": 912 }, { "epoch": 0.4119571347997744, "grad_norm": 2.847828708168504, "learning_rate": 1.3278632468492377e-06, "loss": 0.5647, "step": 913 }, { "epoch": 0.4124083474337281, "grad_norm": 3.108813010015726, "learning_rate": 1.3264818166253916e-06, "loss": 0.4924, "step": 914 }, { "epoch": 0.4128595600676819, "grad_norm": 3.8277286638201558, "learning_rate": 1.325099688674053e-06, "loss": 0.5561, "step": 915 }, { "epoch": 0.41331077270163563, "grad_norm": 3.147511650054291, "learning_rate": 1.3237168659489825e-06, "loss": 0.5601, "step": 916 }, { "epoch": 0.4137619853355894, "grad_norm": 3.0966116731178657, "learning_rate": 1.3223333514054232e-06, "loss": 0.5234, "step": 917 }, { "epoch": 0.41421319796954315, "grad_norm": 2.5545639358135874, "learning_rate": 1.3209491480000977e-06, "loss": 0.4909, "step": 918 }, { "epoch": 0.4146644106034969, "grad_norm": 3.556454372294347, "learning_rate": 1.319564258691201e-06, "loss": 0.5788, "step": 919 }, { "epoch": 0.41511562323745066, "grad_norm": 3.1471280862116564, "learning_rate": 1.3181786864383932e-06, "loss": 0.582, "step": 920 }, { "epoch": 0.4155668358714044, "grad_norm": 3.343836136967912, "learning_rate": 1.3167924342027944e-06, "loss": 0.7171, "step": 921 }, { "epoch": 0.4160180485053582, "grad_norm": 2.917522532695626, "learning_rate": 1.3154055049469782e-06, "loss": 0.5267, "step": 922 }, { "epoch": 0.4164692611393119, "grad_norm": 2.758113115417477, "learning_rate": 1.3140179016349646e-06, "loss": 0.5173, "step": 923 }, { "epoch": 0.41692047377326563, "grad_norm": 3.3089395229486223, "learning_rate": 1.312629627232214e-06, "loss": 0.5977, "step": 924 }, { "epoch": 0.4173716864072194, "grad_norm": 3.4613943944882237, "learning_rate": 1.3112406847056212e-06, "loss": 0.5058, "step": 925 }, { "epoch": 0.41782289904117315, "grad_norm": 3.185867032326023, "learning_rate": 1.3098510770235092e-06, "loss": 0.5305, "step": 926 }, { "epoch": 0.4182741116751269, "grad_norm": 2.9584569775451963, "learning_rate": 1.308460807155622e-06, "loss": 0.4347, "step": 927 }, { "epoch": 0.41872532430908066, "grad_norm": 3.0943591120917735, "learning_rate": 1.3070698780731192e-06, "loss": 0.6806, "step": 928 }, { "epoch": 0.4191765369430344, "grad_norm": 3.257203595590827, "learning_rate": 1.3056782927485688e-06, "loss": 0.5482, "step": 929 }, { "epoch": 0.4196277495769882, "grad_norm": 3.378797664140071, "learning_rate": 1.3042860541559415e-06, "loss": 0.6118, "step": 930 }, { "epoch": 0.42007896221094193, "grad_norm": 3.782330800744356, "learning_rate": 1.3028931652706039e-06, "loss": 0.5512, "step": 931 }, { "epoch": 0.42053017484489563, "grad_norm": 3.2959465895996813, "learning_rate": 1.3014996290693127e-06, "loss": 0.557, "step": 932 }, { "epoch": 0.4209813874788494, "grad_norm": 3.36676862860587, "learning_rate": 1.3001054485302078e-06, "loss": 0.5585, "step": 933 }, { "epoch": 0.42143260011280315, "grad_norm": 2.9162822229329355, "learning_rate": 1.2987106266328058e-06, "loss": 0.4515, "step": 934 }, { "epoch": 0.4218838127467569, "grad_norm": 3.489259111346056, "learning_rate": 1.2973151663579947e-06, "loss": 0.5667, "step": 935 }, { "epoch": 0.42233502538071066, "grad_norm": 3.146522111454242, "learning_rate": 1.295919070688026e-06, "loss": 0.6089, "step": 936 }, { "epoch": 0.4227862380146644, "grad_norm": 3.061791688694132, "learning_rate": 1.2945223426065095e-06, "loss": 0.5346, "step": 937 }, { "epoch": 0.4232374506486182, "grad_norm": 3.0568178693062533, "learning_rate": 1.2931249850984064e-06, "loss": 0.5716, "step": 938 }, { "epoch": 0.42368866328257193, "grad_norm": 3.375298437835067, "learning_rate": 1.2917270011500232e-06, "loss": 0.5784, "step": 939 }, { "epoch": 0.4241398759165257, "grad_norm": 3.031854170941789, "learning_rate": 1.2903283937490055e-06, "loss": 0.4996, "step": 940 }, { "epoch": 0.4245910885504794, "grad_norm": 3.3874162086068726, "learning_rate": 1.2889291658843304e-06, "loss": 0.6241, "step": 941 }, { "epoch": 0.42504230118443315, "grad_norm": 3.1176739539444913, "learning_rate": 1.2875293205463015e-06, "loss": 0.5855, "step": 942 }, { "epoch": 0.4254935138183869, "grad_norm": 3.302403976138221, "learning_rate": 1.2861288607265424e-06, "loss": 0.5478, "step": 943 }, { "epoch": 0.42594472645234066, "grad_norm": 3.3731503961513747, "learning_rate": 1.2847277894179888e-06, "loss": 0.6005, "step": 944 }, { "epoch": 0.4263959390862944, "grad_norm": 2.9075875109978737, "learning_rate": 1.283326109614885e-06, "loss": 0.5674, "step": 945 }, { "epoch": 0.4268471517202482, "grad_norm": 3.4745646922579554, "learning_rate": 1.2819238243127735e-06, "loss": 0.5554, "step": 946 }, { "epoch": 0.42729836435420193, "grad_norm": 3.4374933327923203, "learning_rate": 1.2805209365084925e-06, "loss": 0.5387, "step": 947 }, { "epoch": 0.4277495769881557, "grad_norm": 3.5485274017082404, "learning_rate": 1.2791174492001675e-06, "loss": 0.5332, "step": 948 }, { "epoch": 0.42820078962210945, "grad_norm": 2.9948929783352463, "learning_rate": 1.2777133653872048e-06, "loss": 0.4852, "step": 949 }, { "epoch": 0.42865200225606315, "grad_norm": 3.183845074289996, "learning_rate": 1.2763086880702859e-06, "loss": 0.5381, "step": 950 }, { "epoch": 0.4291032148900169, "grad_norm": 3.3312664393519524, "learning_rate": 1.2749034202513598e-06, "loss": 0.5461, "step": 951 }, { "epoch": 0.42955442752397066, "grad_norm": 3.248294129712524, "learning_rate": 1.2734975649336383e-06, "loss": 0.6579, "step": 952 }, { "epoch": 0.4300056401579244, "grad_norm": 3.650970462515364, "learning_rate": 1.2720911251215896e-06, "loss": 0.5921, "step": 953 }, { "epoch": 0.4304568527918782, "grad_norm": 3.1484500818228627, "learning_rate": 1.270684103820929e-06, "loss": 0.5272, "step": 954 }, { "epoch": 0.43090806542583193, "grad_norm": 3.0790541876092665, "learning_rate": 1.2692765040386156e-06, "loss": 0.503, "step": 955 }, { "epoch": 0.4313592780597857, "grad_norm": 3.2768871039849254, "learning_rate": 1.2678683287828449e-06, "loss": 0.5212, "step": 956 }, { "epoch": 0.43181049069373945, "grad_norm": 3.4878570977168097, "learning_rate": 1.2664595810630422e-06, "loss": 0.6475, "step": 957 }, { "epoch": 0.4322617033276932, "grad_norm": 3.259311956538079, "learning_rate": 1.2650502638898558e-06, "loss": 0.5158, "step": 958 }, { "epoch": 0.4327129159616469, "grad_norm": 2.489346150252523, "learning_rate": 1.2636403802751515e-06, "loss": 0.527, "step": 959 }, { "epoch": 0.43316412859560066, "grad_norm": 3.227790541090488, "learning_rate": 1.2622299332320047e-06, "loss": 0.5149, "step": 960 }, { "epoch": 0.4336153412295544, "grad_norm": 3.360641383529355, "learning_rate": 1.2608189257746968e-06, "loss": 0.5772, "step": 961 }, { "epoch": 0.4340665538635082, "grad_norm": 3.3579225913579283, "learning_rate": 1.2594073609187046e-06, "loss": 0.5674, "step": 962 }, { "epoch": 0.43451776649746193, "grad_norm": 3.6833581489779696, "learning_rate": 1.2579952416806978e-06, "loss": 0.6361, "step": 963 }, { "epoch": 0.4349689791314157, "grad_norm": 3.68096763034361, "learning_rate": 1.2565825710785303e-06, "loss": 0.5787, "step": 964 }, { "epoch": 0.43542019176536945, "grad_norm": 3.200182035241666, "learning_rate": 1.2551693521312338e-06, "loss": 0.4692, "step": 965 }, { "epoch": 0.4358714043993232, "grad_norm": 3.161219776591041, "learning_rate": 1.2537555878590124e-06, "loss": 0.5817, "step": 966 }, { "epoch": 0.4363226170332769, "grad_norm": 3.3483981216994434, "learning_rate": 1.2523412812832366e-06, "loss": 0.6038, "step": 967 }, { "epoch": 0.43677382966723066, "grad_norm": 3.664757783306792, "learning_rate": 1.2509264354264337e-06, "loss": 0.5481, "step": 968 }, { "epoch": 0.4372250423011844, "grad_norm": 3.0682250506770923, "learning_rate": 1.2495110533122848e-06, "loss": 0.5729, "step": 969 }, { "epoch": 0.4376762549351382, "grad_norm": 3.276987387331576, "learning_rate": 1.2480951379656173e-06, "loss": 0.5415, "step": 970 }, { "epoch": 0.43812746756909193, "grad_norm": 2.7598150218875084, "learning_rate": 1.2466786924123977e-06, "loss": 0.546, "step": 971 }, { "epoch": 0.4385786802030457, "grad_norm": 3.232659097993065, "learning_rate": 1.2452617196797258e-06, "loss": 0.4954, "step": 972 }, { "epoch": 0.43902989283699945, "grad_norm": 3.389259817026544, "learning_rate": 1.2438442227958274e-06, "loss": 0.5887, "step": 973 }, { "epoch": 0.4394811054709532, "grad_norm": 3.287684474069178, "learning_rate": 1.2424262047900498e-06, "loss": 0.5423, "step": 974 }, { "epoch": 0.43993231810490696, "grad_norm": 2.9570593801357603, "learning_rate": 1.2410076686928521e-06, "loss": 0.5879, "step": 975 }, { "epoch": 0.44038353073886066, "grad_norm": 3.0562220060970104, "learning_rate": 1.2395886175358026e-06, "loss": 0.6012, "step": 976 }, { "epoch": 0.4408347433728144, "grad_norm": 2.742466631605964, "learning_rate": 1.2381690543515691e-06, "loss": 0.5138, "step": 977 }, { "epoch": 0.4412859560067682, "grad_norm": 3.508015423427429, "learning_rate": 1.236748982173914e-06, "loss": 0.5654, "step": 978 }, { "epoch": 0.44173716864072193, "grad_norm": 3.1519521341701604, "learning_rate": 1.2353284040376876e-06, "loss": 0.696, "step": 979 }, { "epoch": 0.4421883812746757, "grad_norm": 3.199203044906545, "learning_rate": 1.2339073229788214e-06, "loss": 0.5913, "step": 980 }, { "epoch": 0.44263959390862945, "grad_norm": 2.914392046354155, "learning_rate": 1.2324857420343216e-06, "loss": 0.5423, "step": 981 }, { "epoch": 0.4430908065425832, "grad_norm": 3.208161429147259, "learning_rate": 1.2310636642422623e-06, "loss": 0.6325, "step": 982 }, { "epoch": 0.44354201917653696, "grad_norm": 3.222015781659111, "learning_rate": 1.2296410926417804e-06, "loss": 0.4924, "step": 983 }, { "epoch": 0.4439932318104907, "grad_norm": 3.613081662865641, "learning_rate": 1.228218030273068e-06, "loss": 0.7603, "step": 984 }, { "epoch": 0.4444444444444444, "grad_norm": 2.8395745169932316, "learning_rate": 1.2267944801773648e-06, "loss": 0.4732, "step": 985 }, { "epoch": 0.4448956570783982, "grad_norm": 3.1488834617622157, "learning_rate": 1.2253704453969541e-06, "loss": 0.5935, "step": 986 }, { "epoch": 0.44534686971235193, "grad_norm": 2.890352804759586, "learning_rate": 1.2239459289751545e-06, "loss": 0.5563, "step": 987 }, { "epoch": 0.4457980823463057, "grad_norm": 3.344479914937335, "learning_rate": 1.2225209339563143e-06, "loss": 0.6045, "step": 988 }, { "epoch": 0.44624929498025945, "grad_norm": 2.9945956060125245, "learning_rate": 1.2210954633858042e-06, "loss": 0.6325, "step": 989 }, { "epoch": 0.4467005076142132, "grad_norm": 3.0243098524774985, "learning_rate": 1.219669520310011e-06, "loss": 0.5677, "step": 990 }, { "epoch": 0.44715172024816696, "grad_norm": 2.892447973941234, "learning_rate": 1.2182431077763316e-06, "loss": 0.7021, "step": 991 }, { "epoch": 0.4476029328821207, "grad_norm": 3.700082520722297, "learning_rate": 1.216816228833167e-06, "loss": 0.6754, "step": 992 }, { "epoch": 0.4480541455160745, "grad_norm": 3.0843377850883766, "learning_rate": 1.2153888865299133e-06, "loss": 0.5611, "step": 993 }, { "epoch": 0.4485053581500282, "grad_norm": 3.0386431018180806, "learning_rate": 1.213961083916958e-06, "loss": 0.5822, "step": 994 }, { "epoch": 0.44895657078398193, "grad_norm": 2.8399741907687392, "learning_rate": 1.2125328240456725e-06, "loss": 0.4906, "step": 995 }, { "epoch": 0.4494077834179357, "grad_norm": 3.027087230934386, "learning_rate": 1.2111041099684042e-06, "loss": 0.6892, "step": 996 }, { "epoch": 0.44985899605188945, "grad_norm": 2.9568284939288616, "learning_rate": 1.209674944738473e-06, "loss": 0.6299, "step": 997 }, { "epoch": 0.4503102086858432, "grad_norm": 3.259765282572653, "learning_rate": 1.2082453314101606e-06, "loss": 0.4755, "step": 998 }, { "epoch": 0.45076142131979696, "grad_norm": 3.447951905086282, "learning_rate": 1.2068152730387088e-06, "loss": 0.6369, "step": 999 }, { "epoch": 0.4512126339537507, "grad_norm": 3.234955570056143, "learning_rate": 1.2053847726803089e-06, "loss": 0.5156, "step": 1000 }, { "epoch": 0.4516638465877045, "grad_norm": 3.5463128389089333, "learning_rate": 1.203953833392097e-06, "loss": 0.475, "step": 1001 }, { "epoch": 0.45211505922165823, "grad_norm": 3.222070894972768, "learning_rate": 1.2025224582321485e-06, "loss": 0.609, "step": 1002 }, { "epoch": 0.45256627185561193, "grad_norm": 3.0758689339986, "learning_rate": 1.201090650259468e-06, "loss": 0.5973, "step": 1003 }, { "epoch": 0.4530174844895657, "grad_norm": 3.159127568590662, "learning_rate": 1.1996584125339868e-06, "loss": 0.5238, "step": 1004 }, { "epoch": 0.45346869712351945, "grad_norm": 3.6135720048902105, "learning_rate": 1.1982257481165545e-06, "loss": 0.675, "step": 1005 }, { "epoch": 0.4539199097574732, "grad_norm": 3.336465519438658, "learning_rate": 1.1967926600689321e-06, "loss": 0.542, "step": 1006 }, { "epoch": 0.45437112239142696, "grad_norm": 2.7490928983502854, "learning_rate": 1.1953591514537864e-06, "loss": 0.4707, "step": 1007 }, { "epoch": 0.4548223350253807, "grad_norm": 3.3908649442402714, "learning_rate": 1.193925225334682e-06, "loss": 0.602, "step": 1008 }, { "epoch": 0.4552735476593345, "grad_norm": 2.894962512757854, "learning_rate": 1.1924908847760772e-06, "loss": 0.6097, "step": 1009 }, { "epoch": 0.45572476029328823, "grad_norm": 3.4842483249570155, "learning_rate": 1.191056132843315e-06, "loss": 0.5953, "step": 1010 }, { "epoch": 0.45617597292724194, "grad_norm": 2.950398511688371, "learning_rate": 1.1896209726026176e-06, "loss": 0.4035, "step": 1011 }, { "epoch": 0.4566271855611957, "grad_norm": 2.9513437535271185, "learning_rate": 1.1881854071210804e-06, "loss": 0.5398, "step": 1012 }, { "epoch": 0.45707839819514945, "grad_norm": 2.8613177638799945, "learning_rate": 1.186749439466664e-06, "loss": 0.5525, "step": 1013 }, { "epoch": 0.4575296108291032, "grad_norm": 3.188791154585419, "learning_rate": 1.1853130727081893e-06, "loss": 0.5192, "step": 1014 }, { "epoch": 0.45798082346305696, "grad_norm": 3.3146247782552756, "learning_rate": 1.18387630991533e-06, "loss": 0.5131, "step": 1015 }, { "epoch": 0.4584320360970107, "grad_norm": 3.9250211014882517, "learning_rate": 1.1824391541586055e-06, "loss": 0.5914, "step": 1016 }, { "epoch": 0.4588832487309645, "grad_norm": 2.9161098414160733, "learning_rate": 1.1810016085093754e-06, "loss": 0.5391, "step": 1017 }, { "epoch": 0.45933446136491823, "grad_norm": 2.9799917015998987, "learning_rate": 1.179563676039833e-06, "loss": 0.5123, "step": 1018 }, { "epoch": 0.459785673998872, "grad_norm": 3.530754717880453, "learning_rate": 1.1781253598229982e-06, "loss": 0.5318, "step": 1019 }, { "epoch": 0.4602368866328257, "grad_norm": 3.375071447655234, "learning_rate": 1.1766866629327097e-06, "loss": 0.5517, "step": 1020 }, { "epoch": 0.46068809926677945, "grad_norm": 3.388282264796258, "learning_rate": 1.1752475884436213e-06, "loss": 0.5301, "step": 1021 }, { "epoch": 0.4611393119007332, "grad_norm": 3.4203055686756456, "learning_rate": 1.1738081394311932e-06, "loss": 0.6649, "step": 1022 }, { "epoch": 0.46159052453468696, "grad_norm": 3.3102903920545232, "learning_rate": 1.172368318971686e-06, "loss": 0.5006, "step": 1023 }, { "epoch": 0.4620417371686407, "grad_norm": 3.083071988078867, "learning_rate": 1.170928130142154e-06, "loss": 0.4744, "step": 1024 }, { "epoch": 0.4624929498025945, "grad_norm": 3.348114186262164, "learning_rate": 1.169487576020439e-06, "loss": 0.5852, "step": 1025 }, { "epoch": 0.46294416243654823, "grad_norm": 3.5395494913322696, "learning_rate": 1.1680466596851635e-06, "loss": 0.5319, "step": 1026 }, { "epoch": 0.463395375070502, "grad_norm": 3.5359128839904366, "learning_rate": 1.1666053842157232e-06, "loss": 0.5107, "step": 1027 }, { "epoch": 0.46384658770445575, "grad_norm": 2.6657070438945185, "learning_rate": 1.165163752692283e-06, "loss": 0.5478, "step": 1028 }, { "epoch": 0.46429780033840945, "grad_norm": 3.0149910558712243, "learning_rate": 1.1637217681957673e-06, "loss": 0.5872, "step": 1029 }, { "epoch": 0.4647490129723632, "grad_norm": 3.159458756633235, "learning_rate": 1.1622794338078552e-06, "loss": 0.706, "step": 1030 }, { "epoch": 0.46520022560631696, "grad_norm": 3.0885082043510654, "learning_rate": 1.1608367526109736e-06, "loss": 0.593, "step": 1031 }, { "epoch": 0.4656514382402707, "grad_norm": 2.9895356676399625, "learning_rate": 1.159393727688291e-06, "loss": 0.5294, "step": 1032 }, { "epoch": 0.4661026508742245, "grad_norm": 3.2775855911044594, "learning_rate": 1.1579503621237101e-06, "loss": 0.4458, "step": 1033 }, { "epoch": 0.46655386350817823, "grad_norm": 3.1259902784446894, "learning_rate": 1.1565066590018613e-06, "loss": 0.4544, "step": 1034 }, { "epoch": 0.467005076142132, "grad_norm": 3.099407676118007, "learning_rate": 1.1550626214080965e-06, "loss": 0.5772, "step": 1035 }, { "epoch": 0.46745628877608575, "grad_norm": 2.9661310625580333, "learning_rate": 1.1536182524284833e-06, "loss": 0.4604, "step": 1036 }, { "epoch": 0.4679075014100395, "grad_norm": 3.3426717260171976, "learning_rate": 1.1521735551497966e-06, "loss": 0.6421, "step": 1037 }, { "epoch": 0.4683587140439932, "grad_norm": 2.976762713490125, "learning_rate": 1.1507285326595126e-06, "loss": 0.597, "step": 1038 }, { "epoch": 0.46880992667794696, "grad_norm": 3.30725401073655, "learning_rate": 1.1492831880458037e-06, "loss": 0.6313, "step": 1039 }, { "epoch": 0.4692611393119007, "grad_norm": 2.9639877946890327, "learning_rate": 1.1478375243975295e-06, "loss": 0.4938, "step": 1040 }, { "epoch": 0.4697123519458545, "grad_norm": 2.786610484320687, "learning_rate": 1.1463915448042326e-06, "loss": 0.4515, "step": 1041 }, { "epoch": 0.47016356457980824, "grad_norm": 3.135745692122038, "learning_rate": 1.1449452523561294e-06, "loss": 0.5554, "step": 1042 }, { "epoch": 0.470614777213762, "grad_norm": 2.788435273509232, "learning_rate": 1.143498650144106e-06, "loss": 0.4914, "step": 1043 }, { "epoch": 0.47106598984771575, "grad_norm": 2.876792135265133, "learning_rate": 1.1420517412597105e-06, "loss": 0.4697, "step": 1044 }, { "epoch": 0.4715172024816695, "grad_norm": 3.5477556615090093, "learning_rate": 1.1406045287951457e-06, "loss": 0.5816, "step": 1045 }, { "epoch": 0.47196841511562326, "grad_norm": 2.92573936243044, "learning_rate": 1.1391570158432635e-06, "loss": 0.4241, "step": 1046 }, { "epoch": 0.47241962774957696, "grad_norm": 3.2379205245375347, "learning_rate": 1.1377092054975583e-06, "loss": 0.5848, "step": 1047 }, { "epoch": 0.4728708403835307, "grad_norm": 3.2602568198273736, "learning_rate": 1.1362611008521596e-06, "loss": 0.5032, "step": 1048 }, { "epoch": 0.4733220530174845, "grad_norm": 3.563180746954425, "learning_rate": 1.134812705001826e-06, "loss": 0.6199, "step": 1049 }, { "epoch": 0.47377326565143824, "grad_norm": 3.258258686215829, "learning_rate": 1.1333640210419386e-06, "loss": 0.6113, "step": 1050 }, { "epoch": 0.474224478285392, "grad_norm": 3.2628377254999297, "learning_rate": 1.1319150520684944e-06, "loss": 0.5243, "step": 1051 }, { "epoch": 0.47467569091934575, "grad_norm": 3.027101046392854, "learning_rate": 1.1304658011780984e-06, "loss": 0.5171, "step": 1052 }, { "epoch": 0.4751269035532995, "grad_norm": 3.649012114499818, "learning_rate": 1.1290162714679594e-06, "loss": 0.6611, "step": 1053 }, { "epoch": 0.47557811618725326, "grad_norm": 3.1689567292584266, "learning_rate": 1.1275664660358817e-06, "loss": 0.3962, "step": 1054 }, { "epoch": 0.476029328821207, "grad_norm": 2.92166316898993, "learning_rate": 1.1261163879802587e-06, "loss": 0.4777, "step": 1055 }, { "epoch": 0.4764805414551607, "grad_norm": 3.137056987986139, "learning_rate": 1.1246660404000658e-06, "loss": 0.5466, "step": 1056 }, { "epoch": 0.4769317540891145, "grad_norm": 3.382936110474769, "learning_rate": 1.1232154263948556e-06, "loss": 0.5077, "step": 1057 }, { "epoch": 0.47738296672306824, "grad_norm": 3.517217173416557, "learning_rate": 1.1217645490647494e-06, "loss": 0.5492, "step": 1058 }, { "epoch": 0.477834179357022, "grad_norm": 3.26075070325087, "learning_rate": 1.1203134115104315e-06, "loss": 0.5016, "step": 1059 }, { "epoch": 0.47828539199097575, "grad_norm": 3.2324066506117, "learning_rate": 1.1188620168331419e-06, "loss": 0.5466, "step": 1060 }, { "epoch": 0.4787366046249295, "grad_norm": 2.9767488512886975, "learning_rate": 1.1174103681346708e-06, "loss": 0.5206, "step": 1061 }, { "epoch": 0.47918781725888326, "grad_norm": 3.1883816288556166, "learning_rate": 1.1159584685173505e-06, "loss": 0.5209, "step": 1062 }, { "epoch": 0.479639029892837, "grad_norm": 3.2725897850315304, "learning_rate": 1.11450632108405e-06, "loss": 0.6575, "step": 1063 }, { "epoch": 0.4800902425267907, "grad_norm": 2.90226955653477, "learning_rate": 1.113053928938168e-06, "loss": 0.526, "step": 1064 }, { "epoch": 0.4805414551607445, "grad_norm": 3.2118769622065724, "learning_rate": 1.1116012951836255e-06, "loss": 0.5494, "step": 1065 }, { "epoch": 0.48099266779469824, "grad_norm": 2.9794708120012996, "learning_rate": 1.110148422924861e-06, "loss": 0.6493, "step": 1066 }, { "epoch": 0.481443880428652, "grad_norm": 3.36225003487691, "learning_rate": 1.1086953152668216e-06, "loss": 0.5022, "step": 1067 }, { "epoch": 0.48189509306260575, "grad_norm": 3.2803450847662754, "learning_rate": 1.1072419753149585e-06, "loss": 0.6551, "step": 1068 }, { "epoch": 0.4823463056965595, "grad_norm": 3.210329635697201, "learning_rate": 1.1057884061752176e-06, "loss": 0.5731, "step": 1069 }, { "epoch": 0.48279751833051326, "grad_norm": 3.5448326400292514, "learning_rate": 1.1043346109540369e-06, "loss": 0.5602, "step": 1070 }, { "epoch": 0.483248730964467, "grad_norm": 3.2599148247025354, "learning_rate": 1.102880592758336e-06, "loss": 0.6967, "step": 1071 }, { "epoch": 0.4836999435984208, "grad_norm": 3.921908291356212, "learning_rate": 1.1014263546955115e-06, "loss": 0.6903, "step": 1072 }, { "epoch": 0.4841511562323745, "grad_norm": 3.6723472952642098, "learning_rate": 1.0999718998734298e-06, "loss": 0.7113, "step": 1073 }, { "epoch": 0.48460236886632824, "grad_norm": 3.1199912636971767, "learning_rate": 1.0985172314004203e-06, "loss": 0.5092, "step": 1074 }, { "epoch": 0.485053581500282, "grad_norm": 3.084748288084798, "learning_rate": 1.0970623523852698e-06, "loss": 0.5553, "step": 1075 }, { "epoch": 0.48550479413423575, "grad_norm": 2.8776564795067556, "learning_rate": 1.0956072659372141e-06, "loss": 0.5115, "step": 1076 }, { "epoch": 0.4859560067681895, "grad_norm": 2.953441401470534, "learning_rate": 1.094151975165933e-06, "loss": 0.6507, "step": 1077 }, { "epoch": 0.48640721940214326, "grad_norm": 3.690371591944593, "learning_rate": 1.0926964831815424e-06, "loss": 0.6129, "step": 1078 }, { "epoch": 0.486858432036097, "grad_norm": 2.9774725768337387, "learning_rate": 1.0912407930945887e-06, "loss": 0.6598, "step": 1079 }, { "epoch": 0.4873096446700508, "grad_norm": 3.304695556663853, "learning_rate": 1.0897849080160411e-06, "loss": 0.596, "step": 1080 }, { "epoch": 0.48776085730400454, "grad_norm": 3.5963573435316367, "learning_rate": 1.0883288310572862e-06, "loss": 0.6246, "step": 1081 }, { "epoch": 0.48821206993795824, "grad_norm": 3.155896819738853, "learning_rate": 1.0868725653301205e-06, "loss": 0.5807, "step": 1082 }, { "epoch": 0.488663282571912, "grad_norm": 3.081080659400599, "learning_rate": 1.0854161139467435e-06, "loss": 0.434, "step": 1083 }, { "epoch": 0.48911449520586575, "grad_norm": 3.106854377639036, "learning_rate": 1.0839594800197516e-06, "loss": 0.48, "step": 1084 }, { "epoch": 0.4895657078398195, "grad_norm": 3.7755265948586825, "learning_rate": 1.082502666662132e-06, "loss": 0.6046, "step": 1085 }, { "epoch": 0.49001692047377327, "grad_norm": 3.1538567782545153, "learning_rate": 1.0810456769872542e-06, "loss": 0.4631, "step": 1086 }, { "epoch": 0.490468133107727, "grad_norm": 3.493792209571887, "learning_rate": 1.0795885141088652e-06, "loss": 0.4979, "step": 1087 }, { "epoch": 0.4909193457416808, "grad_norm": 3.2677544727924386, "learning_rate": 1.0781311811410825e-06, "loss": 0.4784, "step": 1088 }, { "epoch": 0.49137055837563454, "grad_norm": 3.2385254672960335, "learning_rate": 1.0766736811983863e-06, "loss": 0.6579, "step": 1089 }, { "epoch": 0.4918217710095883, "grad_norm": 3.0410160191967424, "learning_rate": 1.0752160173956144e-06, "loss": 0.4921, "step": 1090 }, { "epoch": 0.492272983643542, "grad_norm": 3.0814337139592958, "learning_rate": 1.0737581928479538e-06, "loss": 0.4993, "step": 1091 }, { "epoch": 0.49272419627749575, "grad_norm": 3.1302147016084336, "learning_rate": 1.0723002106709363e-06, "loss": 0.4858, "step": 1092 }, { "epoch": 0.4931754089114495, "grad_norm": 3.2032365657032846, "learning_rate": 1.0708420739804294e-06, "loss": 0.4662, "step": 1093 }, { "epoch": 0.49362662154540327, "grad_norm": 2.963453928384545, "learning_rate": 1.0693837858926315e-06, "loss": 0.4786, "step": 1094 }, { "epoch": 0.494077834179357, "grad_norm": 2.8479265985444835, "learning_rate": 1.0679253495240645e-06, "loss": 0.6417, "step": 1095 }, { "epoch": 0.4945290468133108, "grad_norm": 3.8316009838064153, "learning_rate": 1.066466767991567e-06, "loss": 0.6002, "step": 1096 }, { "epoch": 0.49498025944726454, "grad_norm": 3.13289524958788, "learning_rate": 1.0650080444122875e-06, "loss": 0.6372, "step": 1097 }, { "epoch": 0.4954314720812183, "grad_norm": 3.2639667702613253, "learning_rate": 1.0635491819036792e-06, "loss": 0.6167, "step": 1098 }, { "epoch": 0.49588268471517205, "grad_norm": 3.049433076188395, "learning_rate": 1.0620901835834912e-06, "loss": 0.5683, "step": 1099 }, { "epoch": 0.49633389734912575, "grad_norm": 3.387030164756313, "learning_rate": 1.0606310525697627e-06, "loss": 0.6261, "step": 1100 }, { "epoch": 0.4967851099830795, "grad_norm": 3.402149165888981, "learning_rate": 1.059171791980817e-06, "loss": 0.4562, "step": 1101 }, { "epoch": 0.49723632261703327, "grad_norm": 3.2602762733367996, "learning_rate": 1.0577124049352548e-06, "loss": 0.626, "step": 1102 }, { "epoch": 0.497687535250987, "grad_norm": 3.2610142828869777, "learning_rate": 1.0562528945519461e-06, "loss": 0.5353, "step": 1103 }, { "epoch": 0.4981387478849408, "grad_norm": 2.97682798873228, "learning_rate": 1.0547932639500246e-06, "loss": 0.4905, "step": 1104 }, { "epoch": 0.49858996051889454, "grad_norm": 2.938963109880475, "learning_rate": 1.0533335162488815e-06, "loss": 0.4562, "step": 1105 }, { "epoch": 0.4990411731528483, "grad_norm": 3.5017181297984012, "learning_rate": 1.051873654568158e-06, "loss": 0.5793, "step": 1106 }, { "epoch": 0.49949238578680205, "grad_norm": 2.7407269861856736, "learning_rate": 1.0504136820277384e-06, "loss": 0.5171, "step": 1107 }, { "epoch": 0.4999435984207558, "grad_norm": 3.2128618819706114, "learning_rate": 1.0489536017477448e-06, "loss": 0.4742, "step": 1108 }, { "epoch": 0.5003948110547095, "grad_norm": 3.2860218544233737, "learning_rate": 1.0474934168485288e-06, "loss": 0.5809, "step": 1109 }, { "epoch": 0.5008460236886633, "grad_norm": 3.1537779519919162, "learning_rate": 1.0460331304506655e-06, "loss": 0.5523, "step": 1110 }, { "epoch": 0.501297236322617, "grad_norm": 2.9629163520820225, "learning_rate": 1.0445727456749483e-06, "loss": 0.5112, "step": 1111 }, { "epoch": 0.5017484489565708, "grad_norm": 3.197935737383708, "learning_rate": 1.043112265642379e-06, "loss": 0.4732, "step": 1112 }, { "epoch": 0.5021996615905245, "grad_norm": 2.9033918431675922, "learning_rate": 1.041651693474164e-06, "loss": 0.4971, "step": 1113 }, { "epoch": 0.5026508742244783, "grad_norm": 3.1684252831494213, "learning_rate": 1.0401910322917064e-06, "loss": 0.5758, "step": 1114 }, { "epoch": 0.503102086858432, "grad_norm": 2.980345483564053, "learning_rate": 1.0387302852165998e-06, "loss": 0.4851, "step": 1115 }, { "epoch": 0.5035532994923858, "grad_norm": 3.36987317517813, "learning_rate": 1.037269455370621e-06, "loss": 0.5245, "step": 1116 }, { "epoch": 0.5040045121263396, "grad_norm": 3.2674734863325945, "learning_rate": 1.035808545875723e-06, "loss": 0.587, "step": 1117 }, { "epoch": 0.5044557247602933, "grad_norm": 3.089508629837757, "learning_rate": 1.0343475598540307e-06, "loss": 0.4585, "step": 1118 }, { "epoch": 0.5049069373942471, "grad_norm": 3.058502135032177, "learning_rate": 1.0328865004278315e-06, "loss": 0.62, "step": 1119 }, { "epoch": 0.5053581500282008, "grad_norm": 3.2132492413810305, "learning_rate": 1.0314253707195703e-06, "loss": 0.5984, "step": 1120 }, { "epoch": 0.5058093626621546, "grad_norm": 2.9972219397833637, "learning_rate": 1.0299641738518405e-06, "loss": 0.6045, "step": 1121 }, { "epoch": 0.5062605752961082, "grad_norm": 2.809038897639686, "learning_rate": 1.0285029129473813e-06, "loss": 0.5555, "step": 1122 }, { "epoch": 0.506711787930062, "grad_norm": 2.659291698186336, "learning_rate": 1.0270415911290671e-06, "loss": 0.5977, "step": 1123 }, { "epoch": 0.5071630005640158, "grad_norm": 3.0309037627722706, "learning_rate": 1.0255802115199032e-06, "loss": 0.5541, "step": 1124 }, { "epoch": 0.5076142131979695, "grad_norm": 3.0510921385148646, "learning_rate": 1.024118777243019e-06, "loss": 0.6471, "step": 1125 }, { "epoch": 0.5080654258319233, "grad_norm": 3.3253019574272176, "learning_rate": 1.022657291421659e-06, "loss": 0.5083, "step": 1126 }, { "epoch": 0.508516638465877, "grad_norm": 3.1624347051422115, "learning_rate": 1.0211957571791795e-06, "loss": 0.5804, "step": 1127 }, { "epoch": 0.5089678510998308, "grad_norm": 3.082831702328242, "learning_rate": 1.019734177639039e-06, "loss": 0.5742, "step": 1128 }, { "epoch": 0.5094190637337845, "grad_norm": 2.855577550248334, "learning_rate": 1.0182725559247945e-06, "loss": 0.5888, "step": 1129 }, { "epoch": 0.5098702763677383, "grad_norm": 3.5077212889613967, "learning_rate": 1.0168108951600915e-06, "loss": 0.5265, "step": 1130 }, { "epoch": 0.510321489001692, "grad_norm": 3.196038174031813, "learning_rate": 1.0153491984686593e-06, "loss": 0.5447, "step": 1131 }, { "epoch": 0.5107727016356458, "grad_norm": 3.0228322304609083, "learning_rate": 1.0138874689743047e-06, "loss": 0.5442, "step": 1132 }, { "epoch": 0.5112239142695996, "grad_norm": 3.2815180852833095, "learning_rate": 1.0124257098009042e-06, "loss": 0.5194, "step": 1133 }, { "epoch": 0.5116751269035533, "grad_norm": 2.9460400536573315, "learning_rate": 1.0109639240723973e-06, "loss": 0.5793, "step": 1134 }, { "epoch": 0.5121263395375071, "grad_norm": 3.2088130319211015, "learning_rate": 1.0095021149127806e-06, "loss": 0.554, "step": 1135 }, { "epoch": 0.5125775521714608, "grad_norm": 3.3804661649102465, "learning_rate": 1.0080402854461007e-06, "loss": 0.5165, "step": 1136 }, { "epoch": 0.5130287648054146, "grad_norm": 3.321371296451192, "learning_rate": 1.0065784387964485e-06, "loss": 0.4321, "step": 1137 }, { "epoch": 0.5134799774393684, "grad_norm": 3.037664620181153, "learning_rate": 1.0051165780879503e-06, "loss": 0.5399, "step": 1138 }, { "epoch": 0.5139311900733221, "grad_norm": 3.556502110355346, "learning_rate": 1.0036547064447622e-06, "loss": 0.5263, "step": 1139 }, { "epoch": 0.5143824027072758, "grad_norm": 3.281961668919357, "learning_rate": 1.0021928269910657e-06, "loss": 0.5017, "step": 1140 }, { "epoch": 0.5148336153412295, "grad_norm": 4.422821086167055, "learning_rate": 1.0007309428510568e-06, "loss": 0.6125, "step": 1141 }, { "epoch": 0.5152848279751833, "grad_norm": 3.042185816714922, "learning_rate": 9.992690571489431e-07, "loss": 0.4608, "step": 1142 }, { "epoch": 0.515736040609137, "grad_norm": 3.139974486773792, "learning_rate": 9.978071730089344e-07, "loss": 0.4999, "step": 1143 }, { "epoch": 0.5161872532430908, "grad_norm": 3.050392508261623, "learning_rate": 9.963452935552377e-07, "loss": 0.5472, "step": 1144 }, { "epoch": 0.5166384658770445, "grad_norm": 3.1524033877265336, "learning_rate": 9.948834219120498e-07, "loss": 0.5617, "step": 1145 }, { "epoch": 0.5170896785109983, "grad_norm": 3.045308188934204, "learning_rate": 9.934215612035514e-07, "loss": 0.4082, "step": 1146 }, { "epoch": 0.517540891144952, "grad_norm": 3.3228413802611225, "learning_rate": 9.91959714553899e-07, "loss": 0.5028, "step": 1147 }, { "epoch": 0.5179921037789058, "grad_norm": 2.9738895566428285, "learning_rate": 9.904978850872191e-07, "loss": 0.6102, "step": 1148 }, { "epoch": 0.5184433164128596, "grad_norm": 3.164313244153204, "learning_rate": 9.89036075927603e-07, "loss": 0.541, "step": 1149 }, { "epoch": 0.5188945290468133, "grad_norm": 3.0993846216386998, "learning_rate": 9.87574290199096e-07, "loss": 0.5887, "step": 1150 }, { "epoch": 0.5193457416807671, "grad_norm": 3.0457956258147503, "learning_rate": 9.861125310256954e-07, "loss": 0.518, "step": 1151 }, { "epoch": 0.5197969543147208, "grad_norm": 3.487761052187324, "learning_rate": 9.846508015313406e-07, "loss": 0.5634, "step": 1152 }, { "epoch": 0.5202481669486746, "grad_norm": 3.0958153646337654, "learning_rate": 9.831891048399084e-07, "loss": 0.486, "step": 1153 }, { "epoch": 0.5206993795826284, "grad_norm": 2.849378458901456, "learning_rate": 9.817274440752052e-07, "loss": 0.4714, "step": 1154 }, { "epoch": 0.5211505922165821, "grad_norm": 3.0204334797341867, "learning_rate": 9.802658223609608e-07, "loss": 0.5409, "step": 1155 }, { "epoch": 0.5216018048505359, "grad_norm": 3.4389615956259414, "learning_rate": 9.78804242820821e-07, "loss": 0.5677, "step": 1156 }, { "epoch": 0.5220530174844895, "grad_norm": 3.079047759993495, "learning_rate": 9.773427085783413e-07, "loss": 0.5837, "step": 1157 }, { "epoch": 0.5225042301184433, "grad_norm": 3.2180585801181407, "learning_rate": 9.758812227569812e-07, "loss": 0.6249, "step": 1158 }, { "epoch": 0.522955442752397, "grad_norm": 3.106738311935934, "learning_rate": 9.744197884800967e-07, "loss": 0.5928, "step": 1159 }, { "epoch": 0.5234066553863508, "grad_norm": 3.0296372900818676, "learning_rate": 9.72958408870933e-07, "loss": 0.6287, "step": 1160 }, { "epoch": 0.5238578680203045, "grad_norm": 3.2657856406977164, "learning_rate": 9.714970870526186e-07, "loss": 0.4556, "step": 1161 }, { "epoch": 0.5243090806542583, "grad_norm": 2.8714718085250497, "learning_rate": 9.700358261481592e-07, "loss": 0.5133, "step": 1162 }, { "epoch": 0.524760293288212, "grad_norm": 3.1929538694444353, "learning_rate": 9.6857462928043e-07, "loss": 0.5365, "step": 1163 }, { "epoch": 0.5252115059221658, "grad_norm": 3.1374361855603725, "learning_rate": 9.671134995721684e-07, "loss": 0.5009, "step": 1164 }, { "epoch": 0.5256627185561196, "grad_norm": 3.0612765295251614, "learning_rate": 9.656524401459692e-07, "loss": 0.4686, "step": 1165 }, { "epoch": 0.5261139311900733, "grad_norm": 3.0360262730106653, "learning_rate": 9.64191454124277e-07, "loss": 0.5078, "step": 1166 }, { "epoch": 0.5265651438240271, "grad_norm": 2.7855335523075895, "learning_rate": 9.62730544629379e-07, "loss": 0.4643, "step": 1167 }, { "epoch": 0.5270163564579808, "grad_norm": 3.2784285165726326, "learning_rate": 9.612697147834003e-07, "loss": 0.5212, "step": 1168 }, { "epoch": 0.5274675690919346, "grad_norm": 2.9731466081932125, "learning_rate": 9.598089677082933e-07, "loss": 0.5892, "step": 1169 }, { "epoch": 0.5279187817258884, "grad_norm": 3.496014297140297, "learning_rate": 9.583483065258363e-07, "loss": 0.5361, "step": 1170 }, { "epoch": 0.5283699943598421, "grad_norm": 3.2554831703742027, "learning_rate": 9.568877343576212e-07, "loss": 0.5437, "step": 1171 }, { "epoch": 0.5288212069937959, "grad_norm": 3.0774362338181307, "learning_rate": 9.554272543250516e-07, "loss": 0.5781, "step": 1172 }, { "epoch": 0.5292724196277496, "grad_norm": 3.189104546167665, "learning_rate": 9.539668695493344e-07, "loss": 0.4621, "step": 1173 }, { "epoch": 0.5297236322617034, "grad_norm": 3.5804034846331607, "learning_rate": 9.525065831514713e-07, "loss": 0.5667, "step": 1174 }, { "epoch": 0.530174844895657, "grad_norm": 3.0139310674345934, "learning_rate": 9.510463982522551e-07, "loss": 0.6764, "step": 1175 }, { "epoch": 0.5306260575296108, "grad_norm": 3.6437122981783507, "learning_rate": 9.495863179722615e-07, "loss": 0.5191, "step": 1176 }, { "epoch": 0.5310772701635645, "grad_norm": 3.780802968968682, "learning_rate": 9.481263454318421e-07, "loss": 0.5795, "step": 1177 }, { "epoch": 0.5315284827975183, "grad_norm": 3.371236920516789, "learning_rate": 9.466664837511186e-07, "loss": 0.4982, "step": 1178 }, { "epoch": 0.531979695431472, "grad_norm": 3.174724613560376, "learning_rate": 9.452067360499753e-07, "loss": 0.5753, "step": 1179 }, { "epoch": 0.5324309080654258, "grad_norm": 2.9536643600933283, "learning_rate": 9.437471054480539e-07, "loss": 0.6468, "step": 1180 }, { "epoch": 0.5328821206993796, "grad_norm": 3.358069442011158, "learning_rate": 9.422875950647451e-07, "loss": 0.5811, "step": 1181 }, { "epoch": 0.5333333333333333, "grad_norm": 3.1595218350137357, "learning_rate": 9.408282080191828e-07, "loss": 0.5539, "step": 1182 }, { "epoch": 0.5337845459672871, "grad_norm": 3.1013329069241844, "learning_rate": 9.393689474302376e-07, "loss": 0.4659, "step": 1183 }, { "epoch": 0.5342357586012408, "grad_norm": 3.743900056060665, "learning_rate": 9.379098164165092e-07, "loss": 0.6178, "step": 1184 }, { "epoch": 0.5346869712351946, "grad_norm": 3.10516339656473, "learning_rate": 9.364508180963207e-07, "loss": 0.5486, "step": 1185 }, { "epoch": 0.5351381838691484, "grad_norm": 3.7247912121292965, "learning_rate": 9.349919555877124e-07, "loss": 0.6168, "step": 1186 }, { "epoch": 0.5355893965031021, "grad_norm": 3.523392877840991, "learning_rate": 9.335332320084331e-07, "loss": 0.6578, "step": 1187 }, { "epoch": 0.5360406091370559, "grad_norm": 3.029040493416117, "learning_rate": 9.320746504759354e-07, "loss": 0.6227, "step": 1188 }, { "epoch": 0.5364918217710096, "grad_norm": 3.494834853466634, "learning_rate": 9.306162141073685e-07, "loss": 0.6807, "step": 1189 }, { "epoch": 0.5369430344049634, "grad_norm": 3.149740148496557, "learning_rate": 9.291579260195708e-07, "loss": 0.5903, "step": 1190 }, { "epoch": 0.5373942470389171, "grad_norm": 3.267822705572205, "learning_rate": 9.27699789329064e-07, "loss": 0.4847, "step": 1191 }, { "epoch": 0.5378454596728709, "grad_norm": 3.714908029228556, "learning_rate": 9.262418071520463e-07, "loss": 0.6019, "step": 1192 }, { "epoch": 0.5382966723068245, "grad_norm": 2.824698681778885, "learning_rate": 9.247839826043857e-07, "loss": 0.4482, "step": 1193 }, { "epoch": 0.5387478849407783, "grad_norm": 3.151682454847607, "learning_rate": 9.233263188016138e-07, "loss": 0.4968, "step": 1194 }, { "epoch": 0.5391990975747321, "grad_norm": 2.9790615764183355, "learning_rate": 9.218688188589174e-07, "loss": 0.5278, "step": 1195 }, { "epoch": 0.5396503102086858, "grad_norm": 2.9627521296692474, "learning_rate": 9.204114858911346e-07, "loss": 0.5431, "step": 1196 }, { "epoch": 0.5401015228426396, "grad_norm": 2.992708202976521, "learning_rate": 9.189543230127461e-07, "loss": 0.5062, "step": 1197 }, { "epoch": 0.5405527354765933, "grad_norm": 2.8085674068135016, "learning_rate": 9.174973333378683e-07, "loss": 0.5778, "step": 1198 }, { "epoch": 0.5410039481105471, "grad_norm": 3.324978073123737, "learning_rate": 9.160405199802486e-07, "loss": 0.5476, "step": 1199 }, { "epoch": 0.5414551607445008, "grad_norm": 2.6559214068968937, "learning_rate": 9.145838860532567e-07, "loss": 0.5372, "step": 1200 }, { "epoch": 0.5419063733784546, "grad_norm": 2.9815500519398763, "learning_rate": 9.131274346698795e-07, "loss": 0.5114, "step": 1201 }, { "epoch": 0.5423575860124084, "grad_norm": 3.4127009993684196, "learning_rate": 9.116711689427136e-07, "loss": 0.6166, "step": 1202 }, { "epoch": 0.5428087986463621, "grad_norm": 3.226502001346105, "learning_rate": 9.102150919839589e-07, "loss": 0.5777, "step": 1203 }, { "epoch": 0.5432600112803159, "grad_norm": 3.2761488720411585, "learning_rate": 9.087592069054118e-07, "loss": 0.4045, "step": 1204 }, { "epoch": 0.5437112239142696, "grad_norm": 3.2522550379255803, "learning_rate": 9.073035168184579e-07, "loss": 0.5614, "step": 1205 }, { "epoch": 0.5441624365482234, "grad_norm": 3.158338840928273, "learning_rate": 9.058480248340671e-07, "loss": 0.5347, "step": 1206 }, { "epoch": 0.5446136491821771, "grad_norm": 3.128846961573522, "learning_rate": 9.043927340627857e-07, "loss": 0.4551, "step": 1207 }, { "epoch": 0.5450648618161309, "grad_norm": 3.2010639237879333, "learning_rate": 9.029376476147301e-07, "loss": 0.6002, "step": 1208 }, { "epoch": 0.5455160744500847, "grad_norm": 3.0281082746251857, "learning_rate": 9.014827685995794e-07, "loss": 0.7246, "step": 1209 }, { "epoch": 0.5459672870840383, "grad_norm": 3.011888432511697, "learning_rate": 9.0002810012657e-07, "loss": 0.5268, "step": 1210 }, { "epoch": 0.5464184997179921, "grad_norm": 3.2243121571383386, "learning_rate": 8.985736453044886e-07, "loss": 0.5456, "step": 1211 }, { "epoch": 0.5468697123519458, "grad_norm": 3.0825653650623126, "learning_rate": 8.97119407241664e-07, "loss": 0.5749, "step": 1212 }, { "epoch": 0.5473209249858996, "grad_norm": 2.661130517981536, "learning_rate": 8.956653890459632e-07, "loss": 0.5596, "step": 1213 }, { "epoch": 0.5477721376198533, "grad_norm": 3.724775101530356, "learning_rate": 8.942115938247823e-07, "loss": 0.5477, "step": 1214 }, { "epoch": 0.5482233502538071, "grad_norm": 3.6650788579826585, "learning_rate": 8.927580246850416e-07, "loss": 0.4938, "step": 1215 }, { "epoch": 0.5486745628877608, "grad_norm": 3.139755611427215, "learning_rate": 8.913046847331784e-07, "loss": 0.6102, "step": 1216 }, { "epoch": 0.5491257755217146, "grad_norm": 2.9266705632636856, "learning_rate": 8.89851577075139e-07, "loss": 0.5387, "step": 1217 }, { "epoch": 0.5495769881556684, "grad_norm": 3.033908297559131, "learning_rate": 8.883987048163746e-07, "loss": 0.4825, "step": 1218 }, { "epoch": 0.5500282007896221, "grad_norm": 2.961499235356465, "learning_rate": 8.869460710618323e-07, "loss": 0.5537, "step": 1219 }, { "epoch": 0.5504794134235759, "grad_norm": 2.58750526796041, "learning_rate": 8.8549367891595e-07, "loss": 0.414, "step": 1220 }, { "epoch": 0.5509306260575296, "grad_norm": 3.2580895273085684, "learning_rate": 8.840415314826496e-07, "loss": 0.5591, "step": 1221 }, { "epoch": 0.5513818386914834, "grad_norm": 3.416770692667306, "learning_rate": 8.825896318653292e-07, "loss": 0.5914, "step": 1222 }, { "epoch": 0.5518330513254371, "grad_norm": 3.3868054346511385, "learning_rate": 8.811379831668578e-07, "loss": 0.5391, "step": 1223 }, { "epoch": 0.5522842639593909, "grad_norm": 3.513167625744527, "learning_rate": 8.796865884895685e-07, "loss": 0.6352, "step": 1224 }, { "epoch": 0.5527354765933447, "grad_norm": 3.2759348364567495, "learning_rate": 8.782354509352505e-07, "loss": 0.4788, "step": 1225 }, { "epoch": 0.5531866892272984, "grad_norm": 3.16036405291083, "learning_rate": 8.767845736051445e-07, "loss": 0.5454, "step": 1226 }, { "epoch": 0.5536379018612522, "grad_norm": 3.0291347992929563, "learning_rate": 8.753339595999343e-07, "loss": 0.4814, "step": 1227 }, { "epoch": 0.5540891144952058, "grad_norm": 3.419410089774215, "learning_rate": 8.738836120197414e-07, "loss": 0.6572, "step": 1228 }, { "epoch": 0.5545403271291596, "grad_norm": 3.4196948382062895, "learning_rate": 8.724335339641183e-07, "loss": 0.5833, "step": 1229 }, { "epoch": 0.5549915397631133, "grad_norm": 2.95345812029554, "learning_rate": 8.709837285320405e-07, "loss": 0.5173, "step": 1230 }, { "epoch": 0.5554427523970671, "grad_norm": 3.277178241020724, "learning_rate": 8.695341988219013e-07, "loss": 0.4623, "step": 1231 }, { "epoch": 0.5558939650310208, "grad_norm": 3.4090915194634466, "learning_rate": 8.68084947931506e-07, "loss": 0.54, "step": 1232 }, { "epoch": 0.5563451776649746, "grad_norm": 2.9424477792511072, "learning_rate": 8.666359789580612e-07, "loss": 0.4951, "step": 1233 }, { "epoch": 0.5567963902989284, "grad_norm": 2.8093499614133166, "learning_rate": 8.651872949981741e-07, "loss": 0.5294, "step": 1234 }, { "epoch": 0.5572476029328821, "grad_norm": 3.403220558325019, "learning_rate": 8.637388991478404e-07, "loss": 0.5359, "step": 1235 }, { "epoch": 0.5576988155668359, "grad_norm": 2.7915279885504294, "learning_rate": 8.622907945024417e-07, "loss": 0.4772, "step": 1236 }, { "epoch": 0.5581500282007896, "grad_norm": 3.402216398468153, "learning_rate": 8.608429841567364e-07, "loss": 0.6865, "step": 1237 }, { "epoch": 0.5586012408347434, "grad_norm": 3.4495430024602993, "learning_rate": 8.593954712048544e-07, "loss": 0.5511, "step": 1238 }, { "epoch": 0.5590524534686971, "grad_norm": 3.141295268567425, "learning_rate": 8.579482587402899e-07, "loss": 0.6404, "step": 1239 }, { "epoch": 0.5595036661026509, "grad_norm": 3.5009079206917257, "learning_rate": 8.565013498558941e-07, "loss": 0.5031, "step": 1240 }, { "epoch": 0.5599548787366047, "grad_norm": 3.4205180711589063, "learning_rate": 8.550547476438708e-07, "loss": 0.7429, "step": 1241 }, { "epoch": 0.5604060913705584, "grad_norm": 2.773602345160653, "learning_rate": 8.536084551957676e-07, "loss": 0.5418, "step": 1242 }, { "epoch": 0.5608573040045122, "grad_norm": 3.1366076676001704, "learning_rate": 8.521624756024704e-07, "loss": 0.554, "step": 1243 }, { "epoch": 0.5613085166384659, "grad_norm": 3.1430067736085676, "learning_rate": 8.507168119541963e-07, "loss": 0.5359, "step": 1244 }, { "epoch": 0.5617597292724196, "grad_norm": 3.3366605489268717, "learning_rate": 8.492714673404871e-07, "loss": 0.6201, "step": 1245 }, { "epoch": 0.5622109419063733, "grad_norm": 2.9317529934404396, "learning_rate": 8.478264448502036e-07, "loss": 0.5387, "step": 1246 }, { "epoch": 0.5626621545403271, "grad_norm": 2.8606734322020233, "learning_rate": 8.463817475715168e-07, "loss": 0.5679, "step": 1247 }, { "epoch": 0.5631133671742808, "grad_norm": 2.921942814808396, "learning_rate": 8.449373785919034e-07, "loss": 0.6017, "step": 1248 }, { "epoch": 0.5635645798082346, "grad_norm": 3.108017504971404, "learning_rate": 8.434933409981389e-07, "loss": 0.5762, "step": 1249 }, { "epoch": 0.5640157924421884, "grad_norm": 3.2256170994042828, "learning_rate": 8.420496378762899e-07, "loss": 0.5252, "step": 1250 }, { "epoch": 0.5644670050761421, "grad_norm": 2.733940948644607, "learning_rate": 8.406062723117089e-07, "loss": 0.4403, "step": 1251 }, { "epoch": 0.5649182177100959, "grad_norm": 3.130827807611649, "learning_rate": 8.391632473890262e-07, "loss": 0.5186, "step": 1252 }, { "epoch": 0.5653694303440496, "grad_norm": 2.8551274180587187, "learning_rate": 8.377205661921452e-07, "loss": 0.5046, "step": 1253 }, { "epoch": 0.5658206429780034, "grad_norm": 3.0734293806965263, "learning_rate": 8.36278231804233e-07, "loss": 0.5113, "step": 1254 }, { "epoch": 0.5662718556119571, "grad_norm": 3.0293954304204593, "learning_rate": 8.348362473077169e-07, "loss": 0.4135, "step": 1255 }, { "epoch": 0.5667230682459109, "grad_norm": 3.2711429622012087, "learning_rate": 8.333946157842767e-07, "loss": 0.5911, "step": 1256 }, { "epoch": 0.5671742808798647, "grad_norm": 3.21348983820472, "learning_rate": 8.319533403148366e-07, "loss": 0.5845, "step": 1257 }, { "epoch": 0.5676254935138184, "grad_norm": 2.977221190242596, "learning_rate": 8.305124239795608e-07, "loss": 0.4975, "step": 1258 }, { "epoch": 0.5680767061477722, "grad_norm": 3.317575862791347, "learning_rate": 8.29071869857846e-07, "loss": 0.5145, "step": 1259 }, { "epoch": 0.5685279187817259, "grad_norm": 3.0078612380810217, "learning_rate": 8.27631681028314e-07, "loss": 0.4761, "step": 1260 }, { "epoch": 0.5689791314156797, "grad_norm": 2.9469364940545684, "learning_rate": 8.26191860568807e-07, "loss": 0.4903, "step": 1261 }, { "epoch": 0.5694303440496334, "grad_norm": 2.8641257568656804, "learning_rate": 8.247524115563788e-07, "loss": 0.4808, "step": 1262 }, { "epoch": 0.5698815566835871, "grad_norm": 3.3655191743332313, "learning_rate": 8.233133370672903e-07, "loss": 0.684, "step": 1263 }, { "epoch": 0.5703327693175408, "grad_norm": 3.2105020379563602, "learning_rate": 8.218746401770021e-07, "loss": 0.5674, "step": 1264 }, { "epoch": 0.5707839819514946, "grad_norm": 3.5642628952355975, "learning_rate": 8.204363239601668e-07, "loss": 0.501, "step": 1265 }, { "epoch": 0.5712351945854484, "grad_norm": 2.838113061969968, "learning_rate": 8.189983914906248e-07, "loss": 0.616, "step": 1266 }, { "epoch": 0.5716864072194021, "grad_norm": 2.9737366718198626, "learning_rate": 8.175608458413947e-07, "loss": 0.576, "step": 1267 }, { "epoch": 0.5721376198533559, "grad_norm": 2.7885730115959597, "learning_rate": 8.161236900846702e-07, "loss": 0.4278, "step": 1268 }, { "epoch": 0.5725888324873096, "grad_norm": 3.4834364046330792, "learning_rate": 8.146869272918108e-07, "loss": 0.5024, "step": 1269 }, { "epoch": 0.5730400451212634, "grad_norm": 3.0305631394109054, "learning_rate": 8.132505605333361e-07, "loss": 0.542, "step": 1270 }, { "epoch": 0.5734912577552171, "grad_norm": 2.7776093782076705, "learning_rate": 8.118145928789197e-07, "loss": 0.382, "step": 1271 }, { "epoch": 0.5739424703891709, "grad_norm": 2.982019078285388, "learning_rate": 8.103790273973823e-07, "loss": 0.5661, "step": 1272 }, { "epoch": 0.5743936830231247, "grad_norm": 2.9581883979360155, "learning_rate": 8.089438671566852e-07, "loss": 0.5193, "step": 1273 }, { "epoch": 0.5748448956570784, "grad_norm": 2.973062841681966, "learning_rate": 8.075091152239229e-07, "loss": 0.5088, "step": 1274 }, { "epoch": 0.5752961082910322, "grad_norm": 3.4183675101644595, "learning_rate": 8.060747746653179e-07, "loss": 0.6613, "step": 1275 }, { "epoch": 0.5757473209249859, "grad_norm": 3.5528615876910012, "learning_rate": 8.046408485462136e-07, "loss": 0.5776, "step": 1276 }, { "epoch": 0.5761985335589397, "grad_norm": 3.332215928387168, "learning_rate": 8.032073399310677e-07, "loss": 0.4554, "step": 1277 }, { "epoch": 0.5766497461928934, "grad_norm": 3.1567473951867178, "learning_rate": 8.017742518834453e-07, "loss": 0.5779, "step": 1278 }, { "epoch": 0.5771009588268472, "grad_norm": 3.1547565169857723, "learning_rate": 8.003415874660129e-07, "loss": 0.6327, "step": 1279 }, { "epoch": 0.577552171460801, "grad_norm": 2.9713115658035547, "learning_rate": 7.989093497405322e-07, "loss": 0.5887, "step": 1280 }, { "epoch": 0.5780033840947546, "grad_norm": 3.2508596246355834, "learning_rate": 7.974775417678517e-07, "loss": 0.5491, "step": 1281 }, { "epoch": 0.5784545967287084, "grad_norm": 3.2239593887990288, "learning_rate": 7.960461666079029e-07, "loss": 0.5808, "step": 1282 }, { "epoch": 0.5789058093626621, "grad_norm": 3.3138402872343966, "learning_rate": 7.946152273196911e-07, "loss": 0.5569, "step": 1283 }, { "epoch": 0.5793570219966159, "grad_norm": 3.434837325852875, "learning_rate": 7.931847269612911e-07, "loss": 0.5351, "step": 1284 }, { "epoch": 0.5798082346305696, "grad_norm": 3.234624298819192, "learning_rate": 7.917546685898391e-07, "loss": 0.5941, "step": 1285 }, { "epoch": 0.5802594472645234, "grad_norm": 3.441772930399985, "learning_rate": 7.903250552615272e-07, "loss": 0.6435, "step": 1286 }, { "epoch": 0.5807106598984771, "grad_norm": 3.2690887674264895, "learning_rate": 7.888958900315959e-07, "loss": 0.5054, "step": 1287 }, { "epoch": 0.5811618725324309, "grad_norm": 3.9833861316481256, "learning_rate": 7.874671759543278e-07, "loss": 0.599, "step": 1288 }, { "epoch": 0.5816130851663847, "grad_norm": 2.868845567268956, "learning_rate": 7.860389160830419e-07, "loss": 0.4934, "step": 1289 }, { "epoch": 0.5820642978003384, "grad_norm": 3.047832823905869, "learning_rate": 7.846111134700867e-07, "loss": 0.5081, "step": 1290 }, { "epoch": 0.5825155104342922, "grad_norm": 3.275881282842554, "learning_rate": 7.831837711668332e-07, "loss": 0.5563, "step": 1291 }, { "epoch": 0.5829667230682459, "grad_norm": 3.14320311170746, "learning_rate": 7.817568922236681e-07, "loss": 0.5681, "step": 1292 }, { "epoch": 0.5834179357021997, "grad_norm": 3.524498053095352, "learning_rate": 7.80330479689989e-07, "loss": 0.6049, "step": 1293 }, { "epoch": 0.5838691483361534, "grad_norm": 3.118060779938019, "learning_rate": 7.789045366141961e-07, "loss": 0.6507, "step": 1294 }, { "epoch": 0.5843203609701072, "grad_norm": 3.3177816993811065, "learning_rate": 7.774790660436857e-07, "loss": 0.4724, "step": 1295 }, { "epoch": 0.584771573604061, "grad_norm": 2.837955287125353, "learning_rate": 7.760540710248454e-07, "loss": 0.51, "step": 1296 }, { "epoch": 0.5852227862380147, "grad_norm": 3.3668111960232117, "learning_rate": 7.746295546030458e-07, "loss": 0.637, "step": 1297 }, { "epoch": 0.5856739988719684, "grad_norm": 3.103539045578858, "learning_rate": 7.732055198226351e-07, "loss": 0.4814, "step": 1298 }, { "epoch": 0.5861252115059221, "grad_norm": 3.0421653752914413, "learning_rate": 7.717819697269321e-07, "loss": 0.525, "step": 1299 }, { "epoch": 0.5865764241398759, "grad_norm": 3.644103387205874, "learning_rate": 7.703589073582193e-07, "loss": 0.6064, "step": 1300 }, { "epoch": 0.5870276367738296, "grad_norm": 2.8055962606604092, "learning_rate": 7.689363357577378e-07, "loss": 0.5143, "step": 1301 }, { "epoch": 0.5874788494077834, "grad_norm": 3.277739442754899, "learning_rate": 7.675142579656788e-07, "loss": 0.5523, "step": 1302 }, { "epoch": 0.5879300620417371, "grad_norm": 3.460236702752055, "learning_rate": 7.660926770211787e-07, "loss": 0.5705, "step": 1303 }, { "epoch": 0.5883812746756909, "grad_norm": 3.260180098933021, "learning_rate": 7.646715959623125e-07, "loss": 0.6103, "step": 1304 }, { "epoch": 0.5888324873096447, "grad_norm": 3.4686183563952824, "learning_rate": 7.632510178260859e-07, "loss": 0.6584, "step": 1305 }, { "epoch": 0.5892836999435984, "grad_norm": 3.2006762499940353, "learning_rate": 7.618309456484308e-07, "loss": 0.4899, "step": 1306 }, { "epoch": 0.5897349125775522, "grad_norm": 2.9971855862973267, "learning_rate": 7.604113824641973e-07, "loss": 0.6366, "step": 1307 }, { "epoch": 0.5901861252115059, "grad_norm": 3.1873994214172807, "learning_rate": 7.589923313071479e-07, "loss": 0.6843, "step": 1308 }, { "epoch": 0.5906373378454597, "grad_norm": 3.2537367572253824, "learning_rate": 7.575737952099505e-07, "loss": 0.5228, "step": 1309 }, { "epoch": 0.5910885504794134, "grad_norm": 3.079550130831521, "learning_rate": 7.561557772041725e-07, "loss": 0.4874, "step": 1310 }, { "epoch": 0.5915397631133672, "grad_norm": 3.2716458004854543, "learning_rate": 7.547382803202742e-07, "loss": 0.5857, "step": 1311 }, { "epoch": 0.591990975747321, "grad_norm": 3.1743175723654735, "learning_rate": 7.533213075876022e-07, "loss": 0.529, "step": 1312 }, { "epoch": 0.5924421883812747, "grad_norm": 3.8188908521422458, "learning_rate": 7.519048620343825e-07, "loss": 0.6333, "step": 1313 }, { "epoch": 0.5928934010152285, "grad_norm": 2.8402686230355156, "learning_rate": 7.504889466877149e-07, "loss": 0.5294, "step": 1314 }, { "epoch": 0.5933446136491822, "grad_norm": 3.207555735354162, "learning_rate": 7.490735645735666e-07, "loss": 0.4968, "step": 1315 }, { "epoch": 0.5937958262831359, "grad_norm": 3.0695972720324622, "learning_rate": 7.476587187167635e-07, "loss": 0.5465, "step": 1316 }, { "epoch": 0.5942470389170896, "grad_norm": 2.9505661618967283, "learning_rate": 7.462444121409875e-07, "loss": 0.6166, "step": 1317 }, { "epoch": 0.5946982515510434, "grad_norm": 3.2187802075234737, "learning_rate": 7.448306478687663e-07, "loss": 0.4585, "step": 1318 }, { "epoch": 0.5951494641849971, "grad_norm": 3.1253329687327818, "learning_rate": 7.434174289214696e-07, "loss": 0.5872, "step": 1319 }, { "epoch": 0.5956006768189509, "grad_norm": 2.945507566127143, "learning_rate": 7.420047583193018e-07, "loss": 0.5749, "step": 1320 }, { "epoch": 0.5960518894529047, "grad_norm": 2.8046185112075506, "learning_rate": 7.405926390812952e-07, "loss": 0.6378, "step": 1321 }, { "epoch": 0.5965031020868584, "grad_norm": 3.3367837174035246, "learning_rate": 7.391810742253035e-07, "loss": 0.5555, "step": 1322 }, { "epoch": 0.5969543147208122, "grad_norm": 3.361270385049288, "learning_rate": 7.377700667679952e-07, "loss": 0.4942, "step": 1323 }, { "epoch": 0.5974055273547659, "grad_norm": 3.198915242112322, "learning_rate": 7.363596197248488e-07, "loss": 0.4949, "step": 1324 }, { "epoch": 0.5978567399887197, "grad_norm": 3.329606223960123, "learning_rate": 7.349497361101442e-07, "loss": 0.5877, "step": 1325 }, { "epoch": 0.5983079526226734, "grad_norm": 3.519739136879892, "learning_rate": 7.335404189369578e-07, "loss": 0.645, "step": 1326 }, { "epoch": 0.5987591652566272, "grad_norm": 3.0247206849836976, "learning_rate": 7.321316712171551e-07, "loss": 0.4796, "step": 1327 }, { "epoch": 0.599210377890581, "grad_norm": 3.186177089807284, "learning_rate": 7.307234959613842e-07, "loss": 0.5669, "step": 1328 }, { "epoch": 0.5996615905245347, "grad_norm": 3.130095831838903, "learning_rate": 7.293158961790714e-07, "loss": 0.5828, "step": 1329 }, { "epoch": 0.6001128031584885, "grad_norm": 3.384495690869306, "learning_rate": 7.279088748784105e-07, "loss": 0.6257, "step": 1330 }, { "epoch": 0.6005640157924422, "grad_norm": 3.3442845371887135, "learning_rate": 7.265024350663615e-07, "loss": 0.5929, "step": 1331 }, { "epoch": 0.601015228426396, "grad_norm": 3.329300257806507, "learning_rate": 7.250965797486404e-07, "loss": 0.5622, "step": 1332 }, { "epoch": 0.6014664410603497, "grad_norm": 3.137210816893963, "learning_rate": 7.236913119297144e-07, "loss": 0.4791, "step": 1333 }, { "epoch": 0.6019176536943034, "grad_norm": 3.2475644991308297, "learning_rate": 7.222866346127952e-07, "loss": 0.5562, "step": 1334 }, { "epoch": 0.6023688663282571, "grad_norm": 2.964087669113336, "learning_rate": 7.208825507998325e-07, "loss": 0.5684, "step": 1335 }, { "epoch": 0.6028200789622109, "grad_norm": 3.41721759180177, "learning_rate": 7.194790634915075e-07, "loss": 0.5463, "step": 1336 }, { "epoch": 0.6032712915961647, "grad_norm": 2.8453268851912923, "learning_rate": 7.180761756872267e-07, "loss": 0.5151, "step": 1337 }, { "epoch": 0.6037225042301184, "grad_norm": 3.2646935195604483, "learning_rate": 7.166738903851153e-07, "loss": 0.4982, "step": 1338 }, { "epoch": 0.6041737168640722, "grad_norm": 2.970220890446487, "learning_rate": 7.152722105820112e-07, "loss": 0.5154, "step": 1339 }, { "epoch": 0.6046249294980259, "grad_norm": 3.4187797390981114, "learning_rate": 7.138711392734578e-07, "loss": 0.4689, "step": 1340 }, { "epoch": 0.6050761421319797, "grad_norm": 2.985186967050454, "learning_rate": 7.124706794536983e-07, "loss": 0.5244, "step": 1341 }, { "epoch": 0.6055273547659334, "grad_norm": 3.455866470082041, "learning_rate": 7.110708341156698e-07, "loss": 0.5728, "step": 1342 }, { "epoch": 0.6059785673998872, "grad_norm": 3.6106718461697977, "learning_rate": 7.096716062509947e-07, "loss": 0.5279, "step": 1343 }, { "epoch": 0.606429780033841, "grad_norm": 4.105211530606187, "learning_rate": 7.082729988499768e-07, "loss": 0.6094, "step": 1344 }, { "epoch": 0.6068809926677947, "grad_norm": 2.693788412848332, "learning_rate": 7.068750149015936e-07, "loss": 0.4555, "step": 1345 }, { "epoch": 0.6073322053017485, "grad_norm": 3.0065637977545676, "learning_rate": 7.054776573934905e-07, "loss": 0.565, "step": 1346 }, { "epoch": 0.6077834179357022, "grad_norm": 3.289187175236741, "learning_rate": 7.04080929311974e-07, "loss": 0.5429, "step": 1347 }, { "epoch": 0.608234630569656, "grad_norm": 3.7046080997067903, "learning_rate": 7.026848336420052e-07, "loss": 0.5946, "step": 1348 }, { "epoch": 0.6086858432036097, "grad_norm": 2.9252591310820586, "learning_rate": 7.012893733671943e-07, "loss": 0.4993, "step": 1349 }, { "epoch": 0.6091370558375635, "grad_norm": 2.8882168795713894, "learning_rate": 6.998945514697923e-07, "loss": 0.4905, "step": 1350 }, { "epoch": 0.6095882684715171, "grad_norm": 3.583128007061669, "learning_rate": 6.985003709306871e-07, "loss": 0.4726, "step": 1351 }, { "epoch": 0.6100394811054709, "grad_norm": 3.130137883692283, "learning_rate": 6.97106834729396e-07, "loss": 0.4106, "step": 1352 }, { "epoch": 0.6104906937394247, "grad_norm": 3.0703439274804682, "learning_rate": 6.957139458440584e-07, "loss": 0.5424, "step": 1353 }, { "epoch": 0.6109419063733784, "grad_norm": 3.0314143588988496, "learning_rate": 6.943217072514311e-07, "loss": 0.6235, "step": 1354 }, { "epoch": 0.6113931190073322, "grad_norm": 3.495368048533246, "learning_rate": 6.929301219268805e-07, "loss": 0.664, "step": 1355 }, { "epoch": 0.6118443316412859, "grad_norm": 3.2783156409995318, "learning_rate": 6.915391928443779e-07, "loss": 0.5994, "step": 1356 }, { "epoch": 0.6122955442752397, "grad_norm": 3.1900987577022075, "learning_rate": 6.90148922976491e-07, "loss": 0.473, "step": 1357 }, { "epoch": 0.6127467569091934, "grad_norm": 3.542481502196449, "learning_rate": 6.887593152943789e-07, "loss": 0.6318, "step": 1358 }, { "epoch": 0.6131979695431472, "grad_norm": 3.0512134064626077, "learning_rate": 6.873703727677862e-07, "loss": 0.4577, "step": 1359 }, { "epoch": 0.613649182177101, "grad_norm": 3.0267553714028814, "learning_rate": 6.859820983650355e-07, "loss": 0.5358, "step": 1360 }, { "epoch": 0.6141003948110547, "grad_norm": 3.320872889591731, "learning_rate": 6.845944950530218e-07, "loss": 0.5442, "step": 1361 }, { "epoch": 0.6145516074450085, "grad_norm": 3.2701182397565347, "learning_rate": 6.832075657972054e-07, "loss": 0.5837, "step": 1362 }, { "epoch": 0.6150028200789622, "grad_norm": 3.3678828828961533, "learning_rate": 6.818213135616071e-07, "loss": 0.663, "step": 1363 }, { "epoch": 0.615454032712916, "grad_norm": 3.1223684191824854, "learning_rate": 6.804357413087992e-07, "loss": 0.529, "step": 1364 }, { "epoch": 0.6159052453468697, "grad_norm": 3.686360531224555, "learning_rate": 6.790508519999023e-07, "loss": 0.6493, "step": 1365 }, { "epoch": 0.6163564579808235, "grad_norm": 2.9271029540244005, "learning_rate": 6.776666485945769e-07, "loss": 0.5776, "step": 1366 }, { "epoch": 0.6168076706147773, "grad_norm": 3.4729445853273737, "learning_rate": 6.762831340510174e-07, "loss": 0.512, "step": 1367 }, { "epoch": 0.617258883248731, "grad_norm": 3.14390720715517, "learning_rate": 6.749003113259466e-07, "loss": 0.5518, "step": 1368 }, { "epoch": 0.6177100958826847, "grad_norm": 3.138843726117474, "learning_rate": 6.735181833746087e-07, "loss": 0.6031, "step": 1369 }, { "epoch": 0.6181613085166384, "grad_norm": 3.272624979852151, "learning_rate": 6.721367531507626e-07, "loss": 0.5369, "step": 1370 }, { "epoch": 0.6186125211505922, "grad_norm": 3.562938902127868, "learning_rate": 6.70756023606676e-07, "loss": 0.6634, "step": 1371 }, { "epoch": 0.6190637337845459, "grad_norm": 2.937710815940084, "learning_rate": 6.6937599769312e-07, "loss": 0.6368, "step": 1372 }, { "epoch": 0.6195149464184997, "grad_norm": 3.2281736301884596, "learning_rate": 6.679966783593615e-07, "loss": 0.5482, "step": 1373 }, { "epoch": 0.6199661590524534, "grad_norm": 3.46989691765386, "learning_rate": 6.666180685531575e-07, "loss": 0.5121, "step": 1374 }, { "epoch": 0.6204173716864072, "grad_norm": 2.9512285549656525, "learning_rate": 6.65240171220748e-07, "loss": 0.6428, "step": 1375 }, { "epoch": 0.620868584320361, "grad_norm": 3.5246959007009044, "learning_rate": 6.638629893068515e-07, "loss": 0.5731, "step": 1376 }, { "epoch": 0.6213197969543147, "grad_norm": 3.0891012405500304, "learning_rate": 6.62486525754657e-07, "loss": 0.4931, "step": 1377 }, { "epoch": 0.6217710095882685, "grad_norm": 2.9732550708765233, "learning_rate": 6.611107835058174e-07, "loss": 0.4172, "step": 1378 }, { "epoch": 0.6222222222222222, "grad_norm": 2.933874068194838, "learning_rate": 6.59735765500446e-07, "loss": 0.5248, "step": 1379 }, { "epoch": 0.622673434856176, "grad_norm": 3.1023005159339165, "learning_rate": 6.583614746771064e-07, "loss": 0.5489, "step": 1380 }, { "epoch": 0.6231246474901297, "grad_norm": 2.9712725239339477, "learning_rate": 6.569879139728096e-07, "loss": 0.5301, "step": 1381 }, { "epoch": 0.6235758601240835, "grad_norm": 3.1648033323683045, "learning_rate": 6.556150863230054e-07, "loss": 0.5682, "step": 1382 }, { "epoch": 0.6240270727580373, "grad_norm": 2.7399149382162564, "learning_rate": 6.542429946615773e-07, "loss": 0.4796, "step": 1383 }, { "epoch": 0.624478285391991, "grad_norm": 2.8260892433984073, "learning_rate": 6.528716419208361e-07, "loss": 0.5453, "step": 1384 }, { "epoch": 0.6249294980259448, "grad_norm": 3.145384660363434, "learning_rate": 6.515010310315125e-07, "loss": 0.638, "step": 1385 }, { "epoch": 0.6253807106598985, "grad_norm": 3.0447519086357846, "learning_rate": 6.50131164922753e-07, "loss": 0.5322, "step": 1386 }, { "epoch": 0.6258319232938522, "grad_norm": 3.4448461436407594, "learning_rate": 6.487620465221117e-07, "loss": 0.5624, "step": 1387 }, { "epoch": 0.6262831359278059, "grad_norm": 3.0855788626135023, "learning_rate": 6.473936787555452e-07, "loss": 0.4002, "step": 1388 }, { "epoch": 0.6267343485617597, "grad_norm": 2.943659166164125, "learning_rate": 6.460260645474052e-07, "loss": 0.4525, "step": 1389 }, { "epoch": 0.6271855611957134, "grad_norm": 2.9945684467434135, "learning_rate": 6.44659206820434e-07, "loss": 0.4683, "step": 1390 }, { "epoch": 0.6276367738296672, "grad_norm": 2.929306663688666, "learning_rate": 6.432931084957566e-07, "loss": 0.5541, "step": 1391 }, { "epoch": 0.628087986463621, "grad_norm": 3.0908644677993817, "learning_rate": 6.419277724928747e-07, "loss": 0.5163, "step": 1392 }, { "epoch": 0.6285391990975747, "grad_norm": 3.001491521797685, "learning_rate": 6.405632017296614e-07, "loss": 0.6203, "step": 1393 }, { "epoch": 0.6289904117315285, "grad_norm": 3.1474986627885895, "learning_rate": 6.391993991223543e-07, "loss": 0.544, "step": 1394 }, { "epoch": 0.6294416243654822, "grad_norm": 3.2290177386909966, "learning_rate": 6.378363675855494e-07, "loss": 0.5752, "step": 1395 }, { "epoch": 0.629892836999436, "grad_norm": 3.222879746589294, "learning_rate": 6.364741100321947e-07, "loss": 0.4536, "step": 1396 }, { "epoch": 0.6303440496333897, "grad_norm": 3.0349129642474755, "learning_rate": 6.351126293735842e-07, "loss": 0.4456, "step": 1397 }, { "epoch": 0.6307952622673435, "grad_norm": 3.218838258944328, "learning_rate": 6.33751928519352e-07, "loss": 0.5432, "step": 1398 }, { "epoch": 0.6312464749012973, "grad_norm": 3.2926190596476284, "learning_rate": 6.323920103774644e-07, "loss": 0.5973, "step": 1399 }, { "epoch": 0.631697687535251, "grad_norm": 3.3559310121476775, "learning_rate": 6.310328778542162e-07, "loss": 0.5248, "step": 1400 }, { "epoch": 0.6321489001692048, "grad_norm": 3.6153857414340793, "learning_rate": 6.296745338542229e-07, "loss": 0.6443, "step": 1401 }, { "epoch": 0.6326001128031585, "grad_norm": 3.1304394163996623, "learning_rate": 6.283169812804146e-07, "loss": 0.5722, "step": 1402 }, { "epoch": 0.6330513254371123, "grad_norm": 3.0269630181138396, "learning_rate": 6.269602230340304e-07, "loss": 0.5002, "step": 1403 }, { "epoch": 0.6335025380710659, "grad_norm": 2.901434555273011, "learning_rate": 6.256042620146118e-07, "loss": 0.4313, "step": 1404 }, { "epoch": 0.6339537507050197, "grad_norm": 2.904472954933817, "learning_rate": 6.242491011199963e-07, "loss": 0.5316, "step": 1405 }, { "epoch": 0.6344049633389734, "grad_norm": 3.569318854020443, "learning_rate": 6.228947432463111e-07, "loss": 0.6989, "step": 1406 }, { "epoch": 0.6348561759729272, "grad_norm": 3.70336661181829, "learning_rate": 6.21541191287968e-07, "loss": 0.4956, "step": 1407 }, { "epoch": 0.635307388606881, "grad_norm": 2.835895432940531, "learning_rate": 6.201884481376562e-07, "loss": 0.5343, "step": 1408 }, { "epoch": 0.6357586012408347, "grad_norm": 3.154332164144894, "learning_rate": 6.188365166863365e-07, "loss": 0.4946, "step": 1409 }, { "epoch": 0.6362098138747885, "grad_norm": 2.999966045677623, "learning_rate": 6.174853998232346e-07, "loss": 0.4792, "step": 1410 }, { "epoch": 0.6366610265087422, "grad_norm": 2.823054257582786, "learning_rate": 6.161351004358359e-07, "loss": 0.4774, "step": 1411 }, { "epoch": 0.637112239142696, "grad_norm": 3.1087901532322473, "learning_rate": 6.14785621409878e-07, "loss": 0.5926, "step": 1412 }, { "epoch": 0.6375634517766497, "grad_norm": 3.19373907556398, "learning_rate": 6.13436965629346e-07, "loss": 0.4821, "step": 1413 }, { "epoch": 0.6380146644106035, "grad_norm": 3.7849347226642136, "learning_rate": 6.120891359764655e-07, "loss": 0.5625, "step": 1414 }, { "epoch": 0.6384658770445573, "grad_norm": 3.214785840415643, "learning_rate": 6.107421353316964e-07, "loss": 0.5056, "step": 1415 }, { "epoch": 0.638917089678511, "grad_norm": 3.2716930771356387, "learning_rate": 6.093959665737267e-07, "loss": 0.5974, "step": 1416 }, { "epoch": 0.6393683023124648, "grad_norm": 2.657202103063792, "learning_rate": 6.080506325794674e-07, "loss": 0.458, "step": 1417 }, { "epoch": 0.6398195149464185, "grad_norm": 3.4613963613287284, "learning_rate": 6.067061362240448e-07, "loss": 0.5536, "step": 1418 }, { "epoch": 0.6402707275803723, "grad_norm": 3.1871447082433213, "learning_rate": 6.053624803807951e-07, "loss": 0.5803, "step": 1419 }, { "epoch": 0.640721940214326, "grad_norm": 3.1180639042969274, "learning_rate": 6.040196679212582e-07, "loss": 0.4619, "step": 1420 }, { "epoch": 0.6411731528482798, "grad_norm": 3.179821687875411, "learning_rate": 6.026777017151718e-07, "loss": 0.584, "step": 1421 }, { "epoch": 0.6416243654822334, "grad_norm": 3.917598055933022, "learning_rate": 6.013365846304657e-07, "loss": 0.4427, "step": 1422 }, { "epoch": 0.6420755781161872, "grad_norm": 3.259154452067742, "learning_rate": 5.999963195332536e-07, "loss": 0.5559, "step": 1423 }, { "epoch": 0.642526790750141, "grad_norm": 3.4742010090894806, "learning_rate": 5.986569092878296e-07, "loss": 0.5994, "step": 1424 }, { "epoch": 0.6429780033840947, "grad_norm": 3.753798551895116, "learning_rate": 5.973183567566604e-07, "loss": 0.6971, "step": 1425 }, { "epoch": 0.6434292160180485, "grad_norm": 2.941155690931197, "learning_rate": 5.959806648003796e-07, "loss": 0.516, "step": 1426 }, { "epoch": 0.6438804286520022, "grad_norm": 2.9533012729371326, "learning_rate": 5.946438362777819e-07, "loss": 0.6212, "step": 1427 }, { "epoch": 0.644331641285956, "grad_norm": 3.3329411483235964, "learning_rate": 5.933078740458166e-07, "loss": 0.6503, "step": 1428 }, { "epoch": 0.6447828539199097, "grad_norm": 3.498152521685046, "learning_rate": 5.919727809595815e-07, "loss": 0.5699, "step": 1429 }, { "epoch": 0.6452340665538635, "grad_norm": 3.638822037194711, "learning_rate": 5.906385598723178e-07, "loss": 0.5854, "step": 1430 }, { "epoch": 0.6456852791878173, "grad_norm": 3.381793813968072, "learning_rate": 5.893052136354017e-07, "loss": 0.4964, "step": 1431 }, { "epoch": 0.646136491821771, "grad_norm": 3.3009296858703268, "learning_rate": 5.879727450983412e-07, "loss": 0.593, "step": 1432 }, { "epoch": 0.6465877044557248, "grad_norm": 3.550794325305599, "learning_rate": 5.866411571087671e-07, "loss": 0.5447, "step": 1433 }, { "epoch": 0.6470389170896785, "grad_norm": 3.3261654579811992, "learning_rate": 5.853104525124297e-07, "loss": 0.5494, "step": 1434 }, { "epoch": 0.6474901297236323, "grad_norm": 3.428245584517914, "learning_rate": 5.839806341531908e-07, "loss": 0.6307, "step": 1435 }, { "epoch": 0.647941342357586, "grad_norm": 3.0629393848960365, "learning_rate": 5.82651704873018e-07, "loss": 0.5884, "step": 1436 }, { "epoch": 0.6483925549915398, "grad_norm": 2.948994135334962, "learning_rate": 5.813236675119793e-07, "loss": 0.4318, "step": 1437 }, { "epoch": 0.6488437676254936, "grad_norm": 3.760438297720544, "learning_rate": 5.79996524908236e-07, "loss": 0.5324, "step": 1438 }, { "epoch": 0.6492949802594473, "grad_norm": 3.49087174910647, "learning_rate": 5.786702798980388e-07, "loss": 0.5048, "step": 1439 }, { "epoch": 0.649746192893401, "grad_norm": 2.9974874928675614, "learning_rate": 5.773449353157171e-07, "loss": 0.5413, "step": 1440 }, { "epoch": 0.6501974055273547, "grad_norm": 3.777448022885416, "learning_rate": 5.76020493993679e-07, "loss": 0.5136, "step": 1441 }, { "epoch": 0.6506486181613085, "grad_norm": 3.210445403802698, "learning_rate": 5.74696958762401e-07, "loss": 0.5167, "step": 1442 }, { "epoch": 0.6510998307952622, "grad_norm": 3.336796933709652, "learning_rate": 5.733743324504224e-07, "loss": 0.5119, "step": 1443 }, { "epoch": 0.651551043429216, "grad_norm": 3.3282911026676176, "learning_rate": 5.720526178843418e-07, "loss": 0.5884, "step": 1444 }, { "epoch": 0.6520022560631697, "grad_norm": 3.21378303870658, "learning_rate": 5.707318178888082e-07, "loss": 0.538, "step": 1445 }, { "epoch": 0.6524534686971235, "grad_norm": 2.969064914210708, "learning_rate": 5.694119352865159e-07, "loss": 0.5845, "step": 1446 }, { "epoch": 0.6529046813310773, "grad_norm": 3.30695390232686, "learning_rate": 5.680929728981991e-07, "loss": 0.5142, "step": 1447 }, { "epoch": 0.653355893965031, "grad_norm": 3.0732966058972604, "learning_rate": 5.667749335426246e-07, "loss": 0.4201, "step": 1448 }, { "epoch": 0.6538071065989848, "grad_norm": 3.254645914821211, "learning_rate": 5.654578200365885e-07, "loss": 0.6627, "step": 1449 }, { "epoch": 0.6542583192329385, "grad_norm": 2.9760909272929683, "learning_rate": 5.641416351949062e-07, "loss": 0.4217, "step": 1450 }, { "epoch": 0.6547095318668923, "grad_norm": 2.997343851542973, "learning_rate": 5.628263818304091e-07, "loss": 0.5031, "step": 1451 }, { "epoch": 0.655160744500846, "grad_norm": 3.0077707371721862, "learning_rate": 5.615120627539387e-07, "loss": 0.4886, "step": 1452 }, { "epoch": 0.6556119571347998, "grad_norm": 3.5304208905303915, "learning_rate": 5.601986807743387e-07, "loss": 0.5286, "step": 1453 }, { "epoch": 0.6560631697687536, "grad_norm": 3.6248543822772463, "learning_rate": 5.588862386984509e-07, "loss": 0.6958, "step": 1454 }, { "epoch": 0.6565143824027073, "grad_norm": 3.153359939961065, "learning_rate": 5.575747393311078e-07, "loss": 0.5553, "step": 1455 }, { "epoch": 0.6569655950366611, "grad_norm": 3.0726315593606093, "learning_rate": 5.562641854751274e-07, "loss": 0.4999, "step": 1456 }, { "epoch": 0.6574168076706147, "grad_norm": 2.6907674515493873, "learning_rate": 5.54954579931308e-07, "loss": 0.4409, "step": 1457 }, { "epoch": 0.6578680203045685, "grad_norm": 2.976942150158183, "learning_rate": 5.536459254984194e-07, "loss": 0.5027, "step": 1458 }, { "epoch": 0.6583192329385222, "grad_norm": 4.099210213112422, "learning_rate": 5.523382249732009e-07, "loss": 0.5853, "step": 1459 }, { "epoch": 0.658770445572476, "grad_norm": 3.5743013453464427, "learning_rate": 5.510314811503519e-07, "loss": 0.6072, "step": 1460 }, { "epoch": 0.6592216582064298, "grad_norm": 3.0791361228178205, "learning_rate": 5.497256968225263e-07, "loss": 0.4773, "step": 1461 }, { "epoch": 0.6596728708403835, "grad_norm": 3.127066793240839, "learning_rate": 5.484208747803301e-07, "loss": 0.6281, "step": 1462 }, { "epoch": 0.6601240834743373, "grad_norm": 3.1864970517695275, "learning_rate": 5.4711701781231e-07, "loss": 0.5335, "step": 1463 }, { "epoch": 0.660575296108291, "grad_norm": 2.7910338972873676, "learning_rate": 5.458141287049525e-07, "loss": 0.4367, "step": 1464 }, { "epoch": 0.6610265087422448, "grad_norm": 3.112978170764596, "learning_rate": 5.445122102426745e-07, "loss": 0.4787, "step": 1465 }, { "epoch": 0.6614777213761985, "grad_norm": 3.2428968725230436, "learning_rate": 5.432112652078179e-07, "loss": 0.4979, "step": 1466 }, { "epoch": 0.6619289340101523, "grad_norm": 3.64761923117337, "learning_rate": 5.419112963806467e-07, "loss": 0.6766, "step": 1467 }, { "epoch": 0.662380146644106, "grad_norm": 3.1374440807407966, "learning_rate": 5.406123065393351e-07, "loss": 0.585, "step": 1468 }, { "epoch": 0.6628313592780598, "grad_norm": 3.0743306845398872, "learning_rate": 5.393142984599684e-07, "loss": 0.5432, "step": 1469 }, { "epoch": 0.6632825719120136, "grad_norm": 3.0220167684458, "learning_rate": 5.380172749165321e-07, "loss": 0.6321, "step": 1470 }, { "epoch": 0.6637337845459673, "grad_norm": 3.2506207588342115, "learning_rate": 5.367212386809073e-07, "loss": 0.4709, "step": 1471 }, { "epoch": 0.6641849971799211, "grad_norm": 2.7879147797631463, "learning_rate": 5.354261925228666e-07, "loss": 0.4185, "step": 1472 }, { "epoch": 0.6646362098138748, "grad_norm": 3.372496137176609, "learning_rate": 5.341321392100655e-07, "loss": 0.5043, "step": 1473 }, { "epoch": 0.6650874224478286, "grad_norm": 3.3089075981861913, "learning_rate": 5.32839081508038e-07, "loss": 0.5101, "step": 1474 }, { "epoch": 0.6655386350817822, "grad_norm": 3.141227268377863, "learning_rate": 5.315470221801905e-07, "loss": 0.5693, "step": 1475 }, { "epoch": 0.665989847715736, "grad_norm": 3.200943952541811, "learning_rate": 5.302559639877952e-07, "loss": 0.5679, "step": 1476 }, { "epoch": 0.6664410603496898, "grad_norm": 3.264422897311328, "learning_rate": 5.289659096899859e-07, "loss": 0.5888, "step": 1477 }, { "epoch": 0.6668922729836435, "grad_norm": 3.2617925134864456, "learning_rate": 5.2767686204375e-07, "loss": 0.618, "step": 1478 }, { "epoch": 0.6673434856175973, "grad_norm": 3.103687607569448, "learning_rate": 5.263888238039234e-07, "loss": 0.4523, "step": 1479 }, { "epoch": 0.667794698251551, "grad_norm": 3.288349444384094, "learning_rate": 5.251017977231862e-07, "loss": 0.5756, "step": 1480 }, { "epoch": 0.6682459108855048, "grad_norm": 3.3422954628890325, "learning_rate": 5.238157865520538e-07, "loss": 0.5607, "step": 1481 }, { "epoch": 0.6686971235194585, "grad_norm": 2.9514424726562414, "learning_rate": 5.225307930388736e-07, "loss": 0.5046, "step": 1482 }, { "epoch": 0.6691483361534123, "grad_norm": 3.381792256243614, "learning_rate": 5.212468199298177e-07, "loss": 0.5893, "step": 1483 }, { "epoch": 0.669599548787366, "grad_norm": 2.974690993406971, "learning_rate": 5.199638699688771e-07, "loss": 0.6419, "step": 1484 }, { "epoch": 0.6700507614213198, "grad_norm": 3.323658905812843, "learning_rate": 5.186819458978577e-07, "loss": 0.575, "step": 1485 }, { "epoch": 0.6705019740552736, "grad_norm": 3.1513857698424426, "learning_rate": 5.174010504563715e-07, "loss": 0.4734, "step": 1486 }, { "epoch": 0.6709531866892273, "grad_norm": 2.8289877159247427, "learning_rate": 5.161211863818328e-07, "loss": 0.5453, "step": 1487 }, { "epoch": 0.6714043993231811, "grad_norm": 3.288966427725167, "learning_rate": 5.148423564094516e-07, "loss": 0.5392, "step": 1488 }, { "epoch": 0.6718556119571348, "grad_norm": 3.224946055142921, "learning_rate": 5.135645632722276e-07, "loss": 0.5281, "step": 1489 }, { "epoch": 0.6723068245910886, "grad_norm": 3.5118397978024625, "learning_rate": 5.122878097009459e-07, "loss": 0.4893, "step": 1490 }, { "epoch": 0.6727580372250423, "grad_norm": 3.288392437565718, "learning_rate": 5.110120984241686e-07, "loss": 0.528, "step": 1491 }, { "epoch": 0.6732092498589961, "grad_norm": 3.3121054039299254, "learning_rate": 5.097374321682303e-07, "loss": 0.5505, "step": 1492 }, { "epoch": 0.6736604624929498, "grad_norm": 3.1922673429233317, "learning_rate": 5.084638136572337e-07, "loss": 0.5267, "step": 1493 }, { "epoch": 0.6741116751269035, "grad_norm": 3.1998010986229546, "learning_rate": 5.071912456130409e-07, "loss": 0.6137, "step": 1494 }, { "epoch": 0.6745628877608573, "grad_norm": 2.9001454970418448, "learning_rate": 5.059197307552697e-07, "loss": 0.5806, "step": 1495 }, { "epoch": 0.675014100394811, "grad_norm": 3.381015585223304, "learning_rate": 5.046492718012869e-07, "loss": 0.5509, "step": 1496 }, { "epoch": 0.6754653130287648, "grad_norm": 3.518395239900884, "learning_rate": 5.033798714662023e-07, "loss": 0.5119, "step": 1497 }, { "epoch": 0.6759165256627185, "grad_norm": 2.9919505064871705, "learning_rate": 5.021115324628647e-07, "loss": 0.5477, "step": 1498 }, { "epoch": 0.6763677382966723, "grad_norm": 3.1660202084920095, "learning_rate": 5.008442575018533e-07, "loss": 0.4414, "step": 1499 }, { "epoch": 0.676818950930626, "grad_norm": 3.3320708896626527, "learning_rate": 4.995780492914736e-07, "loss": 0.6299, "step": 1500 }, { "epoch": 0.6772701635645798, "grad_norm": 2.872291861282731, "learning_rate": 4.983129105377527e-07, "loss": 0.5124, "step": 1501 }, { "epoch": 0.6777213761985336, "grad_norm": 3.1930837083504997, "learning_rate": 4.970488439444296e-07, "loss": 0.576, "step": 1502 }, { "epoch": 0.6781725888324873, "grad_norm": 3.172575535409837, "learning_rate": 4.957858522129544e-07, "loss": 0.5872, "step": 1503 }, { "epoch": 0.6786238014664411, "grad_norm": 3.0896523132819573, "learning_rate": 4.945239380424786e-07, "loss": 0.5427, "step": 1504 }, { "epoch": 0.6790750141003948, "grad_norm": 2.8752434836894243, "learning_rate": 4.932631041298513e-07, "loss": 0.505, "step": 1505 }, { "epoch": 0.6795262267343486, "grad_norm": 2.973393299716313, "learning_rate": 4.920033531696136e-07, "loss": 0.5431, "step": 1506 }, { "epoch": 0.6799774393683023, "grad_norm": 2.9302261733817225, "learning_rate": 4.907446878539912e-07, "loss": 0.5481, "step": 1507 }, { "epoch": 0.6804286520022561, "grad_norm": 2.6511702770482266, "learning_rate": 4.894871108728903e-07, "loss": 0.4476, "step": 1508 }, { "epoch": 0.6808798646362099, "grad_norm": 3.741059117355058, "learning_rate": 4.882306249138909e-07, "loss": 0.5295, "step": 1509 }, { "epoch": 0.6813310772701635, "grad_norm": 3.1716709800903184, "learning_rate": 4.86975232662241e-07, "loss": 0.6054, "step": 1510 }, { "epoch": 0.6817822899041173, "grad_norm": 2.6341030249717026, "learning_rate": 4.857209368008524e-07, "loss": 0.5144, "step": 1511 }, { "epoch": 0.682233502538071, "grad_norm": 3.422325855185031, "learning_rate": 4.844677400102929e-07, "loss": 0.538, "step": 1512 }, { "epoch": 0.6826847151720248, "grad_norm": 3.1391602045967444, "learning_rate": 4.832156449687811e-07, "loss": 0.5469, "step": 1513 }, { "epoch": 0.6831359278059785, "grad_norm": 3.7792088207002283, "learning_rate": 4.819646543521824e-07, "loss": 0.579, "step": 1514 }, { "epoch": 0.6835871404399323, "grad_norm": 3.639662253926238, "learning_rate": 4.807147708340009e-07, "loss": 0.5355, "step": 1515 }, { "epoch": 0.684038353073886, "grad_norm": 3.0964113537901916, "learning_rate": 4.794659970853748e-07, "loss": 0.5282, "step": 1516 }, { "epoch": 0.6844895657078398, "grad_norm": 3.3232471100038894, "learning_rate": 4.782183357750707e-07, "loss": 0.6801, "step": 1517 }, { "epoch": 0.6849407783417936, "grad_norm": 3.3328995149094305, "learning_rate": 4.769717895694774e-07, "loss": 0.5651, "step": 1518 }, { "epoch": 0.6853919909757473, "grad_norm": 2.9076854833073575, "learning_rate": 4.7572636113260176e-07, "loss": 0.6316, "step": 1519 }, { "epoch": 0.6858432036097011, "grad_norm": 3.5121904313302874, "learning_rate": 4.744820531260608e-07, "loss": 0.4922, "step": 1520 }, { "epoch": 0.6862944162436548, "grad_norm": 2.946462024735822, "learning_rate": 4.732388682090771e-07, "loss": 0.4305, "step": 1521 }, { "epoch": 0.6867456288776086, "grad_norm": 3.3183567408446493, "learning_rate": 4.7199680903847426e-07, "loss": 0.6194, "step": 1522 }, { "epoch": 0.6871968415115624, "grad_norm": 2.970061784609735, "learning_rate": 4.7075587826866767e-07, "loss": 0.5048, "step": 1523 }, { "epoch": 0.6876480541455161, "grad_norm": 3.3231922435824757, "learning_rate": 4.695160785516639e-07, "loss": 0.6005, "step": 1524 }, { "epoch": 0.6880992667794699, "grad_norm": 3.248780126563559, "learning_rate": 4.6827741253705054e-07, "loss": 0.5268, "step": 1525 }, { "epoch": 0.6885504794134236, "grad_norm": 2.9816778422414076, "learning_rate": 4.670398828719926e-07, "loss": 0.5717, "step": 1526 }, { "epoch": 0.6890016920473774, "grad_norm": 3.2874556360576883, "learning_rate": 4.658034922012275e-07, "loss": 0.5634, "step": 1527 }, { "epoch": 0.689452904681331, "grad_norm": 3.2116212084250373, "learning_rate": 4.6456824316705725e-07, "loss": 0.5602, "step": 1528 }, { "epoch": 0.6899041173152848, "grad_norm": 3.4186841183082364, "learning_rate": 4.6333413840934575e-07, "loss": 0.6244, "step": 1529 }, { "epoch": 0.6903553299492385, "grad_norm": 3.1280202580241614, "learning_rate": 4.621011805655093e-07, "loss": 0.4692, "step": 1530 }, { "epoch": 0.6908065425831923, "grad_norm": 2.973990284802939, "learning_rate": 4.60869372270514e-07, "loss": 0.5565, "step": 1531 }, { "epoch": 0.691257755217146, "grad_norm": 3.7131828396505706, "learning_rate": 4.5963871615687045e-07, "loss": 0.6157, "step": 1532 }, { "epoch": 0.6917089678510998, "grad_norm": 2.8869266707436823, "learning_rate": 4.584092148546254e-07, "loss": 0.6354, "step": 1533 }, { "epoch": 0.6921601804850536, "grad_norm": 3.3918689462338265, "learning_rate": 4.5718087099135773e-07, "loss": 0.5702, "step": 1534 }, { "epoch": 0.6926113931190073, "grad_norm": 3.2001428288161344, "learning_rate": 4.5595368719217397e-07, "loss": 0.5083, "step": 1535 }, { "epoch": 0.6930626057529611, "grad_norm": 3.084523211589884, "learning_rate": 4.5472766607970024e-07, "loss": 0.5727, "step": 1536 }, { "epoch": 0.6935138183869148, "grad_norm": 3.157807512575682, "learning_rate": 4.5350281027407843e-07, "loss": 0.5678, "step": 1537 }, { "epoch": 0.6939650310208686, "grad_norm": 3.2071372806652905, "learning_rate": 4.522791223929596e-07, "loss": 0.5597, "step": 1538 }, { "epoch": 0.6944162436548224, "grad_norm": 3.225339649209973, "learning_rate": 4.51056605051499e-07, "loss": 0.5693, "step": 1539 }, { "epoch": 0.6948674562887761, "grad_norm": 3.0766839452947856, "learning_rate": 4.4983526086235103e-07, "loss": 0.5093, "step": 1540 }, { "epoch": 0.6953186689227299, "grad_norm": 3.3661435682809535, "learning_rate": 4.4861509243566164e-07, "loss": 0.6246, "step": 1541 }, { "epoch": 0.6957698815566836, "grad_norm": 2.782237105741097, "learning_rate": 4.4739610237906545e-07, "loss": 0.5195, "step": 1542 }, { "epoch": 0.6962210941906374, "grad_norm": 3.1390391034941563, "learning_rate": 4.461782932976782e-07, "loss": 0.6254, "step": 1543 }, { "epoch": 0.6966723068245911, "grad_norm": 2.837001264646572, "learning_rate": 4.4496166779409026e-07, "loss": 0.5682, "step": 1544 }, { "epoch": 0.6971235194585449, "grad_norm": 3.151679334520682, "learning_rate": 4.437462284683653e-07, "loss": 0.5967, "step": 1545 }, { "epoch": 0.6975747320924985, "grad_norm": 3.2614811697248096, "learning_rate": 4.4253197791802967e-07, "loss": 0.4696, "step": 1546 }, { "epoch": 0.6980259447264523, "grad_norm": 3.1064249116523706, "learning_rate": 4.4131891873807103e-07, "loss": 0.5832, "step": 1547 }, { "epoch": 0.698477157360406, "grad_norm": 3.7129814910314884, "learning_rate": 4.401070535209296e-07, "loss": 0.6575, "step": 1548 }, { "epoch": 0.6989283699943598, "grad_norm": 3.5598656251240293, "learning_rate": 4.3889638485649405e-07, "loss": 0.5255, "step": 1549 }, { "epoch": 0.6993795826283136, "grad_norm": 3.0419102505278355, "learning_rate": 4.3768691533209735e-07, "loss": 0.5589, "step": 1550 }, { "epoch": 0.6998307952622673, "grad_norm": 3.250677316394861, "learning_rate": 4.364786475325072e-07, "loss": 0.4775, "step": 1551 }, { "epoch": 0.7002820078962211, "grad_norm": 3.012263276867671, "learning_rate": 4.3527158403992567e-07, "loss": 0.503, "step": 1552 }, { "epoch": 0.7007332205301748, "grad_norm": 3.120176925995655, "learning_rate": 4.3406572743397975e-07, "loss": 0.6046, "step": 1553 }, { "epoch": 0.7011844331641286, "grad_norm": 3.134496372403675, "learning_rate": 4.3286108029171685e-07, "loss": 0.5061, "step": 1554 }, { "epoch": 0.7016356457980824, "grad_norm": 3.5153313421474373, "learning_rate": 4.3165764518760104e-07, "loss": 0.556, "step": 1555 }, { "epoch": 0.7020868584320361, "grad_norm": 3.361928587880027, "learning_rate": 4.304554246935049e-07, "loss": 0.5555, "step": 1556 }, { "epoch": 0.7025380710659899, "grad_norm": 3.113389444703251, "learning_rate": 4.292544213787056e-07, "loss": 0.508, "step": 1557 }, { "epoch": 0.7029892836999436, "grad_norm": 2.951119900702608, "learning_rate": 4.280546378098792e-07, "loss": 0.4627, "step": 1558 }, { "epoch": 0.7034404963338974, "grad_norm": 3.3068113437135747, "learning_rate": 4.2685607655109446e-07, "loss": 0.4641, "step": 1559 }, { "epoch": 0.7038917089678511, "grad_norm": 3.443473545652759, "learning_rate": 4.256587401638091e-07, "loss": 0.5881, "step": 1560 }, { "epoch": 0.7043429216018049, "grad_norm": 3.672614498272048, "learning_rate": 4.244626312068622e-07, "loss": 0.5562, "step": 1561 }, { "epoch": 0.7047941342357587, "grad_norm": 3.1807964103718316, "learning_rate": 4.232677522364696e-07, "loss": 0.514, "step": 1562 }, { "epoch": 0.7052453468697123, "grad_norm": 2.8379876257690118, "learning_rate": 4.220741058062194e-07, "loss": 0.6209, "step": 1563 }, { "epoch": 0.705696559503666, "grad_norm": 3.115298174903329, "learning_rate": 4.2088169446706487e-07, "loss": 0.5598, "step": 1564 }, { "epoch": 0.7061477721376198, "grad_norm": 3.616655218693416, "learning_rate": 4.1969052076732005e-07, "loss": 0.5609, "step": 1565 }, { "epoch": 0.7065989847715736, "grad_norm": 3.209766337154335, "learning_rate": 4.185005872526538e-07, "loss": 0.5969, "step": 1566 }, { "epoch": 0.7070501974055273, "grad_norm": 3.469400141091681, "learning_rate": 4.1731189646608434e-07, "loss": 0.6814, "step": 1567 }, { "epoch": 0.7075014100394811, "grad_norm": 3.1397943853367374, "learning_rate": 4.161244509479751e-07, "loss": 0.5017, "step": 1568 }, { "epoch": 0.7079526226734348, "grad_norm": 3.5877431815311436, "learning_rate": 4.1493825323602737e-07, "loss": 0.6132, "step": 1569 }, { "epoch": 0.7084038353073886, "grad_norm": 2.681923696233627, "learning_rate": 4.137533058652758e-07, "loss": 0.516, "step": 1570 }, { "epoch": 0.7088550479413424, "grad_norm": 3.389585300189951, "learning_rate": 4.12569611368083e-07, "loss": 0.5234, "step": 1571 }, { "epoch": 0.7093062605752961, "grad_norm": 3.180876954986152, "learning_rate": 4.113871722741337e-07, "loss": 0.4276, "step": 1572 }, { "epoch": 0.7097574732092499, "grad_norm": 3.932081857493054, "learning_rate": 4.1020599111043084e-07, "loss": 0.5718, "step": 1573 }, { "epoch": 0.7102086858432036, "grad_norm": 3.0660045119592416, "learning_rate": 4.09026070401288e-07, "loss": 0.4444, "step": 1574 }, { "epoch": 0.7106598984771574, "grad_norm": 2.9150397257470377, "learning_rate": 4.078474126683249e-07, "loss": 0.5647, "step": 1575 }, { "epoch": 0.7111111111111111, "grad_norm": 3.260434948896054, "learning_rate": 4.0667002043046304e-07, "loss": 0.5476, "step": 1576 }, { "epoch": 0.7115623237450649, "grad_norm": 3.2788448633519867, "learning_rate": 4.0549389620391896e-07, "loss": 0.5775, "step": 1577 }, { "epoch": 0.7120135363790187, "grad_norm": 3.0034190041080064, "learning_rate": 4.0431904250219893e-07, "loss": 0.5515, "step": 1578 }, { "epoch": 0.7124647490129724, "grad_norm": 3.2382651735959223, "learning_rate": 4.031454618360944e-07, "loss": 0.4369, "step": 1579 }, { "epoch": 0.7129159616469262, "grad_norm": 3.0531108303993135, "learning_rate": 4.0197315671367593e-07, "loss": 0.5064, "step": 1580 }, { "epoch": 0.7133671742808798, "grad_norm": 2.9453398873817025, "learning_rate": 4.008021296402888e-07, "loss": 0.427, "step": 1581 }, { "epoch": 0.7138183869148336, "grad_norm": 3.265675895615181, "learning_rate": 3.996323831185462e-07, "loss": 0.6767, "step": 1582 }, { "epoch": 0.7142695995487873, "grad_norm": 2.9358827987112535, "learning_rate": 3.984639196483245e-07, "loss": 0.5785, "step": 1583 }, { "epoch": 0.7147208121827411, "grad_norm": 3.3867250177331782, "learning_rate": 3.9729674172675954e-07, "loss": 0.5236, "step": 1584 }, { "epoch": 0.7151720248166948, "grad_norm": 3.31448568517522, "learning_rate": 3.961308518482372e-07, "loss": 0.609, "step": 1585 }, { "epoch": 0.7156232374506486, "grad_norm": 3.71248092590625, "learning_rate": 3.949662525043934e-07, "loss": 0.6091, "step": 1586 }, { "epoch": 0.7160744500846024, "grad_norm": 3.8185840047248125, "learning_rate": 3.9380294618410434e-07, "loss": 0.6641, "step": 1587 }, { "epoch": 0.7165256627185561, "grad_norm": 3.3840710545866632, "learning_rate": 3.92640935373483e-07, "loss": 0.5043, "step": 1588 }, { "epoch": 0.7169768753525099, "grad_norm": 3.109852062826725, "learning_rate": 3.9148022255587487e-07, "loss": 0.5068, "step": 1589 }, { "epoch": 0.7174280879864636, "grad_norm": 3.0466909947937184, "learning_rate": 3.9032081021185024e-07, "loss": 0.8023, "step": 1590 }, { "epoch": 0.7178793006204174, "grad_norm": 3.494445337066531, "learning_rate": 3.891627008192007e-07, "loss": 0.6285, "step": 1591 }, { "epoch": 0.7183305132543711, "grad_norm": 3.0639765960841387, "learning_rate": 3.8800589685293294e-07, "loss": 0.4803, "step": 1592 }, { "epoch": 0.7187817258883249, "grad_norm": 3.370717647642269, "learning_rate": 3.868504007852641e-07, "loss": 0.5468, "step": 1593 }, { "epoch": 0.7192329385222787, "grad_norm": 3.252865929125753, "learning_rate": 3.8569621508561666e-07, "loss": 0.5753, "step": 1594 }, { "epoch": 0.7196841511562324, "grad_norm": 3.1394814071549484, "learning_rate": 3.845433422206118e-07, "loss": 0.4729, "step": 1595 }, { "epoch": 0.7201353637901862, "grad_norm": 3.1631245452667973, "learning_rate": 3.833917846540651e-07, "loss": 0.6474, "step": 1596 }, { "epoch": 0.7205865764241399, "grad_norm": 3.162870418400208, "learning_rate": 3.8224154484698234e-07, "loss": 0.4195, "step": 1597 }, { "epoch": 0.7210377890580937, "grad_norm": 3.161335436131402, "learning_rate": 3.8109262525755183e-07, "loss": 0.5058, "step": 1598 }, { "epoch": 0.7214890016920473, "grad_norm": 3.3185670575255433, "learning_rate": 3.7994502834114085e-07, "loss": 0.4979, "step": 1599 }, { "epoch": 0.7219402143260011, "grad_norm": 3.176929378675548, "learning_rate": 3.7879875655029017e-07, "loss": 0.5728, "step": 1600 }, { "epoch": 0.7223914269599548, "grad_norm": 3.1684081635191066, "learning_rate": 3.7765381233470794e-07, "loss": 0.4601, "step": 1601 }, { "epoch": 0.7228426395939086, "grad_norm": 3.127695957202654, "learning_rate": 3.765101981412665e-07, "loss": 0.5002, "step": 1602 }, { "epoch": 0.7232938522278624, "grad_norm": 3.219550164344783, "learning_rate": 3.753679164139947e-07, "loss": 0.5385, "step": 1603 }, { "epoch": 0.7237450648618161, "grad_norm": 3.601540512399754, "learning_rate": 3.742269695940734e-07, "loss": 0.664, "step": 1604 }, { "epoch": 0.7241962774957699, "grad_norm": 3.375042559176746, "learning_rate": 3.730873601198326e-07, "loss": 0.6187, "step": 1605 }, { "epoch": 0.7246474901297236, "grad_norm": 3.0597581559151794, "learning_rate": 3.7194909042674115e-07, "loss": 0.4807, "step": 1606 }, { "epoch": 0.7250987027636774, "grad_norm": 2.9753785627396105, "learning_rate": 3.708121629474077e-07, "loss": 0.5375, "step": 1607 }, { "epoch": 0.7255499153976311, "grad_norm": 2.9762038394272765, "learning_rate": 3.6967658011157054e-07, "loss": 0.4504, "step": 1608 }, { "epoch": 0.7260011280315849, "grad_norm": 3.1965956621894063, "learning_rate": 3.6854234434609477e-07, "loss": 0.5014, "step": 1609 }, { "epoch": 0.7264523406655387, "grad_norm": 3.499485873703523, "learning_rate": 3.6740945807496736e-07, "loss": 0.4979, "step": 1610 }, { "epoch": 0.7269035532994924, "grad_norm": 3.2588338513217274, "learning_rate": 3.662779237192899e-07, "loss": 0.5223, "step": 1611 }, { "epoch": 0.7273547659334462, "grad_norm": 3.0490054858602584, "learning_rate": 3.6514774369727676e-07, "loss": 0.5, "step": 1612 }, { "epoch": 0.7278059785673999, "grad_norm": 3.12273878068711, "learning_rate": 3.6401892042424556e-07, "loss": 0.5755, "step": 1613 }, { "epoch": 0.7282571912013537, "grad_norm": 3.073986060170658, "learning_rate": 3.6289145631261554e-07, "loss": 0.5491, "step": 1614 }, { "epoch": 0.7287084038353074, "grad_norm": 3.290781958913942, "learning_rate": 3.617653537719022e-07, "loss": 0.5943, "step": 1615 }, { "epoch": 0.7291596164692611, "grad_norm": 3.400579286261728, "learning_rate": 3.606406152087095e-07, "loss": 0.5581, "step": 1616 }, { "epoch": 0.7296108291032148, "grad_norm": 3.450947773384265, "learning_rate": 3.595172430267279e-07, "loss": 0.5873, "step": 1617 }, { "epoch": 0.7300620417371686, "grad_norm": 2.8791493307158014, "learning_rate": 3.583952396267269e-07, "loss": 0.5159, "step": 1618 }, { "epoch": 0.7305132543711224, "grad_norm": 3.3597310964990994, "learning_rate": 3.572746074065509e-07, "loss": 0.5292, "step": 1619 }, { "epoch": 0.7309644670050761, "grad_norm": 3.2911307641618457, "learning_rate": 3.56155348761114e-07, "loss": 0.6023, "step": 1620 }, { "epoch": 0.7314156796390299, "grad_norm": 3.208895269155749, "learning_rate": 3.550374660823948e-07, "loss": 0.6399, "step": 1621 }, { "epoch": 0.7318668922729836, "grad_norm": 3.411723131804797, "learning_rate": 3.539209617594311e-07, "loss": 0.5386, "step": 1622 }, { "epoch": 0.7323181049069374, "grad_norm": 3.017726904850449, "learning_rate": 3.5280583817831577e-07, "loss": 0.5199, "step": 1623 }, { "epoch": 0.7327693175408911, "grad_norm": 3.448141993818582, "learning_rate": 3.516920977221898e-07, "loss": 0.6594, "step": 1624 }, { "epoch": 0.7332205301748449, "grad_norm": 3.1018446805384268, "learning_rate": 3.5057974277123935e-07, "loss": 0.432, "step": 1625 }, { "epoch": 0.7336717428087987, "grad_norm": 3.292832340665522, "learning_rate": 3.494687757026894e-07, "loss": 0.6043, "step": 1626 }, { "epoch": 0.7341229554427524, "grad_norm": 3.37654871788207, "learning_rate": 3.483591988907972e-07, "loss": 0.6076, "step": 1627 }, { "epoch": 0.7345741680767062, "grad_norm": 2.9139739574958794, "learning_rate": 3.472510147068515e-07, "loss": 0.5498, "step": 1628 }, { "epoch": 0.7350253807106599, "grad_norm": 3.0177269585869415, "learning_rate": 3.461442255191628e-07, "loss": 0.4855, "step": 1629 }, { "epoch": 0.7354765933446137, "grad_norm": 3.5169507921427083, "learning_rate": 3.4503883369306176e-07, "loss": 0.5087, "step": 1630 }, { "epoch": 0.7359278059785674, "grad_norm": 3.028359718330194, "learning_rate": 3.439348415908918e-07, "loss": 0.5116, "step": 1631 }, { "epoch": 0.7363790186125212, "grad_norm": 3.3434023450295416, "learning_rate": 3.42832251572005e-07, "loss": 0.623, "step": 1632 }, { "epoch": 0.736830231246475, "grad_norm": 3.0861385669753822, "learning_rate": 3.4173106599275827e-07, "loss": 0.5863, "step": 1633 }, { "epoch": 0.7372814438804286, "grad_norm": 3.4871736340602344, "learning_rate": 3.406312872065047e-07, "loss": 0.6979, "step": 1634 }, { "epoch": 0.7377326565143824, "grad_norm": 3.3007203094285322, "learning_rate": 3.395329175635935e-07, "loss": 0.5642, "step": 1635 }, { "epoch": 0.7381838691483361, "grad_norm": 3.042983266092788, "learning_rate": 3.384359594113606e-07, "loss": 0.5794, "step": 1636 }, { "epoch": 0.7386350817822899, "grad_norm": 3.1322148623384174, "learning_rate": 3.373404150941258e-07, "loss": 0.5112, "step": 1637 }, { "epoch": 0.7390862944162436, "grad_norm": 3.3451076966064037, "learning_rate": 3.3624628695318845e-07, "loss": 0.5563, "step": 1638 }, { "epoch": 0.7395375070501974, "grad_norm": 2.898388910225105, "learning_rate": 3.3515357732682e-07, "loss": 0.5745, "step": 1639 }, { "epoch": 0.7399887196841511, "grad_norm": 3.3344993868159536, "learning_rate": 3.34062288550261e-07, "loss": 0.5693, "step": 1640 }, { "epoch": 0.7404399323181049, "grad_norm": 3.399041172987303, "learning_rate": 3.3297242295571526e-07, "loss": 0.4952, "step": 1641 }, { "epoch": 0.7408911449520587, "grad_norm": 3.552119321092643, "learning_rate": 3.3188398287234496e-07, "loss": 0.5461, "step": 1642 }, { "epoch": 0.7413423575860124, "grad_norm": 3.1787290156115406, "learning_rate": 3.307969706262669e-07, "loss": 0.6037, "step": 1643 }, { "epoch": 0.7417935702199662, "grad_norm": 3.5000027663014492, "learning_rate": 3.2971138854054505e-07, "loss": 0.5315, "step": 1644 }, { "epoch": 0.7422447828539199, "grad_norm": 3.1720666447080808, "learning_rate": 3.286272389351874e-07, "loss": 0.5329, "step": 1645 }, { "epoch": 0.7426959954878737, "grad_norm": 3.09152659928442, "learning_rate": 3.2754452412714153e-07, "loss": 0.4382, "step": 1646 }, { "epoch": 0.7431472081218274, "grad_norm": 2.798294812436822, "learning_rate": 3.2646324643028664e-07, "loss": 0.4975, "step": 1647 }, { "epoch": 0.7435984207557812, "grad_norm": 3.1293398353599393, "learning_rate": 3.2538340815543287e-07, "loss": 0.463, "step": 1648 }, { "epoch": 0.744049633389735, "grad_norm": 3.291575473368299, "learning_rate": 3.243050116103128e-07, "loss": 0.4967, "step": 1649 }, { "epoch": 0.7445008460236887, "grad_norm": 3.1465605176299825, "learning_rate": 3.2322805909957795e-07, "loss": 0.5417, "step": 1650 }, { "epoch": 0.7449520586576425, "grad_norm": 2.9534255260466624, "learning_rate": 3.221525529247949e-07, "loss": 0.484, "step": 1651 }, { "epoch": 0.7454032712915961, "grad_norm": 3.2799558091998215, "learning_rate": 3.21078495384438e-07, "loss": 0.5042, "step": 1652 }, { "epoch": 0.7458544839255499, "grad_norm": 3.3413912958342102, "learning_rate": 3.20005888773886e-07, "loss": 0.5555, "step": 1653 }, { "epoch": 0.7463056965595036, "grad_norm": 3.5148486290813548, "learning_rate": 3.189347353854173e-07, "loss": 0.5452, "step": 1654 }, { "epoch": 0.7467569091934574, "grad_norm": 3.235645631726398, "learning_rate": 3.178650375082038e-07, "loss": 0.5124, "step": 1655 }, { "epoch": 0.7472081218274111, "grad_norm": 3.335777914025934, "learning_rate": 3.16796797428308e-07, "loss": 0.551, "step": 1656 }, { "epoch": 0.7476593344613649, "grad_norm": 3.3336251599770117, "learning_rate": 3.157300174286759e-07, "loss": 0.4606, "step": 1657 }, { "epoch": 0.7481105470953187, "grad_norm": 3.3954076174897683, "learning_rate": 3.1466469978913325e-07, "loss": 0.5403, "step": 1658 }, { "epoch": 0.7485617597292724, "grad_norm": 3.1227703274844614, "learning_rate": 3.136008467863814e-07, "loss": 0.4239, "step": 1659 }, { "epoch": 0.7490129723632262, "grad_norm": 3.144214684776084, "learning_rate": 3.125384606939908e-07, "loss": 0.4472, "step": 1660 }, { "epoch": 0.7494641849971799, "grad_norm": 3.075384566057599, "learning_rate": 3.114775437823971e-07, "loss": 0.4441, "step": 1661 }, { "epoch": 0.7499153976311337, "grad_norm": 3.407644282112315, "learning_rate": 3.104180983188963e-07, "loss": 0.6542, "step": 1662 }, { "epoch": 0.7503666102650874, "grad_norm": 3.0829123476918703, "learning_rate": 3.0936012656763933e-07, "loss": 0.5066, "step": 1663 }, { "epoch": 0.7508178228990412, "grad_norm": 3.197121809224084, "learning_rate": 3.0830363078962853e-07, "loss": 0.5604, "step": 1664 }, { "epoch": 0.751269035532995, "grad_norm": 3.4708630812887162, "learning_rate": 3.0724861324271136e-07, "loss": 0.4919, "step": 1665 }, { "epoch": 0.7517202481669487, "grad_norm": 3.2521891509707173, "learning_rate": 3.061950761815755e-07, "loss": 0.6188, "step": 1666 }, { "epoch": 0.7521714608009025, "grad_norm": 3.047231852620941, "learning_rate": 3.0514302185774653e-07, "loss": 0.5921, "step": 1667 }, { "epoch": 0.7526226734348562, "grad_norm": 3.2765361212999204, "learning_rate": 3.040924525195786e-07, "loss": 0.5818, "step": 1668 }, { "epoch": 0.7530738860688099, "grad_norm": 3.299009096476747, "learning_rate": 3.030433704122549e-07, "loss": 0.5344, "step": 1669 }, { "epoch": 0.7535250987027636, "grad_norm": 2.905729407389529, "learning_rate": 3.0199577777777875e-07, "loss": 0.5905, "step": 1670 }, { "epoch": 0.7539763113367174, "grad_norm": 2.886347624748166, "learning_rate": 3.0094967685497017e-07, "loss": 0.5058, "step": 1671 }, { "epoch": 0.7544275239706711, "grad_norm": 3.3966367759928184, "learning_rate": 2.999050698794624e-07, "loss": 0.6179, "step": 1672 }, { "epoch": 0.7548787366046249, "grad_norm": 3.6135542251703874, "learning_rate": 2.9886195908369504e-07, "loss": 0.5298, "step": 1673 }, { "epoch": 0.7553299492385787, "grad_norm": 3.3688596527662864, "learning_rate": 2.9782034669691027e-07, "loss": 0.5498, "step": 1674 }, { "epoch": 0.7557811618725324, "grad_norm": 3.2566793348820537, "learning_rate": 2.9678023494514815e-07, "loss": 0.5749, "step": 1675 }, { "epoch": 0.7562323745064862, "grad_norm": 3.0116943004039576, "learning_rate": 2.957416260512414e-07, "loss": 0.4711, "step": 1676 }, { "epoch": 0.7566835871404399, "grad_norm": 3.52833333229487, "learning_rate": 2.94704522234812e-07, "loss": 0.5615, "step": 1677 }, { "epoch": 0.7571347997743937, "grad_norm": 3.075366987645075, "learning_rate": 2.936689257122642e-07, "loss": 0.485, "step": 1678 }, { "epoch": 0.7575860124083474, "grad_norm": 3.3966268101373216, "learning_rate": 2.926348386967813e-07, "loss": 0.6323, "step": 1679 }, { "epoch": 0.7580372250423012, "grad_norm": 2.8446858198235483, "learning_rate": 2.9160226339832137e-07, "loss": 0.4601, "step": 1680 }, { "epoch": 0.758488437676255, "grad_norm": 3.3602462761752934, "learning_rate": 2.9057120202361094e-07, "loss": 0.4662, "step": 1681 }, { "epoch": 0.7589396503102087, "grad_norm": 3.3791987672981794, "learning_rate": 2.895416567761414e-07, "loss": 0.6081, "step": 1682 }, { "epoch": 0.7593908629441625, "grad_norm": 3.4250371237897217, "learning_rate": 2.8851362985616387e-07, "loss": 0.5844, "step": 1683 }, { "epoch": 0.7598420755781162, "grad_norm": 3.46770568979179, "learning_rate": 2.874871234606846e-07, "loss": 0.5083, "step": 1684 }, { "epoch": 0.76029328821207, "grad_norm": 3.4131489026616357, "learning_rate": 2.86462139783461e-07, "loss": 0.5537, "step": 1685 }, { "epoch": 0.7607445008460237, "grad_norm": 3.270548918149311, "learning_rate": 2.854386810149955e-07, "loss": 0.4998, "step": 1686 }, { "epoch": 0.7611957134799774, "grad_norm": 3.530660029135758, "learning_rate": 2.8441674934253135e-07, "loss": 0.5188, "step": 1687 }, { "epoch": 0.7616469261139311, "grad_norm": 3.386488218146081, "learning_rate": 2.8339634695005e-07, "loss": 0.529, "step": 1688 }, { "epoch": 0.7620981387478849, "grad_norm": 3.1388677063133716, "learning_rate": 2.823774760182619e-07, "loss": 0.6524, "step": 1689 }, { "epoch": 0.7625493513818387, "grad_norm": 3.4194129454744906, "learning_rate": 2.813601387246073e-07, "loss": 0.6083, "step": 1690 }, { "epoch": 0.7630005640157924, "grad_norm": 3.253968578183408, "learning_rate": 2.8034433724324715e-07, "loss": 0.5288, "step": 1691 }, { "epoch": 0.7634517766497462, "grad_norm": 2.8786430697629752, "learning_rate": 2.7933007374506045e-07, "loss": 0.4924, "step": 1692 }, { "epoch": 0.7639029892836999, "grad_norm": 3.7085567077881403, "learning_rate": 2.783173503976405e-07, "loss": 0.6246, "step": 1693 }, { "epoch": 0.7643542019176537, "grad_norm": 2.977804758799455, "learning_rate": 2.7730616936528763e-07, "loss": 0.5176, "step": 1694 }, { "epoch": 0.7648054145516074, "grad_norm": 3.327404515557504, "learning_rate": 2.7629653280900744e-07, "loss": 0.5808, "step": 1695 }, { "epoch": 0.7652566271855612, "grad_norm": 3.069935757487436, "learning_rate": 2.7528844288650345e-07, "loss": 0.4892, "step": 1696 }, { "epoch": 0.765707839819515, "grad_norm": 3.2131875481565553, "learning_rate": 2.7428190175217437e-07, "loss": 0.5039, "step": 1697 }, { "epoch": 0.7661590524534687, "grad_norm": 2.9817468613273737, "learning_rate": 2.7327691155710976e-07, "loss": 0.5042, "step": 1698 }, { "epoch": 0.7666102650874225, "grad_norm": 3.3064977092399097, "learning_rate": 2.7227347444908347e-07, "loss": 0.4653, "step": 1699 }, { "epoch": 0.7670614777213762, "grad_norm": 3.0238137206901183, "learning_rate": 2.7127159257255136e-07, "loss": 0.464, "step": 1700 }, { "epoch": 0.76751269035533, "grad_norm": 3.169051965683476, "learning_rate": 2.7027126806864465e-07, "loss": 0.708, "step": 1701 }, { "epoch": 0.7679639029892837, "grad_norm": 3.133987039383184, "learning_rate": 2.692725030751668e-07, "loss": 0.5191, "step": 1702 }, { "epoch": 0.7684151156232375, "grad_norm": 3.1774428494885063, "learning_rate": 2.6827529972658816e-07, "loss": 0.5386, "step": 1703 }, { "epoch": 0.7688663282571913, "grad_norm": 3.1659341840204474, "learning_rate": 2.6727966015404144e-07, "loss": 0.5514, "step": 1704 }, { "epoch": 0.7693175408911449, "grad_norm": 3.567018735324809, "learning_rate": 2.662855864853184e-07, "loss": 0.5817, "step": 1705 }, { "epoch": 0.7697687535250987, "grad_norm": 3.1177868647304514, "learning_rate": 2.6529308084486334e-07, "loss": 0.5047, "step": 1706 }, { "epoch": 0.7702199661590524, "grad_norm": 3.6947986746375534, "learning_rate": 2.643021453537695e-07, "loss": 0.5996, "step": 1707 }, { "epoch": 0.7706711787930062, "grad_norm": 3.026216223845999, "learning_rate": 2.633127821297754e-07, "loss": 0.5287, "step": 1708 }, { "epoch": 0.7711223914269599, "grad_norm": 3.4446775186050633, "learning_rate": 2.623249932872589e-07, "loss": 0.5906, "step": 1709 }, { "epoch": 0.7715736040609137, "grad_norm": 3.5855936274951534, "learning_rate": 2.6133878093723227e-07, "loss": 0.5337, "step": 1710 }, { "epoch": 0.7720248166948674, "grad_norm": 3.28611792990953, "learning_rate": 2.603541471873405e-07, "loss": 0.552, "step": 1711 }, { "epoch": 0.7724760293288212, "grad_norm": 3.4992707402338823, "learning_rate": 2.5937109414185364e-07, "loss": 0.4644, "step": 1712 }, { "epoch": 0.772927241962775, "grad_norm": 2.9170590421526104, "learning_rate": 2.583896239016643e-07, "loss": 0.4364, "step": 1713 }, { "epoch": 0.7733784545967287, "grad_norm": 2.7757056581823782, "learning_rate": 2.5740973856428205e-07, "loss": 0.55, "step": 1714 }, { "epoch": 0.7738296672306825, "grad_norm": 3.4368687127239195, "learning_rate": 2.56431440223829e-07, "loss": 0.6541, "step": 1715 }, { "epoch": 0.7742808798646362, "grad_norm": 3.655602500119933, "learning_rate": 2.5545473097103723e-07, "loss": 0.5804, "step": 1716 }, { "epoch": 0.77473209249859, "grad_norm": 2.9071464976070995, "learning_rate": 2.5447961289324024e-07, "loss": 0.4734, "step": 1717 }, { "epoch": 0.7751833051325437, "grad_norm": 3.0403001990908383, "learning_rate": 2.5350608807437356e-07, "loss": 0.4796, "step": 1718 }, { "epoch": 0.7756345177664975, "grad_norm": 3.401670150665487, "learning_rate": 2.525341585949662e-07, "loss": 0.6389, "step": 1719 }, { "epoch": 0.7760857304004513, "grad_norm": 3.2058804560472125, "learning_rate": 2.5156382653213783e-07, "loss": 0.5616, "step": 1720 }, { "epoch": 0.776536943034405, "grad_norm": 2.9830405398098097, "learning_rate": 2.5059509395959523e-07, "loss": 0.4926, "step": 1721 }, { "epoch": 0.7769881556683587, "grad_norm": 3.120574964296058, "learning_rate": 2.496279629476261e-07, "loss": 0.5861, "step": 1722 }, { "epoch": 0.7774393683023124, "grad_norm": 3.390755640754869, "learning_rate": 2.4866243556309554e-07, "loss": 0.5654, "step": 1723 }, { "epoch": 0.7778905809362662, "grad_norm": 2.892098028372284, "learning_rate": 2.476985138694415e-07, "loss": 0.4677, "step": 1724 }, { "epoch": 0.7783417935702199, "grad_norm": 3.250949839615332, "learning_rate": 2.467361999266704e-07, "loss": 0.4674, "step": 1725 }, { "epoch": 0.7787930062041737, "grad_norm": 3.149351653501838, "learning_rate": 2.4577549579135317e-07, "loss": 0.5966, "step": 1726 }, { "epoch": 0.7792442188381274, "grad_norm": 2.983139524124556, "learning_rate": 2.448164035166199e-07, "loss": 0.4793, "step": 1727 }, { "epoch": 0.7796954314720812, "grad_norm": 3.5014980256358172, "learning_rate": 2.438589251521558e-07, "loss": 0.5634, "step": 1728 }, { "epoch": 0.780146644106035, "grad_norm": 2.9881299554568757, "learning_rate": 2.4290306274419793e-07, "loss": 0.5164, "step": 1729 }, { "epoch": 0.7805978567399887, "grad_norm": 3.116274993285628, "learning_rate": 2.419488183355284e-07, "loss": 0.5957, "step": 1730 }, { "epoch": 0.7810490693739425, "grad_norm": 3.148408583672423, "learning_rate": 2.409961939654729e-07, "loss": 0.4581, "step": 1731 }, { "epoch": 0.7815002820078962, "grad_norm": 2.9310980700141784, "learning_rate": 2.40045191669894e-07, "loss": 0.5326, "step": 1732 }, { "epoch": 0.78195149464185, "grad_norm": 2.841995600846533, "learning_rate": 2.39095813481188e-07, "loss": 0.3975, "step": 1733 }, { "epoch": 0.7824027072758037, "grad_norm": 3.360507399857603, "learning_rate": 2.381480614282807e-07, "loss": 0.5217, "step": 1734 }, { "epoch": 0.7828539199097575, "grad_norm": 3.3345269121171786, "learning_rate": 2.37201937536622e-07, "loss": 0.6034, "step": 1735 }, { "epoch": 0.7833051325437113, "grad_norm": 3.2457173184176327, "learning_rate": 2.362574438281827e-07, "loss": 0.5572, "step": 1736 }, { "epoch": 0.783756345177665, "grad_norm": 2.5998859002425654, "learning_rate": 2.353145823214495e-07, "loss": 0.4872, "step": 1737 }, { "epoch": 0.7842075578116188, "grad_norm": 3.3464133340861433, "learning_rate": 2.3437335503142063e-07, "loss": 0.5293, "step": 1738 }, { "epoch": 0.7846587704455725, "grad_norm": 3.4826017860389027, "learning_rate": 2.3343376396960278e-07, "loss": 0.487, "step": 1739 }, { "epoch": 0.7851099830795262, "grad_norm": 3.359966006460181, "learning_rate": 2.3249581114400507e-07, "loss": 0.6411, "step": 1740 }, { "epoch": 0.7855611957134799, "grad_norm": 3.108189759258103, "learning_rate": 2.3155949855913515e-07, "loss": 0.5771, "step": 1741 }, { "epoch": 0.7860124083474337, "grad_norm": 3.206925034569629, "learning_rate": 2.306248282159965e-07, "loss": 0.5679, "step": 1742 }, { "epoch": 0.7864636209813874, "grad_norm": 3.174330922672863, "learning_rate": 2.2969180211208195e-07, "loss": 0.5643, "step": 1743 }, { "epoch": 0.7869148336153412, "grad_norm": 3.1224925255844, "learning_rate": 2.2876042224137081e-07, "loss": 0.5932, "step": 1744 }, { "epoch": 0.787366046249295, "grad_norm": 3.1025659930391107, "learning_rate": 2.2783069059432415e-07, "loss": 0.5617, "step": 1745 }, { "epoch": 0.7878172588832487, "grad_norm": 3.4166793932429416, "learning_rate": 2.2690260915788029e-07, "loss": 0.6249, "step": 1746 }, { "epoch": 0.7882684715172025, "grad_norm": 3.62085438351349, "learning_rate": 2.2597617991545158e-07, "loss": 0.5649, "step": 1747 }, { "epoch": 0.7887196841511562, "grad_norm": 3.5339972671901885, "learning_rate": 2.2505140484691897e-07, "loss": 0.6335, "step": 1748 }, { "epoch": 0.78917089678511, "grad_norm": 3.4985614392697415, "learning_rate": 2.2412828592862798e-07, "loss": 0.6082, "step": 1749 }, { "epoch": 0.7896221094190637, "grad_norm": 3.267096741544989, "learning_rate": 2.2320682513338595e-07, "loss": 0.5948, "step": 1750 }, { "epoch": 0.7900733220530175, "grad_norm": 3.0173344659741486, "learning_rate": 2.2228702443045454e-07, "loss": 0.5234, "step": 1751 }, { "epoch": 0.7905245346869713, "grad_norm": 2.7526133536838358, "learning_rate": 2.213688857855499e-07, "loss": 0.4142, "step": 1752 }, { "epoch": 0.790975747320925, "grad_norm": 3.509571340813883, "learning_rate": 2.2045241116083467e-07, "loss": 0.602, "step": 1753 }, { "epoch": 0.7914269599548788, "grad_norm": 3.3780395439717323, "learning_rate": 2.195376025149156e-07, "loss": 0.5572, "step": 1754 }, { "epoch": 0.7918781725888325, "grad_norm": 3.3496281451911325, "learning_rate": 2.1862446180283966e-07, "loss": 0.5613, "step": 1755 }, { "epoch": 0.7923293852227863, "grad_norm": 3.0508967113065713, "learning_rate": 2.1771299097608864e-07, "loss": 0.47, "step": 1756 }, { "epoch": 0.79278059785674, "grad_norm": 3.1585837200192697, "learning_rate": 2.1680319198257568e-07, "loss": 0.4856, "step": 1757 }, { "epoch": 0.7932318104906937, "grad_norm": 2.8592335827975366, "learning_rate": 2.1589506676664125e-07, "loss": 0.5273, "step": 1758 }, { "epoch": 0.7936830231246474, "grad_norm": 3.5664002528716128, "learning_rate": 2.1498861726904838e-07, "loss": 0.4738, "step": 1759 }, { "epoch": 0.7941342357586012, "grad_norm": 3.3257110040445705, "learning_rate": 2.1408384542697953e-07, "loss": 0.5896, "step": 1760 }, { "epoch": 0.794585448392555, "grad_norm": 3.2893705188785627, "learning_rate": 2.131807531740315e-07, "loss": 0.5908, "step": 1761 }, { "epoch": 0.7950366610265087, "grad_norm": 3.2439198495340724, "learning_rate": 2.1227934244021106e-07, "loss": 0.5418, "step": 1762 }, { "epoch": 0.7954878736604625, "grad_norm": 2.890014525204039, "learning_rate": 2.113796151519327e-07, "loss": 0.5701, "step": 1763 }, { "epoch": 0.7959390862944162, "grad_norm": 2.9555433016653243, "learning_rate": 2.10481573232012e-07, "loss": 0.5724, "step": 1764 }, { "epoch": 0.79639029892837, "grad_norm": 3.1595499191577985, "learning_rate": 2.0958521859966317e-07, "loss": 0.58, "step": 1765 }, { "epoch": 0.7968415115623237, "grad_norm": 3.2406481995005683, "learning_rate": 2.0869055317049454e-07, "loss": 0.5815, "step": 1766 }, { "epoch": 0.7972927241962775, "grad_norm": 3.013567529689706, "learning_rate": 2.0779757885650407e-07, "loss": 0.4969, "step": 1767 }, { "epoch": 0.7977439368302313, "grad_norm": 3.285613814118948, "learning_rate": 2.0690629756607647e-07, "loss": 0.5407, "step": 1768 }, { "epoch": 0.798195149464185, "grad_norm": 3.05321531009751, "learning_rate": 2.0601671120397747e-07, "loss": 0.5402, "step": 1769 }, { "epoch": 0.7986463620981388, "grad_norm": 3.3714424558205, "learning_rate": 2.0512882167135047e-07, "loss": 0.5022, "step": 1770 }, { "epoch": 0.7990975747320925, "grad_norm": 3.3008668639596284, "learning_rate": 2.042426308657138e-07, "loss": 0.5349, "step": 1771 }, { "epoch": 0.7995487873660463, "grad_norm": 3.5138662487604444, "learning_rate": 2.0335814068095336e-07, "loss": 0.5402, "step": 1772 }, { "epoch": 0.8, "grad_norm": 2.9913939825555014, "learning_rate": 2.0247535300732265e-07, "loss": 0.5572, "step": 1773 }, { "epoch": 0.8004512126339538, "grad_norm": 3.6435559524273406, "learning_rate": 2.0159426973143568e-07, "loss": 0.4992, "step": 1774 }, { "epoch": 0.8009024252679074, "grad_norm": 3.2412230999711817, "learning_rate": 2.0071489273626374e-07, "loss": 0.6071, "step": 1775 }, { "epoch": 0.8013536379018612, "grad_norm": 2.969248009278257, "learning_rate": 1.9983722390113255e-07, "loss": 0.5, "step": 1776 }, { "epoch": 0.801804850535815, "grad_norm": 2.768580600024177, "learning_rate": 1.9896126510171641e-07, "loss": 0.4454, "step": 1777 }, { "epoch": 0.8022560631697687, "grad_norm": 3.2476101536079787, "learning_rate": 1.9808701821003614e-07, "loss": 0.5911, "step": 1778 }, { "epoch": 0.8027072758037225, "grad_norm": 2.9616657048741244, "learning_rate": 1.972144850944526e-07, "loss": 0.5681, "step": 1779 }, { "epoch": 0.8031584884376762, "grad_norm": 3.105204828849402, "learning_rate": 1.963436676196649e-07, "loss": 0.5379, "step": 1780 }, { "epoch": 0.80360970107163, "grad_norm": 3.292817844044641, "learning_rate": 1.95474567646706e-07, "loss": 0.5259, "step": 1781 }, { "epoch": 0.8040609137055837, "grad_norm": 3.264350635402717, "learning_rate": 1.9460718703293765e-07, "loss": 0.4252, "step": 1782 }, { "epoch": 0.8045121263395375, "grad_norm": 2.829404764906008, "learning_rate": 1.9374152763204777e-07, "loss": 0.5153, "step": 1783 }, { "epoch": 0.8049633389734913, "grad_norm": 3.2480177263767063, "learning_rate": 1.9287759129404536e-07, "loss": 0.4709, "step": 1784 }, { "epoch": 0.805414551607445, "grad_norm": 2.899713242999487, "learning_rate": 1.920153798652574e-07, "loss": 0.4611, "step": 1785 }, { "epoch": 0.8058657642413988, "grad_norm": 3.1635502073692345, "learning_rate": 1.9115489518832418e-07, "loss": 0.4709, "step": 1786 }, { "epoch": 0.8063169768753525, "grad_norm": 3.1508557769862438, "learning_rate": 1.9029613910219577e-07, "loss": 0.5433, "step": 1787 }, { "epoch": 0.8067681895093063, "grad_norm": 3.5210820961966425, "learning_rate": 1.8943911344212872e-07, "loss": 0.6146, "step": 1788 }, { "epoch": 0.80721940214326, "grad_norm": 3.3366881648206403, "learning_rate": 1.8858382003968077e-07, "loss": 0.5368, "step": 1789 }, { "epoch": 0.8076706147772138, "grad_norm": 3.3247727610257005, "learning_rate": 1.8773026072270759e-07, "loss": 0.8037, "step": 1790 }, { "epoch": 0.8081218274111676, "grad_norm": 3.4817252725861763, "learning_rate": 1.8687843731535956e-07, "loss": 0.5695, "step": 1791 }, { "epoch": 0.8085730400451213, "grad_norm": 3.336918219285363, "learning_rate": 1.8602835163807662e-07, "loss": 0.6866, "step": 1792 }, { "epoch": 0.809024252679075, "grad_norm": 3.614272310931608, "learning_rate": 1.8518000550758527e-07, "loss": 0.4701, "step": 1793 }, { "epoch": 0.8094754653130287, "grad_norm": 3.219275535851942, "learning_rate": 1.843334007368943e-07, "loss": 0.6077, "step": 1794 }, { "epoch": 0.8099266779469825, "grad_norm": 2.89680741186188, "learning_rate": 1.8348853913529083e-07, "loss": 0.6083, "step": 1795 }, { "epoch": 0.8103778905809362, "grad_norm": 3.6574243799798385, "learning_rate": 1.8264542250833748e-07, "loss": 0.6222, "step": 1796 }, { "epoch": 0.81082910321489, "grad_norm": 3.0840696058845793, "learning_rate": 1.8180405265786657e-07, "loss": 0.5185, "step": 1797 }, { "epoch": 0.8112803158488437, "grad_norm": 3.027696852321835, "learning_rate": 1.8096443138197804e-07, "loss": 0.5319, "step": 1798 }, { "epoch": 0.8117315284827975, "grad_norm": 3.159795651179558, "learning_rate": 1.801265604750347e-07, "loss": 0.5356, "step": 1799 }, { "epoch": 0.8121827411167513, "grad_norm": 3.3263427018468628, "learning_rate": 1.792904417276584e-07, "loss": 0.6881, "step": 1800 }, { "epoch": 0.812633953750705, "grad_norm": 3.491430348543849, "learning_rate": 1.7845607692672726e-07, "loss": 0.6757, "step": 1801 }, { "epoch": 0.8130851663846588, "grad_norm": 3.452496098068734, "learning_rate": 1.776234678553702e-07, "loss": 0.634, "step": 1802 }, { "epoch": 0.8135363790186125, "grad_norm": 3.046194568097652, "learning_rate": 1.7679261629296405e-07, "loss": 0.5394, "step": 1803 }, { "epoch": 0.8139875916525663, "grad_norm": 3.41189405392855, "learning_rate": 1.7596352401513025e-07, "loss": 0.6579, "step": 1804 }, { "epoch": 0.81443880428652, "grad_norm": 3.3370355365005433, "learning_rate": 1.7513619279372982e-07, "loss": 0.584, "step": 1805 }, { "epoch": 0.8148900169204738, "grad_norm": 3.1564896201684296, "learning_rate": 1.743106243968605e-07, "loss": 0.454, "step": 1806 }, { "epoch": 0.8153412295544276, "grad_norm": 3.4096665409080305, "learning_rate": 1.7348682058885244e-07, "loss": 0.5079, "step": 1807 }, { "epoch": 0.8157924421883813, "grad_norm": 3.422237532293, "learning_rate": 1.7266478313026467e-07, "loss": 0.49, "step": 1808 }, { "epoch": 0.8162436548223351, "grad_norm": 2.9182124049243043, "learning_rate": 1.71844513777882e-07, "loss": 0.5393, "step": 1809 }, { "epoch": 0.8166948674562888, "grad_norm": 3.298575760582701, "learning_rate": 1.7102601428470986e-07, "loss": 0.5132, "step": 1810 }, { "epoch": 0.8171460800902425, "grad_norm": 3.1076000971626323, "learning_rate": 1.7020928639997133e-07, "loss": 0.5096, "step": 1811 }, { "epoch": 0.8175972927241962, "grad_norm": 2.9576537154131914, "learning_rate": 1.6939433186910435e-07, "loss": 0.4751, "step": 1812 }, { "epoch": 0.81804850535815, "grad_norm": 3.1438542828461884, "learning_rate": 1.6858115243375516e-07, "loss": 0.635, "step": 1813 }, { "epoch": 0.8184997179921037, "grad_norm": 3.3925384992387566, "learning_rate": 1.6776974983177827e-07, "loss": 0.5006, "step": 1814 }, { "epoch": 0.8189509306260575, "grad_norm": 3.155615848697049, "learning_rate": 1.6696012579722986e-07, "loss": 0.5519, "step": 1815 }, { "epoch": 0.8194021432600113, "grad_norm": 3.2055385260502516, "learning_rate": 1.6615228206036524e-07, "loss": 0.5571, "step": 1816 }, { "epoch": 0.819853355893965, "grad_norm": 3.3036737895825485, "learning_rate": 1.6534622034763556e-07, "loss": 0.5955, "step": 1817 }, { "epoch": 0.8203045685279188, "grad_norm": 3.246191316789231, "learning_rate": 1.6454194238168318e-07, "loss": 0.4955, "step": 1818 }, { "epoch": 0.8207557811618725, "grad_norm": 3.7297330377785847, "learning_rate": 1.6373944988133815e-07, "loss": 0.6329, "step": 1819 }, { "epoch": 0.8212069937958263, "grad_norm": 3.3710927359486176, "learning_rate": 1.6293874456161516e-07, "loss": 0.6353, "step": 1820 }, { "epoch": 0.82165820642978, "grad_norm": 3.075825293080748, "learning_rate": 1.621398281337093e-07, "loss": 0.5383, "step": 1821 }, { "epoch": 0.8221094190637338, "grad_norm": 3.0510999526952673, "learning_rate": 1.6134270230499292e-07, "loss": 0.4723, "step": 1822 }, { "epoch": 0.8225606316976876, "grad_norm": 2.990800015781615, "learning_rate": 1.6054736877901154e-07, "loss": 0.5556, "step": 1823 }, { "epoch": 0.8230118443316413, "grad_norm": 3.2691319322666086, "learning_rate": 1.5975382925547965e-07, "loss": 0.6269, "step": 1824 }, { "epoch": 0.8234630569655951, "grad_norm": 3.36974210150047, "learning_rate": 1.5896208543027911e-07, "loss": 0.6239, "step": 1825 }, { "epoch": 0.8239142695995488, "grad_norm": 3.2846656450026863, "learning_rate": 1.5817213899545289e-07, "loss": 0.5369, "step": 1826 }, { "epoch": 0.8243654822335026, "grad_norm": 3.6310256851443357, "learning_rate": 1.5738399163920356e-07, "loss": 0.5704, "step": 1827 }, { "epoch": 0.8248166948674562, "grad_norm": 2.8015522594684414, "learning_rate": 1.5659764504588845e-07, "loss": 0.5314, "step": 1828 }, { "epoch": 0.82526790750141, "grad_norm": 2.9898520678955145, "learning_rate": 1.558131008960163e-07, "loss": 0.4779, "step": 1829 }, { "epoch": 0.8257191201353637, "grad_norm": 3.1623967382624345, "learning_rate": 1.5503036086624454e-07, "loss": 0.5098, "step": 1830 }, { "epoch": 0.8261703327693175, "grad_norm": 3.4072485811873543, "learning_rate": 1.5424942662937434e-07, "loss": 0.563, "step": 1831 }, { "epoch": 0.8266215454032713, "grad_norm": 2.855993326323611, "learning_rate": 1.5347029985434777e-07, "loss": 0.4537, "step": 1832 }, { "epoch": 0.827072758037225, "grad_norm": 3.4851239971474195, "learning_rate": 1.5269298220624505e-07, "loss": 0.6242, "step": 1833 }, { "epoch": 0.8275239706711788, "grad_norm": 3.388860727365143, "learning_rate": 1.5191747534627819e-07, "loss": 0.5445, "step": 1834 }, { "epoch": 0.8279751833051325, "grad_norm": 3.554115255182286, "learning_rate": 1.5114378093179147e-07, "loss": 0.5375, "step": 1835 }, { "epoch": 0.8284263959390863, "grad_norm": 2.655545323566283, "learning_rate": 1.5037190061625427e-07, "loss": 0.3131, "step": 1836 }, { "epoch": 0.82887760857304, "grad_norm": 3.3338895146881002, "learning_rate": 1.4960183604925968e-07, "loss": 0.5707, "step": 1837 }, { "epoch": 0.8293288212069938, "grad_norm": 3.218344870415116, "learning_rate": 1.4883358887652042e-07, "loss": 0.5006, "step": 1838 }, { "epoch": 0.8297800338409476, "grad_norm": 3.1468119802066203, "learning_rate": 1.4806716073986504e-07, "loss": 0.6662, "step": 1839 }, { "epoch": 0.8302312464749013, "grad_norm": 3.5160680404028493, "learning_rate": 1.4730255327723452e-07, "loss": 0.5002, "step": 1840 }, { "epoch": 0.8306824591088551, "grad_norm": 3.260575279604774, "learning_rate": 1.4653976812267898e-07, "loss": 0.5138, "step": 1841 }, { "epoch": 0.8311336717428088, "grad_norm": 2.9649200815505456, "learning_rate": 1.457788069063538e-07, "loss": 0.5762, "step": 1842 }, { "epoch": 0.8315848843767626, "grad_norm": 2.942782774050298, "learning_rate": 1.4501967125451718e-07, "loss": 0.5477, "step": 1843 }, { "epoch": 0.8320360970107163, "grad_norm": 3.0094337078574127, "learning_rate": 1.442623627895251e-07, "loss": 0.4498, "step": 1844 }, { "epoch": 0.8324873096446701, "grad_norm": 3.225704846247271, "learning_rate": 1.4350688312982862e-07, "loss": 0.6199, "step": 1845 }, { "epoch": 0.8329385222786237, "grad_norm": 3.1240951847264076, "learning_rate": 1.4275323388997117e-07, "loss": 0.4944, "step": 1846 }, { "epoch": 0.8333897349125775, "grad_norm": 3.5826594571402963, "learning_rate": 1.4200141668058396e-07, "loss": 0.6276, "step": 1847 }, { "epoch": 0.8338409475465313, "grad_norm": 3.3094708533679102, "learning_rate": 1.412514331083826e-07, "loss": 0.6394, "step": 1848 }, { "epoch": 0.834292160180485, "grad_norm": 3.5135736501243446, "learning_rate": 1.4050328477616458e-07, "loss": 0.5399, "step": 1849 }, { "epoch": 0.8347433728144388, "grad_norm": 3.482173704832845, "learning_rate": 1.3975697328280456e-07, "loss": 0.4879, "step": 1850 }, { "epoch": 0.8351945854483925, "grad_norm": 3.0662306539692543, "learning_rate": 1.3901250022325283e-07, "loss": 0.4534, "step": 1851 }, { "epoch": 0.8356457980823463, "grad_norm": 3.0598921330176214, "learning_rate": 1.382698671885295e-07, "loss": 0.4628, "step": 1852 }, { "epoch": 0.8360970107163, "grad_norm": 3.1520019214759456, "learning_rate": 1.3752907576572347e-07, "loss": 0.6246, "step": 1853 }, { "epoch": 0.8365482233502538, "grad_norm": 2.6571093677090363, "learning_rate": 1.3679012753798724e-07, "loss": 0.4899, "step": 1854 }, { "epoch": 0.8369994359842076, "grad_norm": 2.9836867986668523, "learning_rate": 1.3605302408453356e-07, "loss": 0.4217, "step": 1855 }, { "epoch": 0.8374506486181613, "grad_norm": 2.898222008405317, "learning_rate": 1.3531776698063436e-07, "loss": 0.5292, "step": 1856 }, { "epoch": 0.8379018612521151, "grad_norm": 2.950876249789325, "learning_rate": 1.3458435779761425e-07, "loss": 0.4231, "step": 1857 }, { "epoch": 0.8383530738860688, "grad_norm": 3.174963854424885, "learning_rate": 1.3385279810284956e-07, "loss": 0.4779, "step": 1858 }, { "epoch": 0.8388042865200226, "grad_norm": 3.135298443318726, "learning_rate": 1.3312308945976347e-07, "loss": 0.5428, "step": 1859 }, { "epoch": 0.8392554991539763, "grad_norm": 3.5971103192719505, "learning_rate": 1.3239523342782344e-07, "loss": 0.5815, "step": 1860 }, { "epoch": 0.8397067117879301, "grad_norm": 3.413779126654346, "learning_rate": 1.3166923156253817e-07, "loss": 0.5965, "step": 1861 }, { "epoch": 0.8401579244218839, "grad_norm": 3.397541524879773, "learning_rate": 1.309450854154528e-07, "loss": 0.6834, "step": 1862 }, { "epoch": 0.8406091370558376, "grad_norm": 3.328000762597347, "learning_rate": 1.3022279653414725e-07, "loss": 0.64, "step": 1863 }, { "epoch": 0.8410603496897913, "grad_norm": 3.0084605090395353, "learning_rate": 1.2950236646223244e-07, "loss": 0.5704, "step": 1864 }, { "epoch": 0.841511562323745, "grad_norm": 3.2459425568437053, "learning_rate": 1.2878379673934615e-07, "loss": 0.5634, "step": 1865 }, { "epoch": 0.8419627749576988, "grad_norm": 2.974415189859473, "learning_rate": 1.2806708890115137e-07, "loss": 0.4767, "step": 1866 }, { "epoch": 0.8424139875916525, "grad_norm": 3.3391852048885813, "learning_rate": 1.2735224447933102e-07, "loss": 0.528, "step": 1867 }, { "epoch": 0.8428652002256063, "grad_norm": 3.263226862766161, "learning_rate": 1.2663926500158618e-07, "loss": 0.6521, "step": 1868 }, { "epoch": 0.84331641285956, "grad_norm": 3.208837226211113, "learning_rate": 1.2592815199163244e-07, "loss": 0.4281, "step": 1869 }, { "epoch": 0.8437676254935138, "grad_norm": 3.704953009121333, "learning_rate": 1.2521890696919602e-07, "loss": 0.6876, "step": 1870 }, { "epoch": 0.8442188381274676, "grad_norm": 3.1149574368441075, "learning_rate": 1.245115314500118e-07, "loss": 0.5546, "step": 1871 }, { "epoch": 0.8446700507614213, "grad_norm": 3.2673584564454083, "learning_rate": 1.2380602694581888e-07, "loss": 0.5626, "step": 1872 }, { "epoch": 0.8451212633953751, "grad_norm": 3.6047315203131025, "learning_rate": 1.2310239496435748e-07, "loss": 0.6006, "step": 1873 }, { "epoch": 0.8455724760293288, "grad_norm": 3.3754199818311004, "learning_rate": 1.224006370093672e-07, "loss": 0.5484, "step": 1874 }, { "epoch": 0.8460236886632826, "grad_norm": 3.299566444490645, "learning_rate": 1.2170075458058083e-07, "loss": 0.5683, "step": 1875 }, { "epoch": 0.8464749012972363, "grad_norm": 3.1617484748413154, "learning_rate": 1.2100274917372477e-07, "loss": 0.5741, "step": 1876 }, { "epoch": 0.8469261139311901, "grad_norm": 3.2176898145338533, "learning_rate": 1.203066222805129e-07, "loss": 0.5392, "step": 1877 }, { "epoch": 0.8473773265651439, "grad_norm": 2.6695851978983662, "learning_rate": 1.1961237538864467e-07, "loss": 0.586, "step": 1878 }, { "epoch": 0.8478285391990976, "grad_norm": 3.2205550742995057, "learning_rate": 1.189200099818024e-07, "loss": 0.5225, "step": 1879 }, { "epoch": 0.8482797518330514, "grad_norm": 3.1361790423595144, "learning_rate": 1.1822952753964666e-07, "loss": 0.4771, "step": 1880 }, { "epoch": 0.848730964467005, "grad_norm": 2.9693045075564726, "learning_rate": 1.1754092953781425e-07, "loss": 0.582, "step": 1881 }, { "epoch": 0.8491821771009588, "grad_norm": 3.010501184967778, "learning_rate": 1.168542174479148e-07, "loss": 0.4967, "step": 1882 }, { "epoch": 0.8496333897349125, "grad_norm": 2.9444396444036416, "learning_rate": 1.1616939273752713e-07, "loss": 0.597, "step": 1883 }, { "epoch": 0.8500846023688663, "grad_norm": 3.1244893204773887, "learning_rate": 1.1548645687019742e-07, "loss": 0.476, "step": 1884 }, { "epoch": 0.85053581500282, "grad_norm": 2.895489653963696, "learning_rate": 1.1480541130543431e-07, "loss": 0.4374, "step": 1885 }, { "epoch": 0.8509870276367738, "grad_norm": 3.6679522394726902, "learning_rate": 1.1412625749870675e-07, "loss": 0.6452, "step": 1886 }, { "epoch": 0.8514382402707276, "grad_norm": 3.4012095936653255, "learning_rate": 1.1344899690144138e-07, "loss": 0.6069, "step": 1887 }, { "epoch": 0.8518894529046813, "grad_norm": 3.4427078769517427, "learning_rate": 1.1277363096101833e-07, "loss": 0.4773, "step": 1888 }, { "epoch": 0.8523406655386351, "grad_norm": 2.8891427428108485, "learning_rate": 1.1210016112076869e-07, "loss": 0.5172, "step": 1889 }, { "epoch": 0.8527918781725888, "grad_norm": 3.1882213169736326, "learning_rate": 1.1142858881997153e-07, "loss": 0.6214, "step": 1890 }, { "epoch": 0.8532430908065426, "grad_norm": 3.2631974785916578, "learning_rate": 1.107589154938503e-07, "loss": 0.3816, "step": 1891 }, { "epoch": 0.8536943034404963, "grad_norm": 3.646300547484839, "learning_rate": 1.10091142573571e-07, "loss": 0.5078, "step": 1892 }, { "epoch": 0.8541455160744501, "grad_norm": 3.0800229323313446, "learning_rate": 1.0942527148623736e-07, "loss": 0.5741, "step": 1893 }, { "epoch": 0.8545967287084039, "grad_norm": 3.4437111992470695, "learning_rate": 1.0876130365488878e-07, "loss": 0.4977, "step": 1894 }, { "epoch": 0.8550479413423576, "grad_norm": 3.415687476852208, "learning_rate": 1.0809924049849816e-07, "loss": 0.5176, "step": 1895 }, { "epoch": 0.8554991539763114, "grad_norm": 3.423266820929565, "learning_rate": 1.0743908343196629e-07, "loss": 0.5477, "step": 1896 }, { "epoch": 0.8559503666102651, "grad_norm": 3.319978860721829, "learning_rate": 1.067808338661219e-07, "loss": 0.6, "step": 1897 }, { "epoch": 0.8564015792442189, "grad_norm": 3.2937834521614766, "learning_rate": 1.0612449320771644e-07, "loss": 0.6547, "step": 1898 }, { "epoch": 0.8568527918781725, "grad_norm": 2.7650015075242136, "learning_rate": 1.0547006285942162e-07, "loss": 0.4555, "step": 1899 }, { "epoch": 0.8573040045121263, "grad_norm": 3.2864394879320495, "learning_rate": 1.0481754421982758e-07, "loss": 0.5315, "step": 1900 }, { "epoch": 0.85775521714608, "grad_norm": 3.359413365564091, "learning_rate": 1.0416693868343795e-07, "loss": 0.4423, "step": 1901 }, { "epoch": 0.8582064297800338, "grad_norm": 2.9927056449757714, "learning_rate": 1.0351824764066819e-07, "loss": 0.5731, "step": 1902 }, { "epoch": 0.8586576424139876, "grad_norm": 3.61532226087724, "learning_rate": 1.0287147247784244e-07, "loss": 0.6, "step": 1903 }, { "epoch": 0.8591088550479413, "grad_norm": 2.9636046590677902, "learning_rate": 1.0222661457718985e-07, "loss": 0.683, "step": 1904 }, { "epoch": 0.8595600676818951, "grad_norm": 3.2418007105981337, "learning_rate": 1.015836753168431e-07, "loss": 0.5704, "step": 1905 }, { "epoch": 0.8600112803158488, "grad_norm": 3.270787762505259, "learning_rate": 1.0094265607083374e-07, "loss": 0.5531, "step": 1906 }, { "epoch": 0.8604624929498026, "grad_norm": 3.4241238059817753, "learning_rate": 1.0030355820908997e-07, "loss": 0.4724, "step": 1907 }, { "epoch": 0.8609137055837564, "grad_norm": 3.120442346322047, "learning_rate": 9.966638309743481e-08, "loss": 0.5969, "step": 1908 }, { "epoch": 0.8613649182177101, "grad_norm": 2.8700401112362983, "learning_rate": 9.903113209758096e-08, "loss": 0.639, "step": 1909 }, { "epoch": 0.8618161308516639, "grad_norm": 2.90952553707339, "learning_rate": 9.839780656712959e-08, "loss": 0.4335, "step": 1910 }, { "epoch": 0.8622673434856176, "grad_norm": 3.15352399159264, "learning_rate": 9.776640785956702e-08, "loss": 0.602, "step": 1911 }, { "epoch": 0.8627185561195714, "grad_norm": 3.558953242347675, "learning_rate": 9.713693732426131e-08, "loss": 0.5247, "step": 1912 }, { "epoch": 0.8631697687535251, "grad_norm": 3.058394803478645, "learning_rate": 9.65093963064606e-08, "loss": 0.5442, "step": 1913 }, { "epoch": 0.8636209813874789, "grad_norm": 3.0343736318962646, "learning_rate": 9.588378614728864e-08, "loss": 0.4304, "step": 1914 }, { "epoch": 0.8640721940214326, "grad_norm": 3.103162455870594, "learning_rate": 9.526010818374309e-08, "loss": 0.6602, "step": 1915 }, { "epoch": 0.8645234066553864, "grad_norm": 3.345412268134743, "learning_rate": 9.46383637486925e-08, "loss": 0.5215, "step": 1916 }, { "epoch": 0.86497461928934, "grad_norm": 2.950588770881482, "learning_rate": 9.401855417087234e-08, "loss": 0.432, "step": 1917 }, { "epoch": 0.8654258319232938, "grad_norm": 2.9028721420815438, "learning_rate": 9.34006807748845e-08, "loss": 0.5536, "step": 1918 }, { "epoch": 0.8658770445572476, "grad_norm": 3.422382660689821, "learning_rate": 9.278474488119182e-08, "loss": 0.5321, "step": 1919 }, { "epoch": 0.8663282571912013, "grad_norm": 3.052528705427497, "learning_rate": 9.217074780611688e-08, "loss": 0.5474, "step": 1920 }, { "epoch": 0.8667794698251551, "grad_norm": 3.1194719596998484, "learning_rate": 9.155869086183921e-08, "loss": 0.5148, "step": 1921 }, { "epoch": 0.8672306824591088, "grad_norm": 2.9070890387145494, "learning_rate": 9.094857535639156e-08, "loss": 0.4189, "step": 1922 }, { "epoch": 0.8676818950930626, "grad_norm": 3.177762649776062, "learning_rate": 9.03404025936576e-08, "loss": 0.4111, "step": 1923 }, { "epoch": 0.8681331077270164, "grad_norm": 3.4014028718324885, "learning_rate": 8.973417387336946e-08, "loss": 0.5615, "step": 1924 }, { "epoch": 0.8685843203609701, "grad_norm": 3.314370098733984, "learning_rate": 8.91298904911043e-08, "loss": 0.5898, "step": 1925 }, { "epoch": 0.8690355329949239, "grad_norm": 3.2243090136724772, "learning_rate": 8.852755373828235e-08, "loss": 0.5159, "step": 1926 }, { "epoch": 0.8694867456288776, "grad_norm": 3.1990156923608506, "learning_rate": 8.792716490216335e-08, "loss": 0.5222, "step": 1927 }, { "epoch": 0.8699379582628314, "grad_norm": 3.0312256180294996, "learning_rate": 8.732872526584379e-08, "loss": 0.7078, "step": 1928 }, { "epoch": 0.8703891708967851, "grad_norm": 3.0987202755918983, "learning_rate": 8.67322361082553e-08, "loss": 0.4882, "step": 1929 }, { "epoch": 0.8708403835307389, "grad_norm": 2.7196240122489446, "learning_rate": 8.613769870416066e-08, "loss": 0.5035, "step": 1930 }, { "epoch": 0.8712915961646926, "grad_norm": 2.9325956476707224, "learning_rate": 8.554511432415145e-08, "loss": 0.4407, "step": 1931 }, { "epoch": 0.8717428087986464, "grad_norm": 3.563913043226757, "learning_rate": 8.495448423464568e-08, "loss": 0.6164, "step": 1932 }, { "epoch": 0.8721940214326002, "grad_norm": 3.0026341334651323, "learning_rate": 8.436580969788431e-08, "loss": 0.4445, "step": 1933 }, { "epoch": 0.8726452340665538, "grad_norm": 3.2398204365530656, "learning_rate": 8.377909197193011e-08, "loss": 0.5261, "step": 1934 }, { "epoch": 0.8730964467005076, "grad_norm": 3.552891767506158, "learning_rate": 8.319433231066264e-08, "loss": 0.683, "step": 1935 }, { "epoch": 0.8735476593344613, "grad_norm": 2.857576906676008, "learning_rate": 8.261153196377813e-08, "loss": 0.4816, "step": 1936 }, { "epoch": 0.8739988719684151, "grad_norm": 3.172569622874956, "learning_rate": 8.20306921767847e-08, "loss": 0.4672, "step": 1937 }, { "epoch": 0.8744500846023688, "grad_norm": 3.515851107453122, "learning_rate": 8.145181419100034e-08, "loss": 0.6234, "step": 1938 }, { "epoch": 0.8749012972363226, "grad_norm": 3.439159617356599, "learning_rate": 8.08748992435514e-08, "loss": 0.5984, "step": 1939 }, { "epoch": 0.8753525098702764, "grad_norm": 3.3171348098631857, "learning_rate": 8.02999485673681e-08, "loss": 0.5059, "step": 1940 }, { "epoch": 0.8758037225042301, "grad_norm": 3.008859919440221, "learning_rate": 7.972696339118346e-08, "loss": 0.5124, "step": 1941 }, { "epoch": 0.8762549351381839, "grad_norm": 3.3407440096371626, "learning_rate": 7.91559449395296e-08, "loss": 0.5261, "step": 1942 }, { "epoch": 0.8767061477721376, "grad_norm": 3.2680192388068985, "learning_rate": 7.858689443273547e-08, "loss": 0.4824, "step": 1943 }, { "epoch": 0.8771573604060914, "grad_norm": 3.2302885233475247, "learning_rate": 7.801981308692507e-08, "loss": 0.5301, "step": 1944 }, { "epoch": 0.8776085730400451, "grad_norm": 3.3144955548693846, "learning_rate": 7.745470211401273e-08, "loss": 0.5292, "step": 1945 }, { "epoch": 0.8780597856739989, "grad_norm": 3.6338069876593972, "learning_rate": 7.689156272170316e-08, "loss": 0.6645, "step": 1946 }, { "epoch": 0.8785109983079527, "grad_norm": 3.320705078381082, "learning_rate": 7.633039611348701e-08, "loss": 0.5731, "step": 1947 }, { "epoch": 0.8789622109419064, "grad_norm": 3.400427088861012, "learning_rate": 7.577120348863864e-08, "loss": 0.5424, "step": 1948 }, { "epoch": 0.8794134235758602, "grad_norm": 2.952134565078521, "learning_rate": 7.521398604221451e-08, "loss": 0.4211, "step": 1949 }, { "epoch": 0.8798646362098139, "grad_norm": 3.1223077524770804, "learning_rate": 7.465874496504943e-08, "loss": 0.4525, "step": 1950 }, { "epoch": 0.8803158488437677, "grad_norm": 3.3651190746871213, "learning_rate": 7.410548144375417e-08, "loss": 0.5718, "step": 1951 }, { "epoch": 0.8807670614777213, "grad_norm": 2.8278868757405995, "learning_rate": 7.355419666071406e-08, "loss": 0.5105, "step": 1952 }, { "epoch": 0.8812182741116751, "grad_norm": 3.3691970534959346, "learning_rate": 7.300489179408476e-08, "loss": 0.4373, "step": 1953 }, { "epoch": 0.8816694867456288, "grad_norm": 3.1396491136987374, "learning_rate": 7.245756801779158e-08, "loss": 0.5241, "step": 1954 }, { "epoch": 0.8821206993795826, "grad_norm": 2.9990018272726338, "learning_rate": 7.191222650152528e-08, "loss": 0.5162, "step": 1955 }, { "epoch": 0.8825719120135364, "grad_norm": 2.872850179093246, "learning_rate": 7.136886841074052e-08, "loss": 0.4716, "step": 1956 }, { "epoch": 0.8830231246474901, "grad_norm": 3.7713414016675255, "learning_rate": 7.082749490665351e-08, "loss": 0.6061, "step": 1957 }, { "epoch": 0.8834743372814439, "grad_norm": 3.476662997099093, "learning_rate": 7.028810714623846e-08, "loss": 0.4683, "step": 1958 }, { "epoch": 0.8839255499153976, "grad_norm": 3.2389720481182764, "learning_rate": 6.975070628222646e-08, "loss": 0.5448, "step": 1959 }, { "epoch": 0.8843767625493514, "grad_norm": 3.1694385457983127, "learning_rate": 6.921529346310218e-08, "loss": 0.5136, "step": 1960 }, { "epoch": 0.8848279751833051, "grad_norm": 3.6314284620989787, "learning_rate": 6.868186983310131e-08, "loss": 0.4929, "step": 1961 }, { "epoch": 0.8852791878172589, "grad_norm": 3.123575388660666, "learning_rate": 6.81504365322092e-08, "loss": 0.5635, "step": 1962 }, { "epoch": 0.8857304004512127, "grad_norm": 2.6321261997764807, "learning_rate": 6.76209946961569e-08, "loss": 0.4772, "step": 1963 }, { "epoch": 0.8861816130851664, "grad_norm": 3.210961705882933, "learning_rate": 6.709354545641987e-08, "loss": 0.5765, "step": 1964 }, { "epoch": 0.8866328257191202, "grad_norm": 3.102505340660919, "learning_rate": 6.65680899402149e-08, "loss": 0.6185, "step": 1965 }, { "epoch": 0.8870840383530739, "grad_norm": 3.318813163094756, "learning_rate": 6.604462927049804e-08, "loss": 0.5496, "step": 1966 }, { "epoch": 0.8875352509870277, "grad_norm": 3.693495485179525, "learning_rate": 6.552316456596252e-08, "loss": 0.6225, "step": 1967 }, { "epoch": 0.8879864636209814, "grad_norm": 2.7909920602545957, "learning_rate": 6.500369694103558e-08, "loss": 0.4657, "step": 1968 }, { "epoch": 0.8884376762549352, "grad_norm": 3.8268919463461315, "learning_rate": 6.44862275058763e-08, "loss": 0.5475, "step": 1969 }, { "epoch": 0.8888888888888888, "grad_norm": 2.8955466658153273, "learning_rate": 6.397075736637403e-08, "loss": 0.4761, "step": 1970 }, { "epoch": 0.8893401015228426, "grad_norm": 3.0393342411424054, "learning_rate": 6.345728762414503e-08, "loss": 0.5003, "step": 1971 }, { "epoch": 0.8897913141567964, "grad_norm": 3.0480382524304677, "learning_rate": 6.294581937653042e-08, "loss": 0.5091, "step": 1972 }, { "epoch": 0.8902425267907501, "grad_norm": 3.2472044245989027, "learning_rate": 6.243635371659395e-08, "loss": 0.5822, "step": 1973 }, { "epoch": 0.8906937394247039, "grad_norm": 2.9373896773076926, "learning_rate": 6.192889173311966e-08, "loss": 0.5317, "step": 1974 }, { "epoch": 0.8911449520586576, "grad_norm": 3.4088834880295034, "learning_rate": 6.142343451060972e-08, "loss": 0.5696, "step": 1975 }, { "epoch": 0.8915961646926114, "grad_norm": 3.2061069267520645, "learning_rate": 6.091998312928171e-08, "loss": 0.5439, "step": 1976 }, { "epoch": 0.8920473773265651, "grad_norm": 2.9538556019221733, "learning_rate": 6.04185386650662e-08, "loss": 0.4199, "step": 1977 }, { "epoch": 0.8924985899605189, "grad_norm": 3.3347490781585125, "learning_rate": 5.99191021896055e-08, "loss": 0.585, "step": 1978 }, { "epoch": 0.8929498025944727, "grad_norm": 3.252069083231302, "learning_rate": 5.9421674770249844e-08, "loss": 0.5721, "step": 1979 }, { "epoch": 0.8934010152284264, "grad_norm": 3.373090912932044, "learning_rate": 5.8926257470056415e-08, "loss": 0.6677, "step": 1980 }, { "epoch": 0.8938522278623802, "grad_norm": 3.470419272509584, "learning_rate": 5.8432851347786414e-08, "loss": 0.6336, "step": 1981 }, { "epoch": 0.8943034404963339, "grad_norm": 2.954923563225483, "learning_rate": 5.794145745790269e-08, "loss": 0.5161, "step": 1982 }, { "epoch": 0.8947546531302877, "grad_norm": 2.9240499120629373, "learning_rate": 5.7452076850568186e-08, "loss": 0.5764, "step": 1983 }, { "epoch": 0.8952058657642414, "grad_norm": 3.163381313365714, "learning_rate": 5.696471057164298e-08, "loss": 0.4723, "step": 1984 }, { "epoch": 0.8956570783981952, "grad_norm": 3.001631089880879, "learning_rate": 5.6479359662682246e-08, "loss": 0.4736, "step": 1985 }, { "epoch": 0.896108291032149, "grad_norm": 3.1077853978436414, "learning_rate": 5.599602516093427e-08, "loss": 0.4647, "step": 1986 }, { "epoch": 0.8965595036661026, "grad_norm": 2.9246040215255658, "learning_rate": 5.551470809933756e-08, "loss": 0.4681, "step": 1987 }, { "epoch": 0.8970107163000564, "grad_norm": 3.4387470409961223, "learning_rate": 5.503540950652008e-08, "loss": 0.507, "step": 1988 }, { "epoch": 0.8974619289340101, "grad_norm": 2.6230689040197364, "learning_rate": 5.4558130406795355e-08, "loss": 0.4589, "step": 1989 }, { "epoch": 0.8979131415679639, "grad_norm": 3.388098608880633, "learning_rate": 5.408287182016091e-08, "loss": 0.4525, "step": 1990 }, { "epoch": 0.8983643542019176, "grad_norm": 3.119545140896645, "learning_rate": 5.360963476229707e-08, "loss": 0.5486, "step": 1991 }, { "epoch": 0.8988155668358714, "grad_norm": 3.1519502919215694, "learning_rate": 5.313842024456305e-08, "loss": 0.4839, "step": 1992 }, { "epoch": 0.8992667794698251, "grad_norm": 3.7651904706666395, "learning_rate": 5.2669229273996084e-08, "loss": 0.6181, "step": 1993 }, { "epoch": 0.8997179921037789, "grad_norm": 3.058263724338039, "learning_rate": 5.220206285330886e-08, "loss": 0.5597, "step": 1994 }, { "epoch": 0.9001692047377327, "grad_norm": 3.0934748276186337, "learning_rate": 5.173692198088708e-08, "loss": 0.4815, "step": 1995 }, { "epoch": 0.9006204173716864, "grad_norm": 3.968781722790635, "learning_rate": 5.1273807650788146e-08, "loss": 0.5632, "step": 1996 }, { "epoch": 0.9010716300056402, "grad_norm": 3.131751903358897, "learning_rate": 5.081272085273825e-08, "loss": 0.4128, "step": 1997 }, { "epoch": 0.9015228426395939, "grad_norm": 2.9193006683184275, "learning_rate": 5.035366257213014e-08, "loss": 0.4801, "step": 1998 }, { "epoch": 0.9019740552735477, "grad_norm": 3.2693871968854875, "learning_rate": 4.98966337900224e-08, "loss": 0.5536, "step": 1999 }, { "epoch": 0.9024252679075014, "grad_norm": 3.2813024613985227, "learning_rate": 4.944163548313496e-08, "loss": 0.6593, "step": 2000 }, { "epoch": 0.9028764805414552, "grad_norm": 3.369192254537855, "learning_rate": 4.898866862384976e-08, "loss": 0.4618, "step": 2001 }, { "epoch": 0.903327693175409, "grad_norm": 3.043012997543813, "learning_rate": 4.853773418020646e-08, "loss": 0.4676, "step": 2002 }, { "epoch": 0.9037789058093627, "grad_norm": 3.434166016020396, "learning_rate": 4.8088833115901395e-08, "loss": 0.3948, "step": 2003 }, { "epoch": 0.9042301184433165, "grad_norm": 2.7646887236809854, "learning_rate": 4.764196639028572e-08, "loss": 0.4774, "step": 2004 }, { "epoch": 0.9046813310772701, "grad_norm": 3.3361977687273927, "learning_rate": 4.719713495836242e-08, "loss": 0.5685, "step": 2005 }, { "epoch": 0.9051325437112239, "grad_norm": 3.4831449664643257, "learning_rate": 4.6754339770785465e-08, "loss": 0.5089, "step": 2006 }, { "epoch": 0.9055837563451776, "grad_norm": 3.4671397319854522, "learning_rate": 4.631358177385647e-08, "loss": 0.5439, "step": 2007 }, { "epoch": 0.9060349689791314, "grad_norm": 3.2933483172414335, "learning_rate": 4.58748619095235e-08, "loss": 0.5683, "step": 2008 }, { "epoch": 0.9064861816130851, "grad_norm": 2.985093722715634, "learning_rate": 4.543818111537956e-08, "loss": 0.5344, "step": 2009 }, { "epoch": 0.9069373942470389, "grad_norm": 3.4328919623699674, "learning_rate": 4.500354032465925e-08, "loss": 0.4476, "step": 2010 }, { "epoch": 0.9073886068809927, "grad_norm": 3.304919883947093, "learning_rate": 4.457094046623755e-08, "loss": 0.6034, "step": 2011 }, { "epoch": 0.9078398195149464, "grad_norm": 2.8099205651955206, "learning_rate": 4.414038246462803e-08, "loss": 0.4899, "step": 2012 }, { "epoch": 0.9082910321489002, "grad_norm": 3.3739143855436535, "learning_rate": 4.3711867239980324e-08, "loss": 0.6359, "step": 2013 }, { "epoch": 0.9087422447828539, "grad_norm": 3.189618086168555, "learning_rate": 4.3285395708078546e-08, "loss": 0.5732, "step": 2014 }, { "epoch": 0.9091934574168077, "grad_norm": 3.367461634350275, "learning_rate": 4.286096878033929e-08, "loss": 0.4531, "step": 2015 }, { "epoch": 0.9096446700507614, "grad_norm": 3.6483005516482767, "learning_rate": 4.243858736380912e-08, "loss": 0.6179, "step": 2016 }, { "epoch": 0.9100958826847152, "grad_norm": 3.415485122808768, "learning_rate": 4.2018252361164076e-08, "loss": 0.5615, "step": 2017 }, { "epoch": 0.910547095318669, "grad_norm": 2.741772493137757, "learning_rate": 4.15999646707057e-08, "loss": 0.4732, "step": 2018 }, { "epoch": 0.9109983079526227, "grad_norm": 3.0538104199950307, "learning_rate": 4.118372518636104e-08, "loss": 0.5523, "step": 2019 }, { "epoch": 0.9114495205865765, "grad_norm": 3.0764370937470704, "learning_rate": 4.076953479767964e-08, "loss": 0.5979, "step": 2020 }, { "epoch": 0.9119007332205302, "grad_norm": 3.066765697208492, "learning_rate": 4.035739438983143e-08, "loss": 0.4439, "step": 2021 }, { "epoch": 0.9123519458544839, "grad_norm": 3.668533201777122, "learning_rate": 3.994730484360609e-08, "loss": 0.572, "step": 2022 }, { "epoch": 0.9128031584884376, "grad_norm": 3.401968524293534, "learning_rate": 3.953926703540977e-08, "loss": 0.5584, "step": 2023 }, { "epoch": 0.9132543711223914, "grad_norm": 3.4337981912223174, "learning_rate": 3.9133281837264385e-08, "loss": 0.513, "step": 2024 }, { "epoch": 0.9137055837563451, "grad_norm": 4.049276576839161, "learning_rate": 3.872935011680456e-08, "loss": 0.5475, "step": 2025 }, { "epoch": 0.9141567963902989, "grad_norm": 3.227324543340619, "learning_rate": 3.832747273727699e-08, "loss": 0.4974, "step": 2026 }, { "epoch": 0.9146080090242527, "grad_norm": 3.700152680818643, "learning_rate": 3.792765055753755e-08, "loss": 0.6376, "step": 2027 }, { "epoch": 0.9150592216582064, "grad_norm": 3.134825348605703, "learning_rate": 3.7529884432050074e-08, "loss": 0.5864, "step": 2028 }, { "epoch": 0.9155104342921602, "grad_norm": 3.287365982894785, "learning_rate": 3.71341752108848e-08, "loss": 0.5592, "step": 2029 }, { "epoch": 0.9159616469261139, "grad_norm": 3.3656815978558408, "learning_rate": 3.674052373971559e-08, "loss": 0.5296, "step": 2030 }, { "epoch": 0.9164128595600677, "grad_norm": 3.08635532381097, "learning_rate": 3.634893085981872e-08, "loss": 0.4835, "step": 2031 }, { "epoch": 0.9168640721940214, "grad_norm": 3.120263533445725, "learning_rate": 3.595939740807141e-08, "loss": 0.5167, "step": 2032 }, { "epoch": 0.9173152848279752, "grad_norm": 2.9603539149063374, "learning_rate": 3.557192421694932e-08, "loss": 0.5019, "step": 2033 }, { "epoch": 0.917766497461929, "grad_norm": 3.300594188794194, "learning_rate": 3.518651211452528e-08, "loss": 0.6458, "step": 2034 }, { "epoch": 0.9182177100958827, "grad_norm": 3.122902491830948, "learning_rate": 3.4803161924467196e-08, "loss": 0.5104, "step": 2035 }, { "epoch": 0.9186689227298365, "grad_norm": 3.4528349816512103, "learning_rate": 3.4421874466036285e-08, "loss": 0.5535, "step": 2036 }, { "epoch": 0.9191201353637902, "grad_norm": 3.3890580466360913, "learning_rate": 3.404265055408617e-08, "loss": 0.4116, "step": 2037 }, { "epoch": 0.919571347997744, "grad_norm": 3.2546448072566645, "learning_rate": 3.36654909990598e-08, "loss": 0.5527, "step": 2038 }, { "epoch": 0.9200225606316977, "grad_norm": 2.6288215794522314, "learning_rate": 3.3290396606988405e-08, "loss": 0.4729, "step": 2039 }, { "epoch": 0.9204737732656514, "grad_norm": 3.0588000528716344, "learning_rate": 3.29173681794902e-08, "loss": 0.5862, "step": 2040 }, { "epoch": 0.9209249858996051, "grad_norm": 3.3085745174099244, "learning_rate": 3.25464065137675e-08, "loss": 0.4663, "step": 2041 }, { "epoch": 0.9213761985335589, "grad_norm": 3.3049910550091286, "learning_rate": 3.217751240260647e-08, "loss": 0.5277, "step": 2042 }, { "epoch": 0.9218274111675127, "grad_norm": 3.1470391275241405, "learning_rate": 3.1810686634374253e-08, "loss": 0.5739, "step": 2043 }, { "epoch": 0.9222786238014664, "grad_norm": 2.8926904316169897, "learning_rate": 3.144592999301754e-08, "loss": 0.5079, "step": 2044 }, { "epoch": 0.9227298364354202, "grad_norm": 3.3359025166747753, "learning_rate": 3.1083243258061666e-08, "loss": 0.6565, "step": 2045 }, { "epoch": 0.9231810490693739, "grad_norm": 3.2267530813326193, "learning_rate": 3.072262720460783e-08, "loss": 0.511, "step": 2046 }, { "epoch": 0.9236322617033277, "grad_norm": 3.172137007655229, "learning_rate": 3.036408260333223e-08, "loss": 0.5653, "step": 2047 }, { "epoch": 0.9240834743372814, "grad_norm": 2.9652257724423183, "learning_rate": 3.000761022048393e-08, "loss": 0.5494, "step": 2048 }, { "epoch": 0.9245346869712352, "grad_norm": 3.162831602182436, "learning_rate": 2.9653210817883634e-08, "loss": 0.5811, "step": 2049 }, { "epoch": 0.924985899605189, "grad_norm": 3.3548734459357195, "learning_rate": 2.930088515292173e-08, "loss": 0.4703, "step": 2050 }, { "epoch": 0.9254371122391427, "grad_norm": 2.971253690703245, "learning_rate": 2.8950633978556906e-08, "loss": 0.6375, "step": 2051 }, { "epoch": 0.9258883248730965, "grad_norm": 3.282908277488212, "learning_rate": 2.860245804331429e-08, "loss": 0.5976, "step": 2052 }, { "epoch": 0.9263395375070502, "grad_norm": 3.4398673398293833, "learning_rate": 2.8256358091284238e-08, "loss": 0.5767, "step": 2053 }, { "epoch": 0.926790750141004, "grad_norm": 3.0750885972506654, "learning_rate": 2.79123348621203e-08, "loss": 0.492, "step": 2054 }, { "epoch": 0.9272419627749577, "grad_norm": 3.295821068137443, "learning_rate": 2.7570389091037926e-08, "loss": 0.5278, "step": 2055 }, { "epoch": 0.9276931754089115, "grad_norm": 3.54320248311843, "learning_rate": 2.7230521508812553e-08, "loss": 0.5603, "step": 2056 }, { "epoch": 0.9281443880428653, "grad_norm": 3.6284081250605063, "learning_rate": 2.689273284177873e-08, "loss": 0.6389, "step": 2057 }, { "epoch": 0.9285956006768189, "grad_norm": 3.136805769994715, "learning_rate": 2.6557023811827894e-08, "loss": 0.5818, "step": 2058 }, { "epoch": 0.9290468133107727, "grad_norm": 2.943590397644658, "learning_rate": 2.6223395136407145e-08, "loss": 0.5318, "step": 2059 }, { "epoch": 0.9294980259447264, "grad_norm": 3.126539259551608, "learning_rate": 2.5891847528517476e-08, "loss": 0.4908, "step": 2060 }, { "epoch": 0.9299492385786802, "grad_norm": 3.2669493617048198, "learning_rate": 2.5562381696712654e-08, "loss": 0.5458, "step": 2061 }, { "epoch": 0.9304004512126339, "grad_norm": 3.0754859463953803, "learning_rate": 2.5234998345097237e-08, "loss": 0.4965, "step": 2062 }, { "epoch": 0.9308516638465877, "grad_norm": 3.6839611131375336, "learning_rate": 2.4909698173325443e-08, "loss": 0.5531, "step": 2063 }, { "epoch": 0.9313028764805414, "grad_norm": 3.22462453822676, "learning_rate": 2.458648187659962e-08, "loss": 0.4566, "step": 2064 }, { "epoch": 0.9317540891144952, "grad_norm": 3.4060647980598, "learning_rate": 2.4265350145668106e-08, "loss": 0.7031, "step": 2065 }, { "epoch": 0.932205301748449, "grad_norm": 3.089929652023956, "learning_rate": 2.394630366682493e-08, "loss": 0.4991, "step": 2066 }, { "epoch": 0.9326565143824027, "grad_norm": 3.0972124530500427, "learning_rate": 2.3629343121907562e-08, "loss": 0.5814, "step": 2067 }, { "epoch": 0.9331077270163565, "grad_norm": 2.956122127539399, "learning_rate": 2.3314469188295272e-08, "loss": 0.447, "step": 2068 }, { "epoch": 0.9335589396503102, "grad_norm": 3.4655230691165775, "learning_rate": 2.300168253890833e-08, "loss": 0.5864, "step": 2069 }, { "epoch": 0.934010152284264, "grad_norm": 3.2759006855749764, "learning_rate": 2.2690983842205914e-08, "loss": 0.5745, "step": 2070 }, { "epoch": 0.9344613649182177, "grad_norm": 3.0615174312777014, "learning_rate": 2.2382373762185658e-08, "loss": 0.4796, "step": 2071 }, { "epoch": 0.9349125775521715, "grad_norm": 3.432283728728365, "learning_rate": 2.207585295838099e-08, "loss": 0.6578, "step": 2072 }, { "epoch": 0.9353637901861253, "grad_norm": 3.3123138166785275, "learning_rate": 2.177142208586047e-08, "loss": 0.5887, "step": 2073 }, { "epoch": 0.935815002820079, "grad_norm": 3.3385012625964143, "learning_rate": 2.146908179522644e-08, "loss": 0.5244, "step": 2074 }, { "epoch": 0.9362662154540327, "grad_norm": 3.412059349668194, "learning_rate": 2.116883273261316e-08, "loss": 0.4841, "step": 2075 }, { "epoch": 0.9367174280879864, "grad_norm": 3.0584437698376314, "learning_rate": 2.087067553968602e-08, "loss": 0.3759, "step": 2076 }, { "epoch": 0.9371686407219402, "grad_norm": 3.2188511717745123, "learning_rate": 2.057461085363954e-08, "loss": 0.5688, "step": 2077 }, { "epoch": 0.9376198533558939, "grad_norm": 2.9976242270120594, "learning_rate": 2.028063930719637e-08, "loss": 0.4009, "step": 2078 }, { "epoch": 0.9380710659898477, "grad_norm": 2.892808153960682, "learning_rate": 1.9988761528606178e-08, "loss": 0.4392, "step": 2079 }, { "epoch": 0.9385222786238014, "grad_norm": 3.2358320547434443, "learning_rate": 1.9698978141643784e-08, "loss": 0.503, "step": 2080 }, { "epoch": 0.9389734912577552, "grad_norm": 3.3962163022150653, "learning_rate": 1.941128976560791e-08, "loss": 0.5694, "step": 2081 }, { "epoch": 0.939424703891709, "grad_norm": 2.998665449003642, "learning_rate": 1.912569701532063e-08, "loss": 0.5902, "step": 2082 }, { "epoch": 0.9398759165256627, "grad_norm": 3.3801760197505835, "learning_rate": 1.8842200501124615e-08, "loss": 0.5383, "step": 2083 }, { "epoch": 0.9403271291596165, "grad_norm": 2.9348713239408593, "learning_rate": 1.8560800828883227e-08, "loss": 0.4877, "step": 2084 }, { "epoch": 0.9407783417935702, "grad_norm": 3.097739950065729, "learning_rate": 1.8281498599978407e-08, "loss": 0.6081, "step": 2085 }, { "epoch": 0.941229554427524, "grad_norm": 3.2982172408128814, "learning_rate": 1.8004294411309685e-08, "loss": 0.5686, "step": 2086 }, { "epoch": 0.9416807670614777, "grad_norm": 2.9622880055723084, "learning_rate": 1.7729188855292954e-08, "loss": 0.578, "step": 2087 }, { "epoch": 0.9421319796954315, "grad_norm": 2.947330771863818, "learning_rate": 1.7456182519858808e-08, "loss": 0.5588, "step": 2088 }, { "epoch": 0.9425831923293853, "grad_norm": 2.937817283751574, "learning_rate": 1.7185275988451986e-08, "loss": 0.4514, "step": 2089 }, { "epoch": 0.943034404963339, "grad_norm": 3.3308032462704644, "learning_rate": 1.691646984002937e-08, "loss": 0.5546, "step": 2090 }, { "epoch": 0.9434856175972928, "grad_norm": 2.8609200678838875, "learning_rate": 1.6649764649059094e-08, "loss": 0.4772, "step": 2091 }, { "epoch": 0.9439368302312465, "grad_norm": 3.3257921371498256, "learning_rate": 1.6385160985519564e-08, "loss": 0.4277, "step": 2092 }, { "epoch": 0.9443880428652002, "grad_norm": 3.280225796167774, "learning_rate": 1.6122659414897876e-08, "loss": 0.6392, "step": 2093 }, { "epoch": 0.9448392554991539, "grad_norm": 3.8085560156358054, "learning_rate": 1.5862260498188728e-08, "loss": 0.7179, "step": 2094 }, { "epoch": 0.9452904681331077, "grad_norm": 3.3335448343938268, "learning_rate": 1.56039647918933e-08, "loss": 0.6197, "step": 2095 }, { "epoch": 0.9457416807670614, "grad_norm": 3.031288983603637, "learning_rate": 1.5347772848017583e-08, "loss": 0.4715, "step": 2096 }, { "epoch": 0.9461928934010152, "grad_norm": 3.164918639328314, "learning_rate": 1.509368521407217e-08, "loss": 0.5569, "step": 2097 }, { "epoch": 0.946644106034969, "grad_norm": 3.210287170541672, "learning_rate": 1.4841702433070037e-08, "loss": 0.6519, "step": 2098 }, { "epoch": 0.9470953186689227, "grad_norm": 2.6369570547287977, "learning_rate": 1.4591825043526073e-08, "loss": 0.4529, "step": 2099 }, { "epoch": 0.9475465313028765, "grad_norm": 3.391418189703669, "learning_rate": 1.4344053579455894e-08, "loss": 0.4644, "step": 2100 }, { "epoch": 0.9479977439368302, "grad_norm": 2.9940931472415793, "learning_rate": 1.4098388570374154e-08, "loss": 0.4532, "step": 2101 }, { "epoch": 0.948448956570784, "grad_norm": 3.1943317280291077, "learning_rate": 1.3854830541294105e-08, "loss": 0.5737, "step": 2102 }, { "epoch": 0.9489001692047377, "grad_norm": 3.463904492669196, "learning_rate": 1.3613380012725717e-08, "loss": 0.5669, "step": 2103 }, { "epoch": 0.9493513818386915, "grad_norm": 2.950631481291773, "learning_rate": 1.337403750067545e-08, "loss": 0.5732, "step": 2104 }, { "epoch": 0.9498025944726453, "grad_norm": 3.2538975233288303, "learning_rate": 1.3136803516644701e-08, "loss": 0.5587, "step": 2105 }, { "epoch": 0.950253807106599, "grad_norm": 3.3928844420275692, "learning_rate": 1.2901678567628249e-08, "loss": 0.5758, "step": 2106 }, { "epoch": 0.9507050197405528, "grad_norm": 3.402553144487364, "learning_rate": 1.2668663156114035e-08, "loss": 0.5521, "step": 2107 }, { "epoch": 0.9511562323745065, "grad_norm": 3.1090460601388688, "learning_rate": 1.2437757780081715e-08, "loss": 0.4712, "step": 2108 }, { "epoch": 0.9516074450084603, "grad_norm": 3.157111834878996, "learning_rate": 1.2208962933001332e-08, "loss": 0.7341, "step": 2109 }, { "epoch": 0.952058657642414, "grad_norm": 3.6264603472952692, "learning_rate": 1.1982279103832539e-08, "loss": 0.6544, "step": 2110 }, { "epoch": 0.9525098702763677, "grad_norm": 2.997104592016241, "learning_rate": 1.175770677702359e-08, "loss": 0.6325, "step": 2111 }, { "epoch": 0.9529610829103214, "grad_norm": 3.428867574227346, "learning_rate": 1.1535246432510249e-08, "loss": 0.6784, "step": 2112 }, { "epoch": 0.9534122955442752, "grad_norm": 3.215528072634851, "learning_rate": 1.1314898545714768e-08, "loss": 0.5196, "step": 2113 }, { "epoch": 0.953863508178229, "grad_norm": 3.0135546799565507, "learning_rate": 1.1096663587544574e-08, "loss": 0.4638, "step": 2114 }, { "epoch": 0.9543147208121827, "grad_norm": 3.4816204927235277, "learning_rate": 1.0880542024391926e-08, "loss": 0.5687, "step": 2115 }, { "epoch": 0.9547659334461365, "grad_norm": 3.2568560809861333, "learning_rate": 1.0666534318132248e-08, "loss": 0.521, "step": 2116 }, { "epoch": 0.9552171460800902, "grad_norm": 3.1510176627650037, "learning_rate": 1.0454640926123581e-08, "loss": 0.6516, "step": 2117 }, { "epoch": 0.955668358714044, "grad_norm": 3.3030358895502543, "learning_rate": 1.0244862301205248e-08, "loss": 0.4968, "step": 2118 }, { "epoch": 0.9561195713479977, "grad_norm": 3.204100014366423, "learning_rate": 1.0037198891697297e-08, "loss": 0.5478, "step": 2119 }, { "epoch": 0.9565707839819515, "grad_norm": 3.180566457304421, "learning_rate": 9.831651141399167e-09, "loss": 0.6166, "step": 2120 }, { "epoch": 0.9570219966159053, "grad_norm": 2.9835868653318114, "learning_rate": 9.62821948958914e-09, "loss": 0.5578, "step": 2121 }, { "epoch": 0.957473209249859, "grad_norm": 3.335223111473605, "learning_rate": 9.42690437102267e-09, "loss": 0.6616, "step": 2122 }, { "epoch": 0.9579244218838128, "grad_norm": 3.0269577095198965, "learning_rate": 9.227706215932718e-09, "loss": 0.4504, "step": 2123 }, { "epoch": 0.9583756345177665, "grad_norm": 3.034533006063733, "learning_rate": 9.030625450027197e-09, "loss": 0.535, "step": 2124 }, { "epoch": 0.9588268471517203, "grad_norm": 3.4956858458681195, "learning_rate": 8.835662494489638e-09, "loss": 0.5865, "step": 2125 }, { "epoch": 0.959278059785674, "grad_norm": 3.2430278438524804, "learning_rate": 8.642817765977084e-09, "loss": 0.568, "step": 2126 }, { "epoch": 0.9597292724196278, "grad_norm": 3.259168460054936, "learning_rate": 8.452091676619976e-09, "loss": 0.4907, "step": 2127 }, { "epoch": 0.9601804850535814, "grad_norm": 2.9506525517388447, "learning_rate": 8.263484634020934e-09, "loss": 0.5421, "step": 2128 }, { "epoch": 0.9606316976875352, "grad_norm": 3.3734708681832846, "learning_rate": 8.076997041253864e-09, "loss": 0.6559, "step": 2129 }, { "epoch": 0.961082910321489, "grad_norm": 3.239791595659205, "learning_rate": 7.892629296863296e-09, "loss": 0.6207, "step": 2130 }, { "epoch": 0.9615341229554427, "grad_norm": 2.9754850080940325, "learning_rate": 7.710381794863275e-09, "loss": 0.4749, "step": 2131 }, { "epoch": 0.9619853355893965, "grad_norm": 3.1775519947710387, "learning_rate": 7.53025492473669e-09, "loss": 0.6391, "step": 2132 }, { "epoch": 0.9624365482233502, "grad_norm": 3.066624979505507, "learning_rate": 7.352249071434613e-09, "loss": 0.6327, "step": 2133 }, { "epoch": 0.962887760857304, "grad_norm": 3.1723120674863563, "learning_rate": 7.176364615374964e-09, "loss": 0.4504, "step": 2134 }, { "epoch": 0.9633389734912577, "grad_norm": 3.026276568695946, "learning_rate": 7.002601932442176e-09, "loss": 0.5376, "step": 2135 }, { "epoch": 0.9637901861252115, "grad_norm": 3.0281976135848914, "learning_rate": 6.830961393986201e-09, "loss": 0.5515, "step": 2136 }, { "epoch": 0.9642413987591653, "grad_norm": 3.5847532083995337, "learning_rate": 6.661443366821618e-09, "loss": 0.5691, "step": 2137 }, { "epoch": 0.964692611393119, "grad_norm": 3.1294010668010026, "learning_rate": 6.4940482132272985e-09, "loss": 0.5833, "step": 2138 }, { "epoch": 0.9651438240270728, "grad_norm": 3.387530884932056, "learning_rate": 6.3287762909447486e-09, "loss": 0.5154, "step": 2139 }, { "epoch": 0.9655950366610265, "grad_norm": 3.646146989054173, "learning_rate": 6.165627953178432e-09, "loss": 0.6854, "step": 2140 }, { "epoch": 0.9660462492949803, "grad_norm": 3.083896198719111, "learning_rate": 6.0046035485941114e-09, "loss": 0.64, "step": 2141 }, { "epoch": 0.966497461928934, "grad_norm": 3.1945218965338165, "learning_rate": 5.845703421318849e-09, "loss": 0.5138, "step": 2142 }, { "epoch": 0.9669486745628878, "grad_norm": 2.9725665961761156, "learning_rate": 5.688927910939445e-09, "loss": 0.5811, "step": 2143 }, { "epoch": 0.9673998871968416, "grad_norm": 3.065001768071281, "learning_rate": 5.534277352502448e-09, "loss": 0.4917, "step": 2144 }, { "epoch": 0.9678510998307953, "grad_norm": 2.9575837796796147, "learning_rate": 5.381752076513146e-09, "loss": 0.4729, "step": 2145 }, { "epoch": 0.968302312464749, "grad_norm": 3.490044797232381, "learning_rate": 5.231352408934686e-09, "loss": 0.5855, "step": 2146 }, { "epoch": 0.9687535250987027, "grad_norm": 3.2834883714280823, "learning_rate": 5.083078671187846e-09, "loss": 0.603, "step": 2147 }, { "epoch": 0.9692047377326565, "grad_norm": 3.6227714783334855, "learning_rate": 4.936931180149706e-09, "loss": 0.5146, "step": 2148 }, { "epoch": 0.9696559503666102, "grad_norm": 3.360342852449879, "learning_rate": 4.792910248153537e-09, "loss": 0.6926, "step": 2149 }, { "epoch": 0.970107163000564, "grad_norm": 2.9745190180859997, "learning_rate": 4.6510161829880215e-09, "loss": 0.4959, "step": 2150 }, { "epoch": 0.9705583756345177, "grad_norm": 2.8409802136356115, "learning_rate": 4.511249287896257e-09, "loss": 0.4792, "step": 2151 }, { "epoch": 0.9710095882684715, "grad_norm": 2.9527499447768024, "learning_rate": 4.373609861575422e-09, "loss": 0.5262, "step": 2152 }, { "epoch": 0.9714608009024253, "grad_norm": 2.8234329667683995, "learning_rate": 4.238098198175999e-09, "loss": 0.453, "step": 2153 }, { "epoch": 0.971912013536379, "grad_norm": 3.1955934495623177, "learning_rate": 4.1047145873015494e-09, "loss": 0.5705, "step": 2154 }, { "epoch": 0.9723632261703328, "grad_norm": 3.2186315351530177, "learning_rate": 3.9734593140072766e-09, "loss": 0.5774, "step": 2155 }, { "epoch": 0.9728144388042865, "grad_norm": 3.2781816568797115, "learning_rate": 3.844332658800131e-09, "loss": 0.534, "step": 2156 }, { "epoch": 0.9732656514382403, "grad_norm": 3.1271563804768516, "learning_rate": 3.717334897638147e-09, "loss": 0.4134, "step": 2157 }, { "epoch": 0.973716864072194, "grad_norm": 3.1072908879945045, "learning_rate": 3.59246630192922e-09, "loss": 0.6476, "step": 2158 }, { "epoch": 0.9741680767061478, "grad_norm": 3.0501654737467048, "learning_rate": 3.469727138531442e-09, "loss": 0.517, "step": 2159 }, { "epoch": 0.9746192893401016, "grad_norm": 3.1296507770053585, "learning_rate": 3.3491176697517663e-09, "loss": 0.5104, "step": 2160 }, { "epoch": 0.9750705019740553, "grad_norm": 3.452718374388686, "learning_rate": 3.2306381533460103e-09, "loss": 0.572, "step": 2161 }, { "epoch": 0.9755217146080091, "grad_norm": 3.002928921865537, "learning_rate": 3.1142888425177428e-09, "loss": 0.5037, "step": 2162 }, { "epoch": 0.9759729272419628, "grad_norm": 3.1485029119178862, "learning_rate": 3.0000699859183965e-09, "loss": 0.5625, "step": 2163 }, { "epoch": 0.9764241398759165, "grad_norm": 3.0075732113850138, "learning_rate": 2.8879818276459357e-09, "loss": 0.6007, "step": 2164 }, { "epoch": 0.9768753525098702, "grad_norm": 3.5339981344018234, "learning_rate": 2.7780246072454103e-09, "loss": 0.641, "step": 2165 }, { "epoch": 0.977326565143824, "grad_norm": 2.9938714355039138, "learning_rate": 2.6701985597071817e-09, "loss": 0.6232, "step": 2166 }, { "epoch": 0.9777777777777777, "grad_norm": 3.5748734649397735, "learning_rate": 2.5645039154675863e-09, "loss": 0.5054, "step": 2167 }, { "epoch": 0.9782289904117315, "grad_norm": 2.821490730111447, "learning_rate": 2.4609409004074934e-09, "loss": 0.3994, "step": 2168 }, { "epoch": 0.9786802030456853, "grad_norm": 3.2813541645652347, "learning_rate": 2.3595097358525275e-09, "loss": 0.5495, "step": 2169 }, { "epoch": 0.979131415679639, "grad_norm": 3.1682355394847694, "learning_rate": 2.26021063857218e-09, "loss": 0.4805, "step": 2170 }, { "epoch": 0.9795826283135928, "grad_norm": 3.940594922294758, "learning_rate": 2.1630438207795864e-09, "loss": 0.5344, "step": 2171 }, { "epoch": 0.9800338409475465, "grad_norm": 2.8957814567912292, "learning_rate": 2.068009490130862e-09, "loss": 0.4688, "step": 2172 }, { "epoch": 0.9804850535815003, "grad_norm": 3.2705425141515643, "learning_rate": 1.9751078497248776e-09, "loss": 0.5038, "step": 2173 }, { "epoch": 0.980936266215454, "grad_norm": 3.2831548812730134, "learning_rate": 1.884339098102483e-09, "loss": 0.4829, "step": 2174 }, { "epoch": 0.9813874788494078, "grad_norm": 3.0060524899921393, "learning_rate": 1.79570342924662e-09, "loss": 0.5734, "step": 2175 }, { "epoch": 0.9818386914833616, "grad_norm": 3.2596135761529332, "learning_rate": 1.709201032581431e-09, "loss": 0.4523, "step": 2176 }, { "epoch": 0.9822899041173153, "grad_norm": 2.987843850321189, "learning_rate": 1.6248320929719283e-09, "loss": 0.5607, "step": 2177 }, { "epoch": 0.9827411167512691, "grad_norm": 2.9922698251969893, "learning_rate": 1.5425967907239933e-09, "loss": 0.5095, "step": 2178 }, { "epoch": 0.9831923293852228, "grad_norm": 3.1200632797078933, "learning_rate": 1.4624953015832663e-09, "loss": 0.5743, "step": 2179 }, { "epoch": 0.9836435420191766, "grad_norm": 3.0528829193084914, "learning_rate": 1.3845277967355905e-09, "loss": 0.4337, "step": 2180 }, { "epoch": 0.9840947546531302, "grad_norm": 2.9733715654870614, "learning_rate": 1.3086944428060132e-09, "loss": 0.3556, "step": 2181 }, { "epoch": 0.984545967287084, "grad_norm": 2.9735610218703026, "learning_rate": 1.234995401858785e-09, "loss": 0.5374, "step": 2182 }, { "epoch": 0.9849971799210377, "grad_norm": 3.3255247596982183, "learning_rate": 1.1634308313966944e-09, "loss": 0.5705, "step": 2183 }, { "epoch": 0.9854483925549915, "grad_norm": 3.379807226305083, "learning_rate": 1.0940008843612903e-09, "loss": 0.6434, "step": 2184 }, { "epoch": 0.9858996051889453, "grad_norm": 3.2662985750115894, "learning_rate": 1.026705709131992e-09, "loss": 0.5889, "step": 2185 }, { "epoch": 0.986350817822899, "grad_norm": 3.4188254818839927, "learning_rate": 9.61545449525758e-10, "loss": 0.4827, "step": 2186 }, { "epoch": 0.9868020304568528, "grad_norm": 3.3483494345972713, "learning_rate": 8.985202447974183e-10, "loss": 0.5504, "step": 2187 }, { "epoch": 0.9872532430908065, "grad_norm": 3.2171372630578676, "learning_rate": 8.376302296387861e-10, "loss": 0.5418, "step": 2188 }, { "epoch": 0.9877044557247603, "grad_norm": 3.6688593817328794, "learning_rate": 7.788755341783249e-10, "loss": 0.5123, "step": 2189 }, { "epoch": 0.988155668358714, "grad_norm": 3.6499281007897153, "learning_rate": 7.222562839813706e-10, "loss": 0.6186, "step": 2190 }, { "epoch": 0.9886068809926678, "grad_norm": 3.39857073628445, "learning_rate": 6.677726000494655e-10, "loss": 0.5808, "step": 2191 }, { "epoch": 0.9890580936266216, "grad_norm": 3.5449673321180204, "learning_rate": 6.154245988202466e-10, "loss": 0.7051, "step": 2192 }, { "epoch": 0.9895093062605753, "grad_norm": 3.248475022142295, "learning_rate": 5.652123921672247e-10, "loss": 0.5403, "step": 2193 }, { "epoch": 0.9899605188945291, "grad_norm": 3.3216368867145736, "learning_rate": 5.171360873991171e-10, "loss": 0.5873, "step": 2194 }, { "epoch": 0.9904117315284828, "grad_norm": 3.0891117032715267, "learning_rate": 4.711957872606254e-10, "loss": 0.5611, "step": 2195 }, { "epoch": 0.9908629441624366, "grad_norm": 3.340990779373879, "learning_rate": 4.273915899309921e-10, "loss": 0.5511, "step": 2196 }, { "epoch": 0.9913141567963903, "grad_norm": 3.062648006172101, "learning_rate": 3.857235890245558e-10, "loss": 0.4507, "step": 2197 }, { "epoch": 0.9917653694303441, "grad_norm": 3.168752716935158, "learning_rate": 3.461918735905289e-10, "loss": 0.5381, "step": 2198 }, { "epoch": 0.9922165820642977, "grad_norm": 3.215884817371043, "learning_rate": 3.0879652811255376e-10, "loss": 0.5669, "step": 2199 }, { "epoch": 0.9926677946982515, "grad_norm": 3.3933100109929524, "learning_rate": 2.735376325084804e-10, "loss": 0.5129, "step": 2200 }, { "epoch": 0.9931190073322053, "grad_norm": 3.2902465372899896, "learning_rate": 2.404152621305888e-10, "loss": 0.5563, "step": 2201 }, { "epoch": 0.993570219966159, "grad_norm": 3.2517872002551265, "learning_rate": 2.0942948776481173e-10, "loss": 0.4794, "step": 2202 }, { "epoch": 0.9940214326001128, "grad_norm": 3.0936052923048227, "learning_rate": 1.805803756314006e-10, "loss": 0.53, "step": 2203 }, { "epoch": 0.9944726452340665, "grad_norm": 3.3848557451197743, "learning_rate": 1.5386798738381557e-10, "loss": 0.5361, "step": 2204 }, { "epoch": 0.9949238578680203, "grad_norm": 2.5261957634383094, "learning_rate": 1.292923801096135e-10, "loss": 0.3873, "step": 2205 }, { "epoch": 0.995375070501974, "grad_norm": 3.2421625947937587, "learning_rate": 1.0685360632933793e-10, "loss": 0.5038, "step": 2206 }, { "epoch": 0.9958262831359278, "grad_norm": 3.283331275472657, "learning_rate": 8.655171399718497e-11, "loss": 0.568, "step": 2207 }, { "epoch": 0.9962774957698816, "grad_norm": 2.9967877566234473, "learning_rate": 6.838674650067044e-11, "loss": 0.4354, "step": 2208 }, { "epoch": 0.9967287084038353, "grad_norm": 3.2686080869856142, "learning_rate": 5.235874266018569e-11, "loss": 0.4931, "step": 2209 }, { "epoch": 0.9971799210377891, "grad_norm": 2.926365790766811, "learning_rate": 3.846773672933068e-11, "loss": 0.6223, "step": 2210 }, { "epoch": 0.9976311336717428, "grad_norm": 3.0651076068440113, "learning_rate": 2.6713758394802943e-11, "loss": 0.6735, "step": 2211 }, { "epoch": 0.9980823463056966, "grad_norm": 3.317938299846236, "learning_rate": 1.709683277606455e-11, "loss": 0.5608, "step": 2212 }, { "epoch": 0.9985335589396503, "grad_norm": 3.170578430333791, "learning_rate": 9.616980425453113e-12, "loss": 0.569, "step": 2213 }, { "epoch": 0.9989847715736041, "grad_norm": 2.805405937442547, "learning_rate": 4.274217328514851e-12, "loss": 0.5771, "step": 2214 }, { "epoch": 0.9994359842075579, "grad_norm": 3.2152320900605185, "learning_rate": 1.068554903005392e-12, "loss": 0.4857, "step": 2215 }, { "epoch": 0.9998871968415116, "grad_norm": 3.2758141104619267, "learning_rate": 0.0, "loss": 0.479, "step": 2216 }, { "epoch": 0.9998871968415116, "step": 2216, "total_flos": 1474645333573632.0, "train_loss": 0.580713667661382, "train_runtime": 104901.7766, "train_samples_per_second": 1.352, "train_steps_per_second": 0.021 } ], "logging_steps": 1.0, "max_steps": 2216, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1474645333573632.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }