{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3219, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "cosine_similarity": 0.049056025317209324, "epoch": 0.0009319664492078285, "grad_norm": 5.174343686637319, "learning_rate": 6.211180124223603e-08, "loss": 4.6296, "reason_loss": 2.0136165618896484, "step": 1, "utility_loss": 2.615985870361328 }, { "cosine_similarity": 0.07429963516934664, "epoch": 0.001863932898415657, "grad_norm": 4.902334847387688, "learning_rate": 1.2422360248447206e-07, "loss": 4.1935, "reason_loss": 2.020270347595215, "step": 2, "utility_loss": 2.173187017440796 }, { "cosine_similarity": 0.05482567636364661, "epoch": 0.0027958993476234857, "grad_norm": 5.607812624545479, "learning_rate": 1.863354037267081e-07, "loss": 4.7484, "reason_loss": 1.9590719938278198, "step": 3, "utility_loss": 2.789313793182373 }, { "cosine_similarity": 0.09834119831586129, "epoch": 0.003727865796831314, "grad_norm": 6.42290247393403, "learning_rate": 2.484472049689441e-07, "loss": 5.0664, "reason_loss": 1.8798270225524902, "step": 4, "utility_loss": 3.1865382194519043 }, { "cosine_similarity": 0.05367924307483001, "epoch": 0.004659832246039142, "grad_norm": 6.033854251448991, "learning_rate": 3.1055900621118013e-07, "loss": 4.9615, "reason_loss": 1.951655387878418, "step": 5, "utility_loss": 3.0098140239715576 }, { "cosine_similarity": 0.0685233085963026, "epoch": 0.005591798695246971, "grad_norm": 5.532309384305241, "learning_rate": 3.726708074534162e-07, "loss": 4.9487, "reason_loss": 1.9239387512207031, "step": 6, "utility_loss": 3.0247278213500977 }, { "cosine_similarity": 0.10936769350061215, "epoch": 0.0065237651444548, "grad_norm": 6.694297276041314, "learning_rate": 4.347826086956522e-07, "loss": 5.1475, "reason_loss": 1.9293410778045654, "step": 7, "utility_loss": 3.2181124687194824 }, { "cosine_similarity": 0.1141063065283557, "epoch": 0.007455731593662628, "grad_norm": 6.449837492767292, "learning_rate": 4.968944099378882e-07, "loss": 5.0134, "reason_loss": 1.9363446235656738, "step": 8, "utility_loss": 3.077056884765625 }, { "cosine_similarity": 0.0774364801811145, "epoch": 0.008387698042870456, "grad_norm": 6.06517450201447, "learning_rate": 5.590062111801243e-07, "loss": 4.9884, "reason_loss": 1.9428915977478027, "step": 9, "utility_loss": 3.0454821586608887 }, { "cosine_similarity": 0.029033091265860483, "epoch": 0.009319664492078284, "grad_norm": 5.677600198767976, "learning_rate": 6.211180124223603e-07, "loss": 4.6423, "reason_loss": 1.9249502420425415, "step": 10, "utility_loss": 2.7173752784729004 }, { "cosine_similarity": 0.05353856820226346, "epoch": 0.010251630941286114, "grad_norm": 6.487853650757208, "learning_rate": 6.832298136645964e-07, "loss": 4.8956, "reason_loss": 1.855250597000122, "step": 11, "utility_loss": 3.0403716564178467 }, { "cosine_similarity": 0.05450282365651352, "epoch": 0.011183597390493943, "grad_norm": 5.642740362966532, "learning_rate": 7.453416149068324e-07, "loss": 4.9876, "reason_loss": 1.94852614402771, "step": 12, "utility_loss": 3.0391011238098145 }, { "cosine_similarity": 0.10674647039782441, "epoch": 0.012115563839701771, "grad_norm": 7.3556591959392765, "learning_rate": 8.074534161490684e-07, "loss": 5.0345, "reason_loss": 1.8989834785461426, "step": 13, "utility_loss": 3.1355504989624023 }, { "cosine_similarity": 0.04973060499473793, "epoch": 0.0130475302889096, "grad_norm": 5.954131565288857, "learning_rate": 8.695652173913044e-07, "loss": 4.6172, "reason_loss": 1.90389084815979, "step": 14, "utility_loss": 2.7133564949035645 }, { "cosine_similarity": 0.07184452562688735, "epoch": 0.013979496738117428, "grad_norm": 5.207757970211502, "learning_rate": 9.316770186335404e-07, "loss": 4.1442, "reason_loss": 1.8417866230010986, "step": 15, "utility_loss": 2.302407741546631 }, { "cosine_similarity": 0.06680550778886589, "epoch": 0.014911463187325256, "grad_norm": 6.617625902131517, "learning_rate": 9.937888198757765e-07, "loss": 4.9838, "reason_loss": 1.8954801559448242, "step": 16, "utility_loss": 3.088344097137451 }, { "cosine_similarity": 0.06964114778000721, "epoch": 0.015843429636533086, "grad_norm": 5.533353400317149, "learning_rate": 1.0559006211180126e-06, "loss": 4.4981, "reason_loss": 1.8258848190307617, "step": 17, "utility_loss": 2.6722278594970703 }, { "cosine_similarity": 0.06506933710499058, "epoch": 0.016775396085740912, "grad_norm": 5.410365900332844, "learning_rate": 1.1180124223602485e-06, "loss": 4.3299, "reason_loss": 1.820643424987793, "step": 18, "utility_loss": 2.5092315673828125 }, { "cosine_similarity": 0.09736273356609727, "epoch": 0.017707362534948742, "grad_norm": 5.643212420857473, "learning_rate": 1.1801242236024846e-06, "loss": 4.9796, "reason_loss": 1.7692761421203613, "step": 19, "utility_loss": 3.2102856636047363 }, { "cosine_similarity": 0.06460945056392373, "epoch": 0.01863932898415657, "grad_norm": 5.191736383883793, "learning_rate": 1.2422360248447205e-06, "loss": 4.306, "reason_loss": 1.6591992378234863, "step": 20, "utility_loss": 2.64679217338562 }, { "cosine_similarity": 0.07006602424469806, "epoch": 0.0195712954333644, "grad_norm": 6.192145163094002, "learning_rate": 1.3043478260869566e-06, "loss": 4.7378, "reason_loss": 1.6416113376617432, "step": 21, "utility_loss": 3.096193790435791 }, { "cosine_similarity": 0.08044725049728431, "epoch": 0.02050326188257223, "grad_norm": 4.725937051982072, "learning_rate": 1.3664596273291927e-06, "loss": 4.1238, "reason_loss": 1.566626787185669, "step": 22, "utility_loss": 2.5571563243865967 }, { "cosine_similarity": 0.06725165726498548, "epoch": 0.021435228331780055, "grad_norm": 4.650889996940655, "learning_rate": 1.4285714285714286e-06, "loss": 4.0849, "reason_loss": 1.5605885982513428, "step": 23, "utility_loss": 2.524265766143799 }, { "cosine_similarity": 0.10494904410835182, "epoch": 0.022367194780987885, "grad_norm": 4.9494884410927895, "learning_rate": 1.4906832298136647e-06, "loss": 4.3505, "reason_loss": 1.6158297061920166, "step": 24, "utility_loss": 2.734640598297119 }, { "cosine_similarity": 0.07757362951843072, "epoch": 0.023299161230195712, "grad_norm": 5.140189689097866, "learning_rate": 1.5527950310559006e-06, "loss": 4.1859, "reason_loss": 1.47239089012146, "step": 25, "utility_loss": 2.7135424613952637 }, { "cosine_similarity": 0.04463444181106974, "epoch": 0.024231127679403542, "grad_norm": 5.510551997178625, "learning_rate": 1.6149068322981367e-06, "loss": 3.6511, "reason_loss": 1.417100191116333, "step": 26, "utility_loss": 2.234046459197998 }, { "cosine_similarity": 0.10592171288315215, "epoch": 0.02516309412861137, "grad_norm": 4.903039117493704, "learning_rate": 1.6770186335403729e-06, "loss": 4.33, "reason_loss": 1.3346327543258667, "step": 27, "utility_loss": 2.9953997135162354 }, { "cosine_similarity": 0.08783236998984452, "epoch": 0.0260950605778192, "grad_norm": 4.82258572740209, "learning_rate": 1.7391304347826088e-06, "loss": 3.8481, "reason_loss": 1.2780516147613525, "step": 28, "utility_loss": 2.570054531097412 }, { "cosine_similarity": 0.1388599697152716, "epoch": 0.02702702702702703, "grad_norm": 4.223837079195455, "learning_rate": 1.8012422360248449e-06, "loss": 3.9624, "reason_loss": 1.2309091091156006, "step": 29, "utility_loss": 2.7315025329589844 }, { "cosine_similarity": 0.14003315737572541, "epoch": 0.027958993476234855, "grad_norm": 3.367305323878972, "learning_rate": 1.8633540372670808e-06, "loss": 3.5769, "reason_loss": 1.2085893154144287, "step": 30, "utility_loss": 2.3683395385742188 }, { "cosine_similarity": 0.14389709633009312, "epoch": 0.028890959925442685, "grad_norm": 4.236342184821913, "learning_rate": 1.925465838509317e-06, "loss": 3.8751, "reason_loss": 1.2003422975540161, "step": 31, "utility_loss": 2.6747360229492188 }, { "cosine_similarity": 0.1304645213254926, "epoch": 0.02982292637465051, "grad_norm": 4.474489653734865, "learning_rate": 1.987577639751553e-06, "loss": 3.7521, "reason_loss": 1.1791752576828003, "step": 32, "utility_loss": 2.572936773300171 }, { "cosine_similarity": 0.1484346391583281, "epoch": 0.03075489282385834, "grad_norm": 4.619551561052741, "learning_rate": 2.049689440993789e-06, "loss": 3.9299, "reason_loss": 1.1354162693023682, "step": 33, "utility_loss": 2.7944912910461426 }, { "cosine_similarity": 0.19531446513326656, "epoch": 0.03168685927306617, "grad_norm": 4.195470741058847, "learning_rate": 2.111801242236025e-06, "loss": 3.6061, "reason_loss": 1.119168758392334, "step": 34, "utility_loss": 2.4869070053100586 }, { "cosine_similarity": 0.2474230617262728, "epoch": 0.032618825722273995, "grad_norm": 4.258744120195248, "learning_rate": 2.173913043478261e-06, "loss": 3.9603, "reason_loss": 1.140371561050415, "step": 35, "utility_loss": 2.819902181625366 }, { "cosine_similarity": 0.26610669071118537, "epoch": 0.033550792171481825, "grad_norm": 4.023453094061879, "learning_rate": 2.236024844720497e-06, "loss": 3.7028, "reason_loss": 1.064042091369629, "step": 36, "utility_loss": 2.638801097869873 }, { "cosine_similarity": 0.23974244321747265, "epoch": 0.034482758620689655, "grad_norm": 3.973805605383643, "learning_rate": 2.298136645962733e-06, "loss": 3.804, "reason_loss": 1.0596609115600586, "step": 37, "utility_loss": 2.7443809509277344 }, { "cosine_similarity": 0.16564017102564194, "epoch": 0.035414725069897485, "grad_norm": 4.394669114418239, "learning_rate": 2.3602484472049692e-06, "loss": 3.7713, "reason_loss": 1.0557624101638794, "step": 38, "utility_loss": 2.715508460998535 }, { "cosine_similarity": 0.2381714482984004, "epoch": 0.036346691519105315, "grad_norm": 3.8536451860895466, "learning_rate": 2.422360248447205e-06, "loss": 3.8127, "reason_loss": 1.0105211734771729, "step": 39, "utility_loss": 2.802224636077881 }, { "cosine_similarity": 0.24807901456951315, "epoch": 0.03727865796831314, "grad_norm": 3.8425189353790725, "learning_rate": 2.484472049689441e-06, "loss": 3.7409, "reason_loss": 1.0404850244522095, "step": 40, "utility_loss": 2.700380802154541 }, { "cosine_similarity": 0.17429865218438076, "epoch": 0.03821062441752097, "grad_norm": 3.26848289244622, "learning_rate": 2.546583850931677e-06, "loss": 3.3723, "reason_loss": 0.9485936164855957, "step": 41, "utility_loss": 2.4237558841705322 }, { "cosine_similarity": 0.21889843334729941, "epoch": 0.0391425908667288, "grad_norm": 4.601247014893779, "learning_rate": 2.6086956521739132e-06, "loss": 3.6151, "reason_loss": 1.0204225778579712, "step": 42, "utility_loss": 2.594630718231201 }, { "cosine_similarity": 0.24333432663526972, "epoch": 0.04007455731593663, "grad_norm": 3.467973333629835, "learning_rate": 2.670807453416149e-06, "loss": 3.554, "reason_loss": 0.9685724973678589, "step": 43, "utility_loss": 2.5854578018188477 }, { "cosine_similarity": 0.24640252446365685, "epoch": 0.04100652376514446, "grad_norm": 3.4922582832836517, "learning_rate": 2.7329192546583855e-06, "loss": 3.5559, "reason_loss": 0.9138481616973877, "step": 44, "utility_loss": 2.642045021057129 }, { "cosine_similarity": 0.24508733809656805, "epoch": 0.04193849021435228, "grad_norm": 3.4034582364616783, "learning_rate": 2.795031055900621e-06, "loss": 3.6471, "reason_loss": 0.9794975519180298, "step": 45, "utility_loss": 2.667604923248291 }, { "cosine_similarity": 0.20437721377720572, "epoch": 0.04287045666356011, "grad_norm": 3.45133713187676, "learning_rate": 2.8571428571428573e-06, "loss": 2.9428, "reason_loss": 0.9233300685882568, "step": 46, "utility_loss": 2.019505739212036 }, { "cosine_similarity": 0.12352954134744215, "epoch": 0.04380242311276794, "grad_norm": 3.6026236989151235, "learning_rate": 2.919254658385093e-06, "loss": 3.3391, "reason_loss": 0.968403697013855, "step": 47, "utility_loss": 2.3707244396209717 }, { "cosine_similarity": 0.1872459397947163, "epoch": 0.04473438956197577, "grad_norm": 3.2457825619538085, "learning_rate": 2.9813664596273295e-06, "loss": 3.7477, "reason_loss": 0.9284577965736389, "step": 48, "utility_loss": 2.8192238807678223 }, { "cosine_similarity": 0.15048913755602847, "epoch": 0.045666356011183594, "grad_norm": 3.825670246422583, "learning_rate": 3.043478260869566e-06, "loss": 3.4745, "reason_loss": 0.9025908708572388, "step": 49, "utility_loss": 2.5719375610351562 }, { "cosine_similarity": 0.185798037091775, "epoch": 0.046598322460391424, "grad_norm": 3.78036918185859, "learning_rate": 3.1055900621118013e-06, "loss": 3.3242, "reason_loss": 0.8346738219261169, "step": 50, "utility_loss": 2.489501476287842 }, { "cosine_similarity": 0.3224885359438948, "epoch": 0.047530288909599254, "grad_norm": 5.499641467404342, "learning_rate": 3.1677018633540376e-06, "loss": 3.3223, "reason_loss": 0.8404145240783691, "step": 51, "utility_loss": 2.4818789958953857 }, { "cosine_similarity": 0.14780404411565612, "epoch": 0.048462255358807084, "grad_norm": 4.732509842732253, "learning_rate": 3.2298136645962735e-06, "loss": 3.2311, "reason_loss": 0.855589747428894, "step": 52, "utility_loss": 2.375507354736328 }, { "cosine_similarity": 0.0044226536110686925, "epoch": 0.049394221808014914, "grad_norm": 3.2314953562977187, "learning_rate": 3.29192546583851e-06, "loss": 3.57, "reason_loss": 0.8681210279464722, "step": 53, "utility_loss": 2.7018754482269287 }, { "cosine_similarity": 0.2529356597445025, "epoch": 0.05032618825722274, "grad_norm": 4.461625736033676, "learning_rate": 3.3540372670807457e-06, "loss": 3.4875, "reason_loss": 0.832642674446106, "step": 54, "utility_loss": 2.6548681259155273 }, { "cosine_similarity": 0.21885243695805434, "epoch": 0.05125815470643057, "grad_norm": 3.7656900654823056, "learning_rate": 3.4161490683229816e-06, "loss": 3.602, "reason_loss": 0.9071377515792847, "step": 55, "utility_loss": 2.69490385055542 }, { "cosine_similarity": 0.1194550715869148, "epoch": 0.0521901211556384, "grad_norm": 2.9786838465254, "learning_rate": 3.4782608695652175e-06, "loss": 3.1666, "reason_loss": 0.8078814744949341, "step": 56, "utility_loss": 2.3587253093719482 }, { "cosine_similarity": 0.10006853303973975, "epoch": 0.05312208760484623, "grad_norm": 7.275635047651692, "learning_rate": 3.540372670807454e-06, "loss": 3.6159, "reason_loss": 0.8140820264816284, "step": 57, "utility_loss": 2.801833152770996 }, { "cosine_similarity": 0.041214938889486886, "epoch": 0.05405405405405406, "grad_norm": 3.061163520861295, "learning_rate": 3.6024844720496897e-06, "loss": 3.5312, "reason_loss": 0.8305523991584778, "step": 58, "utility_loss": 2.70064377784729 }, { "cosine_similarity": 0.02712116529230839, "epoch": 0.05498602050326188, "grad_norm": 3.2668765571382563, "learning_rate": 3.664596273291926e-06, "loss": 3.6319, "reason_loss": 0.7755326628684998, "step": 59, "utility_loss": 2.8564107418060303 }, { "cosine_similarity": 0.0562514463746731, "epoch": 0.05591798695246971, "grad_norm": 3.2887121425320616, "learning_rate": 3.7267080745341615e-06, "loss": 3.5556, "reason_loss": 0.7950482368469238, "step": 60, "utility_loss": 2.76057767868042 }, { "cosine_similarity": -0.11245413132274609, "epoch": 0.05684995340167754, "grad_norm": 3.688304088030226, "learning_rate": 3.788819875776398e-06, "loss": 3.0603, "reason_loss": 0.8178303241729736, "step": 61, "utility_loss": 2.2424447536468506 }, { "cosine_similarity": 0.16939693061872133, "epoch": 0.05778191985088537, "grad_norm": 3.700990324222764, "learning_rate": 3.850931677018634e-06, "loss": 3.3144, "reason_loss": 0.8036269545555115, "step": 62, "utility_loss": 2.510773181915283 }, { "cosine_similarity": 0.017581896845718873, "epoch": 0.05871388630009319, "grad_norm": 2.762275247813995, "learning_rate": 3.91304347826087e-06, "loss": 2.7966, "reason_loss": 0.7610481381416321, "step": 63, "utility_loss": 2.0355191230773926 }, { "cosine_similarity": 0.030699922904587784, "epoch": 0.05964585274930102, "grad_norm": 3.2860760231328237, "learning_rate": 3.975155279503106e-06, "loss": 3.2634, "reason_loss": 0.7985856533050537, "step": 64, "utility_loss": 2.4648101329803467 }, { "cosine_similarity": 0.14356443285474182, "epoch": 0.06057781919850885, "grad_norm": 2.8508454578557214, "learning_rate": 4.037267080745342e-06, "loss": 3.403, "reason_loss": 0.7266683578491211, "step": 65, "utility_loss": 2.676318645477295 }, { "cosine_similarity": 0.16758724819683005, "epoch": 0.06150978564771668, "grad_norm": 2.5071386753044806, "learning_rate": 4.099378881987578e-06, "loss": 3.0922, "reason_loss": 0.7905038595199585, "step": 66, "utility_loss": 2.3016695976257324 }, { "cosine_similarity": 0.09543026612081214, "epoch": 0.06244175209692451, "grad_norm": 3.0643974996214944, "learning_rate": 4.1614906832298145e-06, "loss": 3.117, "reason_loss": 0.7904856204986572, "step": 67, "utility_loss": 2.326504945755005 }, { "cosine_similarity": 0.07877493861071704, "epoch": 0.06337371854613234, "grad_norm": 3.1682389318824677, "learning_rate": 4.22360248447205e-06, "loss": 3.0174, "reason_loss": 0.7557171583175659, "step": 68, "utility_loss": 2.261709690093994 }, { "cosine_similarity": 0.18205926042306633, "epoch": 0.06430568499534017, "grad_norm": 3.356529814090341, "learning_rate": 4.2857142857142855e-06, "loss": 3.5199, "reason_loss": 0.754740834236145, "step": 69, "utility_loss": 2.765148639678955 }, { "cosine_similarity": 0.1205513536834549, "epoch": 0.06523765144454799, "grad_norm": 2.8947352919103158, "learning_rate": 4.347826086956522e-06, "loss": 3.3626, "reason_loss": 0.7604979276657104, "step": 70, "utility_loss": 2.6020922660827637 }, { "cosine_similarity": 0.07075034459361558, "epoch": 0.06616961789375582, "grad_norm": 2.819864881566127, "learning_rate": 4.409937888198758e-06, "loss": 3.405, "reason_loss": 0.7593247890472412, "step": 71, "utility_loss": 2.645714282989502 }, { "cosine_similarity": -0.006548006969427792, "epoch": 0.06710158434296365, "grad_norm": 2.829974113610305, "learning_rate": 4.472049689440994e-06, "loss": 3.2777, "reason_loss": 0.768007755279541, "step": 72, "utility_loss": 2.509664297103882 }, { "cosine_similarity": -0.028956474686406705, "epoch": 0.06803355079217148, "grad_norm": 3.512253076310519, "learning_rate": 4.534161490683231e-06, "loss": 3.4262, "reason_loss": 0.7438129186630249, "step": 73, "utility_loss": 2.6823530197143555 }, { "cosine_similarity": 0.0963098172081488, "epoch": 0.06896551724137931, "grad_norm": 2.656242741170821, "learning_rate": 4.596273291925466e-06, "loss": 3.0311, "reason_loss": 0.7310754060745239, "step": 74, "utility_loss": 2.299987554550171 }, { "cosine_similarity": 0.013194941455931832, "epoch": 0.06989748369058714, "grad_norm": 4.226949965329749, "learning_rate": 4.6583850931677025e-06, "loss": 3.2186, "reason_loss": 0.7448288798332214, "step": 75, "utility_loss": 2.4737656116485596 }, { "cosine_similarity": -0.05405329455683192, "epoch": 0.07082945013979497, "grad_norm": 2.846939699599349, "learning_rate": 4.7204968944099384e-06, "loss": 3.083, "reason_loss": 0.7614796161651611, "step": 76, "utility_loss": 2.321510076522827 }, { "cosine_similarity": -0.07443595053188193, "epoch": 0.0717614165890028, "grad_norm": 2.6938975739772957, "learning_rate": 4.782608695652174e-06, "loss": 2.6426, "reason_loss": 0.7614542245864868, "step": 77, "utility_loss": 1.8811142444610596 }, { "cosine_similarity": 0.043384121832338085, "epoch": 0.07269338303821063, "grad_norm": 3.483589702216811, "learning_rate": 4.84472049689441e-06, "loss": 3.1369, "reason_loss": 0.7134395241737366, "step": 78, "utility_loss": 2.4234814643859863 }, { "cosine_similarity": 0.06842553244453246, "epoch": 0.07362534948741846, "grad_norm": 2.9599728689681557, "learning_rate": 4.906832298136646e-06, "loss": 2.9852, "reason_loss": 0.7026999592781067, "step": 79, "utility_loss": 2.282458543777466 }, { "cosine_similarity": 0.019303188842541154, "epoch": 0.07455731593662628, "grad_norm": 2.564412414328797, "learning_rate": 4.968944099378882e-06, "loss": 2.9422, "reason_loss": 0.75994873046875, "step": 80, "utility_loss": 2.1823012828826904 }, { "cosine_similarity": -0.05215372107963201, "epoch": 0.0754892823858341, "grad_norm": 3.3486049052271523, "learning_rate": 5.031055900621118e-06, "loss": 3.1527, "reason_loss": 0.7401418089866638, "step": 81, "utility_loss": 2.41259765625 }, { "cosine_similarity": -0.10150467395589263, "epoch": 0.07642124883504194, "grad_norm": 2.9468157820336685, "learning_rate": 5.093167701863354e-06, "loss": 3.226, "reason_loss": 0.7135269641876221, "step": 82, "utility_loss": 2.5124478340148926 }, { "cosine_similarity": 0.047799062243440535, "epoch": 0.07735321528424977, "grad_norm": 2.9898029678560007, "learning_rate": 5.155279503105591e-06, "loss": 2.8395, "reason_loss": 0.7159587740898132, "step": 83, "utility_loss": 2.1235368251800537 }, { "cosine_similarity": -0.03305201922700177, "epoch": 0.0782851817334576, "grad_norm": 1.9395692057307954, "learning_rate": 5.2173913043478265e-06, "loss": 2.8218, "reason_loss": 0.7270340919494629, "step": 84, "utility_loss": 2.094757080078125 }, { "cosine_similarity": -0.0030684384044343898, "epoch": 0.07921714818266543, "grad_norm": 3.5155894661982074, "learning_rate": 5.279503105590062e-06, "loss": 2.899, "reason_loss": 0.7196202278137207, "step": 85, "utility_loss": 2.1793909072875977 }, { "cosine_similarity": 0.04921451689088847, "epoch": 0.08014911463187326, "grad_norm": 2.3312450508293248, "learning_rate": 5.341614906832298e-06, "loss": 3.3917, "reason_loss": 0.7399660348892212, "step": 86, "utility_loss": 2.651764392852783 }, { "cosine_similarity": 0.16552881922116724, "epoch": 0.08108108108108109, "grad_norm": 2.6517764526702012, "learning_rate": 5.403726708074535e-06, "loss": 3.6956, "reason_loss": 0.7384814023971558, "step": 87, "utility_loss": 2.957136869430542 }, { "cosine_similarity": 0.12732885374599653, "epoch": 0.08201304753028892, "grad_norm": 2.9846938115338264, "learning_rate": 5.465838509316771e-06, "loss": 3.2694, "reason_loss": 0.7426446676254272, "step": 88, "utility_loss": 2.5267229080200195 }, { "cosine_similarity": -0.04179481969922582, "epoch": 0.08294501397949673, "grad_norm": 2.471815340752135, "learning_rate": 5.527950310559007e-06, "loss": 3.1507, "reason_loss": 0.7642133235931396, "step": 89, "utility_loss": 2.386517286300659 }, { "cosine_similarity": 0.08526989294850554, "epoch": 0.08387698042870456, "grad_norm": 2.5193164455798103, "learning_rate": 5.590062111801242e-06, "loss": 2.728, "reason_loss": 0.6697672605514526, "step": 90, "utility_loss": 2.0582261085510254 }, { "cosine_similarity": 0.002013850122778158, "epoch": 0.08480894687791239, "grad_norm": 2.2397136906708885, "learning_rate": 5.652173913043479e-06, "loss": 2.8304, "reason_loss": 0.7239522337913513, "step": 91, "utility_loss": 2.106428384780884 }, { "cosine_similarity": 0.10519723359853063, "epoch": 0.08574091332712022, "grad_norm": 2.3790344685553015, "learning_rate": 5.7142857142857145e-06, "loss": 2.7538, "reason_loss": 0.7070230841636658, "step": 92, "utility_loss": 2.0468101501464844 }, { "cosine_similarity": 0.0562885317077605, "epoch": 0.08667287977632805, "grad_norm": 3.2695157382056608, "learning_rate": 5.77639751552795e-06, "loss": 3.0324, "reason_loss": 0.7103948593139648, "step": 93, "utility_loss": 2.322000503540039 }, { "cosine_similarity": -0.007217656732115926, "epoch": 0.08760484622553588, "grad_norm": 2.6123012836424113, "learning_rate": 5.838509316770186e-06, "loss": 2.9534, "reason_loss": 0.7068930864334106, "step": 94, "utility_loss": 2.246479034423828 }, { "cosine_similarity": 0.16300146415554492, "epoch": 0.08853681267474371, "grad_norm": 2.7096736862991464, "learning_rate": 5.900621118012423e-06, "loss": 3.0484, "reason_loss": 0.6902628540992737, "step": 95, "utility_loss": 2.3581576347351074 }, { "cosine_similarity": 0.0540291379404309, "epoch": 0.08946877912395154, "grad_norm": 2.854324602049061, "learning_rate": 5.962732919254659e-06, "loss": 3.1751, "reason_loss": 0.7267820239067078, "step": 96, "utility_loss": 2.448303699493408 }, { "cosine_similarity": 0.2163273904390716, "epoch": 0.09040074557315937, "grad_norm": 2.3274790505077694, "learning_rate": 6.024844720496895e-06, "loss": 2.7986, "reason_loss": 0.6456356048583984, "step": 97, "utility_loss": 2.1529154777526855 }, { "cosine_similarity": -0.0006596083235065174, "epoch": 0.09133271202236719, "grad_norm": 2.6293862265422154, "learning_rate": 6.086956521739132e-06, "loss": 2.5237, "reason_loss": 0.6657384634017944, "step": 98, "utility_loss": 1.8579347133636475 }, { "cosine_similarity": -0.04426557275990573, "epoch": 0.09226467847157502, "grad_norm": 2.931048539517576, "learning_rate": 6.1490683229813675e-06, "loss": 3.0961, "reason_loss": 0.7101889848709106, "step": 99, "utility_loss": 2.3859152793884277 }, { "cosine_similarity": 0.09419948389067001, "epoch": 0.09319664492078285, "grad_norm": 3.0554367371422915, "learning_rate": 6.2111801242236025e-06, "loss": 2.8285, "reason_loss": 0.7033330798149109, "step": 100, "utility_loss": 2.125210762023926 }, { "cosine_similarity": 0.07095481783942281, "epoch": 0.09412861136999068, "grad_norm": 2.5620380365048003, "learning_rate": 6.2732919254658384e-06, "loss": 2.8044, "reason_loss": 0.6892639398574829, "step": 101, "utility_loss": 2.1150920391082764 }, { "cosine_similarity": -0.1618652058722441, "epoch": 0.09506057781919851, "grad_norm": 2.7732441802973096, "learning_rate": 6.335403726708075e-06, "loss": 2.8884, "reason_loss": 0.6715938448905945, "step": 102, "utility_loss": 2.2167606353759766 }, { "cosine_similarity": 0.12139959550377882, "epoch": 0.09599254426840634, "grad_norm": 2.9508120854946354, "learning_rate": 6.397515527950311e-06, "loss": 2.933, "reason_loss": 0.6963173151016235, "step": 103, "utility_loss": 2.2366366386413574 }, { "cosine_similarity": 0.06180402425968609, "epoch": 0.09692451071761417, "grad_norm": 2.2096121501115022, "learning_rate": 6.459627329192547e-06, "loss": 2.7325, "reason_loss": 0.6886464953422546, "step": 104, "utility_loss": 2.0438437461853027 }, { "cosine_similarity": 0.07260004317396444, "epoch": 0.097856477166822, "grad_norm": 3.071493659009383, "learning_rate": 6.521739130434783e-06, "loss": 3.0998, "reason_loss": 0.6556248068809509, "step": 105, "utility_loss": 2.4441847801208496 }, { "cosine_similarity": 0.332035224461013, "epoch": 0.09878844361602983, "grad_norm": 2.5344751756777564, "learning_rate": 6.58385093167702e-06, "loss": 3.4763, "reason_loss": 0.6812257766723633, "step": 106, "utility_loss": 2.795081615447998 }, { "cosine_similarity": 0.27147019245811616, "epoch": 0.09972041006523766, "grad_norm": 2.60685103159345, "learning_rate": 6.6459627329192555e-06, "loss": 2.7227, "reason_loss": 0.6622864007949829, "step": 107, "utility_loss": 2.06044864654541 }, { "cosine_similarity": 0.016911357844245415, "epoch": 0.10065237651444547, "grad_norm": 3.442244240643887, "learning_rate": 6.7080745341614914e-06, "loss": 2.9947, "reason_loss": 0.7183108329772949, "step": 108, "utility_loss": 2.276437759399414 }, { "cosine_similarity": -0.06687936795857106, "epoch": 0.1015843429636533, "grad_norm": 2.097999316588357, "learning_rate": 6.7701863354037265e-06, "loss": 2.5288, "reason_loss": 0.6564284563064575, "step": 109, "utility_loss": 1.8723464012145996 }, { "cosine_similarity": 0.011618797028355794, "epoch": 0.10251630941286113, "grad_norm": 2.58058914579159, "learning_rate": 6.832298136645963e-06, "loss": 2.6883, "reason_loss": 0.6889911890029907, "step": 110, "utility_loss": 1.9993551969528198 }, { "cosine_similarity": -0.2805104525744817, "epoch": 0.10344827586206896, "grad_norm": 2.6551040667026418, "learning_rate": 6.894409937888199e-06, "loss": 2.9116, "reason_loss": 0.6605509519577026, "step": 111, "utility_loss": 2.251047134399414 }, { "cosine_similarity": 0.007583784520316713, "epoch": 0.1043802423112768, "grad_norm": 2.6780122226426895, "learning_rate": 6.956521739130435e-06, "loss": 2.8957, "reason_loss": 0.7209734916687012, "step": 112, "utility_loss": 2.1747312545776367 }, { "cosine_similarity": 0.04043673561041118, "epoch": 0.10531220876048462, "grad_norm": 3.3403102397285958, "learning_rate": 7.018633540372671e-06, "loss": 3.3669, "reason_loss": 0.6931015253067017, "step": 113, "utility_loss": 2.6737546920776367 }, { "cosine_similarity": 0.03068035677108777, "epoch": 0.10624417520969245, "grad_norm": 2.714331835203537, "learning_rate": 7.080745341614908e-06, "loss": 2.8032, "reason_loss": 0.6481518745422363, "step": 114, "utility_loss": 2.1550190448760986 }, { "cosine_similarity": -0.17043424264151497, "epoch": 0.10717614165890028, "grad_norm": 2.4930482372825025, "learning_rate": 7.1428571428571436e-06, "loss": 3.7675, "reason_loss": 0.6548296213150024, "step": 115, "utility_loss": 3.1126391887664795 }, { "cosine_similarity": -0.009815804415197213, "epoch": 0.10810810810810811, "grad_norm": 2.739023632902644, "learning_rate": 7.2049689440993795e-06, "loss": 2.7292, "reason_loss": 0.6553475260734558, "step": 116, "utility_loss": 2.073878288269043 }, { "cosine_similarity": -0.10509552882192223, "epoch": 0.10904007455731593, "grad_norm": 2.341697800804607, "learning_rate": 7.267080745341616e-06, "loss": 3.127, "reason_loss": 0.6613598465919495, "step": 117, "utility_loss": 2.465689182281494 }, { "cosine_similarity": 0.001598511569650602, "epoch": 0.10997204100652376, "grad_norm": 2.4113769027972496, "learning_rate": 7.329192546583852e-06, "loss": 2.9317, "reason_loss": 0.6725475192070007, "step": 118, "utility_loss": 2.259195327758789 }, { "cosine_similarity": 0.07740946050696305, "epoch": 0.11090400745573159, "grad_norm": 3.5594058110623954, "learning_rate": 7.391304347826087e-06, "loss": 3.0227, "reason_loss": 0.6612075567245483, "step": 119, "utility_loss": 2.361542224884033 }, { "cosine_similarity": 0.06686549088684986, "epoch": 0.11183597390493942, "grad_norm": 2.063196147080955, "learning_rate": 7.453416149068323e-06, "loss": 2.6942, "reason_loss": 0.6535689830780029, "step": 120, "utility_loss": 2.040656089782715 }, { "cosine_similarity": -0.11469600249819203, "epoch": 0.11276794035414725, "grad_norm": 2.368913459158062, "learning_rate": 7.515527950310559e-06, "loss": 3.0985, "reason_loss": 0.6825748682022095, "step": 121, "utility_loss": 2.4158945083618164 }, { "cosine_similarity": 0.10269154670837714, "epoch": 0.11369990680335508, "grad_norm": 4.3233421582695595, "learning_rate": 7.577639751552796e-06, "loss": 2.6215, "reason_loss": 0.6785076856613159, "step": 122, "utility_loss": 1.943007469177246 }, { "cosine_similarity": 0.13506991925824796, "epoch": 0.11463187325256291, "grad_norm": 2.3920551233388148, "learning_rate": 7.639751552795032e-06, "loss": 2.9895, "reason_loss": 0.6360074281692505, "step": 123, "utility_loss": 2.3534975051879883 }, { "cosine_similarity": -0.22732044405638185, "epoch": 0.11556383970177074, "grad_norm": 2.810843878216605, "learning_rate": 7.701863354037268e-06, "loss": 2.7278, "reason_loss": 0.649034321308136, "step": 124, "utility_loss": 2.0787291526794434 }, { "cosine_similarity": 0.17036579336979488, "epoch": 0.11649580615097857, "grad_norm": 2.9782580319166736, "learning_rate": 7.763975155279503e-06, "loss": 3.0401, "reason_loss": 0.6209731698036194, "step": 125, "utility_loss": 2.419099807739258 }, { "cosine_similarity": 0.10072099339590752, "epoch": 0.11742777260018639, "grad_norm": 3.1437566867616202, "learning_rate": 7.82608695652174e-06, "loss": 3.1906, "reason_loss": 0.6655007004737854, "step": 126, "utility_loss": 2.5250792503356934 }, { "cosine_similarity": 0.014534032838714087, "epoch": 0.11835973904939422, "grad_norm": 2.7395710492687546, "learning_rate": 7.888198757763977e-06, "loss": 2.9882, "reason_loss": 0.6694810390472412, "step": 127, "utility_loss": 2.31870436668396 }, { "cosine_similarity": -0.12981894299386804, "epoch": 0.11929170549860205, "grad_norm": 2.704149682753343, "learning_rate": 7.950310559006212e-06, "loss": 2.8771, "reason_loss": 0.6587416529655457, "step": 128, "utility_loss": 2.2183475494384766 }, { "cosine_similarity": -0.12035748976688074, "epoch": 0.12022367194780988, "grad_norm": 2.68434205421851, "learning_rate": 8.012422360248447e-06, "loss": 3.0889, "reason_loss": 0.6470733284950256, "step": 129, "utility_loss": 2.4417953491210938 }, { "cosine_similarity": 0.1735840698587087, "epoch": 0.1211556383970177, "grad_norm": 3.171702718922044, "learning_rate": 8.074534161490684e-06, "loss": 3.2672, "reason_loss": 0.6580490469932556, "step": 130, "utility_loss": 2.6091761589050293 }, { "cosine_similarity": -0.06845128235458545, "epoch": 0.12208760484622554, "grad_norm": 2.8279304396426563, "learning_rate": 8.13664596273292e-06, "loss": 2.9495, "reason_loss": 0.6393916606903076, "step": 131, "utility_loss": 2.310124397277832 }, { "cosine_similarity": -0.1777061313995281, "epoch": 0.12301957129543337, "grad_norm": 2.3897239261341596, "learning_rate": 8.198757763975156e-06, "loss": 3.0079, "reason_loss": 0.6749758124351501, "step": 132, "utility_loss": 2.3328793048858643 }, { "cosine_similarity": 0.30211537130029653, "epoch": 0.1239515377446412, "grad_norm": 2.0974107487513076, "learning_rate": 8.260869565217392e-06, "loss": 2.6389, "reason_loss": 0.665618360042572, "step": 133, "utility_loss": 1.9732400178909302 }, { "cosine_similarity": -0.05109706662485632, "epoch": 0.12488350419384903, "grad_norm": 3.209112564964811, "learning_rate": 8.322981366459629e-06, "loss": 3.2083, "reason_loss": 0.6934763193130493, "step": 134, "utility_loss": 2.514864921569824 }, { "cosine_similarity": 0.29665831077683424, "epoch": 0.12581547064305684, "grad_norm": 3.678890392583701, "learning_rate": 8.385093167701864e-06, "loss": 3.0552, "reason_loss": 0.6256195902824402, "step": 135, "utility_loss": 2.4296205043792725 }, { "cosine_similarity": 0.016779860155807066, "epoch": 0.1267474370922647, "grad_norm": 2.1523090106906313, "learning_rate": 8.4472049689441e-06, "loss": 2.5415, "reason_loss": 0.6290873289108276, "step": 136, "utility_loss": 1.9123625755310059 }, { "cosine_similarity": 0.1650422647586629, "epoch": 0.1276794035414725, "grad_norm": 2.6249178258370565, "learning_rate": 8.509316770186336e-06, "loss": 2.6705, "reason_loss": 0.6370376348495483, "step": 137, "utility_loss": 2.0334126949310303 }, { "cosine_similarity": 0.08464519628992316, "epoch": 0.12861136999068035, "grad_norm": 2.827422272165949, "learning_rate": 8.571428571428571e-06, "loss": 3.1236, "reason_loss": 0.653530478477478, "step": 138, "utility_loss": 2.4701106548309326 }, { "cosine_similarity": 0.009958942490955205, "epoch": 0.12954333643988816, "grad_norm": 2.3306032901465707, "learning_rate": 8.633540372670808e-06, "loss": 2.835, "reason_loss": 0.6234203577041626, "step": 139, "utility_loss": 2.211580514907837 }, { "cosine_similarity": -0.14537062389677927, "epoch": 0.13047530288909598, "grad_norm": 2.3642066964048625, "learning_rate": 8.695652173913044e-06, "loss": 2.8236, "reason_loss": 0.6501860022544861, "step": 140, "utility_loss": 2.1733808517456055 }, { "cosine_similarity": 0.22325838715240806, "epoch": 0.13140726933830382, "grad_norm": 2.1399468330127314, "learning_rate": 8.75776397515528e-06, "loss": 3.077, "reason_loss": 0.7013881802558899, "step": 141, "utility_loss": 2.375623941421509 }, { "cosine_similarity": 0.268571322252794, "epoch": 0.13233923578751164, "grad_norm": 2.5249441069190497, "learning_rate": 8.819875776397516e-06, "loss": 2.8643, "reason_loss": 0.6530239582061768, "step": 142, "utility_loss": 2.2112479209899902 }, { "cosine_similarity": 0.16180442079921153, "epoch": 0.13327120223671948, "grad_norm": 2.9241569223349, "learning_rate": 8.881987577639753e-06, "loss": 2.7486, "reason_loss": 0.6297993659973145, "step": 143, "utility_loss": 2.1187920570373535 }, { "cosine_similarity": -0.040759356767264644, "epoch": 0.1342031686859273, "grad_norm": 2.898371349535002, "learning_rate": 8.944099378881988e-06, "loss": 2.2689, "reason_loss": 0.6938570141792297, "step": 144, "utility_loss": 1.5750703811645508 }, { "cosine_similarity": 0.16152342621842217, "epoch": 0.13513513513513514, "grad_norm": 2.1829428382899017, "learning_rate": 9.006211180124225e-06, "loss": 2.7078, "reason_loss": 0.6644190549850464, "step": 145, "utility_loss": 2.043424606323242 }, { "cosine_similarity": 0.06777033099121224, "epoch": 0.13606710158434296, "grad_norm": 1.9432745199050903, "learning_rate": 9.068322981366461e-06, "loss": 3.0443, "reason_loss": 0.6468141078948975, "step": 146, "utility_loss": 2.39749813079834 }, { "cosine_similarity": 0.30785905186465345, "epoch": 0.1369990680335508, "grad_norm": 2.132466549487132, "learning_rate": 9.130434782608697e-06, "loss": 2.9256, "reason_loss": 0.6555608510971069, "step": 147, "utility_loss": 2.270068883895874 }, { "cosine_similarity": 0.34460134864625785, "epoch": 0.13793103448275862, "grad_norm": 2.7031026703631356, "learning_rate": 9.192546583850932e-06, "loss": 3.2599, "reason_loss": 0.596091091632843, "step": 148, "utility_loss": 2.663831949234009 }, { "cosine_similarity": 0.15122583852007268, "epoch": 0.13886300093196646, "grad_norm": 2.405709449027638, "learning_rate": 9.254658385093168e-06, "loss": 3.1286, "reason_loss": 0.6146531105041504, "step": 149, "utility_loss": 2.5139851570129395 }, { "cosine_similarity": 0.128129428108807, "epoch": 0.13979496738117428, "grad_norm": 1.808086580631732, "learning_rate": 9.316770186335405e-06, "loss": 2.392, "reason_loss": 0.680859386920929, "step": 150, "utility_loss": 1.7111769914627075 }, { "cosine_similarity": 0.16075229563976765, "epoch": 0.1407269338303821, "grad_norm": 3.0995154033672314, "learning_rate": 9.37888198757764e-06, "loss": 2.7018, "reason_loss": 0.6713130474090576, "step": 151, "utility_loss": 2.0304481983184814 }, { "cosine_similarity": 0.02340455055934282, "epoch": 0.14165890027958994, "grad_norm": 2.174560720472506, "learning_rate": 9.440993788819877e-06, "loss": 2.7757, "reason_loss": 0.6539462804794312, "step": 152, "utility_loss": 2.1217644214630127 }, { "cosine_similarity": 0.18186300102508704, "epoch": 0.14259086672879775, "grad_norm": 2.768662987572918, "learning_rate": 9.503105590062112e-06, "loss": 2.7123, "reason_loss": 0.6369265913963318, "step": 153, "utility_loss": 2.0754168033599854 }, { "cosine_similarity": 0.2296696381178422, "epoch": 0.1435228331780056, "grad_norm": 3.040193987679461, "learning_rate": 9.565217391304349e-06, "loss": 2.8781, "reason_loss": 0.6544292569160461, "step": 154, "utility_loss": 2.2236669063568115 }, { "cosine_similarity": -0.15047627545635164, "epoch": 0.14445479962721341, "grad_norm": 4.623233177453476, "learning_rate": 9.627329192546585e-06, "loss": 2.9355, "reason_loss": 0.6247748136520386, "step": 155, "utility_loss": 2.310743808746338 }, { "cosine_similarity": 0.019006996606827274, "epoch": 0.14538676607642126, "grad_norm": 2.6979125029232978, "learning_rate": 9.68944099378882e-06, "loss": 2.8931, "reason_loss": 0.612507700920105, "step": 156, "utility_loss": 2.280552864074707 }, { "cosine_similarity": 0.06040130970269782, "epoch": 0.14631873252562907, "grad_norm": 2.5512947888319677, "learning_rate": 9.751552795031056e-06, "loss": 2.6529, "reason_loss": 0.611884593963623, "step": 157, "utility_loss": 2.0410282611846924 }, { "cosine_similarity": -0.1772641213835999, "epoch": 0.14725069897483692, "grad_norm": 2.1472570221257494, "learning_rate": 9.813664596273292e-06, "loss": 2.5838, "reason_loss": 0.6410301923751831, "step": 158, "utility_loss": 1.9428175687789917 }, { "cosine_similarity": 0.1905580813193025, "epoch": 0.14818266542404473, "grad_norm": 2.6919643599606733, "learning_rate": 9.875776397515529e-06, "loss": 2.8445, "reason_loss": 0.6313097476959229, "step": 159, "utility_loss": 2.2131824493408203 }, { "cosine_similarity": 0.006781729609525959, "epoch": 0.14911463187325255, "grad_norm": 2.740445940329026, "learning_rate": 9.937888198757764e-06, "loss": 3.1571, "reason_loss": 0.6169568300247192, "step": 160, "utility_loss": 2.540102005004883 }, { "cosine_similarity": 0.17625732860042526, "epoch": 0.1500465983224604, "grad_norm": 2.513531102165717, "learning_rate": 1e-05, "loss": 2.8049, "reason_loss": 0.6164910197257996, "step": 161, "utility_loss": 2.188422203063965 }, { "cosine_similarity": 0.1790210118470091, "epoch": 0.1509785647716682, "grad_norm": 2.27001247199482, "learning_rate": 1.0062111801242236e-05, "loss": 2.8147, "reason_loss": 0.6256549954414368, "step": 162, "utility_loss": 2.189072847366333 }, { "cosine_similarity": -0.192750664971838, "epoch": 0.15191053122087605, "grad_norm": 3.831960531495703, "learning_rate": 1.0124223602484473e-05, "loss": 3.0494, "reason_loss": 0.6381500363349915, "step": 163, "utility_loss": 2.4112722873687744 }, { "cosine_similarity": 0.10404832209677604, "epoch": 0.15284249767008387, "grad_norm": 2.4877362297996792, "learning_rate": 1.0186335403726708e-05, "loss": 3.0115, "reason_loss": 0.6582925915718079, "step": 164, "utility_loss": 2.3532543182373047 }, { "cosine_similarity": 0.11850354140883144, "epoch": 0.15377446411929171, "grad_norm": 2.134297035262681, "learning_rate": 1.0248447204968946e-05, "loss": 2.596, "reason_loss": 0.6453489065170288, "step": 165, "utility_loss": 1.9506908655166626 }, { "cosine_similarity": 0.12311629698267548, "epoch": 0.15470643056849953, "grad_norm": 3.0965862272530904, "learning_rate": 1.0310559006211181e-05, "loss": 2.845, "reason_loss": 0.6276886463165283, "step": 166, "utility_loss": 2.2173120975494385 }, { "cosine_similarity": 0.04346705650605149, "epoch": 0.15563839701770738, "grad_norm": 2.3528320140383636, "learning_rate": 1.0372670807453418e-05, "loss": 2.8703, "reason_loss": 0.605004072189331, "step": 167, "utility_loss": 2.265305519104004 }, { "cosine_similarity": -0.047522970366213245, "epoch": 0.1565703634669152, "grad_norm": 2.7821665257894503, "learning_rate": 1.0434782608695653e-05, "loss": 2.7844, "reason_loss": 0.6255670785903931, "step": 168, "utility_loss": 2.1587870121002197 }, { "cosine_similarity": 0.0003466175550968621, "epoch": 0.157502329916123, "grad_norm": 2.488182343617843, "learning_rate": 1.049689440993789e-05, "loss": 2.8754, "reason_loss": 0.5784562826156616, "step": 169, "utility_loss": 2.296942710876465 }, { "cosine_similarity": 0.12388145786797744, "epoch": 0.15843429636533085, "grad_norm": 1.8388284139210593, "learning_rate": 1.0559006211180125e-05, "loss": 2.1845, "reason_loss": 0.5971364974975586, "step": 170, "utility_loss": 1.5873584747314453 }, { "cosine_similarity": 0.15262773856511053, "epoch": 0.15936626281453867, "grad_norm": 2.0974932728588795, "learning_rate": 1.062111801242236e-05, "loss": 2.6159, "reason_loss": 0.6151060461997986, "step": 171, "utility_loss": 2.000779390335083 }, { "cosine_similarity": 0.02223595326083054, "epoch": 0.1602982292637465, "grad_norm": 2.465784993941749, "learning_rate": 1.0683229813664597e-05, "loss": 3.0428, "reason_loss": 0.6096600294113159, "step": 172, "utility_loss": 2.4331812858581543 }, { "cosine_similarity": 0.14611496256051712, "epoch": 0.16123019571295433, "grad_norm": 2.354441460221936, "learning_rate": 1.0745341614906832e-05, "loss": 2.6498, "reason_loss": 0.6638634204864502, "step": 173, "utility_loss": 1.9859106540679932 }, { "cosine_similarity": -0.01409501118699334, "epoch": 0.16216216216216217, "grad_norm": 2.1563337399879283, "learning_rate": 1.080745341614907e-05, "loss": 2.352, "reason_loss": 0.5945676565170288, "step": 174, "utility_loss": 1.7573952674865723 }, { "cosine_similarity": 0.014890190735460829, "epoch": 0.16309412861137, "grad_norm": 2.0359072835278185, "learning_rate": 1.0869565217391305e-05, "loss": 2.6839, "reason_loss": 0.6187235116958618, "step": 175, "utility_loss": 2.065220355987549 }, { "cosine_similarity": 0.22514523909431824, "epoch": 0.16402609506057783, "grad_norm": 2.807630583344466, "learning_rate": 1.0931677018633542e-05, "loss": 2.9427, "reason_loss": 0.5954674482345581, "step": 176, "utility_loss": 2.3472166061401367 }, { "cosine_similarity": 0.04498241947884207, "epoch": 0.16495806150978565, "grad_norm": 2.055387193841589, "learning_rate": 1.0993788819875777e-05, "loss": 3.0615, "reason_loss": 0.5982325077056885, "step": 177, "utility_loss": 2.4632229804992676 }, { "cosine_similarity": 0.043440128975653, "epoch": 0.16589002795899346, "grad_norm": 2.730270768677886, "learning_rate": 1.1055900621118014e-05, "loss": 2.6447, "reason_loss": 0.6085994839668274, "step": 178, "utility_loss": 2.036073684692383 }, { "cosine_similarity": -0.0026743824868465443, "epoch": 0.1668219944082013, "grad_norm": 1.8570744588721, "learning_rate": 1.1118012422360249e-05, "loss": 2.7459, "reason_loss": 0.6415849924087524, "step": 179, "utility_loss": 2.1043248176574707 }, { "cosine_similarity": -0.09321666989054558, "epoch": 0.16775396085740912, "grad_norm": 1.9947852869859652, "learning_rate": 1.1180124223602484e-05, "loss": 2.4113, "reason_loss": 0.6160120964050293, "step": 180, "utility_loss": 1.7953083515167236 }, { "cosine_similarity": 0.02172760981874856, "epoch": 0.16868592730661697, "grad_norm": 2.3265254673047306, "learning_rate": 1.1242236024844722e-05, "loss": 2.9271, "reason_loss": 0.5961334705352783, "step": 181, "utility_loss": 2.330997943878174 }, { "cosine_similarity": 0.12909893429372826, "epoch": 0.16961789375582478, "grad_norm": 2.342115251354744, "learning_rate": 1.1304347826086957e-05, "loss": 2.8186, "reason_loss": 0.6595121622085571, "step": 182, "utility_loss": 2.1591315269470215 }, { "cosine_similarity": 0.4559570934181297, "epoch": 0.17054986020503263, "grad_norm": 2.7630611077382765, "learning_rate": 1.1366459627329194e-05, "loss": 2.8382, "reason_loss": 0.5918244123458862, "step": 183, "utility_loss": 2.2463510036468506 }, { "cosine_similarity": 0.0906619666508192, "epoch": 0.17148182665424044, "grad_norm": 2.8032252887952778, "learning_rate": 1.1428571428571429e-05, "loss": 2.6893, "reason_loss": 0.6115083694458008, "step": 184, "utility_loss": 2.0778355598449707 }, { "cosine_similarity": -0.06389473253586654, "epoch": 0.1724137931034483, "grad_norm": 2.24587173338492, "learning_rate": 1.1490683229813666e-05, "loss": 3.0133, "reason_loss": 0.6221015453338623, "step": 185, "utility_loss": 2.391221284866333 }, { "cosine_similarity": -0.149031465133817, "epoch": 0.1733457595526561, "grad_norm": 2.71731920628843, "learning_rate": 1.15527950310559e-05, "loss": 2.1816, "reason_loss": 0.6164002418518066, "step": 186, "utility_loss": 1.5652439594268799 }, { "cosine_similarity": 0.283536773458922, "epoch": 0.17427772600186392, "grad_norm": 2.2909031458309523, "learning_rate": 1.161490683229814e-05, "loss": 2.7577, "reason_loss": 0.6161497831344604, "step": 187, "utility_loss": 2.141598701477051 }, { "cosine_similarity": 0.1569726444753414, "epoch": 0.17520969245107176, "grad_norm": 1.9732909819537152, "learning_rate": 1.1677018633540373e-05, "loss": 2.725, "reason_loss": 0.6015134453773499, "step": 188, "utility_loss": 2.1234970092773438 }, { "cosine_similarity": 0.2117546639958834, "epoch": 0.17614165890027958, "grad_norm": 1.8318876949159113, "learning_rate": 1.1739130434782611e-05, "loss": 2.5089, "reason_loss": 0.6653010845184326, "step": 189, "utility_loss": 1.8435759544372559 }, { "cosine_similarity": 0.10308169289272395, "epoch": 0.17707362534948742, "grad_norm": 1.7486133228094254, "learning_rate": 1.1801242236024846e-05, "loss": 2.4883, "reason_loss": 0.6329506039619446, "step": 190, "utility_loss": 1.8553293943405151 }, { "cosine_similarity": 0.20165878563133316, "epoch": 0.17800559179869524, "grad_norm": 1.9995636034468416, "learning_rate": 1.1863354037267081e-05, "loss": 2.9032, "reason_loss": 0.6238183975219727, "step": 191, "utility_loss": 2.279367685317993 }, { "cosine_similarity": 0.1260319198323893, "epoch": 0.17893755824790308, "grad_norm": 1.7758011062898102, "learning_rate": 1.1925465838509318e-05, "loss": 2.464, "reason_loss": 0.5819162130355835, "step": 192, "utility_loss": 1.882056474685669 }, { "cosine_similarity": 0.21495161637240398, "epoch": 0.1798695246971109, "grad_norm": 2.3379339603769536, "learning_rate": 1.1987577639751553e-05, "loss": 2.5621, "reason_loss": 0.6099612712860107, "step": 193, "utility_loss": 1.9521584510803223 }, { "cosine_similarity": 0.0414114724020159, "epoch": 0.18080149114631874, "grad_norm": 2.270801652341194, "learning_rate": 1.204968944099379e-05, "loss": 2.5978, "reason_loss": 0.6325784921646118, "step": 194, "utility_loss": 1.965238094329834 }, { "cosine_similarity": 0.04529418703090625, "epoch": 0.18173345759552656, "grad_norm": 2.4716930520748512, "learning_rate": 1.2111801242236025e-05, "loss": 2.9632, "reason_loss": 0.6217238903045654, "step": 195, "utility_loss": 2.341482162475586 }, { "cosine_similarity": -0.17027647954295883, "epoch": 0.18266542404473438, "grad_norm": 2.069271872614914, "learning_rate": 1.2173913043478263e-05, "loss": 2.512, "reason_loss": 0.6159597635269165, "step": 196, "utility_loss": 1.8960204124450684 }, { "cosine_similarity": 0.3823120584553909, "epoch": 0.18359739049394222, "grad_norm": 2.088717519177477, "learning_rate": 1.2236024844720498e-05, "loss": 2.5136, "reason_loss": 0.6379976272583008, "step": 197, "utility_loss": 1.8756088018417358 }, { "cosine_similarity": 0.6334619152558866, "epoch": 0.18452935694315004, "grad_norm": 2.2327571180142196, "learning_rate": 1.2298136645962735e-05, "loss": 2.8903, "reason_loss": 0.6027855277061462, "step": 198, "utility_loss": 2.2874903678894043 }, { "cosine_similarity": 0.09235304614595882, "epoch": 0.18546132339235788, "grad_norm": 2.463320615624538, "learning_rate": 1.236024844720497e-05, "loss": 2.7998, "reason_loss": 0.6295397281646729, "step": 199, "utility_loss": 2.170280694961548 }, { "cosine_similarity": 0.3140964185803338, "epoch": 0.1863932898415657, "grad_norm": 2.6124345708506533, "learning_rate": 1.2422360248447205e-05, "loss": 3.0775, "reason_loss": 0.5882161855697632, "step": 200, "utility_loss": 2.4893057346343994 }, { "cosine_similarity": 0.011606815462923392, "epoch": 0.18732525629077354, "grad_norm": 2.0891519862600036, "learning_rate": 1.2484472049689442e-05, "loss": 2.747, "reason_loss": 0.6023392677307129, "step": 201, "utility_loss": 2.144625186920166 }, { "cosine_similarity": 0.03483247150280671, "epoch": 0.18825722273998136, "grad_norm": 1.8703877596179457, "learning_rate": 1.2546583850931677e-05, "loss": 2.5869, "reason_loss": 0.5883069038391113, "step": 202, "utility_loss": 1.998572587966919 }, { "cosine_similarity": 0.1422869677918677, "epoch": 0.1891891891891892, "grad_norm": 2.492874569014603, "learning_rate": 1.2608695652173915e-05, "loss": 2.7672, "reason_loss": 0.5825269222259521, "step": 203, "utility_loss": 2.1846842765808105 }, { "cosine_similarity": 0.1460729919170473, "epoch": 0.19012115563839702, "grad_norm": 2.578341062893817, "learning_rate": 1.267080745341615e-05, "loss": 2.962, "reason_loss": 0.6159621477127075, "step": 204, "utility_loss": 2.3460421562194824 }, { "cosine_similarity": 0.18189870924139162, "epoch": 0.19105312208760486, "grad_norm": 2.5557292572374646, "learning_rate": 1.2732919254658387e-05, "loss": 2.6223, "reason_loss": 0.6005083322525024, "step": 205, "utility_loss": 2.021800994873047 }, { "cosine_similarity": 0.11880875212430299, "epoch": 0.19198508853681268, "grad_norm": 2.205915421583997, "learning_rate": 1.2795031055900622e-05, "loss": 2.8791, "reason_loss": 0.6514885425567627, "step": 206, "utility_loss": 2.2276358604431152 }, { "cosine_similarity": 0.2413109033218662, "epoch": 0.1929170549860205, "grad_norm": 2.6154735575613945, "learning_rate": 1.2857142857142859e-05, "loss": 2.7578, "reason_loss": 0.6124842166900635, "step": 207, "utility_loss": 2.1452715396881104 }, { "cosine_similarity": 0.02946957126625468, "epoch": 0.19384902143522834, "grad_norm": 2.0632611228455215, "learning_rate": 1.2919254658385094e-05, "loss": 2.8889, "reason_loss": 0.606238842010498, "step": 208, "utility_loss": 2.2826685905456543 }, { "cosine_similarity": -0.1864057365901209, "epoch": 0.19478098788443615, "grad_norm": 2.0357216828255082, "learning_rate": 1.2981366459627329e-05, "loss": 1.9558, "reason_loss": 0.5821788311004639, "step": 209, "utility_loss": 1.3735841512680054 }, { "cosine_similarity": 0.012968269924655086, "epoch": 0.195712954333644, "grad_norm": 2.362890581511633, "learning_rate": 1.3043478260869566e-05, "loss": 2.6841, "reason_loss": 0.6246688961982727, "step": 210, "utility_loss": 2.0594170093536377 }, { "cosine_similarity": 0.1996762185898451, "epoch": 0.1966449207828518, "grad_norm": 1.5928865895254243, "learning_rate": 1.31055900621118e-05, "loss": 2.2243, "reason_loss": 0.6343227624893188, "step": 211, "utility_loss": 1.5899572372436523 }, { "cosine_similarity": 0.04486855517964981, "epoch": 0.19757688723205966, "grad_norm": 1.9372117682403969, "learning_rate": 1.316770186335404e-05, "loss": 2.7445, "reason_loss": 0.6200393438339233, "step": 212, "utility_loss": 2.1244192123413086 }, { "cosine_similarity": 0.027190438509114293, "epoch": 0.19850885368126747, "grad_norm": 2.4972297846187463, "learning_rate": 1.3229813664596274e-05, "loss": 2.5937, "reason_loss": 0.5822629332542419, "step": 213, "utility_loss": 2.0114517211914062 }, { "cosine_similarity": -0.04142429646710334, "epoch": 0.19944082013047532, "grad_norm": 2.9894999385484846, "learning_rate": 1.3291925465838511e-05, "loss": 3.1017, "reason_loss": 0.5973764657974243, "step": 214, "utility_loss": 2.5043272972106934 }, { "cosine_similarity": 0.04319841989474148, "epoch": 0.20037278657968313, "grad_norm": 2.1167440127028416, "learning_rate": 1.3354037267080746e-05, "loss": 2.3769, "reason_loss": 0.5523155927658081, "step": 215, "utility_loss": 1.8245534896850586 }, { "cosine_similarity": 0.141241219178578, "epoch": 0.20130475302889095, "grad_norm": 2.2548739894753678, "learning_rate": 1.3416149068322983e-05, "loss": 2.4131, "reason_loss": 0.576714038848877, "step": 216, "utility_loss": 1.8364036083221436 }, { "cosine_similarity": 0.11140048155682666, "epoch": 0.2022367194780988, "grad_norm": 2.333911188895893, "learning_rate": 1.3478260869565218e-05, "loss": 2.6253, "reason_loss": 0.5750606060028076, "step": 217, "utility_loss": 2.050246000289917 }, { "cosine_similarity": 0.21497182074110205, "epoch": 0.2031686859273066, "grad_norm": 1.5403438935917604, "learning_rate": 1.3540372670807453e-05, "loss": 2.5973, "reason_loss": 0.6409677267074585, "step": 218, "utility_loss": 1.9563283920288086 }, { "cosine_similarity": 0.535739537021371, "epoch": 0.20410065237651445, "grad_norm": 2.00838118467868, "learning_rate": 1.3602484472049691e-05, "loss": 2.7939, "reason_loss": 0.5771864652633667, "step": 219, "utility_loss": 2.216731309890747 }, { "cosine_similarity": 0.1786493039863653, "epoch": 0.20503261882572227, "grad_norm": 2.8349703617241335, "learning_rate": 1.3664596273291926e-05, "loss": 2.7268, "reason_loss": 0.61793053150177, "step": 220, "utility_loss": 2.1089038848876953 }, { "cosine_similarity": 0.27988334782041185, "epoch": 0.2059645852749301, "grad_norm": 2.030941927492416, "learning_rate": 1.3726708074534163e-05, "loss": 2.7465, "reason_loss": 0.577458381652832, "step": 221, "utility_loss": 2.1690821647644043 }, { "cosine_similarity": 0.5744442751175465, "epoch": 0.20689655172413793, "grad_norm": 2.2027582699237973, "learning_rate": 1.3788819875776398e-05, "loss": 2.6499, "reason_loss": 0.5788962841033936, "step": 222, "utility_loss": 2.071014165878296 }, { "cosine_similarity": 0.26535933497772457, "epoch": 0.20782851817334577, "grad_norm": 2.2662765065460415, "learning_rate": 1.3850931677018635e-05, "loss": 2.7531, "reason_loss": 0.5733307600021362, "step": 223, "utility_loss": 2.179792881011963 }, { "cosine_similarity": 0.22012856839908415, "epoch": 0.2087604846225536, "grad_norm": 2.379608970518808, "learning_rate": 1.391304347826087e-05, "loss": 2.8115, "reason_loss": 0.560699462890625, "step": 224, "utility_loss": 2.2508318424224854 }, { "cosine_similarity": 0.21389816283793092, "epoch": 0.2096924510717614, "grad_norm": 3.3688215112878916, "learning_rate": 1.3975155279503107e-05, "loss": 2.8715, "reason_loss": 0.5955812931060791, "step": 225, "utility_loss": 2.275954484939575 }, { "cosine_similarity": 0.25226281534824385, "epoch": 0.21062441752096925, "grad_norm": 2.717109279885258, "learning_rate": 1.4037267080745342e-05, "loss": 2.7211, "reason_loss": 0.5709105730056763, "step": 226, "utility_loss": 2.150181770324707 }, { "cosine_similarity": 0.26067789766279603, "epoch": 0.21155638397017706, "grad_norm": 2.6204819642040182, "learning_rate": 1.409937888198758e-05, "loss": 2.6975, "reason_loss": 0.6138880252838135, "step": 227, "utility_loss": 2.0836527347564697 }, { "cosine_similarity": -0.08609566559127081, "epoch": 0.2124883504193849, "grad_norm": 2.220517858604992, "learning_rate": 1.4161490683229815e-05, "loss": 2.8071, "reason_loss": 0.5684266090393066, "step": 228, "utility_loss": 2.2387213706970215 }, { "cosine_similarity": 0.409941589122331, "epoch": 0.21342031686859272, "grad_norm": 2.3645884953691145, "learning_rate": 1.422360248447205e-05, "loss": 2.9569, "reason_loss": 0.6153180003166199, "step": 229, "utility_loss": 2.341533899307251 }, { "cosine_similarity": 0.41097474471139533, "epoch": 0.21435228331780057, "grad_norm": 1.8775077327020542, "learning_rate": 1.4285714285714287e-05, "loss": 2.7761, "reason_loss": 0.5926763415336609, "step": 230, "utility_loss": 2.1833834648132324 }, { "cosine_similarity": -0.12921309924648683, "epoch": 0.21528424976700838, "grad_norm": 2.051298575451935, "learning_rate": 1.4347826086956522e-05, "loss": 2.2259, "reason_loss": 0.5837148427963257, "step": 231, "utility_loss": 1.6421821117401123 }, { "cosine_similarity": 0.07466599707905264, "epoch": 0.21621621621621623, "grad_norm": 2.699353549831784, "learning_rate": 1.4409937888198759e-05, "loss": 2.5552, "reason_loss": 0.6079723238945007, "step": 232, "utility_loss": 1.9472568035125732 }, { "cosine_similarity": 0.09188378611407325, "epoch": 0.21714818266542404, "grad_norm": 2.0384458786845725, "learning_rate": 1.4472049689440994e-05, "loss": 3.0255, "reason_loss": 0.6153154969215393, "step": 233, "utility_loss": 2.410179615020752 }, { "cosine_similarity": -0.033160156914628, "epoch": 0.21808014911463186, "grad_norm": 2.0795484763037733, "learning_rate": 1.4534161490683232e-05, "loss": 2.4543, "reason_loss": 0.5834105014801025, "step": 234, "utility_loss": 1.870854377746582 }, { "cosine_similarity": 0.21868353050670222, "epoch": 0.2190121155638397, "grad_norm": 2.0230927121817266, "learning_rate": 1.4596273291925467e-05, "loss": 2.2929, "reason_loss": 0.560434877872467, "step": 235, "utility_loss": 1.7324559688568115 }, { "cosine_similarity": 0.09921270795365167, "epoch": 0.21994408201304752, "grad_norm": 1.7607587777540932, "learning_rate": 1.4658385093167704e-05, "loss": 2.1985, "reason_loss": 0.5961803197860718, "step": 236, "utility_loss": 1.6023273468017578 }, { "cosine_similarity": 0.05760453333041245, "epoch": 0.22087604846225536, "grad_norm": 2.2099940955405124, "learning_rate": 1.472049689440994e-05, "loss": 2.3209, "reason_loss": 0.5907562375068665, "step": 237, "utility_loss": 1.7300945520401 }, { "cosine_similarity": 0.17185963422476486, "epoch": 0.22180801491146318, "grad_norm": 2.151520971582208, "learning_rate": 1.4782608695652174e-05, "loss": 2.3985, "reason_loss": 0.6052426099777222, "step": 238, "utility_loss": 1.7932164669036865 }, { "cosine_similarity": 0.4304927881474242, "epoch": 0.22273998136067102, "grad_norm": 2.2788809045634797, "learning_rate": 1.4844720496894411e-05, "loss": 2.4491, "reason_loss": 0.5538003444671631, "step": 239, "utility_loss": 1.8953049182891846 }, { "cosine_similarity": 0.34916415836907283, "epoch": 0.22367194780987884, "grad_norm": 2.348435949363157, "learning_rate": 1.4906832298136646e-05, "loss": 2.7955, "reason_loss": 0.5807802677154541, "step": 240, "utility_loss": 2.2147512435913086 }, { "cosine_similarity": -0.22754038456517486, "epoch": 0.22460391425908668, "grad_norm": 2.45360577107398, "learning_rate": 1.4968944099378885e-05, "loss": 2.8512, "reason_loss": 0.5968025326728821, "step": 241, "utility_loss": 2.2544054985046387 }, { "cosine_similarity": 0.017711765961638568, "epoch": 0.2255358807082945, "grad_norm": 1.9020393869714383, "learning_rate": 1.5031055900621118e-05, "loss": 2.7241, "reason_loss": 0.579351007938385, "step": 242, "utility_loss": 2.144746780395508 }, { "cosine_similarity": -0.08673284726008915, "epoch": 0.22646784715750232, "grad_norm": 2.3626009218171613, "learning_rate": 1.5093167701863356e-05, "loss": 2.5867, "reason_loss": 0.6040968894958496, "step": 243, "utility_loss": 1.9826432466506958 }, { "cosine_similarity": 0.3745672355220843, "epoch": 0.22739981360671016, "grad_norm": 1.9914593109878902, "learning_rate": 1.5155279503105591e-05, "loss": 2.6993, "reason_loss": 0.5749762058258057, "step": 244, "utility_loss": 2.124361038208008 }, { "cosine_similarity": 0.08440106254373543, "epoch": 0.22833178005591798, "grad_norm": 2.3895119321405027, "learning_rate": 1.5217391304347828e-05, "loss": 2.7706, "reason_loss": 0.5975583791732788, "step": 245, "utility_loss": 2.1730899810791016 }, { "cosine_similarity": -0.20786504406403758, "epoch": 0.22926374650512582, "grad_norm": 1.6294528606706813, "learning_rate": 1.5279503105590063e-05, "loss": 2.8425, "reason_loss": 0.5632543563842773, "step": 246, "utility_loss": 2.2792086601257324 }, { "cosine_similarity": 0.6352498101486798, "epoch": 0.23019571295433364, "grad_norm": 3.218039403719852, "learning_rate": 1.5341614906832298e-05, "loss": 2.9677, "reason_loss": 0.5636681914329529, "step": 247, "utility_loss": 2.4040565490722656 }, { "cosine_similarity": -0.026784616516993404, "epoch": 0.23112767940354148, "grad_norm": 2.1388010625501397, "learning_rate": 1.5403726708074537e-05, "loss": 2.5716, "reason_loss": 0.6131218671798706, "step": 248, "utility_loss": 1.9584428071975708 }, { "cosine_similarity": 0.08989694809431736, "epoch": 0.2320596458527493, "grad_norm": 1.7272993036507611, "learning_rate": 1.5465838509316772e-05, "loss": 2.5355, "reason_loss": 0.5926008224487305, "step": 249, "utility_loss": 1.942899465560913 }, { "cosine_similarity": 0.2499219087746305, "epoch": 0.23299161230195714, "grad_norm": 1.9252753495080481, "learning_rate": 1.5527950310559007e-05, "loss": 2.4172, "reason_loss": 0.5759150981903076, "step": 250, "utility_loss": 1.8412349224090576 }, { "cosine_similarity": 0.15057518096248435, "epoch": 0.23392357875116496, "grad_norm": 1.9160928081554494, "learning_rate": 1.5590062111801242e-05, "loss": 2.742, "reason_loss": 0.6151578426361084, "step": 251, "utility_loss": 2.126845359802246 }, { "cosine_similarity": 0.17367453733546379, "epoch": 0.23485554520037277, "grad_norm": 1.8618089543110288, "learning_rate": 1.565217391304348e-05, "loss": 2.6447, "reason_loss": 0.5688133239746094, "step": 252, "utility_loss": 2.0758743286132812 }, { "cosine_similarity": 0.2172996043948191, "epoch": 0.23578751164958062, "grad_norm": 2.355013029644733, "learning_rate": 1.5714285714285715e-05, "loss": 2.6217, "reason_loss": 0.5644755363464355, "step": 253, "utility_loss": 2.0572426319122314 }, { "cosine_similarity": 0.060241758154781035, "epoch": 0.23671947809878843, "grad_norm": 2.0350479385183524, "learning_rate": 1.5776397515527954e-05, "loss": 2.7204, "reason_loss": 0.6153931617736816, "step": 254, "utility_loss": 2.1049609184265137 }, { "cosine_similarity": 0.02961731059961555, "epoch": 0.23765144454799628, "grad_norm": 1.7251265584226603, "learning_rate": 1.583850931677019e-05, "loss": 2.3539, "reason_loss": 0.6005889177322388, "step": 255, "utility_loss": 1.7532784938812256 }, { "cosine_similarity": 0.026110417025963943, "epoch": 0.2385834109972041, "grad_norm": 2.146529055445227, "learning_rate": 1.5900621118012424e-05, "loss": 2.1578, "reason_loss": 0.5696282386779785, "step": 256, "utility_loss": 1.588130235671997 }, { "cosine_similarity": 0.06807063439001267, "epoch": 0.23951537744641194, "grad_norm": 2.4537683726128816, "learning_rate": 1.596273291925466e-05, "loss": 2.8562, "reason_loss": 0.5816941857337952, "step": 257, "utility_loss": 2.274548053741455 }, { "cosine_similarity": 0.21872396595755147, "epoch": 0.24044734389561975, "grad_norm": 1.8899058292462838, "learning_rate": 1.6024844720496894e-05, "loss": 2.6173, "reason_loss": 0.5799087882041931, "step": 258, "utility_loss": 2.0373921394348145 }, { "cosine_similarity": 0.033416452327077406, "epoch": 0.2413793103448276, "grad_norm": 1.691319625266638, "learning_rate": 1.6086956521739132e-05, "loss": 2.525, "reason_loss": 0.5750694870948792, "step": 259, "utility_loss": 1.9499375820159912 }, { "cosine_similarity": 0.010858485218557236, "epoch": 0.2423112767940354, "grad_norm": 1.9165577429670007, "learning_rate": 1.6149068322981367e-05, "loss": 2.5969, "reason_loss": 0.6249494552612305, "step": 260, "utility_loss": 1.9719918966293335 }, { "cosine_similarity": -0.008122096480999652, "epoch": 0.24324324324324326, "grad_norm": 2.488203481155486, "learning_rate": 1.6211180124223606e-05, "loss": 2.5248, "reason_loss": 0.5894231796264648, "step": 261, "utility_loss": 1.9353749752044678 }, { "cosine_similarity": 0.12480750043220501, "epoch": 0.24417520969245107, "grad_norm": 2.018657346623235, "learning_rate": 1.627329192546584e-05, "loss": 2.3885, "reason_loss": 0.6323911547660828, "step": 262, "utility_loss": 1.7561018466949463 }, { "cosine_similarity": 0.11031495304620463, "epoch": 0.2451071761416589, "grad_norm": 1.9834736428281055, "learning_rate": 1.6335403726708076e-05, "loss": 2.5442, "reason_loss": 0.5822319984436035, "step": 263, "utility_loss": 1.9619382619857788 }, { "cosine_similarity": 0.008367764967067863, "epoch": 0.24603914259086673, "grad_norm": 2.089115822230976, "learning_rate": 1.639751552795031e-05, "loss": 2.5777, "reason_loss": 0.5350804328918457, "step": 264, "utility_loss": 2.0426127910614014 }, { "cosine_similarity": 0.3176486228835508, "epoch": 0.24697110904007455, "grad_norm": 2.4148333858297644, "learning_rate": 1.645962732919255e-05, "loss": 2.5418, "reason_loss": 0.564887523651123, "step": 265, "utility_loss": 1.9769036769866943 }, { "cosine_similarity": 0.21479922766441822, "epoch": 0.2479030754892824, "grad_norm": 1.9174414736339955, "learning_rate": 1.6521739130434785e-05, "loss": 2.4824, "reason_loss": 0.5889142751693726, "step": 266, "utility_loss": 1.8934359550476074 }, { "cosine_similarity": 0.3160358318315541, "epoch": 0.2488350419384902, "grad_norm": 1.7064972680596127, "learning_rate": 1.658385093167702e-05, "loss": 2.8555, "reason_loss": 0.594512403011322, "step": 267, "utility_loss": 2.2609996795654297 }, { "cosine_similarity": 0.054471726300411666, "epoch": 0.24976700838769805, "grad_norm": 1.3848768170958938, "learning_rate": 1.6645962732919258e-05, "loss": 2.0067, "reason_loss": 0.5926221013069153, "step": 268, "utility_loss": 1.4141207933425903 }, { "cosine_similarity": 0.15420458354512803, "epoch": 0.2506989748369059, "grad_norm": 2.190582912249374, "learning_rate": 1.670807453416149e-05, "loss": 2.7056, "reason_loss": 0.5746637582778931, "step": 269, "utility_loss": 2.1309757232666016 }, { "cosine_similarity": 0.3852774971514806, "epoch": 0.2516309412861137, "grad_norm": 2.891413398594777, "learning_rate": 1.6770186335403728e-05, "loss": 2.611, "reason_loss": 0.5594158172607422, "step": 270, "utility_loss": 2.0515389442443848 }, { "cosine_similarity": 0.08456270274518368, "epoch": 0.25256290773532153, "grad_norm": 2.0245580709698214, "learning_rate": 1.6832298136645963e-05, "loss": 2.4053, "reason_loss": 0.5393308401107788, "step": 271, "utility_loss": 1.8659820556640625 }, { "cosine_similarity": 0.44713973201330975, "epoch": 0.2534948741845294, "grad_norm": 2.132121321674466, "learning_rate": 1.68944099378882e-05, "loss": 2.6602, "reason_loss": 0.6067647933959961, "step": 272, "utility_loss": 2.053415536880493 }, { "cosine_similarity": -0.006410561485350541, "epoch": 0.25442684063373716, "grad_norm": 1.9836860432409094, "learning_rate": 1.6956521739130437e-05, "loss": 2.1408, "reason_loss": 0.6006215810775757, "step": 273, "utility_loss": 1.5402164459228516 }, { "cosine_similarity": 0.04395929316135022, "epoch": 0.255358807082945, "grad_norm": 1.8316377112424844, "learning_rate": 1.7018633540372672e-05, "loss": 2.6812, "reason_loss": 0.5871187448501587, "step": 274, "utility_loss": 2.0940475463867188 }, { "cosine_similarity": 0.32997721140382896, "epoch": 0.25629077353215285, "grad_norm": 1.7775893446478803, "learning_rate": 1.7080745341614907e-05, "loss": 2.8624, "reason_loss": 0.5993084907531738, "step": 275, "utility_loss": 2.2631144523620605 }, { "cosine_similarity": 0.19280438172020475, "epoch": 0.2572227399813607, "grad_norm": 2.1727746713725464, "learning_rate": 1.7142857142857142e-05, "loss": 2.6659, "reason_loss": 0.6225122809410095, "step": 276, "utility_loss": 2.043358325958252 }, { "cosine_similarity": 0.2744750419530021, "epoch": 0.2581547064305685, "grad_norm": 2.7370048674458825, "learning_rate": 1.720496894409938e-05, "loss": 2.2746, "reason_loss": 0.5572184324264526, "step": 277, "utility_loss": 1.7174105644226074 }, { "cosine_similarity": 0.385025069111902, "epoch": 0.2590866728797763, "grad_norm": 1.9002576192075187, "learning_rate": 1.7267080745341615e-05, "loss": 2.5788, "reason_loss": 0.5611259937286377, "step": 278, "utility_loss": 2.0176448822021484 }, { "cosine_similarity": 0.16440749150728431, "epoch": 0.26001863932898417, "grad_norm": 2.0403515506047687, "learning_rate": 1.7329192546583854e-05, "loss": 2.2143, "reason_loss": 0.5834388732910156, "step": 279, "utility_loss": 1.6308445930480957 }, { "cosine_similarity": 0.14258232740305019, "epoch": 0.26095060577819196, "grad_norm": 2.1012395443833287, "learning_rate": 1.739130434782609e-05, "loss": 2.8267, "reason_loss": 0.5565686821937561, "step": 280, "utility_loss": 2.2701756954193115 }, { "cosine_similarity": 0.8099048463963032, "epoch": 0.2618825722273998, "grad_norm": 1.9889175099422773, "learning_rate": 1.7453416149068324e-05, "loss": 2.6716, "reason_loss": 0.5890052914619446, "step": 281, "utility_loss": 2.0826127529144287 }, { "cosine_similarity": 0.24469043993705838, "epoch": 0.26281453867660765, "grad_norm": 1.6680191125445356, "learning_rate": 1.751552795031056e-05, "loss": 2.0102, "reason_loss": 0.6160133481025696, "step": 282, "utility_loss": 1.3941802978515625 }, { "cosine_similarity": 0.02018261824362258, "epoch": 0.2637465051258155, "grad_norm": 2.089791797122933, "learning_rate": 1.7577639751552797e-05, "loss": 2.3446, "reason_loss": 0.5653672814369202, "step": 283, "utility_loss": 1.7792624235153198 }, { "cosine_similarity": -0.03592264095635327, "epoch": 0.2646784715750233, "grad_norm": 1.7704182182120287, "learning_rate": 1.7639751552795032e-05, "loss": 2.1354, "reason_loss": 0.5631332993507385, "step": 284, "utility_loss": 1.5722649097442627 }, { "cosine_similarity": 0.3988429896536234, "epoch": 0.2656104380242311, "grad_norm": 1.791702309174828, "learning_rate": 1.7701863354037267e-05, "loss": 2.7537, "reason_loss": 0.5701446533203125, "step": 285, "utility_loss": 2.1835622787475586 }, { "cosine_similarity": 0.25922178031872917, "epoch": 0.26654240447343897, "grad_norm": 1.926597626105624, "learning_rate": 1.7763975155279506e-05, "loss": 2.6336, "reason_loss": 0.5959417819976807, "step": 286, "utility_loss": 2.0376813411712646 }, { "cosine_similarity": -0.26428017682827243, "epoch": 0.2674743709226468, "grad_norm": 2.0797388490222586, "learning_rate": 1.782608695652174e-05, "loss": 2.8279, "reason_loss": 0.5694003105163574, "step": 287, "utility_loss": 2.258477210998535 }, { "cosine_similarity": 0.395148595842898, "epoch": 0.2684063373718546, "grad_norm": 1.9876148258109803, "learning_rate": 1.7888198757763976e-05, "loss": 2.7007, "reason_loss": 0.5823975801467896, "step": 288, "utility_loss": 2.1182680130004883 }, { "cosine_similarity": 0.2782595400101195, "epoch": 0.26933830382106244, "grad_norm": 2.0149829616621315, "learning_rate": 1.795031055900621e-05, "loss": 2.9804, "reason_loss": 0.5977380275726318, "step": 289, "utility_loss": 2.3826215267181396 }, { "cosine_similarity": 0.35108732765193457, "epoch": 0.2702702702702703, "grad_norm": 1.7909462720764024, "learning_rate": 1.801242236024845e-05, "loss": 2.5372, "reason_loss": 0.5690537691116333, "step": 290, "utility_loss": 1.9681107997894287 }, { "cosine_similarity": 0.03953679445127776, "epoch": 0.2712022367194781, "grad_norm": 2.276421638114842, "learning_rate": 1.8074534161490685e-05, "loss": 1.9706, "reason_loss": 0.5858811140060425, "step": 291, "utility_loss": 1.3847659826278687 }, { "cosine_similarity": 0.12676564082782835, "epoch": 0.2721342031686859, "grad_norm": 1.890492007481417, "learning_rate": 1.8136645962732923e-05, "loss": 2.3509, "reason_loss": 0.5801662802696228, "step": 292, "utility_loss": 1.7707407474517822 }, { "cosine_similarity": 0.5985730342892648, "epoch": 0.27306616961789376, "grad_norm": 1.7628557495573196, "learning_rate": 1.8198757763975158e-05, "loss": 2.7962, "reason_loss": 0.5700681805610657, "step": 293, "utility_loss": 2.2261404991149902 }, { "cosine_similarity": 0.17367281474338928, "epoch": 0.2739981360671016, "grad_norm": 1.8331162766580558, "learning_rate": 1.8260869565217393e-05, "loss": 2.6507, "reason_loss": 0.588459312915802, "step": 294, "utility_loss": 2.0622079372406006 }, { "cosine_similarity": 0.20312175340246566, "epoch": 0.2749301025163094, "grad_norm": 1.9963548929516755, "learning_rate": 1.8322981366459628e-05, "loss": 2.7573, "reason_loss": 0.6072345972061157, "step": 295, "utility_loss": 2.150024890899658 }, { "cosine_similarity": 0.11186904901059126, "epoch": 0.27586206896551724, "grad_norm": 1.8213540861664739, "learning_rate": 1.8385093167701863e-05, "loss": 2.3384, "reason_loss": 0.5572004318237305, "step": 296, "utility_loss": 1.7811849117279053 }, { "cosine_similarity": 0.3723956423089934, "epoch": 0.2767940354147251, "grad_norm": 1.6739251191589468, "learning_rate": 1.84472049689441e-05, "loss": 2.2464, "reason_loss": 0.5791753530502319, "step": 297, "utility_loss": 1.6672017574310303 }, { "cosine_similarity": 0.06622345490837382, "epoch": 0.2777260018639329, "grad_norm": 1.9692001037170344, "learning_rate": 1.8509316770186337e-05, "loss": 2.6925, "reason_loss": 0.6022244691848755, "step": 298, "utility_loss": 2.090250253677368 }, { "cosine_similarity": -0.04606736821592595, "epoch": 0.2786579683131407, "grad_norm": 2.130866006877246, "learning_rate": 1.8571428571428575e-05, "loss": 2.5334, "reason_loss": 0.5720115900039673, "step": 299, "utility_loss": 1.961383581161499 }, { "cosine_similarity": 0.04591327143304702, "epoch": 0.27958993476234856, "grad_norm": 1.9987999386323576, "learning_rate": 1.863354037267081e-05, "loss": 2.7968, "reason_loss": 0.6273935437202454, "step": 300, "utility_loss": 2.1694469451904297 }, { "cosine_similarity": 0.04966164799153693, "epoch": 0.2805219012115564, "grad_norm": 1.6871715775283573, "learning_rate": 1.8695652173913045e-05, "loss": 1.8264, "reason_loss": 0.5766208171844482, "step": 301, "utility_loss": 1.2497928142547607 }, { "cosine_similarity": 0.10567305358925376, "epoch": 0.2814538676607642, "grad_norm": 1.6199317086065361, "learning_rate": 1.875776397515528e-05, "loss": 2.1148, "reason_loss": 0.5705569386482239, "step": 302, "utility_loss": 1.54425048828125 }, { "cosine_similarity": 0.5606751923816445, "epoch": 0.28238583410997203, "grad_norm": 3.2496487567502803, "learning_rate": 1.881987577639752e-05, "loss": 2.6595, "reason_loss": 0.5539048910140991, "step": 303, "utility_loss": 2.1055588722229004 }, { "cosine_similarity": 0.7001013179430368, "epoch": 0.2833178005591799, "grad_norm": 2.3191293926563086, "learning_rate": 1.8881987577639754e-05, "loss": 2.5949, "reason_loss": 0.595379114151001, "step": 304, "utility_loss": 1.9995156526565552 }, { "cosine_similarity": 0.10111540969343276, "epoch": 0.2842497670083877, "grad_norm": 2.12829987932354, "learning_rate": 1.894409937888199e-05, "loss": 2.5154, "reason_loss": 0.5756771564483643, "step": 305, "utility_loss": 1.9396816492080688 }, { "cosine_similarity": 0.24979415281501557, "epoch": 0.2851817334575955, "grad_norm": 2.5164199884958864, "learning_rate": 1.9006211180124224e-05, "loss": 2.7758, "reason_loss": 0.547782301902771, "step": 306, "utility_loss": 2.2280261516571045 }, { "cosine_similarity": 0.032334953304559204, "epoch": 0.28611369990680335, "grad_norm": 1.8941808554084623, "learning_rate": 1.906832298136646e-05, "loss": 2.5305, "reason_loss": 0.5579342842102051, "step": 307, "utility_loss": 1.9725652933120728 }, { "cosine_similarity": 0.521412029301031, "epoch": 0.2870456663560112, "grad_norm": 1.9690467649622905, "learning_rate": 1.9130434782608697e-05, "loss": 2.6657, "reason_loss": 0.5584021210670471, "step": 308, "utility_loss": 2.1073191165924072 }, { "cosine_similarity": 0.05669685646578087, "epoch": 0.287977632805219, "grad_norm": 1.9433065128417872, "learning_rate": 1.9192546583850932e-05, "loss": 2.4957, "reason_loss": 0.5604404211044312, "step": 309, "utility_loss": 1.9352777004241943 }, { "cosine_similarity": 0.11418343607306733, "epoch": 0.28890959925442683, "grad_norm": 1.5333481129494915, "learning_rate": 1.925465838509317e-05, "loss": 2.0425, "reason_loss": 0.5934227705001831, "step": 310, "utility_loss": 1.4490344524383545 }, { "cosine_similarity": 0.3182057558750447, "epoch": 0.2898415657036347, "grad_norm": 2.8154402822309077, "learning_rate": 1.9316770186335406e-05, "loss": 2.7849, "reason_loss": 0.598469614982605, "step": 311, "utility_loss": 2.1863856315612793 }, { "cosine_similarity": 0.025891190438568155, "epoch": 0.2907735321528425, "grad_norm": 2.5155546751281475, "learning_rate": 1.937888198757764e-05, "loss": 2.474, "reason_loss": 0.5644267797470093, "step": 312, "utility_loss": 1.909583330154419 }, { "cosine_similarity": 0.07473178656378938, "epoch": 0.2917054986020503, "grad_norm": 1.4067927804368534, "learning_rate": 1.9440993788819876e-05, "loss": 2.0735, "reason_loss": 0.5846192240715027, "step": 313, "utility_loss": 1.488861322402954 }, { "cosine_similarity": 0.08977480900626415, "epoch": 0.29263746505125815, "grad_norm": 2.384753337161699, "learning_rate": 1.950310559006211e-05, "loss": 2.6324, "reason_loss": 0.5458442568778992, "step": 314, "utility_loss": 2.086587905883789 }, { "cosine_similarity": 0.09673304950656729, "epoch": 0.293569431500466, "grad_norm": 1.9036296441708087, "learning_rate": 1.956521739130435e-05, "loss": 2.7134, "reason_loss": 0.5945056676864624, "step": 315, "utility_loss": 2.1188764572143555 }, { "cosine_similarity": 0.5789498072618969, "epoch": 0.29450139794967384, "grad_norm": 1.6139001033657423, "learning_rate": 1.9627329192546585e-05, "loss": 2.4818, "reason_loss": 0.5381836891174316, "step": 316, "utility_loss": 1.943648338317871 }, { "cosine_similarity": 0.13263718985430295, "epoch": 0.2954333643988816, "grad_norm": 1.8566327435549512, "learning_rate": 1.9689440993788823e-05, "loss": 2.4933, "reason_loss": 0.5454671382904053, "step": 317, "utility_loss": 1.9478130340576172 }, { "cosine_similarity": 0.022428454239565243, "epoch": 0.29636533084808947, "grad_norm": 1.8680331970013349, "learning_rate": 1.9751552795031058e-05, "loss": 2.4404, "reason_loss": 0.5672692060470581, "step": 318, "utility_loss": 1.87310791015625 }, { "cosine_similarity": 0.23300460845429796, "epoch": 0.2972972972972973, "grad_norm": 1.7038042225385508, "learning_rate": 1.9813664596273293e-05, "loss": 2.6106, "reason_loss": 0.550702691078186, "step": 319, "utility_loss": 2.0598957538604736 }, { "cosine_similarity": -0.0066495044940868995, "epoch": 0.2982292637465051, "grad_norm": 2.2276957958681516, "learning_rate": 1.9875776397515528e-05, "loss": 2.5477, "reason_loss": 0.5788257718086243, "step": 320, "utility_loss": 1.9689195156097412 }, { "cosine_similarity": 0.10818586516163466, "epoch": 0.29916123019571295, "grad_norm": 1.8265419788572435, "learning_rate": 1.9937888198757767e-05, "loss": 2.5563, "reason_loss": 0.5416114330291748, "step": 321, "utility_loss": 2.014686107635498 }, { "cosine_similarity": 0.26749963520194414, "epoch": 0.3000931966449208, "grad_norm": 1.4395760941316476, "learning_rate": 2e-05, "loss": 2.2056, "reason_loss": 0.5582042932510376, "step": 322, "utility_loss": 1.647411823272705 }, { "cosine_similarity": 0.2453240006301716, "epoch": 0.30102516309412863, "grad_norm": 2.251601502056019, "learning_rate": 1.999309630652399e-05, "loss": 2.7948, "reason_loss": 0.5555191040039062, "step": 323, "utility_loss": 2.2392969131469727 }, { "cosine_similarity": 0.697111745589227, "epoch": 0.3019571295433364, "grad_norm": 1.750392893980982, "learning_rate": 1.9986192613047983e-05, "loss": 1.7901, "reason_loss": 0.5554274320602417, "step": 324, "utility_loss": 1.2346735000610352 }, { "cosine_similarity": 0.6245592187787963, "epoch": 0.30288909599254427, "grad_norm": 1.8518495692244115, "learning_rate": 1.9979288919571972e-05, "loss": 2.6509, "reason_loss": 0.5532726049423218, "step": 325, "utility_loss": 2.09761118888855 }, { "cosine_similarity": 0.006354101485827444, "epoch": 0.3038210624417521, "grad_norm": 1.7908834534369407, "learning_rate": 1.997238522609596e-05, "loss": 2.2817, "reason_loss": 0.5572770833969116, "step": 326, "utility_loss": 1.7243759632110596 }, { "cosine_similarity": 0.653379584951248, "epoch": 0.3047530288909599, "grad_norm": 1.7916479240796843, "learning_rate": 1.9965481532619954e-05, "loss": 2.3035, "reason_loss": 0.5682424306869507, "step": 327, "utility_loss": 1.7352771759033203 }, { "cosine_similarity": 0.4929304966322481, "epoch": 0.30568499534016774, "grad_norm": 1.7090122968672015, "learning_rate": 1.9958577839143946e-05, "loss": 2.544, "reason_loss": 0.5582953691482544, "step": 328, "utility_loss": 1.9856899976730347 }, { "cosine_similarity": -0.005067836941568766, "epoch": 0.3066169617893756, "grad_norm": 2.708859368605259, "learning_rate": 1.9951674145667935e-05, "loss": 2.551, "reason_loss": 0.5895805358886719, "step": 329, "utility_loss": 1.9614205360412598 }, { "cosine_similarity": 0.18812953144186964, "epoch": 0.30754892823858343, "grad_norm": 1.6387841260873315, "learning_rate": 1.9944770452191924e-05, "loss": 2.4184, "reason_loss": 0.5590689182281494, "step": 330, "utility_loss": 1.8593668937683105 }, { "cosine_similarity": 0.18622281658721695, "epoch": 0.3084808946877912, "grad_norm": 1.7327551448222112, "learning_rate": 1.9937866758715913e-05, "loss": 2.152, "reason_loss": 0.5574976205825806, "step": 331, "utility_loss": 1.5945103168487549 }, { "cosine_similarity": 0.26425834456931785, "epoch": 0.30941286113699906, "grad_norm": 1.5899731375543575, "learning_rate": 1.9930963065239906e-05, "loss": 1.9201, "reason_loss": 0.5599879026412964, "step": 332, "utility_loss": 1.3601245880126953 }, { "cosine_similarity": 0.2219602983006825, "epoch": 0.3103448275862069, "grad_norm": 1.858472424265726, "learning_rate": 1.9924059371763895e-05, "loss": 2.22, "reason_loss": 0.5619907379150391, "step": 333, "utility_loss": 1.657960057258606 }, { "cosine_similarity": 0.3269961617684722, "epoch": 0.31127679403541475, "grad_norm": 2.2930686303906276, "learning_rate": 1.9917155678287884e-05, "loss": 2.5413, "reason_loss": 0.5712285041809082, "step": 334, "utility_loss": 1.9700216054916382 }, { "cosine_similarity": 0.26773248664143834, "epoch": 0.31220876048462254, "grad_norm": 1.7810601721781285, "learning_rate": 1.9910251984811876e-05, "loss": 2.1419, "reason_loss": 0.5619472861289978, "step": 335, "utility_loss": 1.5799957513809204 }, { "cosine_similarity": 0.26211432127485645, "epoch": 0.3131407269338304, "grad_norm": 1.889687299330557, "learning_rate": 1.9903348291335865e-05, "loss": 2.5655, "reason_loss": 0.5957565307617188, "step": 336, "utility_loss": 1.9697272777557373 }, { "cosine_similarity": 0.24633867189862785, "epoch": 0.3140726933830382, "grad_norm": 1.8947339493199253, "learning_rate": 1.9896444597859858e-05, "loss": 2.331, "reason_loss": 0.5586587190628052, "step": 337, "utility_loss": 1.7723522186279297 }, { "cosine_similarity": 0.17720035997614267, "epoch": 0.315004659832246, "grad_norm": 1.7986635689749255, "learning_rate": 1.9889540904383847e-05, "loss": 2.1866, "reason_loss": 0.5682592988014221, "step": 338, "utility_loss": 1.6183531284332275 }, { "cosine_similarity": 0.15567443197007075, "epoch": 0.31593662628145386, "grad_norm": 2.4352514959718388, "learning_rate": 1.988263721090784e-05, "loss": 2.4493, "reason_loss": 0.54109126329422, "step": 339, "utility_loss": 1.908158779144287 }, { "cosine_similarity": 0.42650251405386697, "epoch": 0.3168685927306617, "grad_norm": 1.770758987741226, "learning_rate": 1.9875733517431828e-05, "loss": 2.6876, "reason_loss": 0.5711397528648376, "step": 340, "utility_loss": 2.1164612770080566 }, { "cosine_similarity": 0.5264138538010752, "epoch": 0.31780055917986955, "grad_norm": 1.8249288843611016, "learning_rate": 1.9868829823955817e-05, "loss": 2.565, "reason_loss": 0.5584731101989746, "step": 341, "utility_loss": 2.0065722465515137 }, { "cosine_similarity": 0.25329915964798877, "epoch": 0.31873252562907733, "grad_norm": 2.193385307815508, "learning_rate": 1.9861926130479806e-05, "loss": 2.3688, "reason_loss": 0.5813273787498474, "step": 342, "utility_loss": 1.7875092029571533 }, { "cosine_similarity": 0.9976738764583418, "epoch": 0.3196644920782852, "grad_norm": 1.788919431684608, "learning_rate": 1.98550224370038e-05, "loss": 2.392, "reason_loss": 0.5473181009292603, "step": 343, "utility_loss": 1.8447260856628418 }, { "cosine_similarity": 0.5290590085989954, "epoch": 0.320596458527493, "grad_norm": 2.0295013455976503, "learning_rate": 1.9848118743527788e-05, "loss": 2.546, "reason_loss": 0.5834101438522339, "step": 344, "utility_loss": 1.9625862836837769 }, { "cosine_similarity": 0.5315355912700558, "epoch": 0.32152842497670087, "grad_norm": 2.2939840741511484, "learning_rate": 1.984121505005178e-05, "loss": 2.7301, "reason_loss": 0.5294288992881775, "step": 345, "utility_loss": 2.2006237506866455 }, { "cosine_similarity": 0.5259648185865355, "epoch": 0.32246039142590865, "grad_norm": 1.641098339137848, "learning_rate": 1.983431135657577e-05, "loss": 2.0976, "reason_loss": 0.5654036998748779, "step": 346, "utility_loss": 1.5321476459503174 }, { "cosine_similarity": 0.36809588399926424, "epoch": 0.3233923578751165, "grad_norm": 1.750874086218276, "learning_rate": 1.982740766309976e-05, "loss": 2.1904, "reason_loss": 0.6090787649154663, "step": 347, "utility_loss": 1.581282615661621 }, { "cosine_similarity": 0.6454623942203858, "epoch": 0.32432432432432434, "grad_norm": 1.5239108911496537, "learning_rate": 1.982050396962375e-05, "loss": 2.1591, "reason_loss": 0.5774381160736084, "step": 348, "utility_loss": 1.581621766090393 }, { "cosine_similarity": 0.7001179564184374, "epoch": 0.32525629077353213, "grad_norm": 2.1981955750034405, "learning_rate": 1.981360027614774e-05, "loss": 2.4354, "reason_loss": 0.568331241607666, "step": 349, "utility_loss": 1.8670737743377686 }, { "cosine_similarity": 0.023227905786855945, "epoch": 0.32618825722274, "grad_norm": 1.6876600184618475, "learning_rate": 1.9806696582671732e-05, "loss": 2.66, "reason_loss": 0.5597571730613708, "step": 350, "utility_loss": 2.1002421379089355 }, { "cosine_similarity": 0.007477474436229489, "epoch": 0.3271202236719478, "grad_norm": 1.8250288719234848, "learning_rate": 1.979979288919572e-05, "loss": 2.3728, "reason_loss": 0.5660345554351807, "step": 351, "utility_loss": 1.806748390197754 }, { "cosine_similarity": 0.6310006369138922, "epoch": 0.32805219012115566, "grad_norm": 1.752976791365402, "learning_rate": 1.979288919571971e-05, "loss": 2.5305, "reason_loss": 0.5583915710449219, "step": 352, "utility_loss": 1.9720706939697266 }, { "cosine_similarity": 0.30943535836701985, "epoch": 0.32898415657036345, "grad_norm": 2.1517880952084227, "learning_rate": 1.9785985502243702e-05, "loss": 2.8426, "reason_loss": 0.5464153289794922, "step": 353, "utility_loss": 2.296135187149048 }, { "cosine_similarity": 0.20811507902301984, "epoch": 0.3299161230195713, "grad_norm": 1.9197058453526847, "learning_rate": 1.977908180876769e-05, "loss": 2.4427, "reason_loss": 0.5505221486091614, "step": 354, "utility_loss": 1.8921279907226562 }, { "cosine_similarity": 0.3206222642730329, "epoch": 0.33084808946877914, "grad_norm": 1.7170098681484474, "learning_rate": 1.9772178115291684e-05, "loss": 2.4444, "reason_loss": 0.5783491134643555, "step": 355, "utility_loss": 1.8660945892333984 }, { "cosine_similarity": -0.026022092153818663, "epoch": 0.3317800559179869, "grad_norm": 1.9808639859573915, "learning_rate": 1.9765274421815673e-05, "loss": 2.7248, "reason_loss": 0.5279041528701782, "step": 356, "utility_loss": 2.1969194412231445 }, { "cosine_similarity": 0.03535205859759838, "epoch": 0.33271202236719477, "grad_norm": 1.7595246038216528, "learning_rate": 1.9758370728339665e-05, "loss": 1.9672, "reason_loss": 0.56026691198349, "step": 357, "utility_loss": 1.4069124460220337 }, { "cosine_similarity": 0.46562759547283084, "epoch": 0.3336439888164026, "grad_norm": 1.992140071136226, "learning_rate": 1.9751467034863654e-05, "loss": 2.4122, "reason_loss": 0.566329300403595, "step": 358, "utility_loss": 1.8458936214447021 }, { "cosine_similarity": 0.012620568614241611, "epoch": 0.33457595526561046, "grad_norm": 1.7531925657641532, "learning_rate": 1.9744563341387643e-05, "loss": 2.4316, "reason_loss": 0.5894204378128052, "step": 359, "utility_loss": 1.8422085046768188 }, { "cosine_similarity": -0.028571801206150306, "epoch": 0.33550792171481825, "grad_norm": 1.6060522668869022, "learning_rate": 1.9737659647911633e-05, "loss": 2.7509, "reason_loss": 0.5362840890884399, "step": 360, "utility_loss": 2.2146220207214355 }, { "cosine_similarity": 0.27512499498283954, "epoch": 0.3364398881640261, "grad_norm": 2.039704521511974, "learning_rate": 1.9730755954435625e-05, "loss": 2.5125, "reason_loss": 0.5396788120269775, "step": 361, "utility_loss": 1.9728517532348633 }, { "cosine_similarity": 0.2370563697995289, "epoch": 0.33737185461323393, "grad_norm": 1.795918154422233, "learning_rate": 1.9723852260959614e-05, "loss": 1.8509, "reason_loss": 0.5378060340881348, "step": 362, "utility_loss": 1.3130526542663574 }, { "cosine_similarity": 0.5719028214389426, "epoch": 0.3383038210624418, "grad_norm": 1.816007971649014, "learning_rate": 1.9716948567483606e-05, "loss": 2.4092, "reason_loss": 0.5536906719207764, "step": 363, "utility_loss": 1.8555045127868652 }, { "cosine_similarity": 0.06192744195468483, "epoch": 0.33923578751164957, "grad_norm": 2.0185038285122454, "learning_rate": 1.9710044874007595e-05, "loss": 2.7608, "reason_loss": 0.5853039026260376, "step": 364, "utility_loss": 2.175530433654785 }, { "cosine_similarity": 0.21360135377931055, "epoch": 0.3401677539608574, "grad_norm": 1.7097511447650198, "learning_rate": 1.9703141180531588e-05, "loss": 2.2694, "reason_loss": 0.5440101623535156, "step": 365, "utility_loss": 1.7254074811935425 }, { "cosine_similarity": 0.6669127523867482, "epoch": 0.34109972041006525, "grad_norm": 1.9984450242246663, "learning_rate": 1.9696237487055577e-05, "loss": 2.7328, "reason_loss": 0.5558720827102661, "step": 366, "utility_loss": 2.1769423484802246 }, { "cosine_similarity": 0.17376120316466398, "epoch": 0.34203168685927304, "grad_norm": 1.6726578109846415, "learning_rate": 1.9689333793579566e-05, "loss": 2.7178, "reason_loss": 0.6131361126899719, "step": 367, "utility_loss": 2.104614734649658 }, { "cosine_similarity": 0.007872152831948346, "epoch": 0.3429636533084809, "grad_norm": 1.5774128048431542, "learning_rate": 1.968243010010356e-05, "loss": 2.0313, "reason_loss": 0.5600897073745728, "step": 368, "utility_loss": 1.471246361732483 }, { "cosine_similarity": 0.43951157529631873, "epoch": 0.34389561975768873, "grad_norm": 2.145292215967103, "learning_rate": 1.9675526406627547e-05, "loss": 2.3599, "reason_loss": 0.5345184803009033, "step": 369, "utility_loss": 1.8254289627075195 }, { "cosine_similarity": 0.5444044170422336, "epoch": 0.3448275862068966, "grad_norm": 2.6683080872325036, "learning_rate": 1.9668622713151536e-05, "loss": 2.6584, "reason_loss": 0.572178065776825, "step": 370, "utility_loss": 2.086265802383423 }, { "cosine_similarity": 0.49002907100477683, "epoch": 0.34575955265610436, "grad_norm": 2.0407338650031077, "learning_rate": 1.9661719019675526e-05, "loss": 2.5181, "reason_loss": 0.54180508852005, "step": 371, "utility_loss": 1.9763176441192627 }, { "cosine_similarity": -0.16671195031728991, "epoch": 0.3466915191053122, "grad_norm": 1.8037660037891807, "learning_rate": 1.9654815326199518e-05, "loss": 2.522, "reason_loss": 0.5456120371818542, "step": 372, "utility_loss": 1.976366400718689 }, { "cosine_similarity": 0.2578219728232431, "epoch": 0.34762348555452005, "grad_norm": 1.5985441778184135, "learning_rate": 1.9647911632723507e-05, "loss": 2.1213, "reason_loss": 0.5587912201881409, "step": 373, "utility_loss": 1.562558650970459 }, { "cosine_similarity": 0.35522829499426484, "epoch": 0.34855545200372784, "grad_norm": 2.051332281008552, "learning_rate": 1.96410079392475e-05, "loss": 2.3728, "reason_loss": 0.5264158248901367, "step": 374, "utility_loss": 1.8463643789291382 }, { "cosine_similarity": 0.3249023105739368, "epoch": 0.3494874184529357, "grad_norm": 1.7809955986395039, "learning_rate": 1.9634104245771492e-05, "loss": 2.5736, "reason_loss": 0.5731656551361084, "step": 375, "utility_loss": 2.0004334449768066 }, { "cosine_similarity": 0.2811125449707936, "epoch": 0.3504193849021435, "grad_norm": 2.068280026940133, "learning_rate": 1.962720055229548e-05, "loss": 2.5493, "reason_loss": 0.5581322908401489, "step": 376, "utility_loss": 1.9911935329437256 }, { "cosine_similarity": 0.17113814870532543, "epoch": 0.35135135135135137, "grad_norm": 1.481183524433133, "learning_rate": 1.962029685881947e-05, "loss": 2.0322, "reason_loss": 0.5935071706771851, "step": 377, "utility_loss": 1.438727617263794 }, { "cosine_similarity": 0.174172524538149, "epoch": 0.35228331780055916, "grad_norm": 1.9903774836202157, "learning_rate": 1.961339316534346e-05, "loss": 2.4397, "reason_loss": 0.5518238544464111, "step": 378, "utility_loss": 1.8879168033599854 }, { "cosine_similarity": 0.1587760032833603, "epoch": 0.353215284249767, "grad_norm": 1.8053320283375207, "learning_rate": 1.960648947186745e-05, "loss": 2.2637, "reason_loss": 0.5793827772140503, "step": 379, "utility_loss": 1.6843256950378418 }, { "cosine_similarity": 0.0485228809402899, "epoch": 0.35414725069897485, "grad_norm": 1.5993379658106885, "learning_rate": 1.959958577839144e-05, "loss": 2.5906, "reason_loss": 0.5601963400840759, "step": 380, "utility_loss": 2.030423879623413 }, { "cosine_similarity": 0.13322371711323894, "epoch": 0.3550792171481827, "grad_norm": 1.9579582755867726, "learning_rate": 1.959268208491543e-05, "loss": 2.2587, "reason_loss": 0.5641905069351196, "step": 381, "utility_loss": 1.6945027112960815 }, { "cosine_similarity": 0.4400590127109887, "epoch": 0.3560111835973905, "grad_norm": 1.6951886769711857, "learning_rate": 1.9585778391439422e-05, "loss": 2.2468, "reason_loss": 0.5501596331596375, "step": 382, "utility_loss": 1.6965954303741455 }, { "cosine_similarity": 0.3978003421745917, "epoch": 0.3569431500465983, "grad_norm": 1.8300867237301952, "learning_rate": 1.957887469796341e-05, "loss": 2.6005, "reason_loss": 0.5958380699157715, "step": 383, "utility_loss": 2.004662036895752 }, { "cosine_similarity": -0.0483355348686297, "epoch": 0.35787511649580617, "grad_norm": 1.258673295167674, "learning_rate": 1.9571971004487403e-05, "loss": 2.0254, "reason_loss": 0.578758955001831, "step": 384, "utility_loss": 1.446659803390503 }, { "cosine_similarity": 0.297592904528295, "epoch": 0.35880708294501396, "grad_norm": 1.9091463112210125, "learning_rate": 1.9565067311011392e-05, "loss": 2.5909, "reason_loss": 0.563230037689209, "step": 385, "utility_loss": 2.0276598930358887 }, { "cosine_similarity": 0.5384295845593438, "epoch": 0.3597390493942218, "grad_norm": 1.7318155200433905, "learning_rate": 1.9558163617535385e-05, "loss": 2.4697, "reason_loss": 0.544969379901886, "step": 386, "utility_loss": 1.9247227907180786 }, { "cosine_similarity": 0.410554697163683, "epoch": 0.36067101584342964, "grad_norm": 1.6785767580079893, "learning_rate": 1.9551259924059374e-05, "loss": 2.6106, "reason_loss": 0.5348280668258667, "step": 387, "utility_loss": 2.0757412910461426 }, { "cosine_similarity": 0.35360891201611766, "epoch": 0.3616029822926375, "grad_norm": 1.8353262419700151, "learning_rate": 1.9544356230583363e-05, "loss": 2.6871, "reason_loss": 0.5591099262237549, "step": 388, "utility_loss": 2.1279451847076416 }, { "cosine_similarity": 0.15107783104135641, "epoch": 0.3625349487418453, "grad_norm": 1.3194473793679686, "learning_rate": 1.9537452537107352e-05, "loss": 2.398, "reason_loss": 0.5645229816436768, "step": 389, "utility_loss": 1.83346426486969 }, { "cosine_similarity": 0.014221138297020236, "epoch": 0.3634669151910531, "grad_norm": 1.4729677819743745, "learning_rate": 1.9530548843631344e-05, "loss": 2.0326, "reason_loss": 0.5594570636749268, "step": 390, "utility_loss": 1.4731745719909668 }, { "cosine_similarity": 0.40970336454875445, "epoch": 0.36439888164026096, "grad_norm": 1.9431673026036547, "learning_rate": 1.9523645150155333e-05, "loss": 2.447, "reason_loss": 0.5920974016189575, "step": 391, "utility_loss": 1.854882001876831 }, { "cosine_similarity": -0.4228762014315249, "epoch": 0.36533084808946875, "grad_norm": 3.7300884016816243, "learning_rate": 1.9516741456679326e-05, "loss": 3.0671, "reason_loss": 0.5112125873565674, "step": 392, "utility_loss": 2.555873394012451 }, { "cosine_similarity": -0.033923052738625524, "epoch": 0.3662628145386766, "grad_norm": 1.9738441739285808, "learning_rate": 1.9509837763203315e-05, "loss": 2.4609, "reason_loss": 0.5449890494346619, "step": 393, "utility_loss": 1.915877103805542 }, { "cosine_similarity": 0.114013440401849, "epoch": 0.36719478098788444, "grad_norm": 2.1214994668329474, "learning_rate": 1.9502934069727307e-05, "loss": 2.3423, "reason_loss": 0.5679395198822021, "step": 394, "utility_loss": 1.7744014263153076 }, { "cosine_similarity": 0.28384632778469837, "epoch": 0.3681267474370923, "grad_norm": 2.076684735707991, "learning_rate": 1.9496030376251296e-05, "loss": 2.1208, "reason_loss": 0.583136796951294, "step": 395, "utility_loss": 1.5376302003860474 }, { "cosine_similarity": -0.045967057465413415, "epoch": 0.36905871388630007, "grad_norm": 2.418052935341563, "learning_rate": 1.9489126682775285e-05, "loss": 2.7502, "reason_loss": 0.5426028370857239, "step": 396, "utility_loss": 2.2075939178466797 }, { "cosine_similarity": 0.09134026242394504, "epoch": 0.3699906803355079, "grad_norm": 1.9184483434475943, "learning_rate": 1.9482222989299278e-05, "loss": 2.0686, "reason_loss": 0.5213552117347717, "step": 397, "utility_loss": 1.5472674369812012 }, { "cosine_similarity": 0.04091272756964578, "epoch": 0.37092264678471576, "grad_norm": 1.7612425411430193, "learning_rate": 1.9475319295823267e-05, "loss": 2.3513, "reason_loss": 0.5691250562667847, "step": 398, "utility_loss": 1.7821418046951294 }, { "cosine_similarity": 0.3610318776381119, "epoch": 0.3718546132339236, "grad_norm": 1.414337077063083, "learning_rate": 1.9468415602347256e-05, "loss": 2.5041, "reason_loss": 0.5437325239181519, "step": 399, "utility_loss": 1.9603188037872314 }, { "cosine_similarity": 0.06515063805721799, "epoch": 0.3727865796831314, "grad_norm": 2.533245153367055, "learning_rate": 1.9461511908871248e-05, "loss": 2.3395, "reason_loss": 0.5319470167160034, "step": 400, "utility_loss": 1.8075363636016846 }, { "cosine_similarity": 0.2622386419148355, "epoch": 0.37371854613233924, "grad_norm": 2.2882099737753157, "learning_rate": 1.9454608215395237e-05, "loss": 2.5808, "reason_loss": 0.5558074712753296, "step": 401, "utility_loss": 2.024963855743408 }, { "cosine_similarity": 0.1612590575101077, "epoch": 0.3746505125815471, "grad_norm": 2.20999507427846, "learning_rate": 1.944770452191923e-05, "loss": 2.5641, "reason_loss": 0.5895177721977234, "step": 402, "utility_loss": 1.9746243953704834 }, { "cosine_similarity": 0.059575954704649776, "epoch": 0.37558247903075487, "grad_norm": 1.581191775901428, "learning_rate": 1.944080082844322e-05, "loss": 2.5562, "reason_loss": 0.5518913269042969, "step": 403, "utility_loss": 2.0042614936828613 }, { "cosine_similarity": 0.21212411556558639, "epoch": 0.3765144454799627, "grad_norm": 1.3490408417133692, "learning_rate": 1.943389713496721e-05, "loss": 2.3399, "reason_loss": 0.565578818321228, "step": 404, "utility_loss": 1.774322748184204 }, { "cosine_similarity": 0.16681471973596343, "epoch": 0.37744641192917056, "grad_norm": 1.7953468980468907, "learning_rate": 1.94269934414912e-05, "loss": 2.1089, "reason_loss": 0.5522218942642212, "step": 405, "utility_loss": 1.556711196899414 }, { "cosine_similarity": 0.4353396134342954, "epoch": 0.3783783783783784, "grad_norm": 1.8496287342915148, "learning_rate": 1.942008974801519e-05, "loss": 2.2436, "reason_loss": 0.5535745024681091, "step": 406, "utility_loss": 1.6900138854980469 }, { "cosine_similarity": 0.15774103021266403, "epoch": 0.3793103448275862, "grad_norm": 1.762780859240963, "learning_rate": 1.941318605453918e-05, "loss": 2.3111, "reason_loss": 0.5572067499160767, "step": 407, "utility_loss": 1.7538992166519165 }, { "cosine_similarity": 0.2070506361912799, "epoch": 0.38024231127679403, "grad_norm": 1.7252287331053286, "learning_rate": 1.940628236106317e-05, "loss": 2.1293, "reason_loss": 0.554503858089447, "step": 408, "utility_loss": 1.57474684715271 }, { "cosine_similarity": 0.08821002992921263, "epoch": 0.3811742777260019, "grad_norm": 2.264544538866061, "learning_rate": 1.939937866758716e-05, "loss": 2.0979, "reason_loss": 0.5628149509429932, "step": 409, "utility_loss": 1.5350432395935059 }, { "cosine_similarity": 0.16246019982993806, "epoch": 0.3821062441752097, "grad_norm": 2.1799743525297766, "learning_rate": 1.9392474974111152e-05, "loss": 2.491, "reason_loss": 0.5600732564926147, "step": 410, "utility_loss": 1.9308884143829346 }, { "cosine_similarity": 0.15438228044606095, "epoch": 0.3830382106244175, "grad_norm": 1.7825463659259038, "learning_rate": 1.938557128063514e-05, "loss": 2.2114, "reason_loss": 0.558227002620697, "step": 411, "utility_loss": 1.653206467628479 }, { "cosine_similarity": 0.3941640272489361, "epoch": 0.38397017707362535, "grad_norm": 1.868408392356326, "learning_rate": 1.9378667587159134e-05, "loss": 2.8021, "reason_loss": 0.5806093215942383, "step": 412, "utility_loss": 2.221534252166748 }, { "cosine_similarity": 0.21565869004106433, "epoch": 0.3849021435228332, "grad_norm": 2.374800840158741, "learning_rate": 1.9371763893683123e-05, "loss": 2.6456, "reason_loss": 0.5177429914474487, "step": 413, "utility_loss": 2.1278090476989746 }, { "cosine_similarity": 1.0122209394144754, "epoch": 0.385834109972041, "grad_norm": 1.3881379443598572, "learning_rate": 1.9364860200207112e-05, "loss": 2.0022, "reason_loss": 0.4987002909183502, "step": 414, "utility_loss": 1.5035319328308105 }, { "cosine_similarity": 0.22933589378124694, "epoch": 0.38676607642124883, "grad_norm": 2.0372488122509016, "learning_rate": 1.9357956506731104e-05, "loss": 2.1838, "reason_loss": 0.5554205775260925, "step": 415, "utility_loss": 1.628358006477356 }, { "cosine_similarity": 0.21389186120579315, "epoch": 0.38769804287045667, "grad_norm": 1.6477770854497207, "learning_rate": 1.9351052813255093e-05, "loss": 2.231, "reason_loss": 0.6026586294174194, "step": 416, "utility_loss": 1.6283845901489258 }, { "cosine_similarity": 0.40390769472822863, "epoch": 0.3886300093196645, "grad_norm": 1.6184795043361409, "learning_rate": 1.9344149119779082e-05, "loss": 2.3048, "reason_loss": 0.566117525100708, "step": 417, "utility_loss": 1.7386378049850464 }, { "cosine_similarity": 0.17532386039919398, "epoch": 0.3895619757688723, "grad_norm": 1.6686326631314854, "learning_rate": 1.933724542630307e-05, "loss": 2.3985, "reason_loss": 0.5499923825263977, "step": 418, "utility_loss": 1.848490595817566 }, { "cosine_similarity": 0.2869466885946051, "epoch": 0.39049394221808015, "grad_norm": 1.7755166717008446, "learning_rate": 1.9330341732827064e-05, "loss": 2.4892, "reason_loss": 0.5756398439407349, "step": 419, "utility_loss": 1.913572072982788 }, { "cosine_similarity": 0.26023546640122125, "epoch": 0.391425908667288, "grad_norm": 1.4036246821131384, "learning_rate": 1.9323438039351056e-05, "loss": 2.5382, "reason_loss": 0.5310065150260925, "step": 420, "utility_loss": 2.007148027420044 }, { "cosine_similarity": 0.14742364802165503, "epoch": 0.3923578751164958, "grad_norm": 1.6231821709620151, "learning_rate": 1.9316534345875045e-05, "loss": 2.383, "reason_loss": 0.5592836737632751, "step": 421, "utility_loss": 1.8237414360046387 }, { "cosine_similarity": -0.0016585799799802574, "epoch": 0.3932898415657036, "grad_norm": 1.6319252442391303, "learning_rate": 1.9309630652399034e-05, "loss": 2.3271, "reason_loss": 0.5606275796890259, "step": 422, "utility_loss": 1.7664884328842163 }, { "cosine_similarity": 0.11847120026987723, "epoch": 0.39422180801491147, "grad_norm": 2.023262328263729, "learning_rate": 1.9302726958923027e-05, "loss": 2.1946, "reason_loss": 0.5942981243133545, "step": 423, "utility_loss": 1.600314974784851 }, { "cosine_similarity": 0.3214172151573417, "epoch": 0.3951537744641193, "grad_norm": 2.2448294472691215, "learning_rate": 1.9295823265447016e-05, "loss": 2.6741, "reason_loss": 0.5462765097618103, "step": 424, "utility_loss": 2.1277847290039062 }, { "cosine_similarity": 0.31362663013378844, "epoch": 0.3960857409133271, "grad_norm": 1.3743025129186022, "learning_rate": 1.9288919571971005e-05, "loss": 1.9916, "reason_loss": 0.5498258471488953, "step": 425, "utility_loss": 1.4418163299560547 }, { "cosine_similarity": 0.06358226091976873, "epoch": 0.39701770736253494, "grad_norm": 1.4180934924087323, "learning_rate": 1.9282015878494997e-05, "loss": 1.8454, "reason_loss": 0.5759633779525757, "step": 426, "utility_loss": 1.2694017887115479 }, { "cosine_similarity": 0.3637884267214644, "epoch": 0.3979496738117428, "grad_norm": 1.490186519956919, "learning_rate": 1.9275112185018986e-05, "loss": 2.2366, "reason_loss": 0.5516984462738037, "step": 427, "utility_loss": 1.684889316558838 }, { "cosine_similarity": 0.30904870486669006, "epoch": 0.39888164026095063, "grad_norm": 1.824380906295871, "learning_rate": 1.9268208491542975e-05, "loss": 2.3386, "reason_loss": 0.5268958806991577, "step": 428, "utility_loss": 1.8117187023162842 }, { "cosine_similarity": 0.11307297501588727, "epoch": 0.3998136067101584, "grad_norm": 1.5793337475153502, "learning_rate": 1.9261304798066968e-05, "loss": 2.4462, "reason_loss": 0.521943211555481, "step": 429, "utility_loss": 1.9242205619812012 }, { "cosine_similarity": 0.2797059647798325, "epoch": 0.40074557315936626, "grad_norm": 1.5181179473438822, "learning_rate": 1.925440110459096e-05, "loss": 2.3286, "reason_loss": 0.5573638677597046, "step": 430, "utility_loss": 1.7712149620056152 }, { "cosine_similarity": 0.19617583074723277, "epoch": 0.4016775396085741, "grad_norm": 1.6989842155541952, "learning_rate": 1.924749741111495e-05, "loss": 2.6741, "reason_loss": 0.560116171836853, "step": 431, "utility_loss": 2.1140060424804688 }, { "cosine_similarity": 0.25041981942790387, "epoch": 0.4026095060577819, "grad_norm": 1.4428861421612795, "learning_rate": 1.9240593717638938e-05, "loss": 1.9767, "reason_loss": 0.5695986151695251, "step": 432, "utility_loss": 1.4071403741836548 }, { "cosine_similarity": 0.19171062087978405, "epoch": 0.40354147250698974, "grad_norm": 1.9570988210683573, "learning_rate": 1.9233690024162927e-05, "loss": 2.8007, "reason_loss": 0.549986720085144, "step": 433, "utility_loss": 2.250762462615967 }, { "cosine_similarity": 0.42148338356853515, "epoch": 0.4044734389561976, "grad_norm": 1.7157728329724078, "learning_rate": 1.922678633068692e-05, "loss": 2.1913, "reason_loss": 0.5657232403755188, "step": 434, "utility_loss": 1.6256091594696045 }, { "cosine_similarity": 0.35271257547242696, "epoch": 0.40540540540540543, "grad_norm": 1.6609853649508464, "learning_rate": 1.921988263721091e-05, "loss": 2.1348, "reason_loss": 0.5582706928253174, "step": 435, "utility_loss": 1.576574444770813 }, { "cosine_similarity": -0.08590868649877156, "epoch": 0.4063373718546132, "grad_norm": 2.012486275144893, "learning_rate": 1.9212978943734898e-05, "loss": 2.6396, "reason_loss": 0.5693385601043701, "step": 436, "utility_loss": 2.0703020095825195 }, { "cosine_similarity": 0.15664524829139012, "epoch": 0.40726933830382106, "grad_norm": 2.1014407357497724, "learning_rate": 1.920607525025889e-05, "loss": 2.4512, "reason_loss": 0.5675274133682251, "step": 437, "utility_loss": 1.883713722229004 }, { "cosine_similarity": 0.402975899148738, "epoch": 0.4082013047530289, "grad_norm": 2.059632621531184, "learning_rate": 1.919917155678288e-05, "loss": 2.3937, "reason_loss": 0.5487034916877747, "step": 438, "utility_loss": 1.8449842929840088 }, { "cosine_similarity": 0.23821975937074127, "epoch": 0.4091332712022367, "grad_norm": 1.5397223740108066, "learning_rate": 1.919226786330687e-05, "loss": 2.193, "reason_loss": 0.5407286882400513, "step": 439, "utility_loss": 1.6522443294525146 }, { "cosine_similarity": 0.041649026485488, "epoch": 0.41006523765144454, "grad_norm": 2.9658699931194112, "learning_rate": 1.918536416983086e-05, "loss": 2.381, "reason_loss": 0.5511441230773926, "step": 440, "utility_loss": 1.8298859596252441 }, { "cosine_similarity": 0.2172206311563764, "epoch": 0.4109972041006524, "grad_norm": 1.5776888805156526, "learning_rate": 1.9178460476354853e-05, "loss": 2.4032, "reason_loss": 0.5498334169387817, "step": 441, "utility_loss": 1.853394865989685 }, { "cosine_similarity": 0.17013017570116581, "epoch": 0.4119291705498602, "grad_norm": 1.9687078363342398, "learning_rate": 1.9171556782878842e-05, "loss": 2.3673, "reason_loss": 0.5575866103172302, "step": 442, "utility_loss": 1.8097485303878784 }, { "cosine_similarity": 0.09624139636273238, "epoch": 0.412861136999068, "grad_norm": 1.7277548203822624, "learning_rate": 1.916465308940283e-05, "loss": 2.247, "reason_loss": 0.5405848026275635, "step": 443, "utility_loss": 1.7064225673675537 }, { "cosine_similarity": 0.07530188058626126, "epoch": 0.41379310344827586, "grad_norm": 1.8763211606812982, "learning_rate": 1.9157749395926824e-05, "loss": 2.5039, "reason_loss": 0.5278857946395874, "step": 444, "utility_loss": 1.9759689569473267 }, { "cosine_similarity": 0.5872851261991445, "epoch": 0.4147250698974837, "grad_norm": 2.0377168420796896, "learning_rate": 1.9150845702450813e-05, "loss": 2.4397, "reason_loss": 0.5692563652992249, "step": 445, "utility_loss": 1.87046217918396 }, { "cosine_similarity": 0.17698844669386332, "epoch": 0.41565703634669154, "grad_norm": 1.624943923491357, "learning_rate": 1.91439420089748e-05, "loss": 2.537, "reason_loss": 0.553325891494751, "step": 446, "utility_loss": 1.983707308769226 }, { "cosine_similarity": 0.08145519573076238, "epoch": 0.41658900279589933, "grad_norm": 2.2131282752935526, "learning_rate": 1.9137038315498794e-05, "loss": 2.3709, "reason_loss": 0.531345009803772, "step": 447, "utility_loss": 1.8395507335662842 }, { "cosine_similarity": 0.044142283801580584, "epoch": 0.4175209692451072, "grad_norm": 1.36395622760598, "learning_rate": 1.9130134622022783e-05, "loss": 2.0521, "reason_loss": 0.5347639322280884, "step": 448, "utility_loss": 1.5173038244247437 }, { "cosine_similarity": 0.19009178081331712, "epoch": 0.418452935694315, "grad_norm": 1.6850957845695973, "learning_rate": 1.9123230928546776e-05, "loss": 2.6938, "reason_loss": 0.5494028329849243, "step": 449, "utility_loss": 2.1443867683410645 }, { "cosine_similarity": 0.2467847436310088, "epoch": 0.4193849021435228, "grad_norm": 2.209915069216686, "learning_rate": 1.9116327235070765e-05, "loss": 2.4405, "reason_loss": 0.525394082069397, "step": 450, "utility_loss": 1.9150786399841309 }, { "cosine_similarity": 0.16104778563384398, "epoch": 0.42031686859273065, "grad_norm": 1.6333120334231068, "learning_rate": 1.9109423541594754e-05, "loss": 2.3932, "reason_loss": 0.5409491062164307, "step": 451, "utility_loss": 1.8522659540176392 }, { "cosine_similarity": -0.00836299151341191, "epoch": 0.4212488350419385, "grad_norm": 1.7565356111090942, "learning_rate": 1.9102519848118746e-05, "loss": 2.4709, "reason_loss": 0.5505331158638, "step": 452, "utility_loss": 1.9204119443893433 }, { "cosine_similarity": 0.0537229529477172, "epoch": 0.42218080149114634, "grad_norm": 1.6808396801120111, "learning_rate": 1.9095616154642735e-05, "loss": 2.6613, "reason_loss": 0.5533384084701538, "step": 453, "utility_loss": 2.1079301834106445 }, { "cosine_similarity": -0.07955603969979827, "epoch": 0.42311276794035413, "grad_norm": 1.3583854344809108, "learning_rate": 1.9088712461166724e-05, "loss": 2.175, "reason_loss": 0.5491795539855957, "step": 454, "utility_loss": 1.6258647441864014 }, { "cosine_similarity": 0.2531223633836009, "epoch": 0.424044734389562, "grad_norm": 1.486535023512776, "learning_rate": 1.9081808767690717e-05, "loss": 2.3541, "reason_loss": 0.5320166349411011, "step": 455, "utility_loss": 1.8220651149749756 }, { "cosine_similarity": 0.27544913107108615, "epoch": 0.4249767008387698, "grad_norm": 1.5169316811007356, "learning_rate": 1.9074905074214706e-05, "loss": 2.3843, "reason_loss": 0.513575553894043, "step": 456, "utility_loss": 1.8707740306854248 }, { "cosine_similarity": 0.035035065512839754, "epoch": 0.42590866728797766, "grad_norm": 1.6741730056898865, "learning_rate": 1.9068001380738698e-05, "loss": 2.4122, "reason_loss": 0.5519176721572876, "step": 457, "utility_loss": 1.8602778911590576 }, { "cosine_similarity": 0.22558186025704238, "epoch": 0.42684063373718545, "grad_norm": 1.8438659627210616, "learning_rate": 1.9061097687262687e-05, "loss": 2.2126, "reason_loss": 0.5541698336601257, "step": 458, "utility_loss": 1.6583905220031738 }, { "cosine_similarity": 0.3348774691567391, "epoch": 0.4277726001863933, "grad_norm": 2.4154981990001643, "learning_rate": 1.905419399378668e-05, "loss": 2.4601, "reason_loss": 0.5333120822906494, "step": 459, "utility_loss": 1.9267890453338623 }, { "cosine_similarity": 0.2088337718959656, "epoch": 0.42870456663560114, "grad_norm": 2.0707849770474875, "learning_rate": 1.904729030031067e-05, "loss": 2.4, "reason_loss": 0.5555226802825928, "step": 460, "utility_loss": 1.8444480895996094 }, { "cosine_similarity": 0.10827066148833037, "epoch": 0.4296365330848089, "grad_norm": 1.7445608938042714, "learning_rate": 1.9040386606834658e-05, "loss": 2.2547, "reason_loss": 0.5432486534118652, "step": 461, "utility_loss": 1.7114652395248413 }, { "cosine_similarity": -0.014299726966284652, "epoch": 0.43056849953401677, "grad_norm": 1.7943532071800827, "learning_rate": 1.9033482913358647e-05, "loss": 2.3769, "reason_loss": 0.5172697305679321, "step": 462, "utility_loss": 1.8596256971359253 }, { "cosine_similarity": 0.026209827985899006, "epoch": 0.4315004659832246, "grad_norm": 1.3011461276406298, "learning_rate": 1.902657921988264e-05, "loss": 1.9581, "reason_loss": 0.5303409099578857, "step": 463, "utility_loss": 1.427788496017456 }, { "cosine_similarity": 0.5177053493557667, "epoch": 0.43243243243243246, "grad_norm": 1.7409825364767981, "learning_rate": 1.9019675526406628e-05, "loss": 2.4386, "reason_loss": 0.5143840312957764, "step": 464, "utility_loss": 1.9242368936538696 }, { "cosine_similarity": 0.7336472571091296, "epoch": 0.43336439888164024, "grad_norm": 1.5477818089777244, "learning_rate": 1.901277183293062e-05, "loss": 2.6128, "reason_loss": 0.5877071022987366, "step": 465, "utility_loss": 2.0251104831695557 }, { "cosine_similarity": 0.22818958743468912, "epoch": 0.4342963653308481, "grad_norm": 1.6376367352002796, "learning_rate": 1.900586813945461e-05, "loss": 2.1544, "reason_loss": 0.5312755107879639, "step": 466, "utility_loss": 1.623120903968811 }, { "cosine_similarity": -0.12595429156801854, "epoch": 0.43522833178005593, "grad_norm": 2.1939140291156924, "learning_rate": 1.8998964445978602e-05, "loss": 2.2142, "reason_loss": 0.5837035179138184, "step": 467, "utility_loss": 1.6304945945739746 }, { "cosine_similarity": 0.3721134280745117, "epoch": 0.4361602982292637, "grad_norm": 3.0978518666550037, "learning_rate": 1.899206075250259e-05, "loss": 2.5608, "reason_loss": 0.5621349215507507, "step": 468, "utility_loss": 1.9987030029296875 }, { "cosine_similarity": 0.4738985278542068, "epoch": 0.43709226467847156, "grad_norm": 1.452916426223559, "learning_rate": 1.898515705902658e-05, "loss": 2.269, "reason_loss": 0.5552882552146912, "step": 469, "utility_loss": 1.7137318849563599 }, { "cosine_similarity": 0.12970474113287253, "epoch": 0.4380242311276794, "grad_norm": 1.6095341796854146, "learning_rate": 1.8978253365550572e-05, "loss": 2.4529, "reason_loss": 0.5405250191688538, "step": 470, "utility_loss": 1.9124048948287964 }, { "cosine_similarity": 0.4401041320389106, "epoch": 0.43895619757688725, "grad_norm": 1.9647756721551688, "learning_rate": 1.897134967207456e-05, "loss": 2.6161, "reason_loss": 0.5277194976806641, "step": 471, "utility_loss": 2.0883841514587402 }, { "cosine_similarity": 0.42558826391907967, "epoch": 0.43988816402609504, "grad_norm": 1.5352532035729531, "learning_rate": 1.896444597859855e-05, "loss": 2.4536, "reason_loss": 0.5749415159225464, "step": 472, "utility_loss": 1.8786253929138184 }, { "cosine_similarity": 0.12935674775654293, "epoch": 0.4408201304753029, "grad_norm": 1.5402701576986824, "learning_rate": 1.895754228512254e-05, "loss": 2.659, "reason_loss": 0.5523815155029297, "step": 473, "utility_loss": 2.1065993309020996 }, { "cosine_similarity": -0.12040196800538497, "epoch": 0.44175209692451073, "grad_norm": 1.3622191480290047, "learning_rate": 1.8950638591646532e-05, "loss": 1.8797, "reason_loss": 0.5195310711860657, "step": 474, "utility_loss": 1.3601417541503906 }, { "cosine_similarity": 0.22123188817169248, "epoch": 0.4426840633737186, "grad_norm": 1.7889139763784156, "learning_rate": 1.894373489817052e-05, "loss": 2.6489, "reason_loss": 0.5342031121253967, "step": 475, "utility_loss": 2.1147050857543945 }, { "cosine_similarity": 0.2136237965173102, "epoch": 0.44361602982292636, "grad_norm": 1.424798730460176, "learning_rate": 1.8936831204694513e-05, "loss": 1.9678, "reason_loss": 0.5800274610519409, "step": 476, "utility_loss": 1.3877534866333008 }, { "cosine_similarity": 0.14211987400193885, "epoch": 0.4445479962721342, "grad_norm": 1.3802291824500563, "learning_rate": 1.8929927511218506e-05, "loss": 2.2632, "reason_loss": 0.5422728061676025, "step": 477, "utility_loss": 1.7208993434906006 }, { "cosine_similarity": 0.07143055814610591, "epoch": 0.44547996272134205, "grad_norm": 2.3704535968384213, "learning_rate": 1.8923023817742495e-05, "loss": 2.3209, "reason_loss": 0.5693183541297913, "step": 478, "utility_loss": 1.7516032457351685 }, { "cosine_similarity": 0.3343234080682683, "epoch": 0.44641192917054984, "grad_norm": 1.5301540808827805, "learning_rate": 1.8916120124266484e-05, "loss": 2.2093, "reason_loss": 0.5363061428070068, "step": 479, "utility_loss": 1.6729788780212402 }, { "cosine_similarity": 0.22075808795977364, "epoch": 0.4473438956197577, "grad_norm": 1.8307181312905838, "learning_rate": 1.8909216430790473e-05, "loss": 2.0775, "reason_loss": 0.5539212226867676, "step": 480, "utility_loss": 1.5235520601272583 }, { "cosine_similarity": 0.059092080770271185, "epoch": 0.4482758620689655, "grad_norm": 1.3647453171104413, "learning_rate": 1.8902312737314465e-05, "loss": 2.039, "reason_loss": 0.5647591352462769, "step": 481, "utility_loss": 1.474273920059204 }, { "cosine_similarity": 0.04059193907904117, "epoch": 0.44920782851817337, "grad_norm": 1.6732835158520003, "learning_rate": 1.8895409043838454e-05, "loss": 2.0462, "reason_loss": 0.5627942085266113, "step": 482, "utility_loss": 1.4833934307098389 }, { "cosine_similarity": 0.027781133527226068, "epoch": 0.45013979496738116, "grad_norm": 1.7283988870001994, "learning_rate": 1.8888505350362443e-05, "loss": 2.71, "reason_loss": 0.5458176136016846, "step": 483, "utility_loss": 2.1641674041748047 }, { "cosine_similarity": -0.11458444579425545, "epoch": 0.451071761416589, "grad_norm": 1.7799848947818817, "learning_rate": 1.8881601656886436e-05, "loss": 2.1863, "reason_loss": 0.5136273503303528, "step": 484, "utility_loss": 1.6726524829864502 }, { "cosine_similarity": 0.100264558973189, "epoch": 0.45200372786579684, "grad_norm": 1.626628889476727, "learning_rate": 1.8874697963410425e-05, "loss": 2.2522, "reason_loss": 0.5461362600326538, "step": 485, "utility_loss": 1.7060893774032593 }, { "cosine_similarity": 0.12074626993352533, "epoch": 0.45293569431500463, "grad_norm": 1.5082002286899951, "learning_rate": 1.8867794269934417e-05, "loss": 2.3228, "reason_loss": 0.5736117362976074, "step": 486, "utility_loss": 1.7491943836212158 }, { "cosine_similarity": 0.44715682857386485, "epoch": 0.4538676607642125, "grad_norm": 1.6199352419950392, "learning_rate": 1.8860890576458406e-05, "loss": 2.3894, "reason_loss": 0.5375967025756836, "step": 487, "utility_loss": 1.8518192768096924 }, { "cosine_similarity": -0.0042164526202053375, "epoch": 0.4547996272134203, "grad_norm": 1.4635999850801604, "learning_rate": 1.88539868829824e-05, "loss": 2.3178, "reason_loss": 0.5462682247161865, "step": 488, "utility_loss": 1.771540880203247 }, { "cosine_similarity": 0.24405801228728968, "epoch": 0.45573159366262816, "grad_norm": 1.4670455679055918, "learning_rate": 1.8847083189506388e-05, "loss": 1.7879, "reason_loss": 0.5370165705680847, "step": 489, "utility_loss": 1.2508795261383057 }, { "cosine_similarity": 0.09587756818634204, "epoch": 0.45666356011183595, "grad_norm": 1.9445452300825952, "learning_rate": 1.8840179496030377e-05, "loss": 2.3376, "reason_loss": 0.508034348487854, "step": 490, "utility_loss": 1.8295294046401978 }, { "cosine_similarity": -0.09976848051801858, "epoch": 0.4575955265610438, "grad_norm": 1.6120039224865517, "learning_rate": 1.8833275802554366e-05, "loss": 2.2045, "reason_loss": 0.5192242860794067, "step": 491, "utility_loss": 1.6853232383728027 }, { "cosine_similarity": 0.3107556138847428, "epoch": 0.45852749301025164, "grad_norm": 1.816496550259819, "learning_rate": 1.882637210907836e-05, "loss": 2.5313, "reason_loss": 0.5731927156448364, "step": 492, "utility_loss": 1.9581091403961182 }, { "cosine_similarity": 0.4415044370934013, "epoch": 0.4594594594594595, "grad_norm": 1.6733230461547666, "learning_rate": 1.8819468415602347e-05, "loss": 2.5681, "reason_loss": 0.5170867443084717, "step": 493, "utility_loss": 2.0510566234588623 }, { "cosine_similarity": 0.12600518991307377, "epoch": 0.4603914259086673, "grad_norm": 1.5588550608040375, "learning_rate": 1.881256472212634e-05, "loss": 2.5274, "reason_loss": 0.542578935623169, "step": 494, "utility_loss": 1.9848175048828125 }, { "cosine_similarity": 0.1863774421479281, "epoch": 0.4613233923578751, "grad_norm": 1.617962264497615, "learning_rate": 1.880566102865033e-05, "loss": 2.4129, "reason_loss": 0.5811722278594971, "step": 495, "utility_loss": 1.8317456245422363 }, { "cosine_similarity": -0.1460392487819582, "epoch": 0.46225535880708296, "grad_norm": 1.665610001406967, "learning_rate": 1.879875733517432e-05, "loss": 2.351, "reason_loss": 0.551222026348114, "step": 496, "utility_loss": 1.7998206615447998 }, { "cosine_similarity": 0.4014981208721921, "epoch": 0.46318732525629075, "grad_norm": 1.7230472996965978, "learning_rate": 1.879185364169831e-05, "loss": 2.4827, "reason_loss": 0.5628318786621094, "step": 497, "utility_loss": 1.9198654890060425 }, { "cosine_similarity": 0.39710438784900126, "epoch": 0.4641192917054986, "grad_norm": 1.5230340105980025, "learning_rate": 1.87849499482223e-05, "loss": 1.6728, "reason_loss": 0.5343964695930481, "step": 498, "utility_loss": 1.1383702754974365 }, { "cosine_similarity": 0.4459175119993239, "epoch": 0.46505125815470644, "grad_norm": 1.4181268457044829, "learning_rate": 1.8778046254746292e-05, "loss": 2.3275, "reason_loss": 0.5492043495178223, "step": 499, "utility_loss": 1.778249740600586 }, { "cosine_similarity": 0.03713109015375203, "epoch": 0.4659832246039143, "grad_norm": 1.8138507819188436, "learning_rate": 1.877114256127028e-05, "loss": 2.3564, "reason_loss": 0.5550894737243652, "step": 500, "utility_loss": 1.8013274669647217 }, { "cosine_similarity": 0.39209128737484805, "epoch": 0.46691519105312207, "grad_norm": 1.9048973281662853, "learning_rate": 1.876423886779427e-05, "loss": 2.4768, "reason_loss": 0.5577561855316162, "step": 501, "utility_loss": 1.9190750122070312 }, { "cosine_similarity": 0.391019768233556, "epoch": 0.4678471575023299, "grad_norm": 1.569222745158808, "learning_rate": 1.8757335174318262e-05, "loss": 2.5609, "reason_loss": 0.5421720147132874, "step": 502, "utility_loss": 2.0186831951141357 }, { "cosine_similarity": 0.5092553252128407, "epoch": 0.46877912395153776, "grad_norm": 2.3758514338358205, "learning_rate": 1.875043148084225e-05, "loss": 2.2205, "reason_loss": 0.5176048278808594, "step": 503, "utility_loss": 1.7029190063476562 }, { "cosine_similarity": 0.25376347694638685, "epoch": 0.46971109040074555, "grad_norm": 1.502516895084699, "learning_rate": 1.8743527787366244e-05, "loss": 2.2068, "reason_loss": 0.5265472531318665, "step": 504, "utility_loss": 1.6802780628204346 }, { "cosine_similarity": 0.23377066057433799, "epoch": 0.4706430568499534, "grad_norm": 1.6063486434199683, "learning_rate": 1.8736624093890233e-05, "loss": 2.0819, "reason_loss": 0.529309868812561, "step": 505, "utility_loss": 1.5525703430175781 }, { "cosine_similarity": 0.2459481129603883, "epoch": 0.47157502329916123, "grad_norm": 1.7205427472882713, "learning_rate": 1.8729720400414225e-05, "loss": 2.0256, "reason_loss": 0.5482283234596252, "step": 506, "utility_loss": 1.4773814678192139 }, { "cosine_similarity": 0.33237257810956633, "epoch": 0.4725069897483691, "grad_norm": 1.964699785113383, "learning_rate": 1.8722816706938214e-05, "loss": 2.5119, "reason_loss": 0.5529122352600098, "step": 507, "utility_loss": 1.9589805603027344 }, { "cosine_similarity": 0.048262025366365605, "epoch": 0.47343895619757687, "grad_norm": 1.3802753607416212, "learning_rate": 1.8715913013462203e-05, "loss": 2.3655, "reason_loss": 0.5487594604492188, "step": 508, "utility_loss": 1.816760540008545 }, { "cosine_similarity": 0.12281333176571904, "epoch": 0.4743709226467847, "grad_norm": 1.5690582207818875, "learning_rate": 1.8709009319986192e-05, "loss": 2.254, "reason_loss": 0.56925368309021, "step": 509, "utility_loss": 1.6847355365753174 }, { "cosine_similarity": 0.29339040875836003, "epoch": 0.47530288909599255, "grad_norm": 1.4369582474493565, "learning_rate": 1.8702105626510185e-05, "loss": 2.0595, "reason_loss": 0.5094286203384399, "step": 510, "utility_loss": 1.5500411987304688 }, { "cosine_similarity": 0.29966120711499755, "epoch": 0.4762348555452004, "grad_norm": 1.6586543232476183, "learning_rate": 1.8695201933034174e-05, "loss": 2.0745, "reason_loss": 0.5353127717971802, "step": 511, "utility_loss": 1.5392043590545654 }, { "cosine_similarity": 0.031027837093137466, "epoch": 0.4771668219944082, "grad_norm": 1.619139508820187, "learning_rate": 1.8688298239558166e-05, "loss": 2.1063, "reason_loss": 0.5314828753471375, "step": 512, "utility_loss": 1.574866771697998 }, { "cosine_similarity": 0.3163717536060597, "epoch": 0.47809878844361603, "grad_norm": 1.542603633692394, "learning_rate": 1.8681394546082155e-05, "loss": 2.0737, "reason_loss": 0.5720305442810059, "step": 513, "utility_loss": 1.5016329288482666 }, { "cosine_similarity": 0.20184608635831305, "epoch": 0.4790307548928239, "grad_norm": 1.3783890428637988, "learning_rate": 1.8674490852606148e-05, "loss": 2.2274, "reason_loss": 0.5360398292541504, "step": 514, "utility_loss": 1.6913596391677856 }, { "cosine_similarity": -0.08111589747098376, "epoch": 0.47996272134203166, "grad_norm": 1.575016518017598, "learning_rate": 1.8667587159130137e-05, "loss": 2.2545, "reason_loss": 0.5371385812759399, "step": 515, "utility_loss": 1.7173802852630615 }, { "cosine_similarity": 0.03981876289932805, "epoch": 0.4808946877912395, "grad_norm": 1.5022763632804605, "learning_rate": 1.8660683465654126e-05, "loss": 2.1906, "reason_loss": 0.5419087409973145, "step": 516, "utility_loss": 1.648695468902588 }, { "cosine_similarity": 0.08089480617525278, "epoch": 0.48182665424044735, "grad_norm": 1.7160888428334697, "learning_rate": 1.8653779772178118e-05, "loss": 2.2743, "reason_loss": 0.587475597858429, "step": 517, "utility_loss": 1.686823844909668 }, { "cosine_similarity": 0.01227683924923799, "epoch": 0.4827586206896552, "grad_norm": 1.705405203378889, "learning_rate": 1.8646876078702107e-05, "loss": 2.1584, "reason_loss": 0.5255818367004395, "step": 518, "utility_loss": 1.6328165531158447 }, { "cosine_similarity": 0.0856102899361198, "epoch": 0.483690587138863, "grad_norm": 1.552981135706972, "learning_rate": 1.8639972385226096e-05, "loss": 2.3577, "reason_loss": 0.5387923121452332, "step": 519, "utility_loss": 1.8189263343811035 }, { "cosine_similarity": -0.16917245625360025, "epoch": 0.4846225535880708, "grad_norm": 2.951050743721409, "learning_rate": 1.8633068691750085e-05, "loss": 2.3922, "reason_loss": 0.5450548529624939, "step": 520, "utility_loss": 1.84715735912323 }, { "cosine_similarity": 0.525923878748049, "epoch": 0.48555452003727867, "grad_norm": 1.5633238835327548, "learning_rate": 1.8626164998274078e-05, "loss": 2.1089, "reason_loss": 0.5201057195663452, "step": 521, "utility_loss": 1.5888186693191528 }, { "cosine_similarity": 0.3366898814662232, "epoch": 0.4864864864864865, "grad_norm": 1.3495252239810533, "learning_rate": 1.861926130479807e-05, "loss": 2.4604, "reason_loss": 0.5478874444961548, "step": 522, "utility_loss": 1.912534236907959 }, { "cosine_similarity": 0.06997749541317033, "epoch": 0.4874184529356943, "grad_norm": 1.562815832540475, "learning_rate": 1.861235761132206e-05, "loss": 2.2879, "reason_loss": 0.5405502319335938, "step": 523, "utility_loss": 1.7473492622375488 }, { "cosine_similarity": 0.2950165907311946, "epoch": 0.48835041938490215, "grad_norm": 1.558932578618509, "learning_rate": 1.860545391784605e-05, "loss": 2.2375, "reason_loss": 0.5446411371231079, "step": 524, "utility_loss": 1.6928430795669556 }, { "cosine_similarity": 0.43522670800268376, "epoch": 0.48928238583411, "grad_norm": 1.6478876460836678, "learning_rate": 1.859855022437004e-05, "loss": 2.3646, "reason_loss": 0.5798410177230835, "step": 525, "utility_loss": 1.784776210784912 }, { "cosine_similarity": 0.057698513411207505, "epoch": 0.4902143522833178, "grad_norm": 2.3585679562761994, "learning_rate": 1.859164653089403e-05, "loss": 2.2162, "reason_loss": 0.5625600814819336, "step": 526, "utility_loss": 1.653627634048462 }, { "cosine_similarity": 0.05969287200245109, "epoch": 0.4911463187325256, "grad_norm": 1.5690392389366357, "learning_rate": 1.858474283741802e-05, "loss": 2.5645, "reason_loss": 0.5341619253158569, "step": 527, "utility_loss": 2.030333995819092 }, { "cosine_similarity": 0.13052260115853836, "epoch": 0.49207828518173347, "grad_norm": 1.7809765608060284, "learning_rate": 1.857783914394201e-05, "loss": 2.0742, "reason_loss": 0.5512499809265137, "step": 528, "utility_loss": 1.5229125022888184 }, { "cosine_similarity": 0.17044856470963357, "epoch": 0.4930102516309413, "grad_norm": 1.7545889420224166, "learning_rate": 1.8570935450466e-05, "loss": 2.5382, "reason_loss": 0.5127447843551636, "step": 529, "utility_loss": 2.0254859924316406 }, { "cosine_similarity": -0.02402066113251231, "epoch": 0.4939422180801491, "grad_norm": 1.7894158084580503, "learning_rate": 1.856403175698999e-05, "loss": 2.5551, "reason_loss": 0.5507650375366211, "step": 530, "utility_loss": 2.004301071166992 }, { "cosine_similarity": -0.0186038464211936, "epoch": 0.49487418452935694, "grad_norm": 2.1360300380697863, "learning_rate": 1.8557128063513982e-05, "loss": 2.4259, "reason_loss": 0.5493189692497253, "step": 531, "utility_loss": 1.8765755891799927 }, { "cosine_similarity": 0.08911802185906674, "epoch": 0.4958061509785648, "grad_norm": 1.4572152411251884, "learning_rate": 1.8550224370037974e-05, "loss": 2.0635, "reason_loss": 0.5254961252212524, "step": 532, "utility_loss": 1.5379812717437744 }, { "cosine_similarity": 0.3716704370551973, "epoch": 0.4967381174277726, "grad_norm": 1.8336190395903245, "learning_rate": 1.8543320676561963e-05, "loss": 2.1298, "reason_loss": 0.56238853931427, "step": 533, "utility_loss": 1.5673725605010986 }, { "cosine_similarity": 0.05518587971072245, "epoch": 0.4976700838769804, "grad_norm": 1.3412874129011902, "learning_rate": 1.8536416983085952e-05, "loss": 2.5488, "reason_loss": 0.5612865686416626, "step": 534, "utility_loss": 1.9875051975250244 }, { "cosine_similarity": 0.2625156297215224, "epoch": 0.49860205032618826, "grad_norm": 2.446361034553642, "learning_rate": 1.8529513289609945e-05, "loss": 1.9087, "reason_loss": 0.5199224948883057, "step": 535, "utility_loss": 1.3888005018234253 }, { "cosine_similarity": 0.030253766164092918, "epoch": 0.4995340167753961, "grad_norm": 1.7723215363896165, "learning_rate": 1.8522609596133934e-05, "loss": 2.1541, "reason_loss": 0.5363695621490479, "step": 536, "utility_loss": 1.6177211999893188 }, { "cosine_similarity": 0.040973612698163624, "epoch": 0.5004659832246039, "grad_norm": 1.4525319313510896, "learning_rate": 1.8515705902657923e-05, "loss": 2.0908, "reason_loss": 0.501968502998352, "step": 537, "utility_loss": 1.5888712406158447 }, { "cosine_similarity": 0.38448118926144953, "epoch": 0.5013979496738118, "grad_norm": 1.5833428753311607, "learning_rate": 1.8508802209181912e-05, "loss": 1.9919, "reason_loss": 0.545943558216095, "step": 538, "utility_loss": 1.4459846019744873 }, { "cosine_similarity": 0.19626832528916788, "epoch": 0.5023299161230196, "grad_norm": 1.6338597994979664, "learning_rate": 1.8501898515705904e-05, "loss": 2.276, "reason_loss": 0.5505058765411377, "step": 539, "utility_loss": 1.7254765033721924 }, { "cosine_similarity": 0.47440631218073087, "epoch": 0.5032618825722274, "grad_norm": 1.5627780704171832, "learning_rate": 1.8494994822229893e-05, "loss": 2.4807, "reason_loss": 0.5208377838134766, "step": 540, "utility_loss": 1.959871530532837 }, { "cosine_similarity": -0.4300480394321923, "epoch": 0.5041938490214353, "grad_norm": 1.7763744024971495, "learning_rate": 1.8488091128753886e-05, "loss": 2.4004, "reason_loss": 0.5322619676589966, "step": 541, "utility_loss": 1.8681375980377197 }, { "cosine_similarity": 0.3999605277825621, "epoch": 0.5051258154706431, "grad_norm": 1.9136353479236459, "learning_rate": 1.8481187435277875e-05, "loss": 2.3945, "reason_loss": 0.5419068336486816, "step": 542, "utility_loss": 1.8526356220245361 }, { "cosine_similarity": 0.20181046986238688, "epoch": 0.5060577819198508, "grad_norm": 1.9797999401120128, "learning_rate": 1.8474283741801867e-05, "loss": 2.3384, "reason_loss": 0.5234487056732178, "step": 543, "utility_loss": 1.8149323463439941 }, { "cosine_similarity": 0.23942115451728455, "epoch": 0.5069897483690587, "grad_norm": 1.4225703626439747, "learning_rate": 1.8467380048325856e-05, "loss": 2.0954, "reason_loss": 0.5550522804260254, "step": 544, "utility_loss": 1.5403790473937988 }, { "cosine_similarity": 0.08426607091074548, "epoch": 0.5079217148182665, "grad_norm": 1.7744713220337156, "learning_rate": 1.8460476354849845e-05, "loss": 2.3092, "reason_loss": 0.5390750765800476, "step": 545, "utility_loss": 1.7700870037078857 }, { "cosine_similarity": 0.1798693792015476, "epoch": 0.5088536812674743, "grad_norm": 1.9263184877115285, "learning_rate": 1.8453572661373838e-05, "loss": 2.7982, "reason_loss": 0.5288469195365906, "step": 546, "utility_loss": 2.2693400382995605 }, { "cosine_similarity": 0.38597293788107334, "epoch": 0.5097856477166822, "grad_norm": 1.6584309332057519, "learning_rate": 1.8446668967897827e-05, "loss": 2.3863, "reason_loss": 0.5437051057815552, "step": 547, "utility_loss": 1.8426344394683838 }, { "cosine_similarity": 0.011975169025058938, "epoch": 0.51071761416589, "grad_norm": 1.3761510443587632, "learning_rate": 1.8439765274421816e-05, "loss": 2.1763, "reason_loss": 0.5125733613967896, "step": 548, "utility_loss": 1.6636841297149658 }, { "cosine_similarity": 0.07281724076693333, "epoch": 0.5116495806150979, "grad_norm": 1.5896024398901565, "learning_rate": 1.8432861580945808e-05, "loss": 2.776, "reason_loss": 0.5498342514038086, "step": 549, "utility_loss": 2.226144790649414 }, { "cosine_similarity": -0.10926617141052858, "epoch": 0.5125815470643057, "grad_norm": 1.7958371343864317, "learning_rate": 1.8425957887469797e-05, "loss": 2.4168, "reason_loss": 0.5487566590309143, "step": 550, "utility_loss": 1.8680037260055542 }, { "cosine_similarity": 0.17559570973137337, "epoch": 0.5135135135135135, "grad_norm": 1.5340597289215248, "learning_rate": 1.841905419399379e-05, "loss": 2.3215, "reason_loss": 0.560693085193634, "step": 551, "utility_loss": 1.7607841491699219 }, { "cosine_similarity": 0.3362603333336731, "epoch": 0.5144454799627214, "grad_norm": 2.1412069142457564, "learning_rate": 1.841215050051778e-05, "loss": 2.3851, "reason_loss": 0.5190249681472778, "step": 552, "utility_loss": 1.8660855293273926 }, { "cosine_similarity": 0.29433365616125706, "epoch": 0.5153774464119292, "grad_norm": 1.8541150147875831, "learning_rate": 1.8405246807041768e-05, "loss": 2.2378, "reason_loss": 0.48427814245224, "step": 553, "utility_loss": 1.753478765487671 }, { "cosine_similarity": 0.03605010443463735, "epoch": 0.516309412861137, "grad_norm": 1.8847049110224012, "learning_rate": 1.839834311356576e-05, "loss": 2.267, "reason_loss": 0.5366066694259644, "step": 554, "utility_loss": 1.7303798198699951 }, { "cosine_similarity": 0.022080135348587732, "epoch": 0.5172413793103449, "grad_norm": 1.892665775579302, "learning_rate": 1.839143942008975e-05, "loss": 2.4808, "reason_loss": 0.5601500868797302, "step": 555, "utility_loss": 1.9206078052520752 }, { "cosine_similarity": -0.1482214268226348, "epoch": 0.5181733457595527, "grad_norm": 1.5958336849682115, "learning_rate": 1.8384535726613738e-05, "loss": 2.502, "reason_loss": 0.6040878295898438, "step": 556, "utility_loss": 1.897905945777893 }, { "cosine_similarity": 0.07425204981408613, "epoch": 0.5191053122087604, "grad_norm": 1.4569613157359018, "learning_rate": 1.837763203313773e-05, "loss": 2.3478, "reason_loss": 0.5250273942947388, "step": 557, "utility_loss": 1.822805643081665 }, { "cosine_similarity": -0.0667045621512453, "epoch": 0.5200372786579683, "grad_norm": 1.683046198771896, "learning_rate": 1.837072833966172e-05, "loss": 2.389, "reason_loss": 0.5865042209625244, "step": 558, "utility_loss": 1.8025100231170654 }, { "cosine_similarity": -0.17999407655175173, "epoch": 0.5209692451071761, "grad_norm": 1.579268204335265, "learning_rate": 1.8363824646185712e-05, "loss": 2.012, "reason_loss": 0.5411480665206909, "step": 559, "utility_loss": 1.4708298444747925 }, { "cosine_similarity": 0.0422318118872757, "epoch": 0.5219012115563839, "grad_norm": 1.418923630174561, "learning_rate": 1.83569209527097e-05, "loss": 2.0458, "reason_loss": 0.5281542539596558, "step": 560, "utility_loss": 1.5176806449890137 }, { "cosine_similarity": 0.4666719167678294, "epoch": 0.5228331780055918, "grad_norm": 1.4859106609470054, "learning_rate": 1.8350017259233694e-05, "loss": 2.1843, "reason_loss": 0.5475990176200867, "step": 561, "utility_loss": 1.636672854423523 }, { "cosine_similarity": 0.06820429704635954, "epoch": 0.5237651444547996, "grad_norm": 1.5287699566860526, "learning_rate": 1.8343113565757683e-05, "loss": 1.9916, "reason_loss": 0.5423727631568909, "step": 562, "utility_loss": 1.4492168426513672 }, { "cosine_similarity": 0.4276443616454645, "epoch": 0.5246971109040075, "grad_norm": 1.392557332014762, "learning_rate": 1.833620987228167e-05, "loss": 2.256, "reason_loss": 0.5193544030189514, "step": 563, "utility_loss": 1.7366068363189697 }, { "cosine_similarity": 0.0939786542603645, "epoch": 0.5256290773532153, "grad_norm": 1.3361646977897819, "learning_rate": 1.832930617880566e-05, "loss": 1.9133, "reason_loss": 0.529839277267456, "step": 564, "utility_loss": 1.3834656476974487 }, { "cosine_similarity": 0.275214300294376, "epoch": 0.5265610438024231, "grad_norm": 1.3654865519550707, "learning_rate": 1.8322402485329653e-05, "loss": 2.0646, "reason_loss": 0.5059675574302673, "step": 565, "utility_loss": 1.5586013793945312 }, { "cosine_similarity": 0.2246695964545132, "epoch": 0.527493010251631, "grad_norm": 1.965558420080734, "learning_rate": 1.8315498791853642e-05, "loss": 2.324, "reason_loss": 0.5408630967140198, "step": 566, "utility_loss": 1.7831403017044067 }, { "cosine_similarity": 0.10284377996321674, "epoch": 0.5284249767008388, "grad_norm": 1.6564773068755312, "learning_rate": 1.830859509837763e-05, "loss": 2.202, "reason_loss": 0.5626770257949829, "step": 567, "utility_loss": 1.6392731666564941 }, { "cosine_similarity": 0.3671974075156294, "epoch": 0.5293569431500466, "grad_norm": 1.462718541022984, "learning_rate": 1.8301691404901624e-05, "loss": 2.1466, "reason_loss": 0.4982058107852936, "step": 568, "utility_loss": 1.6483678817749023 }, { "cosine_similarity": 0.6510194413270715, "epoch": 0.5302889095992545, "grad_norm": 1.8274585411934838, "learning_rate": 1.8294787711425616e-05, "loss": 2.5001, "reason_loss": 0.5236825346946716, "step": 569, "utility_loss": 1.9763822555541992 }, { "cosine_similarity": 0.14110073822010924, "epoch": 0.5312208760484622, "grad_norm": 1.897513504276882, "learning_rate": 1.8287884017949605e-05, "loss": 2.4221, "reason_loss": 0.524181604385376, "step": 570, "utility_loss": 1.897870421409607 }, { "cosine_similarity": 0.3568660767306019, "epoch": 0.53215284249767, "grad_norm": 1.4237632249829255, "learning_rate": 1.8280980324473594e-05, "loss": 1.9332, "reason_loss": 0.5068228840827942, "step": 571, "utility_loss": 1.4263286590576172 }, { "cosine_similarity": 0.6267257057557167, "epoch": 0.5330848089468779, "grad_norm": 1.5499031338761875, "learning_rate": 1.8274076630997587e-05, "loss": 2.3328, "reason_loss": 0.550186038017273, "step": 572, "utility_loss": 1.7825647592544556 }, { "cosine_similarity": 0.07223532638959689, "epoch": 0.5340167753960857, "grad_norm": 1.4927323332651155, "learning_rate": 1.8267172937521576e-05, "loss": 2.5263, "reason_loss": 0.5550125241279602, "step": 573, "utility_loss": 1.971334457397461 }, { "cosine_similarity": 0.07819480042332512, "epoch": 0.5349487418452936, "grad_norm": 1.4291705768073895, "learning_rate": 1.8260269244045565e-05, "loss": 2.1345, "reason_loss": 0.5400010347366333, "step": 574, "utility_loss": 1.5945024490356445 }, { "cosine_similarity": 0.03919021655899268, "epoch": 0.5358807082945014, "grad_norm": 1.5524022840768452, "learning_rate": 1.8253365550569557e-05, "loss": 2.347, "reason_loss": 0.5300875306129456, "step": 575, "utility_loss": 1.8169299364089966 }, { "cosine_similarity": 0.24586181915731867, "epoch": 0.5368126747437092, "grad_norm": 1.5083735444952502, "learning_rate": 1.8246461857093546e-05, "loss": 2.1297, "reason_loss": 0.5170225501060486, "step": 576, "utility_loss": 1.6127068996429443 }, { "cosine_similarity": 0.4810899855537627, "epoch": 0.5377446411929171, "grad_norm": 1.4456204429398873, "learning_rate": 1.8239558163617535e-05, "loss": 2.2752, "reason_loss": 0.5298543572425842, "step": 577, "utility_loss": 1.7453185319900513 }, { "cosine_similarity": 0.15300872786100653, "epoch": 0.5386766076421249, "grad_norm": 1.6741606115544767, "learning_rate": 1.8232654470141528e-05, "loss": 2.4021, "reason_loss": 0.5292667150497437, "step": 578, "utility_loss": 1.872856616973877 }, { "cosine_similarity": 0.20502186029961575, "epoch": 0.5396085740913327, "grad_norm": 1.4527379287166728, "learning_rate": 1.822575077666552e-05, "loss": 2.3018, "reason_loss": 0.5463473796844482, "step": 579, "utility_loss": 1.7554864883422852 }, { "cosine_similarity": 0.6596304205467837, "epoch": 0.5405405405405406, "grad_norm": 1.802495933585187, "learning_rate": 1.821884708318951e-05, "loss": 2.4672, "reason_loss": 0.5592218041419983, "step": 580, "utility_loss": 1.9080193042755127 }, { "cosine_similarity": 0.38281168465680016, "epoch": 0.5414725069897484, "grad_norm": 1.308464403767013, "learning_rate": 1.8211943389713498e-05, "loss": 2.4734, "reason_loss": 0.5457773208618164, "step": 581, "utility_loss": 1.9275858402252197 }, { "cosine_similarity": -0.0032410268698770824, "epoch": 0.5424044734389561, "grad_norm": 1.2861615916350582, "learning_rate": 1.8205039696237487e-05, "loss": 2.173, "reason_loss": 0.5498815774917603, "step": 582, "utility_loss": 1.623071312904358 }, { "cosine_similarity": 0.13407351284067506, "epoch": 0.543336439888164, "grad_norm": 1.3820121515865749, "learning_rate": 1.819813600276148e-05, "loss": 2.0015, "reason_loss": 0.49967581033706665, "step": 583, "utility_loss": 1.5017818212509155 }, { "cosine_similarity": 0.09689645992646757, "epoch": 0.5442684063373718, "grad_norm": 1.3418024589217226, "learning_rate": 1.819123230928547e-05, "loss": 2.1974, "reason_loss": 0.5452707409858704, "step": 584, "utility_loss": 1.652101993560791 }, { "cosine_similarity": 0.21935245979759724, "epoch": 0.5452003727865797, "grad_norm": 1.5333120162325036, "learning_rate": 1.8184328615809458e-05, "loss": 2.1833, "reason_loss": 0.5246049761772156, "step": 585, "utility_loss": 1.6586673259735107 }, { "cosine_similarity": 0.20719770384427, "epoch": 0.5461323392357875, "grad_norm": 1.5686650326940863, "learning_rate": 1.817742492233345e-05, "loss": 1.9002, "reason_loss": 0.5221524238586426, "step": 586, "utility_loss": 1.378077507019043 }, { "cosine_similarity": 0.05658214788297757, "epoch": 0.5470643056849953, "grad_norm": 1.714153046451524, "learning_rate": 1.817052122885744e-05, "loss": 2.2192, "reason_loss": 0.486890971660614, "step": 587, "utility_loss": 1.7323133945465088 }, { "cosine_similarity": 0.09336460028610565, "epoch": 0.5479962721342032, "grad_norm": 9.481226475906176, "learning_rate": 1.816361753538143e-05, "loss": 2.5447, "reason_loss": 0.5195767879486084, "step": 588, "utility_loss": 2.025150775909424 }, { "cosine_similarity": 0.530519543660687, "epoch": 0.548928238583411, "grad_norm": 1.380326143602053, "learning_rate": 1.815671384190542e-05, "loss": 2.3532, "reason_loss": 0.5927684307098389, "step": 589, "utility_loss": 1.7604713439941406 }, { "cosine_similarity": 0.16603987544344412, "epoch": 0.5498602050326188, "grad_norm": 1.4009973011760553, "learning_rate": 1.8149810148429413e-05, "loss": 2.2294, "reason_loss": 0.5317491888999939, "step": 590, "utility_loss": 1.6976618766784668 }, { "cosine_similarity": 0.44964600340432864, "epoch": 0.5507921714818267, "grad_norm": 1.4456894397975355, "learning_rate": 1.8142906454953402e-05, "loss": 2.529, "reason_loss": 0.5368420481681824, "step": 591, "utility_loss": 1.9921141862869263 }, { "cosine_similarity": 0.6500873863164675, "epoch": 0.5517241379310345, "grad_norm": 1.316392883464819, "learning_rate": 1.813600276147739e-05, "loss": 2.0736, "reason_loss": 0.5194362998008728, "step": 592, "utility_loss": 1.5541918277740479 }, { "cosine_similarity": 0.17233117627932748, "epoch": 0.5526561043802423, "grad_norm": 1.6500430879748174, "learning_rate": 1.812909906800138e-05, "loss": 2.2675, "reason_loss": 0.5138267874717712, "step": 593, "utility_loss": 1.7537007331848145 }, { "cosine_similarity": 0.2651883323126109, "epoch": 0.5535880708294502, "grad_norm": 1.667495515793246, "learning_rate": 1.8122195374525372e-05, "loss": 2.5305, "reason_loss": 0.5547143816947937, "step": 594, "utility_loss": 1.975816249847412 }, { "cosine_similarity": 0.5288044360515303, "epoch": 0.554520037278658, "grad_norm": 1.516221922215422, "learning_rate": 1.811529168104936e-05, "loss": 2.1868, "reason_loss": 0.5389065146446228, "step": 595, "utility_loss": 1.647853970527649 }, { "cosine_similarity": 0.3451576500828194, "epoch": 0.5554520037278659, "grad_norm": 1.6862777422040998, "learning_rate": 1.8108387987573354e-05, "loss": 2.2303, "reason_loss": 0.5233259201049805, "step": 596, "utility_loss": 1.7069284915924072 }, { "cosine_similarity": 0.5758488161891434, "epoch": 0.5563839701770736, "grad_norm": 1.6747687955542854, "learning_rate": 1.8101484294097343e-05, "loss": 2.4058, "reason_loss": 0.5184226036071777, "step": 597, "utility_loss": 1.8873343467712402 }, { "cosine_similarity": 0.1485806152061493, "epoch": 0.5573159366262814, "grad_norm": 1.4688648885353877, "learning_rate": 1.8094580600621335e-05, "loss": 2.1634, "reason_loss": 0.5345396995544434, "step": 598, "utility_loss": 1.6288484334945679 }, { "cosine_similarity": -0.06520990894735534, "epoch": 0.5582479030754893, "grad_norm": 1.5225122712711747, "learning_rate": 1.8087676907145324e-05, "loss": 2.5308, "reason_loss": 0.5451313257217407, "step": 599, "utility_loss": 1.9856246709823608 }, { "cosine_similarity": 0.12811579999764566, "epoch": 0.5591798695246971, "grad_norm": 1.406484739654437, "learning_rate": 1.8080773213669313e-05, "loss": 2.3263, "reason_loss": 0.5292607545852661, "step": 600, "utility_loss": 1.7970035076141357 }, { "cosine_similarity": 0.23727466379927908, "epoch": 0.5601118359739049, "grad_norm": 1.944371834308297, "learning_rate": 1.8073869520193306e-05, "loss": 2.1784, "reason_loss": 0.5285534262657166, "step": 601, "utility_loss": 1.649802327156067 }, { "cosine_similarity": 0.05161277468371211, "epoch": 0.5610438024231128, "grad_norm": 1.1102253552607737, "learning_rate": 1.8066965826717295e-05, "loss": 1.9447, "reason_loss": 0.5403935313224792, "step": 602, "utility_loss": 1.4042813777923584 }, { "cosine_similarity": 0.248951229160776, "epoch": 0.5619757688723206, "grad_norm": 1.6250961489333817, "learning_rate": 1.8060062133241284e-05, "loss": 2.1353, "reason_loss": 0.5412812232971191, "step": 603, "utility_loss": 1.5939888954162598 }, { "cosine_similarity": 0.218891519283298, "epoch": 0.5629077353215284, "grad_norm": 2.2934909677722857, "learning_rate": 1.8053158439765276e-05, "loss": 2.5102, "reason_loss": 0.5335261821746826, "step": 604, "utility_loss": 1.976627230644226 }, { "cosine_similarity": 0.30722468799341685, "epoch": 0.5638397017707363, "grad_norm": 1.548923608977999, "learning_rate": 1.8046254746289265e-05, "loss": 2.3621, "reason_loss": 0.5044299364089966, "step": 605, "utility_loss": 1.8576269149780273 }, { "cosine_similarity": 0.3517631800854138, "epoch": 0.5647716682199441, "grad_norm": 1.6220440414494057, "learning_rate": 1.8039351052813258e-05, "loss": 2.1827, "reason_loss": 0.5709881782531738, "step": 606, "utility_loss": 1.6116740703582764 }, { "cosine_similarity": 0.4732773149345988, "epoch": 0.5657036346691519, "grad_norm": 1.6086376055752096, "learning_rate": 1.8032447359337247e-05, "loss": 2.3979, "reason_loss": 0.5306233763694763, "step": 607, "utility_loss": 1.8672858476638794 }, { "cosine_similarity": 0.06246540105239209, "epoch": 0.5666356011183598, "grad_norm": 1.8035655030926077, "learning_rate": 1.802554366586124e-05, "loss": 2.1137, "reason_loss": 0.5591509342193604, "step": 608, "utility_loss": 1.5545613765716553 }, { "cosine_similarity": 0.3116359766350087, "epoch": 0.5675675675675675, "grad_norm": 1.6296068587841441, "learning_rate": 1.801863997238523e-05, "loss": 2.2611, "reason_loss": 0.5372958183288574, "step": 609, "utility_loss": 1.7238216400146484 }, { "cosine_similarity": 0.29569159004389023, "epoch": 0.5684995340167754, "grad_norm": 1.9835581083621536, "learning_rate": 1.8011736278909217e-05, "loss": 2.1507, "reason_loss": 0.5200641751289368, "step": 610, "utility_loss": 1.6306350231170654 }, { "cosine_similarity": 0.16491599152816946, "epoch": 0.5694315004659832, "grad_norm": 1.569441200619222, "learning_rate": 1.8004832585433206e-05, "loss": 2.376, "reason_loss": 0.5154308080673218, "step": 611, "utility_loss": 1.8605730533599854 }, { "cosine_similarity": 0.4980563502173958, "epoch": 0.570363466915191, "grad_norm": 1.6185477383319067, "learning_rate": 1.79979288919572e-05, "loss": 2.2527, "reason_loss": 0.5065970420837402, "step": 612, "utility_loss": 1.7460899353027344 }, { "cosine_similarity": 0.35245845559228245, "epoch": 0.5712954333643989, "grad_norm": 1.8583560200084752, "learning_rate": 1.7991025198481188e-05, "loss": 2.3305, "reason_loss": 0.558685839176178, "step": 613, "utility_loss": 1.7718193531036377 }, { "cosine_similarity": 0.23828601998958973, "epoch": 0.5722273998136067, "grad_norm": 1.5060137046225268, "learning_rate": 1.798412150500518e-05, "loss": 2.4052, "reason_loss": 0.540657639503479, "step": 614, "utility_loss": 1.864502191543579 }, { "cosine_similarity": 0.2414936427417823, "epoch": 0.5731593662628145, "grad_norm": 1.3282415986125427, "learning_rate": 1.797721781152917e-05, "loss": 2.0056, "reason_loss": 0.5194897651672363, "step": 615, "utility_loss": 1.486134648323059 }, { "cosine_similarity": 0.34038353595584914, "epoch": 0.5740913327120224, "grad_norm": 1.6535309892404217, "learning_rate": 1.7970314118053162e-05, "loss": 2.4933, "reason_loss": 0.5410176515579224, "step": 616, "utility_loss": 1.9523143768310547 }, { "cosine_similarity": 0.6495483219761586, "epoch": 0.5750232991612302, "grad_norm": 1.63735610737452, "learning_rate": 1.796341042457715e-05, "loss": 2.4557, "reason_loss": 0.5101308822631836, "step": 617, "utility_loss": 1.9455927610397339 }, { "cosine_similarity": 0.45936518179995606, "epoch": 0.575955265610438, "grad_norm": 1.489725344218733, "learning_rate": 1.795650673110114e-05, "loss": 2.3761, "reason_loss": 0.534064769744873, "step": 618, "utility_loss": 1.8420772552490234 }, { "cosine_similarity": 0.4895900621206968, "epoch": 0.5768872320596459, "grad_norm": 1.5189516601158866, "learning_rate": 1.7949603037625132e-05, "loss": 2.1281, "reason_loss": 0.5184173583984375, "step": 619, "utility_loss": 1.6096558570861816 }, { "cosine_similarity": 0.06439286094533785, "epoch": 0.5778191985088537, "grad_norm": 1.9692686799829486, "learning_rate": 1.794269934414912e-05, "loss": 2.0939, "reason_loss": 0.5122203826904297, "step": 620, "utility_loss": 1.5816917419433594 }, { "cosine_similarity": 0.2620767443772115, "epoch": 0.5787511649580616, "grad_norm": 1.2035404641794398, "learning_rate": 1.793579565067311e-05, "loss": 1.951, "reason_loss": 0.5236625671386719, "step": 621, "utility_loss": 1.427363634109497 }, { "cosine_similarity": 0.44141861690513967, "epoch": 0.5796831314072693, "grad_norm": 1.311406850561135, "learning_rate": 1.79288919571971e-05, "loss": 2.3636, "reason_loss": 0.5689903497695923, "step": 622, "utility_loss": 1.7946419715881348 }, { "cosine_similarity": 0.1345122395365785, "epoch": 0.5806150978564771, "grad_norm": 1.4886051243778837, "learning_rate": 1.7921988263721092e-05, "loss": 1.9137, "reason_loss": 0.540082573890686, "step": 623, "utility_loss": 1.3736491203308105 }, { "cosine_similarity": 0.0032025153871792136, "epoch": 0.581547064305685, "grad_norm": 1.779045766006105, "learning_rate": 1.7915084570245084e-05, "loss": 2.3505, "reason_loss": 0.5534555315971375, "step": 624, "utility_loss": 1.7969958782196045 }, { "cosine_similarity": 0.18941968471460666, "epoch": 0.5824790307548928, "grad_norm": 1.4333296329930767, "learning_rate": 1.7908180876769073e-05, "loss": 2.4331, "reason_loss": 0.569053053855896, "step": 625, "utility_loss": 1.8640023469924927 }, { "cosine_similarity": -0.04193615338681259, "epoch": 0.5834109972041006, "grad_norm": 1.731597244709523, "learning_rate": 1.7901277183293066e-05, "loss": 2.5242, "reason_loss": 0.5534231662750244, "step": 626, "utility_loss": 1.9708260297775269 }, { "cosine_similarity": 0.35194698736383834, "epoch": 0.5843429636533085, "grad_norm": 1.5235818004172768, "learning_rate": 1.7894373489817055e-05, "loss": 2.228, "reason_loss": 0.5732808113098145, "step": 627, "utility_loss": 1.6546744108200073 }, { "cosine_similarity": 0.11560810155410774, "epoch": 0.5852749301025163, "grad_norm": 1.5625956990604146, "learning_rate": 1.7887469796341044e-05, "loss": 2.2965, "reason_loss": 0.545698881149292, "step": 628, "utility_loss": 1.7507572174072266 }, { "cosine_similarity": 0.060390007903410814, "epoch": 0.5862068965517241, "grad_norm": 1.7283578396894206, "learning_rate": 1.7880566102865033e-05, "loss": 2.2568, "reason_loss": 0.5209529399871826, "step": 629, "utility_loss": 1.7358529567718506 }, { "cosine_similarity": 0.28946921567873696, "epoch": 0.587138863000932, "grad_norm": 2.2367579193882943, "learning_rate": 1.7873662409389025e-05, "loss": 2.356, "reason_loss": 0.5199569463729858, "step": 630, "utility_loss": 1.8360838890075684 }, { "cosine_similarity": 0.25733400270045337, "epoch": 0.5880708294501398, "grad_norm": 1.616574729246342, "learning_rate": 1.7866758715913014e-05, "loss": 2.3173, "reason_loss": 0.5250175595283508, "step": 631, "utility_loss": 1.792291283607483 }, { "cosine_similarity": -0.08172656269630807, "epoch": 0.5890027958993477, "grad_norm": 1.342535628792821, "learning_rate": 1.7859855022437003e-05, "loss": 2.1328, "reason_loss": 0.5331897735595703, "step": 632, "utility_loss": 1.5996018648147583 }, { "cosine_similarity": 0.0685018312517267, "epoch": 0.5899347623485555, "grad_norm": 1.9694765337995184, "learning_rate": 1.7852951328960996e-05, "loss": 2.3882, "reason_loss": 0.5368295311927795, "step": 633, "utility_loss": 1.8514034748077393 }, { "cosine_similarity": 0.30150000148679323, "epoch": 0.5908667287977633, "grad_norm": 1.3417228028928285, "learning_rate": 1.7846047635484988e-05, "loss": 2.2666, "reason_loss": 0.5678608417510986, "step": 634, "utility_loss": 1.6987321376800537 }, { "cosine_similarity": 0.38990787092555396, "epoch": 0.5917986952469712, "grad_norm": 1.4336094676808657, "learning_rate": 1.7839143942008977e-05, "loss": 2.3523, "reason_loss": 0.5580404996871948, "step": 635, "utility_loss": 1.794264316558838 }, { "cosine_similarity": 0.23479336538964848, "epoch": 0.5927306616961789, "grad_norm": 1.5377644292689667, "learning_rate": 1.7832240248532966e-05, "loss": 2.1641, "reason_loss": 0.5349169373512268, "step": 636, "utility_loss": 1.6291530132293701 }, { "cosine_similarity": 0.14151254146907152, "epoch": 0.5936626281453867, "grad_norm": 1.4920148607929657, "learning_rate": 1.782533655505696e-05, "loss": 2.4532, "reason_loss": 0.5628265142440796, "step": 637, "utility_loss": 1.8904157876968384 }, { "cosine_similarity": 0.370698201084727, "epoch": 0.5945945945945946, "grad_norm": 2.109514691599318, "learning_rate": 1.7818432861580948e-05, "loss": 2.3753, "reason_loss": 0.5274957418441772, "step": 638, "utility_loss": 1.8478014469146729 }, { "cosine_similarity": 0.1393776429061298, "epoch": 0.5955265610438024, "grad_norm": 1.5391440587497638, "learning_rate": 1.7811529168104937e-05, "loss": 2.3523, "reason_loss": 0.5388529300689697, "step": 639, "utility_loss": 1.8134106397628784 }, { "cosine_similarity": 0.4133226884547328, "epoch": 0.5964585274930102, "grad_norm": 2.110111392878653, "learning_rate": 1.7804625474628926e-05, "loss": 2.1168, "reason_loss": 0.5121226906776428, "step": 640, "utility_loss": 1.6046364307403564 }, { "cosine_similarity": 0.2876881059836191, "epoch": 0.5973904939422181, "grad_norm": 1.634943651950318, "learning_rate": 1.7797721781152918e-05, "loss": 2.1554, "reason_loss": 0.5272014141082764, "step": 641, "utility_loss": 1.6282461881637573 }, { "cosine_similarity": 0.24083130871286978, "epoch": 0.5983224603914259, "grad_norm": 1.235949298205631, "learning_rate": 1.7790818087676907e-05, "loss": 2.1624, "reason_loss": 0.5610195398330688, "step": 642, "utility_loss": 1.6014280319213867 }, { "cosine_similarity": 0.27537806057238856, "epoch": 0.5992544268406338, "grad_norm": 1.6227638501033497, "learning_rate": 1.77839143942009e-05, "loss": 2.2079, "reason_loss": 0.5385842323303223, "step": 643, "utility_loss": 1.6693434715270996 }, { "cosine_similarity": 0.2666804056911889, "epoch": 0.6001863932898416, "grad_norm": 1.4208373045857263, "learning_rate": 1.777701070072489e-05, "loss": 2.1766, "reason_loss": 0.5255281329154968, "step": 644, "utility_loss": 1.6510316133499146 }, { "cosine_similarity": 0.1964367140154596, "epoch": 0.6011183597390494, "grad_norm": 1.5566040872891613, "learning_rate": 1.777010700724888e-05, "loss": 2.2697, "reason_loss": 0.5212392807006836, "step": 645, "utility_loss": 1.7484638690948486 }, { "cosine_similarity": 0.3497966961303715, "epoch": 0.6020503261882573, "grad_norm": 1.4310036867765974, "learning_rate": 1.776320331377287e-05, "loss": 1.839, "reason_loss": 0.5452812910079956, "step": 646, "utility_loss": 1.293751835823059 }, { "cosine_similarity": 0.24383791423462833, "epoch": 0.6029822926374651, "grad_norm": 1.8132722495050884, "learning_rate": 1.775629962029686e-05, "loss": 2.2048, "reason_loss": 0.5596185922622681, "step": 647, "utility_loss": 1.6451857089996338 }, { "cosine_similarity": 0.137434948279648, "epoch": 0.6039142590866728, "grad_norm": 1.6505392515685933, "learning_rate": 1.774939592682085e-05, "loss": 2.0888, "reason_loss": 0.5570360422134399, "step": 648, "utility_loss": 1.5317800045013428 }, { "cosine_similarity": 0.18936786817115056, "epoch": 0.6048462255358807, "grad_norm": 1.7696376648559025, "learning_rate": 1.774249223334484e-05, "loss": 2.0011, "reason_loss": 0.5248401165008545, "step": 649, "utility_loss": 1.4762669801712036 }, { "cosine_similarity": 0.028681832733260946, "epoch": 0.6057781919850885, "grad_norm": 1.329761743060737, "learning_rate": 1.773558853986883e-05, "loss": 1.9023, "reason_loss": 0.5345120429992676, "step": 650, "utility_loss": 1.367749571800232 }, { "cosine_similarity": 0.09491981884581302, "epoch": 0.6067101584342963, "grad_norm": 1.6827276539682396, "learning_rate": 1.7728684846392822e-05, "loss": 2.1606, "reason_loss": 0.5548595786094666, "step": 651, "utility_loss": 1.6056911945343018 }, { "cosine_similarity": 0.136788623995134, "epoch": 0.6076421248835042, "grad_norm": 1.5143937127359122, "learning_rate": 1.772178115291681e-05, "loss": 2.0295, "reason_loss": 0.5437123775482178, "step": 652, "utility_loss": 1.4857592582702637 }, { "cosine_similarity": 0.2981417788521377, "epoch": 0.608574091332712, "grad_norm": 1.4379060216059023, "learning_rate": 1.7714877459440804e-05, "loss": 2.2806, "reason_loss": 0.5198557376861572, "step": 653, "utility_loss": 1.7607754468917847 }, { "cosine_similarity": 0.2543252064109213, "epoch": 0.6095060577819198, "grad_norm": 1.4100476747168607, "learning_rate": 1.7707973765964793e-05, "loss": 2.2943, "reason_loss": 0.5200709700584412, "step": 654, "utility_loss": 1.7742409706115723 }, { "cosine_similarity": 0.039328841529373965, "epoch": 0.6104380242311277, "grad_norm": 1.7378978257458475, "learning_rate": 1.7701070072488782e-05, "loss": 2.2682, "reason_loss": 0.5617741346359253, "step": 655, "utility_loss": 1.7064019441604614 }, { "cosine_similarity": 0.37557670243670166, "epoch": 0.6113699906803355, "grad_norm": 1.768920204400828, "learning_rate": 1.7694166379012774e-05, "loss": 2.2059, "reason_loss": 0.5335974097251892, "step": 656, "utility_loss": 1.6723103523254395 }, { "cosine_similarity": 0.36495213743607996, "epoch": 0.6123019571295434, "grad_norm": 1.8169499710429369, "learning_rate": 1.7687262685536763e-05, "loss": 2.3658, "reason_loss": 0.5363283753395081, "step": 657, "utility_loss": 1.8294252157211304 }, { "cosine_similarity": 0.035942520775543337, "epoch": 0.6132339235787512, "grad_norm": 1.4360747120189916, "learning_rate": 1.7680358992060752e-05, "loss": 2.2251, "reason_loss": 0.4962531626224518, "step": 658, "utility_loss": 1.728834629058838 }, { "cosine_similarity": 0.22060802889639028, "epoch": 0.614165890027959, "grad_norm": 1.223937464571289, "learning_rate": 1.7673455298584745e-05, "loss": 1.9786, "reason_loss": 0.4914226830005646, "step": 659, "utility_loss": 1.4871420860290527 }, { "cosine_similarity": 0.15486983594560272, "epoch": 0.6150978564771669, "grad_norm": 1.7734123147822125, "learning_rate": 1.7666551605108734e-05, "loss": 1.9512, "reason_loss": 0.5333490967750549, "step": 660, "utility_loss": 1.4178152084350586 }, { "cosine_similarity": 0.45976616050692104, "epoch": 0.6160298229263746, "grad_norm": 1.7021281074633396, "learning_rate": 1.7659647911632726e-05, "loss": 1.9743, "reason_loss": 0.5009932518005371, "step": 661, "utility_loss": 1.4733357429504395 }, { "cosine_similarity": 0.06959631814464233, "epoch": 0.6169617893755824, "grad_norm": 1.2979198799742686, "learning_rate": 1.7652744218156715e-05, "loss": 1.8626, "reason_loss": 0.5049738883972168, "step": 662, "utility_loss": 1.3575831651687622 }, { "cosine_similarity": 0.009145014475064954, "epoch": 0.6178937558247903, "grad_norm": 1.3796339933363835, "learning_rate": 1.7645840524680708e-05, "loss": 1.9912, "reason_loss": 0.5452728271484375, "step": 663, "utility_loss": 1.44596529006958 }, { "cosine_similarity": 0.3658748995110882, "epoch": 0.6188257222739981, "grad_norm": 1.7765854989866303, "learning_rate": 1.7638936831204697e-05, "loss": 2.1914, "reason_loss": 0.5314642190933228, "step": 664, "utility_loss": 1.6599242687225342 }, { "cosine_similarity": 0.14167270967998163, "epoch": 0.6197576887232059, "grad_norm": 1.3259896559181898, "learning_rate": 1.7632033137728686e-05, "loss": 2.3561, "reason_loss": 0.5245733857154846, "step": 665, "utility_loss": 1.8315430879592896 }, { "cosine_similarity": 0.2509371193026025, "epoch": 0.6206896551724138, "grad_norm": 1.5185915751211818, "learning_rate": 1.7625129444252678e-05, "loss": 2.2756, "reason_loss": 0.5346598625183105, "step": 666, "utility_loss": 1.7409491539001465 }, { "cosine_similarity": 0.3413726082043005, "epoch": 0.6216216216216216, "grad_norm": 1.3189518434927208, "learning_rate": 1.7618225750776667e-05, "loss": 2.484, "reason_loss": 0.518608808517456, "step": 667, "utility_loss": 1.9654327630996704 }, { "cosine_similarity": 0.16293951870039133, "epoch": 0.6225535880708295, "grad_norm": 1.453137233133649, "learning_rate": 1.7611322057300656e-05, "loss": 2.1452, "reason_loss": 0.4973848760128021, "step": 668, "utility_loss": 1.6478164196014404 }, { "cosine_similarity": 0.16833209559851345, "epoch": 0.6234855545200373, "grad_norm": 1.7748935992195611, "learning_rate": 1.7604418363824645e-05, "loss": 2.3353, "reason_loss": 0.5375195145606995, "step": 669, "utility_loss": 1.797823429107666 }, { "cosine_similarity": 0.5269578782205507, "epoch": 0.6244175209692451, "grad_norm": 1.8411414262752326, "learning_rate": 1.7597514670348638e-05, "loss": 2.2458, "reason_loss": 0.5069767236709595, "step": 670, "utility_loss": 1.7388108968734741 }, { "cosine_similarity": 0.5149203504494121, "epoch": 0.625349487418453, "grad_norm": 1.3818084678907387, "learning_rate": 1.759061097687263e-05, "loss": 2.1672, "reason_loss": 0.5464365482330322, "step": 671, "utility_loss": 1.6207737922668457 }, { "cosine_similarity": 0.20235912334929024, "epoch": 0.6262814538676608, "grad_norm": 1.5048210045758323, "learning_rate": 1.758370728339662e-05, "loss": 2.2513, "reason_loss": 0.5463340878486633, "step": 672, "utility_loss": 1.7049496173858643 }, { "cosine_similarity": 0.5558328320664379, "epoch": 0.6272134203168686, "grad_norm": 1.3185242169205849, "learning_rate": 1.7576803589920608e-05, "loss": 2.1964, "reason_loss": 0.5272239446640015, "step": 673, "utility_loss": 1.669182300567627 }, { "cosine_similarity": 0.4611918115284551, "epoch": 0.6281453867660765, "grad_norm": 1.690172271489729, "learning_rate": 1.75698998964446e-05, "loss": 2.1589, "reason_loss": 0.5197817087173462, "step": 674, "utility_loss": 1.6391420364379883 }, { "cosine_similarity": -0.11157059635778231, "epoch": 0.6290773532152842, "grad_norm": 1.4530233009276992, "learning_rate": 1.756299620296859e-05, "loss": 1.9954, "reason_loss": 0.5054156184196472, "step": 675, "utility_loss": 1.4900275468826294 }, { "cosine_similarity": 0.5299780419667501, "epoch": 0.630009319664492, "grad_norm": 1.5370776141027145, "learning_rate": 1.755609250949258e-05, "loss": 2.5345, "reason_loss": 0.5390161275863647, "step": 676, "utility_loss": 1.9955329895019531 }, { "cosine_similarity": 0.4137067574489367, "epoch": 0.6309412861136999, "grad_norm": 1.6567630769959012, "learning_rate": 1.754918881601657e-05, "loss": 2.3915, "reason_loss": 0.5114865303039551, "step": 677, "utility_loss": 1.8800601959228516 }, { "cosine_similarity": 0.2557456744597143, "epoch": 0.6318732525629077, "grad_norm": 1.4764676756442143, "learning_rate": 1.754228512254056e-05, "loss": 2.5626, "reason_loss": 0.5035150051116943, "step": 678, "utility_loss": 2.0590980052948 }, { "cosine_similarity": 0.48044611748352467, "epoch": 0.6328052190121156, "grad_norm": 1.7139166433019934, "learning_rate": 1.753538142906455e-05, "loss": 2.4562, "reason_loss": 0.539840579032898, "step": 679, "utility_loss": 1.9163625240325928 }, { "cosine_similarity": 0.44297663837072626, "epoch": 0.6337371854613234, "grad_norm": 1.6761524100488148, "learning_rate": 1.752847773558854e-05, "loss": 2.3768, "reason_loss": 0.5648411512374878, "step": 680, "utility_loss": 1.8119473457336426 }, { "cosine_similarity": 0.3301525962837972, "epoch": 0.6346691519105312, "grad_norm": 1.3552088743475321, "learning_rate": 1.7521574042112534e-05, "loss": 2.3476, "reason_loss": 0.5635179877281189, "step": 681, "utility_loss": 1.7840920686721802 }, { "cosine_similarity": 0.2339677005415686, "epoch": 0.6356011183597391, "grad_norm": 1.407027343394337, "learning_rate": 1.7514670348636523e-05, "loss": 2.3068, "reason_loss": 0.5351284146308899, "step": 682, "utility_loss": 1.771637201309204 }, { "cosine_similarity": 0.21220437474832382, "epoch": 0.6365330848089469, "grad_norm": 1.371078451766995, "learning_rate": 1.7507766655160512e-05, "loss": 2.2388, "reason_loss": 0.5455830097198486, "step": 683, "utility_loss": 1.6932553052902222 }, { "cosine_similarity": 0.16721392333751614, "epoch": 0.6374650512581547, "grad_norm": 1.7590904790506567, "learning_rate": 1.75008629616845e-05, "loss": 2.1974, "reason_loss": 0.5519358515739441, "step": 684, "utility_loss": 1.6454766988754272 }, { "cosine_similarity": -0.030544416214387214, "epoch": 0.6383970177073626, "grad_norm": 1.3986217009999662, "learning_rate": 1.7493959268208494e-05, "loss": 2.3473, "reason_loss": 0.524081289768219, "step": 685, "utility_loss": 1.8231925964355469 }, { "cosine_similarity": 0.07830537270756978, "epoch": 0.6393289841565704, "grad_norm": 1.5437066077996913, "learning_rate": 1.7487055574732483e-05, "loss": 2.1084, "reason_loss": 0.5264165997505188, "step": 686, "utility_loss": 1.5820119380950928 }, { "cosine_similarity": 0.21005371650923244, "epoch": 0.6402609506057781, "grad_norm": 1.5918106838229644, "learning_rate": 1.748015188125647e-05, "loss": 2.0928, "reason_loss": 0.5426279306411743, "step": 687, "utility_loss": 1.5501480102539062 }, { "cosine_similarity": 0.07525076688656321, "epoch": 0.641192917054986, "grad_norm": 1.612771551998157, "learning_rate": 1.7473248187780464e-05, "loss": 2.1617, "reason_loss": 0.5157860517501831, "step": 688, "utility_loss": 1.645925760269165 }, { "cosine_similarity": 0.48246682391045687, "epoch": 0.6421248835041938, "grad_norm": 1.3850414091087448, "learning_rate": 1.7466344494304453e-05, "loss": 2.3921, "reason_loss": 0.5296571254730225, "step": 689, "utility_loss": 1.86240553855896 }, { "cosine_similarity": -0.05673714847673801, "epoch": 0.6430568499534017, "grad_norm": 1.5948082213459818, "learning_rate": 1.7459440800828446e-05, "loss": 2.1373, "reason_loss": 0.4991689920425415, "step": 690, "utility_loss": 1.6380846500396729 }, { "cosine_similarity": 0.3720751700241731, "epoch": 0.6439888164026095, "grad_norm": 1.7512637607278556, "learning_rate": 1.7452537107352435e-05, "loss": 1.9553, "reason_loss": 0.5366426110267639, "step": 691, "utility_loss": 1.418702244758606 }, { "cosine_similarity": 0.7107908551562797, "epoch": 0.6449207828518173, "grad_norm": 1.438383085160133, "learning_rate": 1.7445633413876427e-05, "loss": 2.0157, "reason_loss": 0.5154249668121338, "step": 692, "utility_loss": 1.5003247261047363 }, { "cosine_similarity": 0.2112049731955697, "epoch": 0.6458527493010252, "grad_norm": 1.4992398629942831, "learning_rate": 1.7438729720400416e-05, "loss": 2.4502, "reason_loss": 0.5235666632652283, "step": 693, "utility_loss": 1.9265968799591064 }, { "cosine_similarity": 0.07361858102302693, "epoch": 0.646784715750233, "grad_norm": 1.3610958234836628, "learning_rate": 1.7431826026924405e-05, "loss": 1.8837, "reason_loss": 0.5441418886184692, "step": 694, "utility_loss": 1.3395625352859497 }, { "cosine_similarity": 0.37377229229508085, "epoch": 0.6477166821994408, "grad_norm": 1.3791159172957101, "learning_rate": 1.7424922333448394e-05, "loss": 2.2269, "reason_loss": 0.5174171924591064, "step": 695, "utility_loss": 1.70952570438385 }, { "cosine_similarity": 0.6067065115884646, "epoch": 0.6486486486486487, "grad_norm": 1.578875582181154, "learning_rate": 1.7418018639972387e-05, "loss": 2.2126, "reason_loss": 0.565454363822937, "step": 696, "utility_loss": 1.6471501588821411 }, { "cosine_similarity": 0.3717639777522554, "epoch": 0.6495806150978565, "grad_norm": 1.906711069977616, "learning_rate": 1.7411114946496376e-05, "loss": 1.9687, "reason_loss": 0.5403132438659668, "step": 697, "utility_loss": 1.4283466339111328 }, { "cosine_similarity": -0.3551578092883665, "epoch": 0.6505125815470643, "grad_norm": 1.7788377088539131, "learning_rate": 1.7404211253020368e-05, "loss": 2.4898, "reason_loss": 0.5354414582252502, "step": 698, "utility_loss": 1.9543355703353882 }, { "cosine_similarity": 0.027666945147672154, "epoch": 0.6514445479962722, "grad_norm": 2.296278558986944, "learning_rate": 1.7397307559544357e-05, "loss": 2.4034, "reason_loss": 0.5379719734191895, "step": 699, "utility_loss": 1.8654359579086304 }, { "cosine_similarity": 0.4949500162931217, "epoch": 0.65237651444548, "grad_norm": 1.8211985511522888, "learning_rate": 1.739040386606835e-05, "loss": 2.2505, "reason_loss": 0.5112956762313843, "step": 700, "utility_loss": 1.7391552925109863 }, { "cosine_similarity": 0.4415598803701013, "epoch": 0.6533084808946877, "grad_norm": 1.3686301211506169, "learning_rate": 1.738350017259234e-05, "loss": 2.2404, "reason_loss": 0.49557995796203613, "step": 701, "utility_loss": 1.74481201171875 }, { "cosine_similarity": 0.220027999393033, "epoch": 0.6542404473438956, "grad_norm": 1.8236902168451847, "learning_rate": 1.7376596479116328e-05, "loss": 2.4616, "reason_loss": 0.5743576884269714, "step": 702, "utility_loss": 1.8872473239898682 }, { "cosine_similarity": 0.36719679580077164, "epoch": 0.6551724137931034, "grad_norm": 1.389841943542611, "learning_rate": 1.736969278564032e-05, "loss": 2.4434, "reason_loss": 0.5639318227767944, "step": 703, "utility_loss": 1.8794893026351929 }, { "cosine_similarity": 0.09546720734817236, "epoch": 0.6561043802423113, "grad_norm": 1.6352326989795851, "learning_rate": 1.736278909216431e-05, "loss": 2.3134, "reason_loss": 0.5309697985649109, "step": 704, "utility_loss": 1.7824289798736572 }, { "cosine_similarity": 0.23222429108859335, "epoch": 0.6570363466915191, "grad_norm": 1.2417272016490897, "learning_rate": 1.7355885398688298e-05, "loss": 2.1539, "reason_loss": 0.49870967864990234, "step": 705, "utility_loss": 1.65521240234375 }, { "cosine_similarity": 0.3677314575790239, "epoch": 0.6579683131407269, "grad_norm": 1.551139896159573, "learning_rate": 1.734898170521229e-05, "loss": 2.0989, "reason_loss": 0.5020847320556641, "step": 706, "utility_loss": 1.596808910369873 }, { "cosine_similarity": 0.023891852852677222, "epoch": 0.6589002795899348, "grad_norm": 1.520059651965292, "learning_rate": 1.734207801173628e-05, "loss": 2.5286, "reason_loss": 0.5547000169754028, "step": 707, "utility_loss": 1.9739221334457397 }, { "cosine_similarity": 0.09146842881418851, "epoch": 0.6598322460391426, "grad_norm": 1.6626349942711225, "learning_rate": 1.7335174318260272e-05, "loss": 1.9091, "reason_loss": 0.5265274047851562, "step": 708, "utility_loss": 1.3826210498809814 }, { "cosine_similarity": 0.8034042221144444, "epoch": 0.6607642124883504, "grad_norm": 1.4646492451719089, "learning_rate": 1.732827062478426e-05, "loss": 2.5454, "reason_loss": 0.5369274020195007, "step": 709, "utility_loss": 2.008498191833496 }, { "cosine_similarity": 0.23644902941789378, "epoch": 0.6616961789375583, "grad_norm": 1.4238082716809013, "learning_rate": 1.7321366931308253e-05, "loss": 2.1707, "reason_loss": 0.5434994697570801, "step": 710, "utility_loss": 1.627187728881836 }, { "cosine_similarity": -0.11144992985664248, "epoch": 0.6626281453867661, "grad_norm": 1.6545783843688773, "learning_rate": 1.7314463237832242e-05, "loss": 2.2321, "reason_loss": 0.5291093587875366, "step": 711, "utility_loss": 1.7029905319213867 }, { "cosine_similarity": 0.19517779082207765, "epoch": 0.6635601118359739, "grad_norm": 1.292803993125257, "learning_rate": 1.730755954435623e-05, "loss": 2.4644, "reason_loss": 0.5094757080078125, "step": 712, "utility_loss": 1.9549462795257568 }, { "cosine_similarity": 0.1399476795490432, "epoch": 0.6644920782851818, "grad_norm": 1.7070081910955737, "learning_rate": 1.730065585088022e-05, "loss": 2.0265, "reason_loss": 0.5225555896759033, "step": 713, "utility_loss": 1.503924012184143 }, { "cosine_similarity": 0.17318086369431002, "epoch": 0.6654240447343895, "grad_norm": 1.353794200764033, "learning_rate": 1.7293752157404213e-05, "loss": 2.3452, "reason_loss": 0.5062888860702515, "step": 714, "utility_loss": 1.8388996124267578 }, { "cosine_similarity": 0.04771207708653075, "epoch": 0.6663560111835974, "grad_norm": 1.3567328143049795, "learning_rate": 1.7286848463928202e-05, "loss": 2.157, "reason_loss": 0.5257084369659424, "step": 715, "utility_loss": 1.6312482357025146 }, { "cosine_similarity": 0.14265450845222022, "epoch": 0.6672879776328052, "grad_norm": 1.3597054621990716, "learning_rate": 1.7279944770452194e-05, "loss": 1.9879, "reason_loss": 0.5240734815597534, "step": 716, "utility_loss": 1.4638036489486694 }, { "cosine_similarity": 0.21585609406894748, "epoch": 0.668219944082013, "grad_norm": 1.3053814099848835, "learning_rate": 1.7273041076976183e-05, "loss": 2.3595, "reason_loss": 0.5257412195205688, "step": 717, "utility_loss": 1.8337243795394897 }, { "cosine_similarity": 0.3089435333159034, "epoch": 0.6691519105312209, "grad_norm": 1.2563689525310424, "learning_rate": 1.7266137383500176e-05, "loss": 2.2043, "reason_loss": 0.5502508878707886, "step": 718, "utility_loss": 1.6540381908416748 }, { "cosine_similarity": 0.11503665889525846, "epoch": 0.6700838769804287, "grad_norm": 1.5240733293637052, "learning_rate": 1.7259233690024165e-05, "loss": 2.3145, "reason_loss": 0.5154790878295898, "step": 719, "utility_loss": 1.799038052558899 }, { "cosine_similarity": 0.2945031003978058, "epoch": 0.6710158434296365, "grad_norm": 1.5545455468032885, "learning_rate": 1.7252329996548154e-05, "loss": 2.3093, "reason_loss": 0.5443589091300964, "step": 720, "utility_loss": 1.7649229764938354 }, { "cosine_similarity": 0.12453730220746187, "epoch": 0.6719478098788444, "grad_norm": 1.6448148996040155, "learning_rate": 1.7245426303072146e-05, "loss": 2.1016, "reason_loss": 0.5560145378112793, "step": 721, "utility_loss": 1.5455610752105713 }, { "cosine_similarity": 0.32968159743101977, "epoch": 0.6728797763280522, "grad_norm": 1.370470686000444, "learning_rate": 1.7238522609596135e-05, "loss": 2.3183, "reason_loss": 0.4972226023674011, "step": 722, "utility_loss": 1.821035623550415 }, { "cosine_similarity": 0.2064681563138357, "epoch": 0.67381174277726, "grad_norm": 1.3765665326027912, "learning_rate": 1.7231618916120124e-05, "loss": 2.0784, "reason_loss": 0.5443596839904785, "step": 723, "utility_loss": 1.5340489149093628 }, { "cosine_similarity": 0.25138902182991835, "epoch": 0.6747437092264679, "grad_norm": 2.1902861990831473, "learning_rate": 1.7224715222644113e-05, "loss": 2.3124, "reason_loss": 0.5323059558868408, "step": 724, "utility_loss": 1.7800836563110352 }, { "cosine_similarity": 0.2525658935658094, "epoch": 0.6756756756756757, "grad_norm": 1.463892433396284, "learning_rate": 1.7217811529168106e-05, "loss": 2.5046, "reason_loss": 0.5206477642059326, "step": 725, "utility_loss": 1.9839420318603516 }, { "cosine_similarity": 0.18987715355927748, "epoch": 0.6766076421248836, "grad_norm": 1.8087497011333236, "learning_rate": 1.72109078356921e-05, "loss": 2.2458, "reason_loss": 0.5234672427177429, "step": 726, "utility_loss": 1.72232985496521 }, { "cosine_similarity": 0.18584370951257798, "epoch": 0.6775396085740913, "grad_norm": 1.4526340800587643, "learning_rate": 1.7204004142216087e-05, "loss": 2.0391, "reason_loss": 0.5314146280288696, "step": 727, "utility_loss": 1.507727026939392 }, { "cosine_similarity": 0.10963173344844639, "epoch": 0.6784715750232991, "grad_norm": 1.3577253977325492, "learning_rate": 1.719710044874008e-05, "loss": 2.1764, "reason_loss": 0.511958122253418, "step": 728, "utility_loss": 1.6643939018249512 }, { "cosine_similarity": 0.07133268165574344, "epoch": 0.679403541472507, "grad_norm": 1.8090568256556196, "learning_rate": 1.719019675526407e-05, "loss": 2.0066, "reason_loss": 0.5099217891693115, "step": 729, "utility_loss": 1.496639609336853 }, { "cosine_similarity": 0.33236857149021237, "epoch": 0.6803355079217148, "grad_norm": 1.6361838295171491, "learning_rate": 1.7183293061788058e-05, "loss": 2.0748, "reason_loss": 0.5347063541412354, "step": 730, "utility_loss": 1.5401157140731812 }, { "cosine_similarity": 0.029335295289948153, "epoch": 0.6812674743709226, "grad_norm": 1.3167776961897866, "learning_rate": 1.7176389368312047e-05, "loss": 2.1517, "reason_loss": 0.5147091150283813, "step": 731, "utility_loss": 1.6370277404785156 }, { "cosine_similarity": 0.2699495189472932, "epoch": 0.6821994408201305, "grad_norm": 1.3739369547701739, "learning_rate": 1.716948567483604e-05, "loss": 2.2097, "reason_loss": 0.5264922976493835, "step": 732, "utility_loss": 1.683243751525879 }, { "cosine_similarity": 0.11921106426405369, "epoch": 0.6831314072693383, "grad_norm": 1.5848037385758793, "learning_rate": 1.716258198136003e-05, "loss": 2.1705, "reason_loss": 0.5356240272521973, "step": 733, "utility_loss": 1.6348729133605957 }, { "cosine_similarity": 0.14780775491414322, "epoch": 0.6840633737185461, "grad_norm": 1.3699093762717245, "learning_rate": 1.7155678287884017e-05, "loss": 2.3433, "reason_loss": 0.5303477644920349, "step": 734, "utility_loss": 1.8129358291625977 }, { "cosine_similarity": 0.42657093439283056, "epoch": 0.684995340167754, "grad_norm": 1.7205367808393728, "learning_rate": 1.714877459440801e-05, "loss": 2.0466, "reason_loss": 0.5357128381729126, "step": 735, "utility_loss": 1.5108377933502197 }, { "cosine_similarity": 0.54682543885671, "epoch": 0.6859273066169618, "grad_norm": 1.3188716693425022, "learning_rate": 1.7141870900932002e-05, "loss": 2.1645, "reason_loss": 0.5326061248779297, "step": 736, "utility_loss": 1.6318657398223877 }, { "cosine_similarity": 0.09611374126505255, "epoch": 0.6868592730661697, "grad_norm": 2.1101419571319617, "learning_rate": 1.713496720745599e-05, "loss": 2.2106, "reason_loss": 0.5345814824104309, "step": 737, "utility_loss": 1.6759953498840332 }, { "cosine_similarity": -0.38922603615264784, "epoch": 0.6877912395153775, "grad_norm": 1.8633224756065188, "learning_rate": 1.712806351397998e-05, "loss": 2.2973, "reason_loss": 0.5015980005264282, "step": 738, "utility_loss": 1.7956559658050537 }, { "cosine_similarity": 0.018944839740534517, "epoch": 0.6887232059645852, "grad_norm": 1.421404341975345, "learning_rate": 1.7121159820503973e-05, "loss": 2.3255, "reason_loss": 0.5392179489135742, "step": 739, "utility_loss": 1.7862660884857178 }, { "cosine_similarity": 0.2257702011566864, "epoch": 0.6896551724137931, "grad_norm": 1.2376408611853311, "learning_rate": 1.7114256127027962e-05, "loss": 1.9307, "reason_loss": 0.4979419708251953, "step": 740, "utility_loss": 1.4327738285064697 }, { "cosine_similarity": 0.15123003314997563, "epoch": 0.6905871388630009, "grad_norm": 1.1997261270547308, "learning_rate": 1.710735243355195e-05, "loss": 2.0169, "reason_loss": 0.5071520805358887, "step": 741, "utility_loss": 1.5097792148590088 }, { "cosine_similarity": -0.028956466046054896, "epoch": 0.6915191053122087, "grad_norm": 1.4201230323484981, "learning_rate": 1.710044874007594e-05, "loss": 1.9927, "reason_loss": 0.5083653330802917, "step": 742, "utility_loss": 1.4843111038208008 }, { "cosine_similarity": -0.18557130942722697, "epoch": 0.6924510717614166, "grad_norm": 1.2812051658109496, "learning_rate": 1.7093545046599932e-05, "loss": 2.21, "reason_loss": 0.49628522992134094, "step": 743, "utility_loss": 1.7136709690093994 }, { "cosine_similarity": -0.2701709547404777, "epoch": 0.6933830382106244, "grad_norm": 1.644333424110964, "learning_rate": 1.708664135312392e-05, "loss": 2.1096, "reason_loss": 0.5314714312553406, "step": 744, "utility_loss": 1.5781383514404297 }, { "cosine_similarity": 0.19471669395729982, "epoch": 0.6943150046598322, "grad_norm": 1.457337959308818, "learning_rate": 1.7079737659647914e-05, "loss": 2.3353, "reason_loss": 0.5341716408729553, "step": 745, "utility_loss": 1.801114559173584 }, { "cosine_similarity": 0.30313303403155917, "epoch": 0.6952469711090401, "grad_norm": 1.4540038724438624, "learning_rate": 1.7072833966171903e-05, "loss": 2.1884, "reason_loss": 0.49359381198883057, "step": 746, "utility_loss": 1.6948189735412598 }, { "cosine_similarity": 0.5897556596373378, "epoch": 0.6961789375582479, "grad_norm": 1.3565205118387846, "learning_rate": 1.7065930272695895e-05, "loss": 2.036, "reason_loss": 0.5331989526748657, "step": 747, "utility_loss": 1.5028204917907715 }, { "cosine_similarity": 0.07366231745794038, "epoch": 0.6971109040074557, "grad_norm": 1.3805667930314902, "learning_rate": 1.7059026579219884e-05, "loss": 2.1341, "reason_loss": 0.517788290977478, "step": 748, "utility_loss": 1.6163301467895508 }, { "cosine_similarity": 0.4571085175681629, "epoch": 0.6980428704566636, "grad_norm": 1.307514435486339, "learning_rate": 1.7052122885743873e-05, "loss": 2.1858, "reason_loss": 0.5397221446037292, "step": 749, "utility_loss": 1.6460392475128174 }, { "cosine_similarity": 0.008967387967214059, "epoch": 0.6989748369058714, "grad_norm": 1.7233692131702312, "learning_rate": 1.7045219192267866e-05, "loss": 1.9591, "reason_loss": 0.5168782472610474, "step": 750, "utility_loss": 1.4422640800476074 }, { "cosine_similarity": 0.2201691177072337, "epoch": 0.6999068033550793, "grad_norm": 1.2683037695110413, "learning_rate": 1.7038315498791855e-05, "loss": 2.0122, "reason_loss": 0.5370646119117737, "step": 751, "utility_loss": 1.4751503467559814 }, { "cosine_similarity": 0.4143028656034257, "epoch": 0.700838769804287, "grad_norm": 2.5389874952655735, "learning_rate": 1.7031411805315844e-05, "loss": 2.1131, "reason_loss": 0.5062525272369385, "step": 752, "utility_loss": 1.6068620681762695 }, { "cosine_similarity": 0.18312604050829456, "epoch": 0.7017707362534948, "grad_norm": 1.4380721484738737, "learning_rate": 1.7024508111839836e-05, "loss": 2.425, "reason_loss": 0.5189947485923767, "step": 753, "utility_loss": 1.906032681465149 }, { "cosine_similarity": -0.10696951633121331, "epoch": 0.7027027027027027, "grad_norm": 1.461968351606211, "learning_rate": 1.7017604418363825e-05, "loss": 2.3732, "reason_loss": 0.5153653621673584, "step": 754, "utility_loss": 1.8578739166259766 }, { "cosine_similarity": 0.6557431007681741, "epoch": 0.7036346691519105, "grad_norm": 1.2984362674764234, "learning_rate": 1.7010700724887818e-05, "loss": 2.325, "reason_loss": 0.5371112823486328, "step": 755, "utility_loss": 1.7878751754760742 }, { "cosine_similarity": 0.25647622071193316, "epoch": 0.7045666356011183, "grad_norm": 1.4832770975389113, "learning_rate": 1.7003797031411807e-05, "loss": 1.9595, "reason_loss": 0.5171271562576294, "step": 756, "utility_loss": 1.4423902034759521 }, { "cosine_similarity": -0.009642371089199209, "epoch": 0.7054986020503262, "grad_norm": 1.5008730306891302, "learning_rate": 1.69968933379358e-05, "loss": 1.9316, "reason_loss": 0.5225212574005127, "step": 757, "utility_loss": 1.4090824127197266 }, { "cosine_similarity": 0.07804266971919824, "epoch": 0.706430568499534, "grad_norm": 1.7269321145066425, "learning_rate": 1.6989989644459788e-05, "loss": 2.3423, "reason_loss": 0.551005482673645, "step": 758, "utility_loss": 1.791316032409668 }, { "cosine_similarity": 0.8252036629053281, "epoch": 0.7073625349487418, "grad_norm": 1.3855260626139023, "learning_rate": 1.6983085950983777e-05, "loss": 2.2758, "reason_loss": 0.5355021357536316, "step": 759, "utility_loss": 1.7403101921081543 }, { "cosine_similarity": 0.2711809028454966, "epoch": 0.7082945013979497, "grad_norm": 1.4421347418884733, "learning_rate": 1.6976182257507766e-05, "loss": 2.0411, "reason_loss": 0.5163918733596802, "step": 760, "utility_loss": 1.5246949195861816 }, { "cosine_similarity": 0.5646044922707253, "epoch": 0.7092264678471575, "grad_norm": 1.5142760883808375, "learning_rate": 1.696927856403176e-05, "loss": 2.017, "reason_loss": 0.5152632594108582, "step": 761, "utility_loss": 1.5017647743225098 }, { "cosine_similarity": 0.4675866912017908, "epoch": 0.7101584342963654, "grad_norm": 1.4627141447940712, "learning_rate": 1.6962374870555748e-05, "loss": 2.8639, "reason_loss": 0.5299705266952515, "step": 762, "utility_loss": 2.333892822265625 }, { "cosine_similarity": 0.08799231396388667, "epoch": 0.7110904007455732, "grad_norm": 1.473007291194205, "learning_rate": 1.695547117707974e-05, "loss": 2.3218, "reason_loss": 0.4788839817047119, "step": 763, "utility_loss": 1.8428828716278076 }, { "cosine_similarity": 0.36646816832252144, "epoch": 0.712022367194781, "grad_norm": 1.6063394211323276, "learning_rate": 1.694856748360373e-05, "loss": 2.2747, "reason_loss": 0.5047387480735779, "step": 764, "utility_loss": 1.7699191570281982 }, { "cosine_similarity": 0.3969495851309499, "epoch": 0.7129543336439889, "grad_norm": 1.260274020907556, "learning_rate": 1.694166379012772e-05, "loss": 2.095, "reason_loss": 0.5072081089019775, "step": 765, "utility_loss": 1.5877561569213867 }, { "cosine_similarity": 0.12828051310940666, "epoch": 0.7138863000931966, "grad_norm": 1.886060255544534, "learning_rate": 1.693476009665171e-05, "loss": 2.3109, "reason_loss": 0.5377556681632996, "step": 766, "utility_loss": 1.7731138467788696 }, { "cosine_similarity": 0.48125723885880656, "epoch": 0.7148182665424044, "grad_norm": 1.527864241257264, "learning_rate": 1.69278564031757e-05, "loss": 2.1156, "reason_loss": 0.5019251108169556, "step": 767, "utility_loss": 1.6137219667434692 }, { "cosine_similarity": 0.5194783743027069, "epoch": 0.7157502329916123, "grad_norm": 1.2581488539549903, "learning_rate": 1.6920952709699692e-05, "loss": 2.1965, "reason_loss": 0.5148411989212036, "step": 768, "utility_loss": 1.6816545724868774 }, { "cosine_similarity": 0.2524033336999521, "epoch": 0.7166821994408201, "grad_norm": 1.1803696253572626, "learning_rate": 1.691404901622368e-05, "loss": 2.0785, "reason_loss": 0.5291386246681213, "step": 769, "utility_loss": 1.5493855476379395 }, { "cosine_similarity": 0.2442774501338973, "epoch": 0.7176141658900279, "grad_norm": 1.562001506195485, "learning_rate": 1.690714532274767e-05, "loss": 1.9863, "reason_loss": 0.5056861639022827, "step": 770, "utility_loss": 1.4805870056152344 }, { "cosine_similarity": 0.8164081914840406, "epoch": 0.7185461323392358, "grad_norm": 1.8125059739678775, "learning_rate": 1.690024162927166e-05, "loss": 2.0489, "reason_loss": 0.5333068370819092, "step": 771, "utility_loss": 1.5155510902404785 }, { "cosine_similarity": 0.5292830421398048, "epoch": 0.7194780987884436, "grad_norm": 1.4205328986213586, "learning_rate": 1.689333793579565e-05, "loss": 2.2549, "reason_loss": 0.5592381954193115, "step": 772, "utility_loss": 1.6956640481948853 }, { "cosine_similarity": 0.5864440096757124, "epoch": 0.7204100652376515, "grad_norm": 1.3256699310940023, "learning_rate": 1.6886434242319644e-05, "loss": 2.077, "reason_loss": 0.5218977928161621, "step": 773, "utility_loss": 1.5550823211669922 }, { "cosine_similarity": 0.4123477681944653, "epoch": 0.7213420316868593, "grad_norm": 1.2594686907237402, "learning_rate": 1.6879530548843633e-05, "loss": 2.0554, "reason_loss": 0.5417502522468567, "step": 774, "utility_loss": 1.5136103630065918 }, { "cosine_similarity": 0.22804025701247538, "epoch": 0.7222739981360671, "grad_norm": 1.425897975922589, "learning_rate": 1.6872626855367622e-05, "loss": 2.5231, "reason_loss": 0.5268067121505737, "step": 775, "utility_loss": 1.9962533712387085 }, { "cosine_similarity": 0.17840083906429388, "epoch": 0.723205964585275, "grad_norm": 1.4460284002477934, "learning_rate": 1.6865723161891615e-05, "loss": 2.1266, "reason_loss": 0.5358353853225708, "step": 776, "utility_loss": 1.5907551050186157 }, { "cosine_similarity": 0.29658582141083933, "epoch": 0.7241379310344828, "grad_norm": 1.4534043459182482, "learning_rate": 1.6858819468415604e-05, "loss": 1.8386, "reason_loss": 0.506074845790863, "step": 777, "utility_loss": 1.332484483718872 }, { "cosine_similarity": 0.7100696737486459, "epoch": 0.7250698974836906, "grad_norm": 1.6745094301580934, "learning_rate": 1.6851915774939593e-05, "loss": 1.94, "reason_loss": 0.5359783172607422, "step": 778, "utility_loss": 1.4039762020111084 }, { "cosine_similarity": 0.15770331002104876, "epoch": 0.7260018639328985, "grad_norm": 2.3925121798315803, "learning_rate": 1.6845012081463585e-05, "loss": 2.3536, "reason_loss": 0.5176923274993896, "step": 779, "utility_loss": 1.83591628074646 }, { "cosine_similarity": -0.04685493512676446, "epoch": 0.7269338303821062, "grad_norm": 1.4832680932530302, "learning_rate": 1.6838108387987574e-05, "loss": 1.8222, "reason_loss": 0.4870513081550598, "step": 780, "utility_loss": 1.335184931755066 }, { "cosine_similarity": 0.4858065095551362, "epoch": 0.727865796831314, "grad_norm": 1.3402780472820692, "learning_rate": 1.6831204694511563e-05, "loss": 2.2877, "reason_loss": 0.5239320993423462, "step": 781, "utility_loss": 1.763803482055664 }, { "cosine_similarity": -0.06835553719599921, "epoch": 0.7287977632805219, "grad_norm": 1.3271713690446412, "learning_rate": 1.6824301001035556e-05, "loss": 2.1405, "reason_loss": 0.5238552093505859, "step": 782, "utility_loss": 1.616629958152771 }, { "cosine_similarity": 0.5168266661169525, "epoch": 0.7297297297297297, "grad_norm": 1.2367261578564976, "learning_rate": 1.6817397307559548e-05, "loss": 1.7681, "reason_loss": 0.5231452584266663, "step": 783, "utility_loss": 1.2449476718902588 }, { "cosine_similarity": 0.05028089070109857, "epoch": 0.7306616961789375, "grad_norm": 1.7480296033476137, "learning_rate": 1.6810493614083537e-05, "loss": 2.0171, "reason_loss": 0.5267734527587891, "step": 784, "utility_loss": 1.4903056621551514 }, { "cosine_similarity": 0.1147138876624165, "epoch": 0.7315936626281454, "grad_norm": 2.039810554851798, "learning_rate": 1.6803589920607526e-05, "loss": 2.5758, "reason_loss": 0.5267464518547058, "step": 785, "utility_loss": 2.049027681350708 }, { "cosine_similarity": 0.5039092045696488, "epoch": 0.7325256290773532, "grad_norm": 1.9005763120409198, "learning_rate": 1.6796686227131515e-05, "loss": 2.4868, "reason_loss": 0.5267195701599121, "step": 786, "utility_loss": 1.9600416421890259 }, { "cosine_similarity": 0.20967967406385135, "epoch": 0.7334575955265611, "grad_norm": 1.6240587412278782, "learning_rate": 1.6789782533655508e-05, "loss": 2.0934, "reason_loss": 0.5104259252548218, "step": 787, "utility_loss": 1.5830081701278687 }, { "cosine_similarity": -0.23977087395252425, "epoch": 0.7343895619757689, "grad_norm": 2.0149004105494326, "learning_rate": 1.6782878840179497e-05, "loss": 2.3288, "reason_loss": 0.5340535044670105, "step": 788, "utility_loss": 1.7947945594787598 }, { "cosine_similarity": 0.03657272186231695, "epoch": 0.7353215284249767, "grad_norm": 2.188510424986267, "learning_rate": 1.6775975146703486e-05, "loss": 2.1656, "reason_loss": 0.5493285655975342, "step": 789, "utility_loss": 1.616241216659546 }, { "cosine_similarity": 0.5790269471385986, "epoch": 0.7362534948741846, "grad_norm": 1.573969332483205, "learning_rate": 1.6769071453227478e-05, "loss": 2.3856, "reason_loss": 0.533882737159729, "step": 790, "utility_loss": 1.8517457246780396 }, { "cosine_similarity": 0.4243367025055931, "epoch": 0.7371854613233924, "grad_norm": 1.2951140051528691, "learning_rate": 1.6762167759751467e-05, "loss": 1.9115, "reason_loss": 0.532309889793396, "step": 791, "utility_loss": 1.3791446685791016 }, { "cosine_similarity": 0.44553164635051123, "epoch": 0.7381174277726001, "grad_norm": 1.3806983273924447, "learning_rate": 1.675526406627546e-05, "loss": 2.0226, "reason_loss": 0.5444380044937134, "step": 792, "utility_loss": 1.4781241416931152 }, { "cosine_similarity": 0.19144429598257212, "epoch": 0.739049394221808, "grad_norm": 1.5963139546145018, "learning_rate": 1.674836037279945e-05, "loss": 2.2743, "reason_loss": 0.5282073020935059, "step": 793, "utility_loss": 1.7460806369781494 }, { "cosine_similarity": -0.08554780858330562, "epoch": 0.7399813606710158, "grad_norm": 1.4873281195836006, "learning_rate": 1.674145667932344e-05, "loss": 2.0085, "reason_loss": 0.501490592956543, "step": 794, "utility_loss": 1.5070371627807617 }, { "cosine_similarity": 0.3009540546900733, "epoch": 0.7409133271202236, "grad_norm": 1.465983966581474, "learning_rate": 1.673455298584743e-05, "loss": 2.1559, "reason_loss": 0.5076472759246826, "step": 795, "utility_loss": 1.6482582092285156 }, { "cosine_similarity": 0.5169799730951288, "epoch": 0.7418452935694315, "grad_norm": 1.261557180449773, "learning_rate": 1.672764929237142e-05, "loss": 1.7991, "reason_loss": 0.5076098442077637, "step": 796, "utility_loss": 1.2915291786193848 }, { "cosine_similarity": 0.13037377054941687, "epoch": 0.7427772600186393, "grad_norm": 1.4368986307171456, "learning_rate": 1.672074559889541e-05, "loss": 2.4132, "reason_loss": 0.5420913696289062, "step": 797, "utility_loss": 1.8710823059082031 }, { "cosine_similarity": 0.2420281102037887, "epoch": 0.7437092264678472, "grad_norm": 1.3522069007424224, "learning_rate": 1.67138419054194e-05, "loss": 2.0841, "reason_loss": 0.5195660591125488, "step": 798, "utility_loss": 1.56455659866333 }, { "cosine_similarity": 0.08453736426815854, "epoch": 0.744641192917055, "grad_norm": 2.3746887512364796, "learning_rate": 1.670693821194339e-05, "loss": 2.1398, "reason_loss": 0.5214860439300537, "step": 799, "utility_loss": 1.618312954902649 }, { "cosine_similarity": 0.46222003411957635, "epoch": 0.7455731593662628, "grad_norm": 1.816738438223548, "learning_rate": 1.6700034518467382e-05, "loss": 2.0826, "reason_loss": 0.5083814859390259, "step": 800, "utility_loss": 1.574173927307129 }, { "cosine_similarity": 0.09710702512031257, "epoch": 0.7465051258154707, "grad_norm": 1.4900468724622942, "learning_rate": 1.669313082499137e-05, "loss": 2.0788, "reason_loss": 0.5253528952598572, "step": 801, "utility_loss": 1.553435206413269 }, { "cosine_similarity": 0.28388049609928456, "epoch": 0.7474370922646785, "grad_norm": 1.2709134456583837, "learning_rate": 1.6686227131515363e-05, "loss": 2.0541, "reason_loss": 0.5126663446426392, "step": 802, "utility_loss": 1.5414061546325684 }, { "cosine_similarity": -0.03103373081559838, "epoch": 0.7483690587138863, "grad_norm": 1.398350890396117, "learning_rate": 1.6679323438039353e-05, "loss": 1.9129, "reason_loss": 0.5321924090385437, "step": 803, "utility_loss": 1.3807203769683838 }, { "cosine_similarity": 0.35166000398421, "epoch": 0.7493010251630942, "grad_norm": 1.5569328037766996, "learning_rate": 1.667241974456334e-05, "loss": 1.9333, "reason_loss": 0.5176260471343994, "step": 804, "utility_loss": 1.4156568050384521 }, { "cosine_similarity": 0.5644286566475012, "epoch": 0.750232991612302, "grad_norm": 1.3050734111911435, "learning_rate": 1.6665516051087334e-05, "loss": 2.0887, "reason_loss": 0.5124764442443848, "step": 805, "utility_loss": 1.5762125253677368 }, { "cosine_similarity": 0.4484633810419713, "epoch": 0.7511649580615097, "grad_norm": 1.792827937876891, "learning_rate": 1.6658612357611323e-05, "loss": 2.0704, "reason_loss": 0.5445359945297241, "step": 806, "utility_loss": 1.5259042978286743 }, { "cosine_similarity": 0.05426415221539827, "epoch": 0.7520969245107176, "grad_norm": 1.5872952731532153, "learning_rate": 1.6651708664135312e-05, "loss": 2.4191, "reason_loss": 0.5334673523902893, "step": 807, "utility_loss": 1.8856242895126343 }, { "cosine_similarity": 0.18783565533581975, "epoch": 0.7530288909599254, "grad_norm": 1.4565529715672552, "learning_rate": 1.6644804970659305e-05, "loss": 2.5022, "reason_loss": 0.5539669990539551, "step": 808, "utility_loss": 1.94820237159729 }, { "cosine_similarity": 0.13553588416875148, "epoch": 0.7539608574091333, "grad_norm": 1.3686030332258146, "learning_rate": 1.6637901277183294e-05, "loss": 2.335, "reason_loss": 0.5364843606948853, "step": 809, "utility_loss": 1.7985397577285767 }, { "cosine_similarity": 0.10992124055724209, "epoch": 0.7548928238583411, "grad_norm": 1.312566700783272, "learning_rate": 1.6630997583707286e-05, "loss": 2.0109, "reason_loss": 0.5549148321151733, "step": 810, "utility_loss": 1.4559930562973022 }, { "cosine_similarity": 0.16367770499363332, "epoch": 0.7558247903075489, "grad_norm": 1.284310549687762, "learning_rate": 1.6624093890231275e-05, "loss": 2.1731, "reason_loss": 0.532652735710144, "step": 811, "utility_loss": 1.6404759883880615 }, { "cosine_similarity": 0.2259398273188345, "epoch": 0.7567567567567568, "grad_norm": 1.6760452274364908, "learning_rate": 1.6617190196755267e-05, "loss": 2.2588, "reason_loss": 0.5108683109283447, "step": 812, "utility_loss": 1.747890591621399 }, { "cosine_similarity": 0.28174446182604607, "epoch": 0.7576887232059646, "grad_norm": 1.4325578241447618, "learning_rate": 1.6610286503279256e-05, "loss": 2.2669, "reason_loss": 0.5249216556549072, "step": 813, "utility_loss": 1.7419531345367432 }, { "cosine_similarity": -0.07585268375525618, "epoch": 0.7586206896551724, "grad_norm": 1.3015212636836448, "learning_rate": 1.6603382809803246e-05, "loss": 2.3237, "reason_loss": 0.5222841501235962, "step": 814, "utility_loss": 1.8014638423919678 }, { "cosine_similarity": 0.5333642758653117, "epoch": 0.7595526561043803, "grad_norm": 1.368123512951761, "learning_rate": 1.6596479116327235e-05, "loss": 1.8359, "reason_loss": 0.46653667092323303, "step": 815, "utility_loss": 1.369368314743042 }, { "cosine_similarity": 0.31440210263237356, "epoch": 0.7604846225535881, "grad_norm": 1.2792405809042893, "learning_rate": 1.6589575422851227e-05, "loss": 1.6631, "reason_loss": 0.5135912299156189, "step": 816, "utility_loss": 1.1495001316070557 }, { "cosine_similarity": 0.16347958996901413, "epoch": 0.7614165890027959, "grad_norm": 1.4788304454902412, "learning_rate": 1.6582671729375216e-05, "loss": 2.2429, "reason_loss": 0.5445472598075867, "step": 817, "utility_loss": 1.6983650922775269 }, { "cosine_similarity": 0.26941957865361565, "epoch": 0.7623485554520038, "grad_norm": 1.1141510546535105, "learning_rate": 1.657576803589921e-05, "loss": 1.7906, "reason_loss": 0.5404378771781921, "step": 818, "utility_loss": 1.250161051750183 }, { "cosine_similarity": 0.5933031141433848, "epoch": 0.7632805219012115, "grad_norm": 1.2670192740499502, "learning_rate": 1.6568864342423197e-05, "loss": 1.9694, "reason_loss": 0.5062586665153503, "step": 819, "utility_loss": 1.4631133079528809 }, { "cosine_similarity": 0.09537636959975142, "epoch": 0.7642124883504194, "grad_norm": 1.2549933528430184, "learning_rate": 1.656196064894719e-05, "loss": 1.9204, "reason_loss": 0.5483211278915405, "step": 820, "utility_loss": 1.3720952272415161 }, { "cosine_similarity": 0.441750226740855, "epoch": 0.7651444547996272, "grad_norm": 1.3263162814472467, "learning_rate": 1.655505695547118e-05, "loss": 2.0979, "reason_loss": 0.5012762546539307, "step": 821, "utility_loss": 1.596635341644287 }, { "cosine_similarity": 0.3357095763224105, "epoch": 0.766076421248835, "grad_norm": 1.8072206496532361, "learning_rate": 1.6548153261995168e-05, "loss": 2.1185, "reason_loss": 0.5284889936447144, "step": 822, "utility_loss": 1.5900523662567139 }, { "cosine_similarity": -0.13451707691218157, "epoch": 0.7670083876980429, "grad_norm": 1.623006874251328, "learning_rate": 1.654124956851916e-05, "loss": 2.2896, "reason_loss": 0.5040380954742432, "step": 823, "utility_loss": 1.785559892654419 }, { "cosine_similarity": 0.06268694235225981, "epoch": 0.7679403541472507, "grad_norm": 1.6666976021680813, "learning_rate": 1.653434587504315e-05, "loss": 2.5954, "reason_loss": 0.5410939455032349, "step": 824, "utility_loss": 2.054288148880005 }, { "cosine_similarity": -0.034800463454870194, "epoch": 0.7688723205964585, "grad_norm": 1.9409290034993913, "learning_rate": 1.652744218156714e-05, "loss": 2.3825, "reason_loss": 0.5271710753440857, "step": 825, "utility_loss": 1.8553721904754639 }, { "cosine_similarity": -0.008664068431300109, "epoch": 0.7698042870456664, "grad_norm": 1.1745096514318627, "learning_rate": 1.6520538488091128e-05, "loss": 1.9185, "reason_loss": 0.513282299041748, "step": 826, "utility_loss": 1.405256986618042 }, { "cosine_similarity": 0.1666070261739518, "epoch": 0.7707362534948742, "grad_norm": 2.5351073001686237, "learning_rate": 1.651363479461512e-05, "loss": 2.2657, "reason_loss": 0.4905385971069336, "step": 827, "utility_loss": 1.7751214504241943 }, { "cosine_similarity": 0.27192508945604715, "epoch": 0.771668219944082, "grad_norm": 1.3190505309303846, "learning_rate": 1.6506731101139112e-05, "loss": 2.1872, "reason_loss": 0.5282942056655884, "step": 828, "utility_loss": 1.6589548587799072 }, { "cosine_similarity": 0.27121183881218225, "epoch": 0.7726001863932899, "grad_norm": 2.068330760530035, "learning_rate": 1.64998274076631e-05, "loss": 1.8916, "reason_loss": 0.5132781267166138, "step": 829, "utility_loss": 1.3783516883850098 }, { "cosine_similarity": 0.5491555868106651, "epoch": 0.7735321528424977, "grad_norm": 1.6648090623817748, "learning_rate": 1.6492923714187094e-05, "loss": 2.4273, "reason_loss": 0.5251809358596802, "step": 830, "utility_loss": 1.9020795822143555 }, { "cosine_similarity": 0.7583303622601187, "epoch": 0.7744641192917054, "grad_norm": 1.1893753954723225, "learning_rate": 1.6486020020711083e-05, "loss": 1.9748, "reason_loss": 0.539460301399231, "step": 831, "utility_loss": 1.4352952241897583 }, { "cosine_similarity": 0.07234209135178322, "epoch": 0.7753960857409133, "grad_norm": 1.352866004734849, "learning_rate": 1.6479116327235072e-05, "loss": 1.9408, "reason_loss": 0.5159143805503845, "step": 832, "utility_loss": 1.424925446510315 }, { "cosine_similarity": 0.5497654583174325, "epoch": 0.7763280521901211, "grad_norm": 1.2802973561327318, "learning_rate": 1.647221263375906e-05, "loss": 2.2199, "reason_loss": 0.49534082412719727, "step": 833, "utility_loss": 1.724582552909851 }, { "cosine_similarity": 0.12944123466365576, "epoch": 0.777260018639329, "grad_norm": 1.1567502950257618, "learning_rate": 1.6465308940283053e-05, "loss": 2.1112, "reason_loss": 0.5208456516265869, "step": 834, "utility_loss": 1.5903337001800537 }, { "cosine_similarity": 0.5633826504889677, "epoch": 0.7781919850885368, "grad_norm": 1.7425136555522838, "learning_rate": 1.6458405246807042e-05, "loss": 2.2046, "reason_loss": 0.5396255254745483, "step": 835, "utility_loss": 1.664944052696228 }, { "cosine_similarity": 0.17477344323443963, "epoch": 0.7791239515377446, "grad_norm": 2.263109508421637, "learning_rate": 1.645150155333103e-05, "loss": 2.158, "reason_loss": 0.5119957327842712, "step": 836, "utility_loss": 1.6459832191467285 }, { "cosine_similarity": 0.6197453118733839, "epoch": 0.7800559179869525, "grad_norm": 1.2935585881618623, "learning_rate": 1.6444597859855024e-05, "loss": 2.3245, "reason_loss": 0.5168744325637817, "step": 837, "utility_loss": 1.8076651096343994 }, { "cosine_similarity": -0.059736884233480496, "epoch": 0.7809878844361603, "grad_norm": 1.7161774039695046, "learning_rate": 1.6437694166379013e-05, "loss": 2.0238, "reason_loss": 0.5363942384719849, "step": 838, "utility_loss": 1.4874080419540405 }, { "cosine_similarity": 0.3904618727655038, "epoch": 0.7819198508853681, "grad_norm": 1.4453303347907422, "learning_rate": 1.6430790472903005e-05, "loss": 2.3648, "reason_loss": 0.5077948570251465, "step": 839, "utility_loss": 1.8570115566253662 }, { "cosine_similarity": 0.45672156347645254, "epoch": 0.782851817334576, "grad_norm": 1.4263937727698506, "learning_rate": 1.6423886779426994e-05, "loss": 2.2297, "reason_loss": 0.5633323192596436, "step": 840, "utility_loss": 1.6663758754730225 }, { "cosine_similarity": 0.2586427176258794, "epoch": 0.7837837837837838, "grad_norm": 1.6542408296084408, "learning_rate": 1.6416983085950987e-05, "loss": 2.2115, "reason_loss": 0.5531784296035767, "step": 841, "utility_loss": 1.6583678722381592 }, { "cosine_similarity": 0.3502869472319847, "epoch": 0.7847157502329916, "grad_norm": 1.2967912474127787, "learning_rate": 1.6410079392474976e-05, "loss": 1.9075, "reason_loss": 0.535262942314148, "step": 842, "utility_loss": 1.3722827434539795 }, { "cosine_similarity": 0.18436403917286934, "epoch": 0.7856477166821995, "grad_norm": 1.6362957004815464, "learning_rate": 1.6403175698998965e-05, "loss": 2.2329, "reason_loss": 0.5370278358459473, "step": 843, "utility_loss": 1.6958556175231934 }, { "cosine_similarity": 0.09592678489414119, "epoch": 0.7865796831314072, "grad_norm": 1.377631423543959, "learning_rate": 1.6396272005522954e-05, "loss": 1.8947, "reason_loss": 0.535686194896698, "step": 844, "utility_loss": 1.3590216636657715 }, { "cosine_similarity": -0.2607452408813707, "epoch": 0.7875116495806151, "grad_norm": 1.6876656043758507, "learning_rate": 1.6389368312046946e-05, "loss": 2.1406, "reason_loss": 0.5327192544937134, "step": 845, "utility_loss": 1.6078566312789917 }, { "cosine_similarity": 0.053133756969759444, "epoch": 0.7884436160298229, "grad_norm": 1.5377948780651338, "learning_rate": 1.6382464618570935e-05, "loss": 2.3189, "reason_loss": 0.5461667776107788, "step": 846, "utility_loss": 1.7726880311965942 }, { "cosine_similarity": 0.135775561153551, "epoch": 0.7893755824790307, "grad_norm": 1.5049355176693067, "learning_rate": 1.6375560925094928e-05, "loss": 1.8397, "reason_loss": 0.5110247731208801, "step": 847, "utility_loss": 1.3286958932876587 }, { "cosine_similarity": 0.3757992285319896, "epoch": 0.7903075489282386, "grad_norm": 1.297752233760655, "learning_rate": 1.6368657231618917e-05, "loss": 1.9183, "reason_loss": 0.5220776200294495, "step": 848, "utility_loss": 1.396185040473938 }, { "cosine_similarity": 0.24905233195913523, "epoch": 0.7912395153774464, "grad_norm": 1.4364377889707103, "learning_rate": 1.636175353814291e-05, "loss": 1.8244, "reason_loss": 0.5059149861335754, "step": 849, "utility_loss": 1.3184492588043213 }, { "cosine_similarity": -0.0020431882106230516, "epoch": 0.7921714818266542, "grad_norm": 1.3259671392152954, "learning_rate": 1.63548498446669e-05, "loss": 2.1211, "reason_loss": 0.5339948534965515, "step": 850, "utility_loss": 1.587146520614624 }, { "cosine_similarity": 0.11625171612454753, "epoch": 0.7931034482758621, "grad_norm": 1.289577593614738, "learning_rate": 1.6347946151190887e-05, "loss": 2.4457, "reason_loss": 0.5565953254699707, "step": 851, "utility_loss": 1.8890697956085205 }, { "cosine_similarity": 0.03932386571580653, "epoch": 0.7940354147250699, "grad_norm": 2.43914056394766, "learning_rate": 1.634104245771488e-05, "loss": 2.6335, "reason_loss": 0.5163828134536743, "step": 852, "utility_loss": 2.117112636566162 }, { "cosine_similarity": 0.20594799598778135, "epoch": 0.7949673811742777, "grad_norm": 1.6366357417442783, "learning_rate": 1.633413876423887e-05, "loss": 2.1489, "reason_loss": 0.5010437965393066, "step": 853, "utility_loss": 1.6478873491287231 }, { "cosine_similarity": 0.34919014550986066, "epoch": 0.7958993476234856, "grad_norm": 1.5364793408076023, "learning_rate": 1.6327235070762858e-05, "loss": 2.5239, "reason_loss": 0.5182921886444092, "step": 854, "utility_loss": 2.0055789947509766 }, { "cosine_similarity": 0.19927493993607695, "epoch": 0.7968313140726934, "grad_norm": 2.3664224903647213, "learning_rate": 1.632033137728685e-05, "loss": 2.1677, "reason_loss": 0.5426814556121826, "step": 855, "utility_loss": 1.6249778270721436 }, { "cosine_similarity": -0.003611076764769337, "epoch": 0.7977632805219013, "grad_norm": 1.6863372101371792, "learning_rate": 1.631342768381084e-05, "loss": 2.4365, "reason_loss": 0.5438927412033081, "step": 856, "utility_loss": 1.8926501274108887 }, { "cosine_similarity": -0.01047369227612024, "epoch": 0.798695246971109, "grad_norm": 1.0072734214048065, "learning_rate": 1.6306523990334832e-05, "loss": 1.6774, "reason_loss": 0.522380530834198, "step": 857, "utility_loss": 1.1550372838974 }, { "cosine_similarity": 0.43444165943733015, "epoch": 0.7996272134203168, "grad_norm": 1.1334190313481352, "learning_rate": 1.629962029685882e-05, "loss": 1.8384, "reason_loss": 0.5105188488960266, "step": 858, "utility_loss": 1.3278651237487793 }, { "cosine_similarity": 0.22929124595180514, "epoch": 0.8005591798695247, "grad_norm": 2.040395747376774, "learning_rate": 1.6292716603382813e-05, "loss": 2.501, "reason_loss": 0.5869053602218628, "step": 859, "utility_loss": 1.9140607118606567 }, { "cosine_similarity": 0.15198883026232501, "epoch": 0.8014911463187325, "grad_norm": 1.438684315714728, "learning_rate": 1.6285812909906802e-05, "loss": 2.0596, "reason_loss": 0.5198137760162354, "step": 860, "utility_loss": 1.539765477180481 }, { "cosine_similarity": 0.18894260005385347, "epoch": 0.8024231127679403, "grad_norm": 1.3357768818414204, "learning_rate": 1.627890921643079e-05, "loss": 2.1436, "reason_loss": 0.49582040309906006, "step": 861, "utility_loss": 1.6477564573287964 }, { "cosine_similarity": 0.06609138888930251, "epoch": 0.8033550792171482, "grad_norm": 1.0840330961944609, "learning_rate": 1.627200552295478e-05, "loss": 1.8933, "reason_loss": 0.541733980178833, "step": 862, "utility_loss": 1.3516039848327637 }, { "cosine_similarity": 0.1793677071248785, "epoch": 0.804287045666356, "grad_norm": 1.3716537373490645, "learning_rate": 1.6265101829478773e-05, "loss": 1.9981, "reason_loss": 0.5297866463661194, "step": 863, "utility_loss": 1.4682872295379639 }, { "cosine_similarity": 0.08864622420168587, "epoch": 0.8052190121155638, "grad_norm": 1.5183224760357499, "learning_rate": 1.6258198136002762e-05, "loss": 2.147, "reason_loss": 0.4732222855091095, "step": 864, "utility_loss": 1.6737818717956543 }, { "cosine_similarity": 0.18906549460341562, "epoch": 0.8061509785647717, "grad_norm": 2.210782951548166, "learning_rate": 1.6251294442526754e-05, "loss": 2.2158, "reason_loss": 0.5165759921073914, "step": 865, "utility_loss": 1.699198603630066 }, { "cosine_similarity": 0.14541769598716436, "epoch": 0.8070829450139795, "grad_norm": 1.4482346343557073, "learning_rate": 1.6244390749050743e-05, "loss": 2.3341, "reason_loss": 0.5287860035896301, "step": 866, "utility_loss": 1.8053300380706787 }, { "cosine_similarity": 0.14769445854047952, "epoch": 0.8080149114631874, "grad_norm": 1.3877637445928148, "learning_rate": 1.6237487055574736e-05, "loss": 2.0801, "reason_loss": 0.5122088193893433, "step": 867, "utility_loss": 1.5678846836090088 }, { "cosine_similarity": 0.12049240335677958, "epoch": 0.8089468779123952, "grad_norm": 1.4550056792553943, "learning_rate": 1.6230583362098725e-05, "loss": 2.3897, "reason_loss": 0.5455363988876343, "step": 868, "utility_loss": 1.8441808223724365 }, { "cosine_similarity": 0.16537333181716166, "epoch": 0.809878844361603, "grad_norm": 1.7211396109030155, "learning_rate": 1.6223679668622714e-05, "loss": 2.3426, "reason_loss": 0.5384219884872437, "step": 869, "utility_loss": 1.804155945777893 }, { "cosine_similarity": 0.05508571388055195, "epoch": 0.8108108108108109, "grad_norm": 1.4768351291765094, "learning_rate": 1.6216775975146706e-05, "loss": 2.2436, "reason_loss": 0.5264263153076172, "step": 870, "utility_loss": 1.717126488685608 }, { "cosine_similarity": 0.3425541744153571, "epoch": 0.8117427772600186, "grad_norm": 1.5489079429961394, "learning_rate": 1.6209872281670695e-05, "loss": 1.9559, "reason_loss": 0.5456163287162781, "step": 871, "utility_loss": 1.410315752029419 }, { "cosine_similarity": 0.12810513968247447, "epoch": 0.8126747437092264, "grad_norm": 1.4338903561030167, "learning_rate": 1.6202968588194684e-05, "loss": 2.1315, "reason_loss": 0.5202457904815674, "step": 872, "utility_loss": 1.6112890243530273 }, { "cosine_similarity": -0.25744046892789163, "epoch": 0.8136067101584343, "grad_norm": 1.7771919748719336, "learning_rate": 1.6196064894718673e-05, "loss": 2.2763, "reason_loss": 0.5032246112823486, "step": 873, "utility_loss": 1.7730591297149658 }, { "cosine_similarity": -0.13466745985445774, "epoch": 0.8145386766076421, "grad_norm": 1.2258211234268626, "learning_rate": 1.6189161201242666e-05, "loss": 1.8409, "reason_loss": 0.5312086939811707, "step": 874, "utility_loss": 1.3097366094589233 }, { "cosine_similarity": -0.33740686968009054, "epoch": 0.8154706430568499, "grad_norm": 1.409862124319221, "learning_rate": 1.6182257507766658e-05, "loss": 2.2725, "reason_loss": 0.5220919847488403, "step": 875, "utility_loss": 1.7503776550292969 }, { "cosine_similarity": -0.003381730534347299, "epoch": 0.8164026095060578, "grad_norm": 1.3828598071345652, "learning_rate": 1.6175353814290647e-05, "loss": 2.1184, "reason_loss": 0.519080400466919, "step": 876, "utility_loss": 1.599355936050415 }, { "cosine_similarity": 0.24223578746207244, "epoch": 0.8173345759552656, "grad_norm": 1.4484689370212878, "learning_rate": 1.6168450120814636e-05, "loss": 1.9174, "reason_loss": 0.5372572541236877, "step": 877, "utility_loss": 1.3801114559173584 }, { "cosine_similarity": -0.25896378705889644, "epoch": 0.8182665424044734, "grad_norm": 1.5018383962032946, "learning_rate": 1.616154642733863e-05, "loss": 2.0329, "reason_loss": 0.4841494560241699, "step": 878, "utility_loss": 1.5487422943115234 }, { "cosine_similarity": 0.22064199026270306, "epoch": 0.8191985088536813, "grad_norm": 1.201674063498759, "learning_rate": 1.6154642733862618e-05, "loss": 1.9252, "reason_loss": 0.5186326503753662, "step": 879, "utility_loss": 1.4065284729003906 }, { "cosine_similarity": 0.08818669552612528, "epoch": 0.8201304753028891, "grad_norm": 1.2034326128821278, "learning_rate": 1.6147739040386607e-05, "loss": 1.8499, "reason_loss": 0.49916911125183105, "step": 880, "utility_loss": 1.3507435321807861 }, { "cosine_similarity": 0.23677079019051964, "epoch": 0.821062441752097, "grad_norm": 1.5901038084066295, "learning_rate": 1.61408353469106e-05, "loss": 2.2265, "reason_loss": 0.5325629711151123, "step": 881, "utility_loss": 1.6939102411270142 }, { "cosine_similarity": 0.27783589387548263, "epoch": 0.8219944082013048, "grad_norm": 1.4834863434237597, "learning_rate": 1.6133931653434588e-05, "loss": 2.1898, "reason_loss": 0.526611328125, "step": 882, "utility_loss": 1.6631965637207031 }, { "cosine_similarity": 0.48300009871805094, "epoch": 0.8229263746505125, "grad_norm": 1.2519780609529414, "learning_rate": 1.6127027959958577e-05, "loss": 2.0439, "reason_loss": 0.5453062653541565, "step": 883, "utility_loss": 1.498557686805725 }, { "cosine_similarity": 0.39065307073780564, "epoch": 0.8238583410997204, "grad_norm": 1.229626885146603, "learning_rate": 1.612012426648257e-05, "loss": 2.0003, "reason_loss": 0.5269237160682678, "step": 884, "utility_loss": 1.47340989112854 }, { "cosine_similarity": 0.4479212699592161, "epoch": 0.8247903075489282, "grad_norm": 1.4202553997381477, "learning_rate": 1.6113220573006562e-05, "loss": 2.2171, "reason_loss": 0.5255386829376221, "step": 885, "utility_loss": 1.6915333271026611 }, { "cosine_similarity": 0.34559669238217183, "epoch": 0.825722273998136, "grad_norm": 1.482771617182045, "learning_rate": 1.610631687953055e-05, "loss": 1.9515, "reason_loss": 0.5098735094070435, "step": 886, "utility_loss": 1.441637635231018 }, { "cosine_similarity": 0.47703739529348704, "epoch": 0.8266542404473439, "grad_norm": 1.482577364830142, "learning_rate": 1.609941318605454e-05, "loss": 1.9042, "reason_loss": 0.5098311901092529, "step": 887, "utility_loss": 1.3943859338760376 }, { "cosine_similarity": 0.8491904927476657, "epoch": 0.8275862068965517, "grad_norm": 1.9085524326724503, "learning_rate": 1.6092509492578533e-05, "loss": 2.058, "reason_loss": 0.5135733485221863, "step": 888, "utility_loss": 1.5443968772888184 }, { "cosine_similarity": 0.22171809068085158, "epoch": 0.8285181733457595, "grad_norm": 1.3137732268443176, "learning_rate": 1.608560579910252e-05, "loss": 2.0197, "reason_loss": 0.512012779712677, "step": 889, "utility_loss": 1.507690191268921 }, { "cosine_similarity": 0.2342439213161338, "epoch": 0.8294501397949674, "grad_norm": 1.4826255148554113, "learning_rate": 1.607870210562651e-05, "loss": 2.1194, "reason_loss": 0.5293402075767517, "step": 890, "utility_loss": 1.5900596380233765 }, { "cosine_similarity": 0.025033441658360384, "epoch": 0.8303821062441752, "grad_norm": 1.481104551938159, "learning_rate": 1.60717984121505e-05, "loss": 1.9802, "reason_loss": 0.5393192768096924, "step": 891, "utility_loss": 1.4408583641052246 }, { "cosine_similarity": 0.2006275277946287, "epoch": 0.8313140726933831, "grad_norm": 1.3107550912573336, "learning_rate": 1.6064894718674492e-05, "loss": 1.7464, "reason_loss": 0.5832493305206299, "step": 892, "utility_loss": 1.163118600845337 }, { "cosine_similarity": 0.19865583798877406, "epoch": 0.8322460391425909, "grad_norm": 1.1537879933940356, "learning_rate": 1.605799102519848e-05, "loss": 1.9451, "reason_loss": 0.516869306564331, "step": 893, "utility_loss": 1.4282407760620117 }, { "cosine_similarity": 0.3308278077281915, "epoch": 0.8331780055917987, "grad_norm": 1.2517266377001892, "learning_rate": 1.6051087331722474e-05, "loss": 2.0405, "reason_loss": 0.5225299596786499, "step": 894, "utility_loss": 1.517940878868103 }, { "cosine_similarity": 0.37895484232094834, "epoch": 0.8341099720410066, "grad_norm": 1.435332696590044, "learning_rate": 1.6044183638246463e-05, "loss": 2.1999, "reason_loss": 0.5364679098129272, "step": 895, "utility_loss": 1.6634124517440796 }, { "cosine_similarity": 0.18281961689624157, "epoch": 0.8350419384902144, "grad_norm": 1.4441709490543972, "learning_rate": 1.6037279944770455e-05, "loss": 2.4323, "reason_loss": 0.5279226899147034, "step": 896, "utility_loss": 1.9044020175933838 }, { "cosine_similarity": 0.03311021689753742, "epoch": 0.8359739049394221, "grad_norm": 1.42041009169911, "learning_rate": 1.6030376251294444e-05, "loss": 1.8116, "reason_loss": 0.49793851375579834, "step": 897, "utility_loss": 1.3136128187179565 }, { "cosine_similarity": 0.20197957853948198, "epoch": 0.83690587138863, "grad_norm": 1.1736309213257552, "learning_rate": 1.6023472557818433e-05, "loss": 1.7282, "reason_loss": 0.5361611843109131, "step": 898, "utility_loss": 1.192002534866333 }, { "cosine_similarity": -0.03291036883532565, "epoch": 0.8378378378378378, "grad_norm": 1.4015447017412046, "learning_rate": 1.6016568864342426e-05, "loss": 2.2886, "reason_loss": 0.5099312663078308, "step": 899, "utility_loss": 1.7786285877227783 }, { "cosine_similarity": 0.3418221070887694, "epoch": 0.8387698042870456, "grad_norm": 1.6323629482859927, "learning_rate": 1.6009665170866415e-05, "loss": 2.3215, "reason_loss": 0.5155770182609558, "step": 900, "utility_loss": 1.8058781623840332 }, { "cosine_similarity": 0.25907989482855664, "epoch": 0.8397017707362535, "grad_norm": 1.605189302430356, "learning_rate": 1.6002761477390404e-05, "loss": 2.2551, "reason_loss": 0.5375561714172363, "step": 901, "utility_loss": 1.7175183296203613 }, { "cosine_similarity": 0.38157973879569024, "epoch": 0.8406337371854613, "grad_norm": 1.3392986790275077, "learning_rate": 1.5995857783914396e-05, "loss": 2.0045, "reason_loss": 0.5145851373672485, "step": 902, "utility_loss": 1.489917278289795 }, { "cosine_similarity": 0.38418968768911527, "epoch": 0.8415657036346692, "grad_norm": 1.1957913596646477, "learning_rate": 1.5988954090438385e-05, "loss": 1.7873, "reason_loss": 0.5279034376144409, "step": 903, "utility_loss": 1.2594408988952637 }, { "cosine_similarity": 0.11424414374583869, "epoch": 0.842497670083877, "grad_norm": 1.1896152210388373, "learning_rate": 1.5982050396962378e-05, "loss": 2.0919, "reason_loss": 0.5215993523597717, "step": 904, "utility_loss": 1.5702736377716064 }, { "cosine_similarity": 0.14681538786971662, "epoch": 0.8434296365330848, "grad_norm": 1.4304175354844457, "learning_rate": 1.5975146703486367e-05, "loss": 2.0457, "reason_loss": 0.49058419466018677, "step": 905, "utility_loss": 1.5550718307495117 }, { "cosine_similarity": 0.08808955960523725, "epoch": 0.8443616029822927, "grad_norm": 1.3411852085331284, "learning_rate": 1.5968243010010356e-05, "loss": 2.1632, "reason_loss": 0.5543804168701172, "step": 906, "utility_loss": 1.6088593006134033 }, { "cosine_similarity": 0.1219375382688162, "epoch": 0.8452935694315005, "grad_norm": 1.5445978570446062, "learning_rate": 1.5961339316534348e-05, "loss": 2.3865, "reason_loss": 0.5463793277740479, "step": 907, "utility_loss": 1.8401076793670654 }, { "cosine_similarity": 0.41342296122768457, "epoch": 0.8462255358807083, "grad_norm": 1.4369317356300646, "learning_rate": 1.5954435623058337e-05, "loss": 2.1831, "reason_loss": 0.5182272791862488, "step": 908, "utility_loss": 1.66483473777771 }, { "cosine_similarity": -0.39090630200592313, "epoch": 0.8471575023299162, "grad_norm": 1.3222009599527655, "learning_rate": 1.5947531929582326e-05, "loss": 1.8267, "reason_loss": 0.49789297580718994, "step": 909, "utility_loss": 1.3288055658340454 }, { "cosine_similarity": 0.43739615598706655, "epoch": 0.848089468779124, "grad_norm": 3.28411399042869, "learning_rate": 1.594062823610632e-05, "loss": 1.9478, "reason_loss": 0.5193644762039185, "step": 910, "utility_loss": 1.4284725189208984 }, { "cosine_similarity": 0.4198897968859528, "epoch": 0.8490214352283317, "grad_norm": 1.2239449795968287, "learning_rate": 1.5933724542630308e-05, "loss": 2.0936, "reason_loss": 0.5365056991577148, "step": 911, "utility_loss": 1.55708646774292 }, { "cosine_similarity": 0.3715117250289283, "epoch": 0.8499534016775396, "grad_norm": 1.246968317204697, "learning_rate": 1.59268208491543e-05, "loss": 2.1683, "reason_loss": 0.486498087644577, "step": 912, "utility_loss": 1.6818163394927979 }, { "cosine_similarity": 0.019320077079706694, "epoch": 0.8508853681267474, "grad_norm": 1.2780204457126894, "learning_rate": 1.591991715567829e-05, "loss": 2.1531, "reason_loss": 0.5164871215820312, "step": 913, "utility_loss": 1.6366381645202637 }, { "cosine_similarity": -0.009047965324542398, "epoch": 0.8518173345759553, "grad_norm": 1.4222981959342251, "learning_rate": 1.591301346220228e-05, "loss": 1.9976, "reason_loss": 0.5102956891059875, "step": 914, "utility_loss": 1.4872829914093018 }, { "cosine_similarity": 0.07408921558681021, "epoch": 0.8527493010251631, "grad_norm": 3.915952341638121, "learning_rate": 1.590610976872627e-05, "loss": 2.0877, "reason_loss": 0.5295779705047607, "step": 915, "utility_loss": 1.5581551790237427 }, { "cosine_similarity": 0.3038462895497893, "epoch": 0.8536812674743709, "grad_norm": 1.3917294754077616, "learning_rate": 1.589920607525026e-05, "loss": 2.4516, "reason_loss": 0.519474983215332, "step": 916, "utility_loss": 1.9321686029434204 }, { "cosine_similarity": 0.5229442283438698, "epoch": 0.8546132339235788, "grad_norm": 1.486353642248869, "learning_rate": 1.589230238177425e-05, "loss": 2.2998, "reason_loss": 0.5318280458450317, "step": 917, "utility_loss": 1.7679834365844727 }, { "cosine_similarity": -0.036529478970690685, "epoch": 0.8555452003727866, "grad_norm": 1.2224142112948928, "learning_rate": 1.588539868829824e-05, "loss": 1.8636, "reason_loss": 0.5122095942497253, "step": 918, "utility_loss": 1.3514134883880615 }, { "cosine_similarity": -0.5430366904368586, "epoch": 0.8564771668219944, "grad_norm": 1.3559358113646236, "learning_rate": 1.587849499482223e-05, "loss": 2.366, "reason_loss": 0.5360242128372192, "step": 919, "utility_loss": 1.8299486637115479 }, { "cosine_similarity": -0.06901460979317052, "epoch": 0.8574091332712023, "grad_norm": 1.2054438819100504, "learning_rate": 1.5871591301346222e-05, "loss": 2.0091, "reason_loss": 0.5339469909667969, "step": 920, "utility_loss": 1.4751681089401245 }, { "cosine_similarity": 0.15401252566000434, "epoch": 0.8583410997204101, "grad_norm": 1.263476928230569, "learning_rate": 1.586468760787021e-05, "loss": 1.8651, "reason_loss": 0.49135178327560425, "step": 921, "utility_loss": 1.373701572418213 }, { "cosine_similarity": 0.7568673432812332, "epoch": 0.8592730661696178, "grad_norm": 1.3522973111374577, "learning_rate": 1.5857783914394204e-05, "loss": 2.3446, "reason_loss": 0.4990406334400177, "step": 922, "utility_loss": 1.845568299293518 }, { "cosine_similarity": 0.18959839824325198, "epoch": 0.8602050326188257, "grad_norm": 1.5262535416810339, "learning_rate": 1.5850880220918193e-05, "loss": 2.0492, "reason_loss": 0.5271984934806824, "step": 923, "utility_loss": 1.5220410823822021 }, { "cosine_similarity": -0.020775662883026593, "epoch": 0.8611369990680335, "grad_norm": 1.1723644701821032, "learning_rate": 1.5843976527442182e-05, "loss": 1.9716, "reason_loss": 0.5144271850585938, "step": 924, "utility_loss": 1.4571560621261597 }, { "cosine_similarity": 0.19225827170681098, "epoch": 0.8620689655172413, "grad_norm": 1.8264726146596195, "learning_rate": 1.5837072833966174e-05, "loss": 2.1375, "reason_loss": 0.5189777612686157, "step": 925, "utility_loss": 1.6185367107391357 }, { "cosine_similarity": 0.3846784264696435, "epoch": 0.8630009319664492, "grad_norm": 1.2450307608736992, "learning_rate": 1.5830169140490164e-05, "loss": 2.2019, "reason_loss": 0.49950313568115234, "step": 926, "utility_loss": 1.7023557424545288 }, { "cosine_similarity": -0.017412585687161437, "epoch": 0.863932898415657, "grad_norm": 1.139021209480942, "learning_rate": 1.5823265447014153e-05, "loss": 1.8758, "reason_loss": 0.524536669254303, "step": 927, "utility_loss": 1.3513046503067017 }, { "cosine_similarity": -0.0737790822511054, "epoch": 0.8648648648648649, "grad_norm": 1.423625478602621, "learning_rate": 1.5816361753538145e-05, "loss": 2.3325, "reason_loss": 0.5024152398109436, "step": 928, "utility_loss": 1.8301291465759277 }, { "cosine_similarity": 0.5199950254320519, "epoch": 0.8657968313140727, "grad_norm": 1.6682586630056278, "learning_rate": 1.5809458060062134e-05, "loss": 2.0569, "reason_loss": 0.5024251937866211, "step": 929, "utility_loss": 1.5545146465301514 }, { "cosine_similarity": 0.2430638093451206, "epoch": 0.8667287977632805, "grad_norm": 1.4522616092753062, "learning_rate": 1.5802554366586126e-05, "loss": 1.9737, "reason_loss": 0.5017093420028687, "step": 930, "utility_loss": 1.4720045328140259 }, { "cosine_similarity": 0.1262791965508589, "epoch": 0.8676607642124884, "grad_norm": 1.2725063141865476, "learning_rate": 1.5795650673110115e-05, "loss": 2.3311, "reason_loss": 0.5242868661880493, "step": 931, "utility_loss": 1.8068345785140991 }, { "cosine_similarity": 0.2534217394908787, "epoch": 0.8685927306616962, "grad_norm": 3.929684571512446, "learning_rate": 1.5788746979634108e-05, "loss": 2.1908, "reason_loss": 0.5224035978317261, "step": 932, "utility_loss": 1.668418288230896 }, { "cosine_similarity": 0.16406206014047503, "epoch": 0.869524697110904, "grad_norm": 1.1981367673151646, "learning_rate": 1.5781843286158097e-05, "loss": 1.918, "reason_loss": 0.5391570925712585, "step": 933, "utility_loss": 1.3788728713989258 }, { "cosine_similarity": 0.24428674755844276, "epoch": 0.8704566635601119, "grad_norm": 1.281442355957762, "learning_rate": 1.5774939592682086e-05, "loss": 2.0766, "reason_loss": 0.5069922208786011, "step": 934, "utility_loss": 1.569617748260498 }, { "cosine_similarity": 0.35659597614719585, "epoch": 0.8713886300093197, "grad_norm": 1.1413200897049012, "learning_rate": 1.5768035899206075e-05, "loss": 2.0337, "reason_loss": 0.5243185758590698, "step": 935, "utility_loss": 1.5094022750854492 }, { "cosine_similarity": 0.40793406822231004, "epoch": 0.8723205964585274, "grad_norm": 1.6844205045521377, "learning_rate": 1.5761132205730067e-05, "loss": 2.302, "reason_loss": 0.5295814871788025, "step": 936, "utility_loss": 1.7724401950836182 }, { "cosine_similarity": 0.003607330310257527, "epoch": 0.8732525629077353, "grad_norm": 1.4457771429161512, "learning_rate": 1.5754228512254056e-05, "loss": 2.1632, "reason_loss": 0.5120607614517212, "step": 937, "utility_loss": 1.6511857509613037 }, { "cosine_similarity": 0.19411794987652947, "epoch": 0.8741845293569431, "grad_norm": 1.3750056207265073, "learning_rate": 1.5747324818778046e-05, "loss": 1.7983, "reason_loss": 0.5022905468940735, "step": 938, "utility_loss": 1.296044111251831 }, { "cosine_similarity": 0.422890199965373, "epoch": 0.875116495806151, "grad_norm": 1.2414744561972069, "learning_rate": 1.5740421125302038e-05, "loss": 2.1102, "reason_loss": 0.5178453922271729, "step": 939, "utility_loss": 1.592348337173462 }, { "cosine_similarity": 0.35032589172124584, "epoch": 0.8760484622553588, "grad_norm": 1.4398971984435003, "learning_rate": 1.5733517431826027e-05, "loss": 2.2797, "reason_loss": 0.527381181716919, "step": 940, "utility_loss": 1.7523179054260254 }, { "cosine_similarity": 0.45310867921171527, "epoch": 0.8769804287045666, "grad_norm": 1.6986180965265565, "learning_rate": 1.572661373835002e-05, "loss": 2.276, "reason_loss": 0.5014745593070984, "step": 941, "utility_loss": 1.774533987045288 }, { "cosine_similarity": 0.38159625728434443, "epoch": 0.8779123951537745, "grad_norm": 1.2489748427619032, "learning_rate": 1.571971004487401e-05, "loss": 2.0776, "reason_loss": 0.5248359441757202, "step": 942, "utility_loss": 1.5527538061141968 }, { "cosine_similarity": 0.24149899515718307, "epoch": 0.8788443616029823, "grad_norm": 1.2679271621680859, "learning_rate": 1.5712806351398e-05, "loss": 2.1145, "reason_loss": 0.5200295448303223, "step": 943, "utility_loss": 1.5944771766662598 }, { "cosine_similarity": 0.15962473741351205, "epoch": 0.8797763280521901, "grad_norm": 1.488188338934083, "learning_rate": 1.570590265792199e-05, "loss": 2.1391, "reason_loss": 0.5560997128486633, "step": 944, "utility_loss": 1.5830186605453491 }, { "cosine_similarity": 0.4446132821364237, "epoch": 0.880708294501398, "grad_norm": 1.4030296094177257, "learning_rate": 1.569899896444598e-05, "loss": 1.7188, "reason_loss": 0.5130500793457031, "step": 945, "utility_loss": 1.205771803855896 }, { "cosine_similarity": 0.6677920959654481, "epoch": 0.8816402609506058, "grad_norm": 1.6367135442891503, "learning_rate": 1.5692095270969968e-05, "loss": 2.1598, "reason_loss": 0.5353392958641052, "step": 946, "utility_loss": 1.6244468688964844 }, { "cosine_similarity": 0.2597599251487165, "epoch": 0.8825722273998136, "grad_norm": 1.1810181227371752, "learning_rate": 1.568519157749396e-05, "loss": 2.1806, "reason_loss": 0.5295809507369995, "step": 947, "utility_loss": 1.6510233879089355 }, { "cosine_similarity": -0.05188826973529927, "epoch": 0.8835041938490215, "grad_norm": 2.2321823574870123, "learning_rate": 1.567828788401795e-05, "loss": 2.4019, "reason_loss": 0.5207107067108154, "step": 948, "utility_loss": 1.8811522722244263 }, { "cosine_similarity": 0.21887669374244262, "epoch": 0.8844361602982292, "grad_norm": 1.146947644682379, "learning_rate": 1.5671384190541942e-05, "loss": 1.9303, "reason_loss": 0.5465517044067383, "step": 949, "utility_loss": 1.3837124109268188 }, { "cosine_similarity": 0.33522301231937773, "epoch": 0.8853681267474371, "grad_norm": 1.454228745922491, "learning_rate": 1.566448049706593e-05, "loss": 2.3075, "reason_loss": 0.5202068090438843, "step": 950, "utility_loss": 1.7873060703277588 }, { "cosine_similarity": 0.2665291258779064, "epoch": 0.8863000931966449, "grad_norm": 1.5090992395088514, "learning_rate": 1.5657576803589923e-05, "loss": 2.0094, "reason_loss": 0.5133486986160278, "step": 951, "utility_loss": 1.4960196018218994 }, { "cosine_similarity": 0.3016147978505362, "epoch": 0.8872320596458527, "grad_norm": 1.7080275460224035, "learning_rate": 1.5650673110113912e-05, "loss": 2.2157, "reason_loss": 0.5025068521499634, "step": 952, "utility_loss": 1.7131898403167725 }, { "cosine_similarity": 0.08561953083657771, "epoch": 0.8881640260950606, "grad_norm": 1.2096336738980265, "learning_rate": 1.56437694166379e-05, "loss": 2.0968, "reason_loss": 0.5663344860076904, "step": 953, "utility_loss": 1.5304820537567139 }, { "cosine_similarity": 0.20969912112668446, "epoch": 0.8890959925442684, "grad_norm": 1.4147009352760858, "learning_rate": 1.5636865723161894e-05, "loss": 2.0567, "reason_loss": 0.5285232067108154, "step": 954, "utility_loss": 1.5281612873077393 }, { "cosine_similarity": 0.035355960025668466, "epoch": 0.8900279589934762, "grad_norm": 1.338620887346818, "learning_rate": 1.5629962029685883e-05, "loss": 2.0452, "reason_loss": 0.5516839027404785, "step": 955, "utility_loss": 1.4934790134429932 }, { "cosine_similarity": 0.21302433114247266, "epoch": 0.8909599254426841, "grad_norm": 1.2874879956278933, "learning_rate": 1.5623058336209872e-05, "loss": 2.0585, "reason_loss": 0.5283451676368713, "step": 956, "utility_loss": 1.5301499366760254 }, { "cosine_similarity": -0.010860982504161388, "epoch": 0.8918918918918919, "grad_norm": 1.5419108941313135, "learning_rate": 1.5616154642733864e-05, "loss": 2.299, "reason_loss": 0.5590137243270874, "step": 957, "utility_loss": 1.7399754524230957 }, { "cosine_similarity": 0.3266085984151757, "epoch": 0.8928238583410997, "grad_norm": 1.3411535360199707, "learning_rate": 1.5609250949257853e-05, "loss": 2.3177, "reason_loss": 0.5208139419555664, "step": 958, "utility_loss": 1.7968522310256958 }, { "cosine_similarity": 0.18175806303484837, "epoch": 0.8937558247903076, "grad_norm": 1.13245895430078, "learning_rate": 1.5602347255781846e-05, "loss": 1.6959, "reason_loss": 0.4917140305042267, "step": 959, "utility_loss": 1.2042019367218018 }, { "cosine_similarity": 0.0686461149254474, "epoch": 0.8946877912395154, "grad_norm": 1.2894534528780581, "learning_rate": 1.5595443562305835e-05, "loss": 2.4561, "reason_loss": 0.5148290395736694, "step": 960, "utility_loss": 1.9412651062011719 }, { "cosine_similarity": -0.05742898721031899, "epoch": 0.8956197576887233, "grad_norm": 1.7734847439033845, "learning_rate": 1.5588539868829827e-05, "loss": 2.2947, "reason_loss": 0.535015344619751, "step": 961, "utility_loss": 1.7596544027328491 }, { "cosine_similarity": -0.01938910126718101, "epoch": 0.896551724137931, "grad_norm": 1.269551701964355, "learning_rate": 1.5581636175353816e-05, "loss": 2.1685, "reason_loss": 0.49692484736442566, "step": 962, "utility_loss": 1.6715385913848877 }, { "cosine_similarity": -0.011376390761600916, "epoch": 0.8974836905871388, "grad_norm": 1.3101910508215355, "learning_rate": 1.5574732481877805e-05, "loss": 1.9285, "reason_loss": 0.520148754119873, "step": 963, "utility_loss": 1.4083034992218018 }, { "cosine_similarity": 0.1330766966218037, "epoch": 0.8984156570363467, "grad_norm": 1.4402101158249352, "learning_rate": 1.5567828788401794e-05, "loss": 1.7099, "reason_loss": 0.4898744225502014, "step": 964, "utility_loss": 1.220052719116211 }, { "cosine_similarity": 0.2550193407040692, "epoch": 0.8993476234855545, "grad_norm": 1.480723609189109, "learning_rate": 1.5560925094925787e-05, "loss": 2.0452, "reason_loss": 0.5258780717849731, "step": 965, "utility_loss": 1.5193450450897217 }, { "cosine_similarity": 0.04229250809648878, "epoch": 0.9002795899347623, "grad_norm": 1.180508630322174, "learning_rate": 1.5554021401449776e-05, "loss": 1.8018, "reason_loss": 0.5074405074119568, "step": 966, "utility_loss": 1.294389009475708 }, { "cosine_similarity": 0.010579973797475451, "epoch": 0.9012115563839702, "grad_norm": 1.1165805107940407, "learning_rate": 1.5547117707973768e-05, "loss": 2.1338, "reason_loss": 0.5227648615837097, "step": 967, "utility_loss": 1.6110334396362305 }, { "cosine_similarity": -0.1449801990920641, "epoch": 0.902143522833178, "grad_norm": 1.5137245334176195, "learning_rate": 1.5540214014497757e-05, "loss": 2.0669, "reason_loss": 0.5130971670150757, "step": 968, "utility_loss": 1.5538277626037598 }, { "cosine_similarity": -0.02664324036219329, "epoch": 0.9030754892823858, "grad_norm": 1.6809283368096957, "learning_rate": 1.553331032102175e-05, "loss": 2.303, "reason_loss": 0.5164497494697571, "step": 969, "utility_loss": 1.7865376472473145 }, { "cosine_similarity": 0.4702469737078669, "epoch": 0.9040074557315937, "grad_norm": 1.4925993266812085, "learning_rate": 1.552640662754574e-05, "loss": 2.2589, "reason_loss": 0.4904974699020386, "step": 970, "utility_loss": 1.7684431076049805 }, { "cosine_similarity": 0.21840009182619416, "epoch": 0.9049394221808015, "grad_norm": 1.9252696515506302, "learning_rate": 1.5519502934069728e-05, "loss": 2.0687, "reason_loss": 0.5164822340011597, "step": 971, "utility_loss": 1.5522150993347168 }, { "cosine_similarity": 0.26833050746004783, "epoch": 0.9058713886300093, "grad_norm": 1.5236751526723493, "learning_rate": 1.551259924059372e-05, "loss": 2.1907, "reason_loss": 0.5267958641052246, "step": 972, "utility_loss": 1.6639368534088135 }, { "cosine_similarity": -0.14810177468747732, "epoch": 0.9068033550792172, "grad_norm": 1.5824650906559299, "learning_rate": 1.550569554711771e-05, "loss": 1.8233, "reason_loss": 0.5495967864990234, "step": 973, "utility_loss": 1.2737276554107666 }, { "cosine_similarity": 0.7573813474992065, "epoch": 0.907735321528425, "grad_norm": 1.619782109503455, "learning_rate": 1.54987918536417e-05, "loss": 1.9514, "reason_loss": 0.5412192344665527, "step": 974, "utility_loss": 1.4101401567459106 }, { "cosine_similarity": -0.19108654943904615, "epoch": 0.9086672879776329, "grad_norm": 1.7733752305573645, "learning_rate": 1.5491888160165687e-05, "loss": 2.3246, "reason_loss": 0.5335723161697388, "step": 975, "utility_loss": 1.7909791469573975 }, { "cosine_similarity": 0.4098548695328826, "epoch": 0.9095992544268406, "grad_norm": 1.3416869994947724, "learning_rate": 1.548498446668968e-05, "loss": 2.2332, "reason_loss": 0.5045666694641113, "step": 976, "utility_loss": 1.7286193370819092 }, { "cosine_similarity": 0.4149751908298699, "epoch": 0.9105312208760484, "grad_norm": 1.0992912949790392, "learning_rate": 1.5478080773213672e-05, "loss": 2.0377, "reason_loss": 0.5167431831359863, "step": 977, "utility_loss": 1.5209076404571533 }, { "cosine_similarity": 0.12807120208145642, "epoch": 0.9114631873252563, "grad_norm": 1.656422588163697, "learning_rate": 1.547117707973766e-05, "loss": 1.4996, "reason_loss": 0.5098803043365479, "step": 978, "utility_loss": 0.9896960854530334 }, { "cosine_similarity": 0.12436488735416991, "epoch": 0.9123951537744641, "grad_norm": 1.956030018940321, "learning_rate": 1.5464273386261654e-05, "loss": 1.8277, "reason_loss": 0.5236122012138367, "step": 979, "utility_loss": 1.3041083812713623 }, { "cosine_similarity": 0.4869521588858568, "epoch": 0.9133271202236719, "grad_norm": 1.4715337229438452, "learning_rate": 1.5457369692785643e-05, "loss": 2.038, "reason_loss": 0.5388494729995728, "step": 980, "utility_loss": 1.499180555343628 }, { "cosine_similarity": 0.16548165632490483, "epoch": 0.9142590866728798, "grad_norm": 1.4477543322265813, "learning_rate": 1.5450465999309632e-05, "loss": 2.1907, "reason_loss": 0.517529308795929, "step": 981, "utility_loss": 1.6731626987457275 }, { "cosine_similarity": 0.013086207881661762, "epoch": 0.9151910531220876, "grad_norm": 1.3178917149779734, "learning_rate": 1.544356230583362e-05, "loss": 2.3256, "reason_loss": 0.5011806488037109, "step": 982, "utility_loss": 1.824455738067627 }, { "cosine_similarity": 0.2640474950308117, "epoch": 0.9161230195712954, "grad_norm": 1.6971804912549846, "learning_rate": 1.5436658612357613e-05, "loss": 2.0412, "reason_loss": 0.5267577767372131, "step": 983, "utility_loss": 1.5144476890563965 }, { "cosine_similarity": 0.5234492493764499, "epoch": 0.9170549860205033, "grad_norm": 1.295641816186903, "learning_rate": 1.5429754918881602e-05, "loss": 2.0224, "reason_loss": 0.5153931975364685, "step": 984, "utility_loss": 1.507023572921753 }, { "cosine_similarity": 0.09806501611354888, "epoch": 0.9179869524697111, "grad_norm": 1.4446220179387563, "learning_rate": 1.542285122540559e-05, "loss": 2.0711, "reason_loss": 0.5352205634117126, "step": 985, "utility_loss": 1.5358586311340332 }, { "cosine_similarity": 0.1896234613005848, "epoch": 0.918918918918919, "grad_norm": 1.7546891140681964, "learning_rate": 1.5415947531929584e-05, "loss": 2.495, "reason_loss": 0.49783337116241455, "step": 986, "utility_loss": 1.9972035884857178 }, { "cosine_similarity": 0.42087903775529095, "epoch": 0.9198508853681268, "grad_norm": 1.244571367050983, "learning_rate": 1.5409043838453576e-05, "loss": 1.8302, "reason_loss": 0.522179901599884, "step": 987, "utility_loss": 1.308053970336914 }, { "cosine_similarity": 0.351527632428559, "epoch": 0.9207828518173345, "grad_norm": 1.099375218288772, "learning_rate": 1.5402140144977565e-05, "loss": 2.2276, "reason_loss": 0.5425937175750732, "step": 988, "utility_loss": 1.6849768161773682 }, { "cosine_similarity": 0.1300708413063024, "epoch": 0.9217148182665424, "grad_norm": 1.4129262168607652, "learning_rate": 1.5395236451501554e-05, "loss": 2.1256, "reason_loss": 0.500077486038208, "step": 989, "utility_loss": 1.6254868507385254 }, { "cosine_similarity": 0.30177408671393713, "epoch": 0.9226467847157502, "grad_norm": 1.2829502040857539, "learning_rate": 1.5388332758025547e-05, "loss": 1.9218, "reason_loss": 0.5211941003799438, "step": 990, "utility_loss": 1.4005593061447144 }, { "cosine_similarity": 0.4628405383506702, "epoch": 0.923578751164958, "grad_norm": 1.4233443396043615, "learning_rate": 1.5381429064549536e-05, "loss": 2.2895, "reason_loss": 0.5045596957206726, "step": 991, "utility_loss": 1.7849262952804565 }, { "cosine_similarity": 0.17798751352031444, "epoch": 0.9245107176141659, "grad_norm": 1.352858505449574, "learning_rate": 1.5374525371073525e-05, "loss": 2.2418, "reason_loss": 0.5116474628448486, "step": 992, "utility_loss": 1.7301560640335083 }, { "cosine_similarity": 0.4534040375673104, "epoch": 0.9254426840633737, "grad_norm": 1.9218475397229207, "learning_rate": 1.5367621677597514e-05, "loss": 2.3344, "reason_loss": 0.4797041416168213, "step": 993, "utility_loss": 1.8546490669250488 }, { "cosine_similarity": 0.4753766762017648, "epoch": 0.9263746505125815, "grad_norm": 1.2563401494417916, "learning_rate": 1.5360717984121506e-05, "loss": 2.2205, "reason_loss": 0.5494781732559204, "step": 994, "utility_loss": 1.67104172706604 }, { "cosine_similarity": 0.319136367115894, "epoch": 0.9273066169617894, "grad_norm": 1.622817515853541, "learning_rate": 1.5353814290645495e-05, "loss": 2.0747, "reason_loss": 0.510596513748169, "step": 995, "utility_loss": 1.5640605688095093 }, { "cosine_similarity": 0.24189249714120503, "epoch": 0.9282385834109972, "grad_norm": 1.5806052289820594, "learning_rate": 1.5346910597169488e-05, "loss": 2.1364, "reason_loss": 0.5245083570480347, "step": 996, "utility_loss": 1.611914873123169 }, { "cosine_similarity": 0.2440826700450572, "epoch": 0.9291705498602051, "grad_norm": 1.80615589967663, "learning_rate": 1.5340006903693477e-05, "loss": 2.1543, "reason_loss": 0.5075006484985352, "step": 997, "utility_loss": 1.6468186378479004 }, { "cosine_similarity": 0.22695844267281742, "epoch": 0.9301025163094129, "grad_norm": 1.7752750516644047, "learning_rate": 1.533310321021747e-05, "loss": 2.1493, "reason_loss": 0.5069904327392578, "step": 998, "utility_loss": 1.6423377990722656 }, { "cosine_similarity": -0.024161798330782535, "epoch": 0.9310344827586207, "grad_norm": 1.4435742762821167, "learning_rate": 1.5326199516741458e-05, "loss": 1.8574, "reason_loss": 0.5230615139007568, "step": 999, "utility_loss": 1.3343511819839478 }, { "cosine_similarity": -0.032149682459183385, "epoch": 0.9319664492078286, "grad_norm": 1.4733142988094796, "learning_rate": 1.5319295823265447e-05, "loss": 2.3447, "reason_loss": 0.5132569670677185, "step": 1000, "utility_loss": 1.831453800201416 }, { "cosine_similarity": 0.27785384025145254, "epoch": 0.9328984156570364, "grad_norm": 1.353643144668385, "learning_rate": 1.531239212978944e-05, "loss": 2.0742, "reason_loss": 0.5043401718139648, "step": 1001, "utility_loss": 1.569897174835205 }, { "cosine_similarity": 0.2723253710850547, "epoch": 0.9338303821062441, "grad_norm": 1.3607388996996332, "learning_rate": 1.530548843631343e-05, "loss": 2.1379, "reason_loss": 0.5222967863082886, "step": 1002, "utility_loss": 1.6156309843063354 }, { "cosine_similarity": 0.15035718006623888, "epoch": 0.934762348555452, "grad_norm": 1.1595572220045849, "learning_rate": 1.5298584742837418e-05, "loss": 1.9745, "reason_loss": 0.520175576210022, "step": 1003, "utility_loss": 1.4542829990386963 }, { "cosine_similarity": 0.4310515957546148, "epoch": 0.9356943150046598, "grad_norm": 1.3056654571189468, "learning_rate": 1.529168104936141e-05, "loss": 2.3855, "reason_loss": 0.5300445556640625, "step": 1004, "utility_loss": 1.8554980754852295 }, { "cosine_similarity": -0.05970426748338339, "epoch": 0.9366262814538676, "grad_norm": 1.4655383601341108, "learning_rate": 1.52847773558854e-05, "loss": 2.1834, "reason_loss": 0.5258980393409729, "step": 1005, "utility_loss": 1.6574972867965698 }, { "cosine_similarity": 0.1262649456171518, "epoch": 0.9375582479030755, "grad_norm": 2.3417037939533047, "learning_rate": 1.527787366240939e-05, "loss": 2.2549, "reason_loss": 0.5227569341659546, "step": 1006, "utility_loss": 1.732172966003418 }, { "cosine_similarity": 0.293869772573215, "epoch": 0.9384902143522833, "grad_norm": 1.30805073169078, "learning_rate": 1.527096996893338e-05, "loss": 2.1767, "reason_loss": 0.5084245204925537, "step": 1007, "utility_loss": 1.6683014631271362 }, { "cosine_similarity": 0.4910323551664043, "epoch": 0.9394221808014911, "grad_norm": 1.2032298984501495, "learning_rate": 1.526406627545737e-05, "loss": 2.4194, "reason_loss": 0.4806249737739563, "step": 1008, "utility_loss": 1.9387367963790894 }, { "cosine_similarity": 0.15156106675519193, "epoch": 0.940354147250699, "grad_norm": 1.1222729607759285, "learning_rate": 1.5257162581981362e-05, "loss": 1.7637, "reason_loss": 0.5383565425872803, "step": 1009, "utility_loss": 1.2253687381744385 }, { "cosine_similarity": 0.03163321926905126, "epoch": 0.9412861136999068, "grad_norm": 1.0908167119549645, "learning_rate": 1.5250258888505351e-05, "loss": 1.7189, "reason_loss": 0.5198472738265991, "step": 1010, "utility_loss": 1.1990891695022583 }, { "cosine_similarity": 0.1814922410057099, "epoch": 0.9422180801491147, "grad_norm": 1.3947800655891733, "learning_rate": 1.5243355195029342e-05, "loss": 1.6989, "reason_loss": 0.5376548767089844, "step": 1011, "utility_loss": 1.1612138748168945 }, { "cosine_similarity": 0.6373328233621621, "epoch": 0.9431500465983225, "grad_norm": 1.1926323502238714, "learning_rate": 1.5236451501553333e-05, "loss": 2.5989, "reason_loss": 0.5248677730560303, "step": 1012, "utility_loss": 2.074021577835083 }, { "cosine_similarity": 0.06101730837635493, "epoch": 0.9440820130475303, "grad_norm": 1.0343989527799224, "learning_rate": 1.5229547808077323e-05, "loss": 1.6665, "reason_loss": 0.5129683017730713, "step": 1013, "utility_loss": 1.153571367263794 }, { "cosine_similarity": 0.26877419771668276, "epoch": 0.9450139794967382, "grad_norm": 1.681149647212136, "learning_rate": 1.5222644114601312e-05, "loss": 2.1518, "reason_loss": 0.5092158317565918, "step": 1014, "utility_loss": 1.6425830125808716 }, { "cosine_similarity": 0.40637786404269227, "epoch": 0.9459459459459459, "grad_norm": 1.3436546402730933, "learning_rate": 1.5215740421125303e-05, "loss": 2.2048, "reason_loss": 0.5373548269271851, "step": 1015, "utility_loss": 1.6674535274505615 }, { "cosine_similarity": 0.549140850226911, "epoch": 0.9468779123951537, "grad_norm": 1.4466397370635293, "learning_rate": 1.5208836727649294e-05, "loss": 1.9993, "reason_loss": 0.5126357078552246, "step": 1016, "utility_loss": 1.4866690635681152 }, { "cosine_similarity": 0.1328916003643187, "epoch": 0.9478098788443616, "grad_norm": 1.4270707812939063, "learning_rate": 1.5201933034173285e-05, "loss": 2.0095, "reason_loss": 0.5079113245010376, "step": 1017, "utility_loss": 1.50162672996521 }, { "cosine_similarity": 0.15823528132700204, "epoch": 0.9487418452935694, "grad_norm": 1.2957863541250896, "learning_rate": 1.5195029340697274e-05, "loss": 1.9842, "reason_loss": 0.5295058488845825, "step": 1018, "utility_loss": 1.4547216892242432 }, { "cosine_similarity": 0.04993166321909506, "epoch": 0.9496738117427772, "grad_norm": 1.5543318179467518, "learning_rate": 1.5188125647221266e-05, "loss": 2.1478, "reason_loss": 0.5273449420928955, "step": 1019, "utility_loss": 1.6204590797424316 }, { "cosine_similarity": 0.46607088433026084, "epoch": 0.9506057781919851, "grad_norm": 1.467118520950751, "learning_rate": 1.5181221953745255e-05, "loss": 2.1159, "reason_loss": 0.499634325504303, "step": 1020, "utility_loss": 1.616242527961731 }, { "cosine_similarity": 0.06870432308256758, "epoch": 0.9515377446411929, "grad_norm": 1.2336712926973368, "learning_rate": 1.5174318260269246e-05, "loss": 2.1883, "reason_loss": 0.5211055278778076, "step": 1021, "utility_loss": 1.667213797569275 }, { "cosine_similarity": -0.1390988453117231, "epoch": 0.9524697110904008, "grad_norm": 1.4149989135610488, "learning_rate": 1.5167414566793235e-05, "loss": 2.1726, "reason_loss": 0.522865891456604, "step": 1022, "utility_loss": 1.6497442722320557 }, { "cosine_similarity": 0.06638507174973986, "epoch": 0.9534016775396086, "grad_norm": 1.3542968545237162, "learning_rate": 1.5160510873317227e-05, "loss": 2.213, "reason_loss": 0.5072717666625977, "step": 1023, "utility_loss": 1.705704689025879 }, { "cosine_similarity": 0.16984975633348923, "epoch": 0.9543336439888164, "grad_norm": 1.4479358690303, "learning_rate": 1.5153607179841216e-05, "loss": 2.1337, "reason_loss": 0.5228193998336792, "step": 1024, "utility_loss": 1.6109113693237305 }, { "cosine_similarity": 0.20731501421772205, "epoch": 0.9552656104380243, "grad_norm": 1.2679580499077028, "learning_rate": 1.5146703486365207e-05, "loss": 2.251, "reason_loss": 0.5284132957458496, "step": 1025, "utility_loss": 1.7226145267486572 }, { "cosine_similarity": 0.17784156454462588, "epoch": 0.9561975768872321, "grad_norm": 1.1228884546019506, "learning_rate": 1.5139799792889196e-05, "loss": 2.0806, "reason_loss": 0.5221454501152039, "step": 1026, "utility_loss": 1.5584211349487305 }, { "cosine_similarity": -0.03503495355327605, "epoch": 0.9571295433364398, "grad_norm": 1.3232790956676546, "learning_rate": 1.5132896099413189e-05, "loss": 2.2008, "reason_loss": 0.5228800177574158, "step": 1027, "utility_loss": 1.67789888381958 }, { "cosine_similarity": 0.18769477713853802, "epoch": 0.9580615097856477, "grad_norm": 1.4461989796741845, "learning_rate": 1.5125992405937178e-05, "loss": 2.185, "reason_loss": 0.5653321743011475, "step": 1028, "utility_loss": 1.6197099685668945 }, { "cosine_similarity": -0.08004789768697657, "epoch": 0.9589934762348555, "grad_norm": 1.1634528297362687, "learning_rate": 1.5119088712461167e-05, "loss": 1.9512, "reason_loss": 0.526321530342102, "step": 1029, "utility_loss": 1.4249073266983032 }, { "cosine_similarity": 0.20213104289682313, "epoch": 0.9599254426840633, "grad_norm": 1.3573874841385904, "learning_rate": 1.5112185018985159e-05, "loss": 2.0367, "reason_loss": 0.5088675618171692, "step": 1030, "utility_loss": 1.5277941226959229 }, { "cosine_similarity": 0.0645757314132491, "epoch": 0.9608574091332712, "grad_norm": 1.2764447197421147, "learning_rate": 1.510528132550915e-05, "loss": 2.1398, "reason_loss": 0.5443450808525085, "step": 1031, "utility_loss": 1.5955029726028442 }, { "cosine_similarity": -0.012031638902275314, "epoch": 0.961789375582479, "grad_norm": 1.3296314949098873, "learning_rate": 1.5098377632033139e-05, "loss": 2.1931, "reason_loss": 0.5054205656051636, "step": 1032, "utility_loss": 1.6876660585403442 }, { "cosine_similarity": 0.28456364655143257, "epoch": 0.9627213420316869, "grad_norm": 1.4562727629140546, "learning_rate": 1.5091473938557128e-05, "loss": 2.0773, "reason_loss": 0.5273296236991882, "step": 1033, "utility_loss": 1.5499955415725708 }, { "cosine_similarity": 0.2672920867982851, "epoch": 0.9636533084808947, "grad_norm": 1.9552281945046996, "learning_rate": 1.508457024508112e-05, "loss": 2.0614, "reason_loss": 0.49016863107681274, "step": 1034, "utility_loss": 1.5712627172470093 }, { "cosine_similarity": 0.23584064446418893, "epoch": 0.9645852749301025, "grad_norm": 1.584637378030262, "learning_rate": 1.5077666551605111e-05, "loss": 2.3484, "reason_loss": 0.5126910209655762, "step": 1035, "utility_loss": 1.8357274532318115 }, { "cosine_similarity": 0.4803565258547805, "epoch": 0.9655172413793104, "grad_norm": 3.3962710339698092, "learning_rate": 1.50707628581291e-05, "loss": 2.1251, "reason_loss": 0.5021305680274963, "step": 1036, "utility_loss": 1.6229362487792969 }, { "cosine_similarity": 0.1714064354504707, "epoch": 0.9664492078285182, "grad_norm": 1.337733977975215, "learning_rate": 1.5063859164653089e-05, "loss": 1.7447, "reason_loss": 0.516743540763855, "step": 1037, "utility_loss": 1.2280054092407227 }, { "cosine_similarity": -0.013506254046599116, "epoch": 0.967381174277726, "grad_norm": 1.0902240079611831, "learning_rate": 1.5056955471177081e-05, "loss": 1.8423, "reason_loss": 0.5130891799926758, "step": 1038, "utility_loss": 1.3292464017868042 }, { "cosine_similarity": 0.03270434810198631, "epoch": 0.9683131407269339, "grad_norm": 1.538424524596422, "learning_rate": 1.505005177770107e-05, "loss": 2.1622, "reason_loss": 0.5518155097961426, "step": 1039, "utility_loss": 1.6104333400726318 }, { "cosine_similarity": -0.005404914755231267, "epoch": 0.9692451071761417, "grad_norm": 1.9560502900634786, "learning_rate": 1.5043148084225061e-05, "loss": 1.7373, "reason_loss": 0.48784974217414856, "step": 1040, "utility_loss": 1.249401569366455 }, { "cosine_similarity": 0.1196714569269746, "epoch": 0.9701770736253494, "grad_norm": 1.313378423420695, "learning_rate": 1.5036244390749054e-05, "loss": 1.9711, "reason_loss": 0.5017799139022827, "step": 1041, "utility_loss": 1.4693384170532227 }, { "cosine_similarity": 0.15337099125957931, "epoch": 0.9711090400745573, "grad_norm": 1.8061824856395416, "learning_rate": 1.5029340697273043e-05, "loss": 2.2959, "reason_loss": 0.510030210018158, "step": 1042, "utility_loss": 1.785852074623108 }, { "cosine_similarity": 0.021951773477454566, "epoch": 0.9720410065237651, "grad_norm": 1.7366014395246898, "learning_rate": 1.5022437003797032e-05, "loss": 2.2275, "reason_loss": 0.48999983072280884, "step": 1043, "utility_loss": 1.737520694732666 }, { "cosine_similarity": 0.2443829087009089, "epoch": 0.972972972972973, "grad_norm": 1.6538841814175862, "learning_rate": 1.5015533310321023e-05, "loss": 1.9521, "reason_loss": 0.49656927585601807, "step": 1044, "utility_loss": 1.455517292022705 }, { "cosine_similarity": 0.11342004160366521, "epoch": 0.9739049394221808, "grad_norm": 1.297899011742496, "learning_rate": 1.5008629616845013e-05, "loss": 2.2586, "reason_loss": 0.5154814720153809, "step": 1045, "utility_loss": 1.7431409358978271 }, { "cosine_similarity": 0.09077529021441073, "epoch": 0.9748369058713886, "grad_norm": 1.3291398252872284, "learning_rate": 1.5001725923369004e-05, "loss": 2.1923, "reason_loss": 0.5496464967727661, "step": 1046, "utility_loss": 1.6426552534103394 }, { "cosine_similarity": -0.05188814473986472, "epoch": 0.9757688723205965, "grad_norm": 1.7198258177622112, "learning_rate": 1.4994822229892993e-05, "loss": 2.6839, "reason_loss": 0.5595493316650391, "step": 1047, "utility_loss": 2.1243114471435547 }, { "cosine_similarity": 0.19964577222877342, "epoch": 0.9767008387698043, "grad_norm": 1.2085288584438245, "learning_rate": 1.4987918536416984e-05, "loss": 2.0016, "reason_loss": 0.5096719264984131, "step": 1048, "utility_loss": 1.4919753074645996 }, { "cosine_similarity": 0.1051539952226604, "epoch": 0.9776328052190121, "grad_norm": 1.4428537690145202, "learning_rate": 1.4981014842940974e-05, "loss": 2.3996, "reason_loss": 0.5384706258773804, "step": 1049, "utility_loss": 1.8611650466918945 }, { "cosine_similarity": 0.03054013859615437, "epoch": 0.97856477166822, "grad_norm": 1.6502370581504924, "learning_rate": 1.4974111149464965e-05, "loss": 2.0798, "reason_loss": 0.5492303371429443, "step": 1050, "utility_loss": 1.5305263996124268 }, { "cosine_similarity": -0.1610913215211141, "epoch": 0.9794967381174278, "grad_norm": 1.6347061370798355, "learning_rate": 1.4967207455988954e-05, "loss": 2.0394, "reason_loss": 0.4867359697818756, "step": 1051, "utility_loss": 1.5526297092437744 }, { "cosine_similarity": 0.14324409712116248, "epoch": 0.9804287045666356, "grad_norm": 1.4026026834454017, "learning_rate": 1.4960303762512947e-05, "loss": 1.9145, "reason_loss": 0.5244504809379578, "step": 1052, "utility_loss": 1.3900024890899658 }, { "cosine_similarity": 0.2863361662932841, "epoch": 0.9813606710158435, "grad_norm": 1.4425893675218742, "learning_rate": 1.4953400069036936e-05, "loss": 2.2094, "reason_loss": 0.5117427706718445, "step": 1053, "utility_loss": 1.6976613998413086 }, { "cosine_similarity": 0.0022349149753083278, "epoch": 0.9822926374650512, "grad_norm": 1.2315517135513538, "learning_rate": 1.4946496375560926e-05, "loss": 2.1527, "reason_loss": 0.5053653717041016, "step": 1054, "utility_loss": 1.6473629474639893 }, { "cosine_similarity": -0.052736442077397576, "epoch": 0.983224603914259, "grad_norm": 1.1346122023931848, "learning_rate": 1.4939592682084915e-05, "loss": 2.2954, "reason_loss": 0.5381675958633423, "step": 1055, "utility_loss": 1.7572758197784424 }, { "cosine_similarity": 0.199185226475467, "epoch": 0.9841565703634669, "grad_norm": 1.0567775033845834, "learning_rate": 1.4932688988608908e-05, "loss": 1.8584, "reason_loss": 0.517786979675293, "step": 1056, "utility_loss": 1.3406262397766113 }, { "cosine_similarity": 0.48750915137342976, "epoch": 0.9850885368126747, "grad_norm": 1.6126253252513032, "learning_rate": 1.4925785295132897e-05, "loss": 2.4557, "reason_loss": 0.526129961013794, "step": 1057, "utility_loss": 1.9296122789382935 }, { "cosine_similarity": 0.31293390250791703, "epoch": 0.9860205032618826, "grad_norm": 1.4515485703101638, "learning_rate": 1.4918881601656888e-05, "loss": 2.0973, "reason_loss": 0.5040540099143982, "step": 1058, "utility_loss": 1.593245267868042 }, { "cosine_similarity": 0.1001623390225958, "epoch": 0.9869524697110904, "grad_norm": 1.149729820024087, "learning_rate": 1.4911977908180878e-05, "loss": 2.4634, "reason_loss": 0.5250274538993835, "step": 1059, "utility_loss": 1.938403844833374 }, { "cosine_similarity": 0.12686424730783838, "epoch": 0.9878844361602982, "grad_norm": 1.6308931010159917, "learning_rate": 1.4905074214704869e-05, "loss": 1.9734, "reason_loss": 0.4868544042110443, "step": 1060, "utility_loss": 1.4865505695343018 }, { "cosine_similarity": 0.24752741296058964, "epoch": 0.9888164026095061, "grad_norm": 1.2396281324539702, "learning_rate": 1.4898170521228858e-05, "loss": 2.1365, "reason_loss": 0.5158849358558655, "step": 1061, "utility_loss": 1.6206185817718506 }, { "cosine_similarity": 0.42992452580215573, "epoch": 0.9897483690587139, "grad_norm": 1.5731062742353548, "learning_rate": 1.4891266827752849e-05, "loss": 2.0854, "reason_loss": 0.5080804824829102, "step": 1062, "utility_loss": 1.5772794485092163 }, { "cosine_similarity": 0.32529090288869605, "epoch": 0.9906803355079217, "grad_norm": 1.1499510567183946, "learning_rate": 1.488436313427684e-05, "loss": 1.8175, "reason_loss": 0.5294050574302673, "step": 1063, "utility_loss": 1.2881014347076416 }, { "cosine_similarity": 0.07090732440406078, "epoch": 0.9916123019571296, "grad_norm": 1.3407692551872428, "learning_rate": 1.487745944080083e-05, "loss": 2.2545, "reason_loss": 0.5038859844207764, "step": 1064, "utility_loss": 1.7506253719329834 }, { "cosine_similarity": 0.1821511640209479, "epoch": 0.9925442684063374, "grad_norm": 1.0878585571422839, "learning_rate": 1.487055574732482e-05, "loss": 1.6492, "reason_loss": 0.5239315629005432, "step": 1065, "utility_loss": 1.1252355575561523 }, { "cosine_similarity": 0.22184467215704104, "epoch": 0.9934762348555451, "grad_norm": 1.2953960600848626, "learning_rate": 1.486365205384881e-05, "loss": 2.0859, "reason_loss": 0.5488088130950928, "step": 1066, "utility_loss": 1.537064790725708 }, { "cosine_similarity": 0.0487536617843412, "epoch": 0.994408201304753, "grad_norm": 1.269369831357942, "learning_rate": 1.4856748360372801e-05, "loss": 1.8329, "reason_loss": 0.493789404630661, "step": 1067, "utility_loss": 1.33914053440094 }, { "cosine_similarity": 0.21599100597316467, "epoch": 0.9953401677539608, "grad_norm": 1.455796488884524, "learning_rate": 1.4849844666896792e-05, "loss": 2.2319, "reason_loss": 0.5415908098220825, "step": 1068, "utility_loss": 1.690324306488037 }, { "cosine_similarity": 0.09205604757541896, "epoch": 0.9962721342031687, "grad_norm": 1.277705547547336, "learning_rate": 1.484294097342078e-05, "loss": 2.3566, "reason_loss": 0.4983454942703247, "step": 1069, "utility_loss": 1.858295202255249 }, { "cosine_similarity": 0.35978215142937625, "epoch": 0.9972041006523765, "grad_norm": 1.1839807856679947, "learning_rate": 1.4836037279944773e-05, "loss": 1.9896, "reason_loss": 0.518933117389679, "step": 1070, "utility_loss": 1.4706999063491821 }, { "cosine_similarity": 0.44971391596433696, "epoch": 0.9981360671015843, "grad_norm": 1.2576978088676778, "learning_rate": 1.4829133586468762e-05, "loss": 1.9229, "reason_loss": 0.4932750463485718, "step": 1071, "utility_loss": 1.4296042919158936 }, { "cosine_similarity": 0.13084814151333335, "epoch": 0.9990680335507922, "grad_norm": 1.4293123267225154, "learning_rate": 1.4822229892992753e-05, "loss": 2.4204, "reason_loss": 0.513018012046814, "step": 1072, "utility_loss": 1.907369613647461 }, { "cosine_similarity": 0.1809791077785465, "epoch": 1.0, "grad_norm": 1.1102006632867627, "learning_rate": 1.4815326199516742e-05, "loss": 2.0419, "reason_loss": 0.49825993180274963, "step": 1073, "utility_loss": 1.5436546802520752 }, { "cosine_similarity": 0.24541778255568847, "epoch": 1.0009319664492078, "grad_norm": 1.3974421152853447, "learning_rate": 1.4808422506040734e-05, "loss": 1.9519, "reason_loss": 0.46745356917381287, "step": 1074, "utility_loss": 1.484492301940918 }, { "cosine_similarity": 0.1082264157569821, "epoch": 1.0018639328984156, "grad_norm": 1.4735943968614356, "learning_rate": 1.4801518812564723e-05, "loss": 2.0766, "reason_loss": 0.4969664216041565, "step": 1075, "utility_loss": 1.5796229839324951 }, { "cosine_similarity": 0.7525193370502978, "epoch": 1.0027958993476236, "grad_norm": 1.0814433716669791, "learning_rate": 1.4794615119088714e-05, "loss": 1.6935, "reason_loss": 0.4675397276878357, "step": 1076, "utility_loss": 1.2260032892227173 }, { "cosine_similarity": 0.17376948111166002, "epoch": 1.0037278657968314, "grad_norm": 1.2261895429638678, "learning_rate": 1.4787711425612703e-05, "loss": 1.7071, "reason_loss": 0.5185325145721436, "step": 1077, "utility_loss": 1.1886065006256104 }, { "cosine_similarity": -0.2359851847741031, "epoch": 1.0046598322460392, "grad_norm": 1.0798700650052384, "learning_rate": 1.4780807732136696e-05, "loss": 1.9902, "reason_loss": 0.5200790166854858, "step": 1078, "utility_loss": 1.4701343774795532 }, { "cosine_similarity": 0.2903839479062801, "epoch": 1.005591798695247, "grad_norm": 1.0871188167747514, "learning_rate": 1.4773904038660685e-05, "loss": 1.63, "reason_loss": 0.5044764280319214, "step": 1079, "utility_loss": 1.1254935264587402 }, { "cosine_similarity": 0.10099540663001332, "epoch": 1.0065237651444547, "grad_norm": 1.0746672797550794, "learning_rate": 1.4767000345184674e-05, "loss": 1.493, "reason_loss": 0.5137522220611572, "step": 1080, "utility_loss": 0.9792569875717163 }, { "cosine_similarity": 0.19113197265971724, "epoch": 1.0074557315936625, "grad_norm": 1.1676420097595277, "learning_rate": 1.4760096651708666e-05, "loss": 2.028, "reason_loss": 0.4658985733985901, "step": 1081, "utility_loss": 1.562082052230835 }, { "cosine_similarity": 0.08959065931140832, "epoch": 1.0083876980428705, "grad_norm": 1.1608616820176934, "learning_rate": 1.4753192958232657e-05, "loss": 1.742, "reason_loss": 0.5088455677032471, "step": 1082, "utility_loss": 1.2331262826919556 }, { "cosine_similarity": -0.23867627876982925, "epoch": 1.0093196644920783, "grad_norm": 1.3732495849365463, "learning_rate": 1.4746289264756646e-05, "loss": 1.8195, "reason_loss": 0.5179746150970459, "step": 1083, "utility_loss": 1.3015412092208862 }, { "cosine_similarity": 0.14446698455817, "epoch": 1.0102516309412861, "grad_norm": 1.2521486051949449, "learning_rate": 1.4739385571280635e-05, "loss": 1.6287, "reason_loss": 0.5079947710037231, "step": 1084, "utility_loss": 1.1206965446472168 }, { "cosine_similarity": 0.21447919051349226, "epoch": 1.011183597390494, "grad_norm": 1.1933128929225105, "learning_rate": 1.4732481877804627e-05, "loss": 1.8941, "reason_loss": 0.5430785417556763, "step": 1085, "utility_loss": 1.3510136604309082 }, { "cosine_similarity": 0.1443249404143199, "epoch": 1.0121155638397017, "grad_norm": 1.1159794223238069, "learning_rate": 1.4725578184328618e-05, "loss": 1.7268, "reason_loss": 0.4820949137210846, "step": 1086, "utility_loss": 1.2447062730789185 }, { "cosine_similarity": 0.014015668190987872, "epoch": 1.0130475302889097, "grad_norm": 0.9410301411652964, "learning_rate": 1.4718674490852607e-05, "loss": 1.691, "reason_loss": 0.5408718585968018, "step": 1087, "utility_loss": 1.15011465549469 }, { "cosine_similarity": -0.16530201342880035, "epoch": 1.0139794967381175, "grad_norm": 1.1706563147868423, "learning_rate": 1.4711770797376596e-05, "loss": 1.7154, "reason_loss": 0.546662449836731, "step": 1088, "utility_loss": 1.1687049865722656 }, { "cosine_similarity": 0.1080514794529915, "epoch": 1.0149114631873253, "grad_norm": 1.7718502674706968, "learning_rate": 1.4704867103900589e-05, "loss": 1.8968, "reason_loss": 0.5497881174087524, "step": 1089, "utility_loss": 1.3470258712768555 }, { "cosine_similarity": 0.3375005814460472, "epoch": 1.015843429636533, "grad_norm": 1.3177240253117515, "learning_rate": 1.4697963410424578e-05, "loss": 1.8453, "reason_loss": 0.5177121162414551, "step": 1090, "utility_loss": 1.3275855779647827 }, { "cosine_similarity": 0.31394406147942727, "epoch": 1.0167753960857409, "grad_norm": 1.0971535446182759, "learning_rate": 1.4691059716948568e-05, "loss": 1.738, "reason_loss": 0.48716771602630615, "step": 1091, "utility_loss": 1.2507984638214111 }, { "cosine_similarity": 0.1934325479415027, "epoch": 1.0177073625349486, "grad_norm": 1.3194725892726518, "learning_rate": 1.468415602347256e-05, "loss": 1.9362, "reason_loss": 0.5162621736526489, "step": 1092, "utility_loss": 1.4199305772781372 }, { "cosine_similarity": -0.17192435355756613, "epoch": 1.0186393289841567, "grad_norm": 1.1639903612961235, "learning_rate": 1.467725232999655e-05, "loss": 2.076, "reason_loss": 0.5074446201324463, "step": 1093, "utility_loss": 1.568597674369812 }, { "cosine_similarity": 0.2748474355688506, "epoch": 1.0195712954333644, "grad_norm": 1.4936926623801814, "learning_rate": 1.4670348636520539e-05, "loss": 1.8672, "reason_loss": 0.5127832293510437, "step": 1094, "utility_loss": 1.354439377784729 }, { "cosine_similarity": -0.3486080666648341, "epoch": 1.0205032618825722, "grad_norm": 1.1180918500620796, "learning_rate": 1.466344494304453e-05, "loss": 1.5563, "reason_loss": 0.4993787109851837, "step": 1095, "utility_loss": 1.0568817853927612 }, { "cosine_similarity": 0.15755707509193942, "epoch": 1.02143522833178, "grad_norm": 1.0620837782643895, "learning_rate": 1.465654124956852e-05, "loss": 1.6529, "reason_loss": 0.4798993468284607, "step": 1096, "utility_loss": 1.1730222702026367 }, { "cosine_similarity": 0.5313643579942988, "epoch": 1.0223671947809878, "grad_norm": 1.1655793395373795, "learning_rate": 1.4649637556092511e-05, "loss": 1.6808, "reason_loss": 0.47167956829071045, "step": 1097, "utility_loss": 1.2091667652130127 }, { "cosine_similarity": 0.33446593800133056, "epoch": 1.0232991612301958, "grad_norm": 1.2607325071784508, "learning_rate": 1.46427338626165e-05, "loss": 1.5004, "reason_loss": 0.5051648616790771, "step": 1098, "utility_loss": 0.9952676892280579 }, { "cosine_similarity": -0.06960480099398209, "epoch": 1.0242311276794036, "grad_norm": 1.5675578126684984, "learning_rate": 1.463583016914049e-05, "loss": 2.0301, "reason_loss": 0.49016958475112915, "step": 1099, "utility_loss": 1.5398861169815063 }, { "cosine_similarity": 0.11465650660201916, "epoch": 1.0251630941286114, "grad_norm": 1.2833326617892062, "learning_rate": 1.4628926475664481e-05, "loss": 1.7328, "reason_loss": 0.5107004046440125, "step": 1100, "utility_loss": 1.2221038341522217 }, { "cosine_similarity": 0.18525622114174853, "epoch": 1.0260950605778192, "grad_norm": 1.1130873487038169, "learning_rate": 1.4622022782188472e-05, "loss": 1.6416, "reason_loss": 0.4962010383605957, "step": 1101, "utility_loss": 1.1453945636749268 }, { "cosine_similarity": 0.1704157705505215, "epoch": 1.027027027027027, "grad_norm": 1.5186247846164307, "learning_rate": 1.4615119088712461e-05, "loss": 1.9334, "reason_loss": 0.5023212432861328, "step": 1102, "utility_loss": 1.4311227798461914 }, { "cosine_similarity": 0.2548156963300366, "epoch": 1.0279589934762348, "grad_norm": 1.4300004031978368, "learning_rate": 1.4608215395236454e-05, "loss": 1.7265, "reason_loss": 0.5261505842208862, "step": 1103, "utility_loss": 1.2003238201141357 }, { "cosine_similarity": -0.14507775770692768, "epoch": 1.0288909599254428, "grad_norm": 1.0251920935794074, "learning_rate": 1.4601311701760443e-05, "loss": 1.8869, "reason_loss": 0.4923466444015503, "step": 1104, "utility_loss": 1.3945529460906982 }, { "cosine_similarity": 0.07764110995144487, "epoch": 1.0298229263746506, "grad_norm": 1.4166608987830756, "learning_rate": 1.4594408008284433e-05, "loss": 1.8028, "reason_loss": 0.49951982498168945, "step": 1105, "utility_loss": 1.3033157587051392 }, { "cosine_similarity": 0.012708457346372173, "epoch": 1.0307548928238583, "grad_norm": 1.2208071064693837, "learning_rate": 1.4587504314808423e-05, "loss": 1.6919, "reason_loss": 0.5125074982643127, "step": 1106, "utility_loss": 1.1793692111968994 }, { "cosine_similarity": -0.09425964223041121, "epoch": 1.0316868592730661, "grad_norm": 1.3494274113807594, "learning_rate": 1.4580600621332415e-05, "loss": 1.7167, "reason_loss": 0.4833499491214752, "step": 1107, "utility_loss": 1.2333401441574097 }, { "cosine_similarity": 0.2821359334902432, "epoch": 1.032618825722274, "grad_norm": 1.2829935574277638, "learning_rate": 1.4573696927856404e-05, "loss": 1.9486, "reason_loss": 0.5097252130508423, "step": 1108, "utility_loss": 1.438826322555542 }, { "cosine_similarity": 0.08245533823840459, "epoch": 1.0335507921714817, "grad_norm": 1.290157893161906, "learning_rate": 1.4566793234380395e-05, "loss": 1.6682, "reason_loss": 0.4955798387527466, "step": 1109, "utility_loss": 1.1726462841033936 }, { "cosine_similarity": 0.07835849195844723, "epoch": 1.0344827586206897, "grad_norm": 1.202592007329211, "learning_rate": 1.4559889540904385e-05, "loss": 1.7596, "reason_loss": 0.4908748269081116, "step": 1110, "utility_loss": 1.2687498331069946 }, { "cosine_similarity": -0.027379324673881517, "epoch": 1.0354147250698975, "grad_norm": 1.3433646327087718, "learning_rate": 1.4552985847428376e-05, "loss": 1.5755, "reason_loss": 0.5011141300201416, "step": 1111, "utility_loss": 1.0743486881256104 }, { "cosine_similarity": 0.23153520254664176, "epoch": 1.0363466915191053, "grad_norm": 1.115184759627974, "learning_rate": 1.4546082153952365e-05, "loss": 1.806, "reason_loss": 0.4847267270088196, "step": 1112, "utility_loss": 1.321258544921875 }, { "cosine_similarity": 0.5190692550715914, "epoch": 1.037278657968313, "grad_norm": 1.2484325100181066, "learning_rate": 1.4539178460476356e-05, "loss": 1.7172, "reason_loss": 0.5112935900688171, "step": 1113, "utility_loss": 1.2058753967285156 }, { "cosine_similarity": 0.2525872413831634, "epoch": 1.0382106244175209, "grad_norm": 1.2846957797596394, "learning_rate": 1.4532274767000347e-05, "loss": 1.9321, "reason_loss": 0.5047398209571838, "step": 1114, "utility_loss": 1.4273557662963867 }, { "cosine_similarity": 0.5219262502760111, "epoch": 1.0391425908667289, "grad_norm": 1.3098854736436651, "learning_rate": 1.4525371073524337e-05, "loss": 1.9529, "reason_loss": 0.5213174819946289, "step": 1115, "utility_loss": 1.4315694570541382 }, { "cosine_similarity": 0.10936034018981518, "epoch": 1.0400745573159367, "grad_norm": 1.2126579846183099, "learning_rate": 1.4518467380048326e-05, "loss": 1.5975, "reason_loss": 0.47502148151397705, "step": 1116, "utility_loss": 1.1224809885025024 }, { "cosine_similarity": 0.4756996724068436, "epoch": 1.0410065237651445, "grad_norm": 1.2666530485254435, "learning_rate": 1.4511563686572317e-05, "loss": 1.8928, "reason_loss": 0.5231167078018188, "step": 1117, "utility_loss": 1.369686245918274 }, { "cosine_similarity": 0.15172720076937862, "epoch": 1.0419384902143523, "grad_norm": 1.4334662573950863, "learning_rate": 1.4504659993096308e-05, "loss": 2.0249, "reason_loss": 0.48744165897369385, "step": 1118, "utility_loss": 1.5374462604522705 }, { "cosine_similarity": 0.08106373488999608, "epoch": 1.04287045666356, "grad_norm": 1.313948859638009, "learning_rate": 1.4497756299620299e-05, "loss": 2.2261, "reason_loss": 0.5281422734260559, "step": 1119, "utility_loss": 1.6979352235794067 }, { "cosine_similarity": 0.22221262145559456, "epoch": 1.0438024231127678, "grad_norm": 1.1797677144778072, "learning_rate": 1.4490852606144288e-05, "loss": 1.961, "reason_loss": 0.5022127032279968, "step": 1120, "utility_loss": 1.458796739578247 }, { "cosine_similarity": 0.06695028573128911, "epoch": 1.0447343895619758, "grad_norm": 1.0809884444532203, "learning_rate": 1.448394891266828e-05, "loss": 1.645, "reason_loss": 0.49476367235183716, "step": 1121, "utility_loss": 1.150212049484253 }, { "cosine_similarity": 0.03720931957111108, "epoch": 1.0456663560111836, "grad_norm": 1.1500457318112158, "learning_rate": 1.4477045219192269e-05, "loss": 1.7144, "reason_loss": 0.5145710110664368, "step": 1122, "utility_loss": 1.1998207569122314 }, { "cosine_similarity": 0.13094974656548805, "epoch": 1.0465983224603914, "grad_norm": 1.2486464397859613, "learning_rate": 1.447014152571626e-05, "loss": 2.0843, "reason_loss": 0.49737781286239624, "step": 1123, "utility_loss": 1.5869020223617554 }, { "cosine_similarity": 0.6368445876224363, "epoch": 1.0475302889095992, "grad_norm": 1.1489870907651396, "learning_rate": 1.4463237832240249e-05, "loss": 2.0031, "reason_loss": 0.5321593284606934, "step": 1124, "utility_loss": 1.4709389209747314 }, { "cosine_similarity": 0.37393340031430133, "epoch": 1.048462255358807, "grad_norm": 1.1181399877590856, "learning_rate": 1.4456334138764241e-05, "loss": 1.9087, "reason_loss": 0.4930714964866638, "step": 1125, "utility_loss": 1.4155796766281128 }, { "cosine_similarity": 0.301027818835531, "epoch": 1.049394221808015, "grad_norm": 1.2413286467040283, "learning_rate": 1.444943044528823e-05, "loss": 2.0107, "reason_loss": 0.4986580014228821, "step": 1126, "utility_loss": 1.512013554573059 }, { "cosine_similarity": 0.2597640133933015, "epoch": 1.0503261882572228, "grad_norm": 1.0372529767319023, "learning_rate": 1.4442526751812221e-05, "loss": 1.6297, "reason_loss": 0.5373709201812744, "step": 1127, "utility_loss": 1.092289924621582 }, { "cosine_similarity": -0.08765370239546878, "epoch": 1.0512581547064306, "grad_norm": 1.1374912683413463, "learning_rate": 1.443562305833621e-05, "loss": 1.6869, "reason_loss": 0.4980924725532532, "step": 1128, "utility_loss": 1.1887917518615723 }, { "cosine_similarity": 0.0258284299567384, "epoch": 1.0521901211556384, "grad_norm": 1.196079420362743, "learning_rate": 1.4428719364860203e-05, "loss": 1.9919, "reason_loss": 0.49672558903694153, "step": 1129, "utility_loss": 1.49521005153656 }, { "cosine_similarity": -0.15561015792176766, "epoch": 1.0531220876048462, "grad_norm": 1.5319297403362366, "learning_rate": 1.4421815671384192e-05, "loss": 1.7856, "reason_loss": 0.49569958448410034, "step": 1130, "utility_loss": 1.289896845817566 }, { "cosine_similarity": 0.05325015770974519, "epoch": 1.054054054054054, "grad_norm": 1.53112214843865, "learning_rate": 1.441491197790818e-05, "loss": 2.1079, "reason_loss": 0.5238756537437439, "step": 1131, "utility_loss": 1.5840718746185303 }, { "cosine_similarity": -0.11705370058031873, "epoch": 1.054986020503262, "grad_norm": 1.2512577518404755, "learning_rate": 1.4408008284432173e-05, "loss": 1.4451, "reason_loss": 0.5055972337722778, "step": 1132, "utility_loss": 0.9394642114639282 }, { "cosine_similarity": -0.049037456192394836, "epoch": 1.0559179869524697, "grad_norm": 1.1025661926382575, "learning_rate": 1.4401104590956164e-05, "loss": 1.9005, "reason_loss": 0.4991281032562256, "step": 1133, "utility_loss": 1.401364803314209 }, { "cosine_similarity": 0.11310320422700085, "epoch": 1.0568499534016775, "grad_norm": 1.1344419110409376, "learning_rate": 1.4394200897480153e-05, "loss": 1.9063, "reason_loss": 0.5297058820724487, "step": 1134, "utility_loss": 1.3765572309494019 }, { "cosine_similarity": 0.18058632180148865, "epoch": 1.0577819198508853, "grad_norm": 1.3188289441726249, "learning_rate": 1.4387297204004142e-05, "loss": 1.8747, "reason_loss": 0.5391352772712708, "step": 1135, "utility_loss": 1.3355330228805542 }, { "cosine_similarity": -0.014614913747772226, "epoch": 1.058713886300093, "grad_norm": 1.1695287315895018, "learning_rate": 1.4380393510528134e-05, "loss": 1.7473, "reason_loss": 0.49588173627853394, "step": 1136, "utility_loss": 1.2514491081237793 }, { "cosine_similarity": -0.09110780871201628, "epoch": 1.0596458527493011, "grad_norm": 1.0358457454345893, "learning_rate": 1.4373489817052125e-05, "loss": 1.962, "reason_loss": 0.5315226316452026, "step": 1137, "utility_loss": 1.4304560422897339 }, { "cosine_similarity": -0.1167456993842772, "epoch": 1.060577819198509, "grad_norm": 1.3228501002290276, "learning_rate": 1.4366586123576114e-05, "loss": 1.9013, "reason_loss": 0.5082974433898926, "step": 1138, "utility_loss": 1.3930318355560303 }, { "cosine_similarity": 0.4613284820048864, "epoch": 1.0615097856477167, "grad_norm": 1.1595050411793923, "learning_rate": 1.4359682430100103e-05, "loss": 1.788, "reason_loss": 0.5065302848815918, "step": 1139, "utility_loss": 1.2815086841583252 }, { "cosine_similarity": -0.008123262652685856, "epoch": 1.0624417520969245, "grad_norm": 1.7430017886615978, "learning_rate": 1.4352778736624096e-05, "loss": 2.2849, "reason_loss": 0.5210397243499756, "step": 1140, "utility_loss": 1.7638989686965942 }, { "cosine_similarity": 0.08754744697505587, "epoch": 1.0633737185461323, "grad_norm": 1.1701200501784839, "learning_rate": 1.4345875043148085e-05, "loss": 1.8241, "reason_loss": 0.486200213432312, "step": 1141, "utility_loss": 1.3378897905349731 }, { "cosine_similarity": -0.0863570482043751, "epoch": 1.06430568499534, "grad_norm": 1.0089924497046039, "learning_rate": 1.4338971349672075e-05, "loss": 1.3609, "reason_loss": 0.48539626598358154, "step": 1142, "utility_loss": 0.8755095601081848 }, { "cosine_similarity": -0.23486851168981895, "epoch": 1.065237651444548, "grad_norm": 1.042431294899933, "learning_rate": 1.4332067656196068e-05, "loss": 1.779, "reason_loss": 0.5283496379852295, "step": 1143, "utility_loss": 1.2506201267242432 }, { "cosine_similarity": -0.09225228498395546, "epoch": 1.0661696178937559, "grad_norm": 1.2520587145389022, "learning_rate": 1.4325163962720057e-05, "loss": 1.9134, "reason_loss": 0.49733927845954895, "step": 1144, "utility_loss": 1.4161038398742676 }, { "cosine_similarity": 0.15495977945409392, "epoch": 1.0671015843429636, "grad_norm": 1.2668157456092526, "learning_rate": 1.4318260269244046e-05, "loss": 1.7331, "reason_loss": 0.5166051387786865, "step": 1145, "utility_loss": 1.2164642810821533 }, { "cosine_similarity": 0.15515145003041025, "epoch": 1.0680335507921714, "grad_norm": 1.2645658662675583, "learning_rate": 1.4311356575768037e-05, "loss": 1.8961, "reason_loss": 0.5066432356834412, "step": 1146, "utility_loss": 1.3894599676132202 }, { "cosine_similarity": 0.06976112335399758, "epoch": 1.0689655172413792, "grad_norm": 1.143304624627996, "learning_rate": 1.4304452882292027e-05, "loss": 1.9554, "reason_loss": 0.504087507724762, "step": 1147, "utility_loss": 1.4513182640075684 }, { "cosine_similarity": 0.23993155706739833, "epoch": 1.0698974836905872, "grad_norm": 1.127174071415146, "learning_rate": 1.4297549188816018e-05, "loss": 1.7847, "reason_loss": 0.48761898279190063, "step": 1148, "utility_loss": 1.297081708908081 }, { "cosine_similarity": 0.3521897477030198, "epoch": 1.070829450139795, "grad_norm": 1.626769945590913, "learning_rate": 1.4290645495340007e-05, "loss": 1.9838, "reason_loss": 0.4874151349067688, "step": 1149, "utility_loss": 1.4963403940200806 }, { "cosine_similarity": -0.018853090115179764, "epoch": 1.0717614165890028, "grad_norm": 1.1964348956204722, "learning_rate": 1.4283741801864e-05, "loss": 1.7503, "reason_loss": 0.510216474533081, "step": 1150, "utility_loss": 1.2400351762771606 }, { "cosine_similarity": 0.20494661887172905, "epoch": 1.0726933830382106, "grad_norm": 1.6900856303029792, "learning_rate": 1.4276838108387989e-05, "loss": 2.104, "reason_loss": 0.4845235347747803, "step": 1151, "utility_loss": 1.619429111480713 }, { "cosine_similarity": -0.01613777858267245, "epoch": 1.0736253494874184, "grad_norm": 1.4884285540141042, "learning_rate": 1.426993441491198e-05, "loss": 1.8484, "reason_loss": 0.5148859620094299, "step": 1152, "utility_loss": 1.3335237503051758 }, { "cosine_similarity": 0.1421204564947339, "epoch": 1.0745573159366262, "grad_norm": 1.337425545870763, "learning_rate": 1.4263030721435968e-05, "loss": 1.8442, "reason_loss": 0.5227769613265991, "step": 1153, "utility_loss": 1.3214564323425293 }, { "cosine_similarity": -0.17109874159605046, "epoch": 1.0754892823858342, "grad_norm": 1.1915908937259523, "learning_rate": 1.425612702795996e-05, "loss": 1.7345, "reason_loss": 0.48662763833999634, "step": 1154, "utility_loss": 1.2479135990142822 }, { "cosine_similarity": 0.2230402078500212, "epoch": 1.076421248835042, "grad_norm": 1.30580056919387, "learning_rate": 1.424922333448395e-05, "loss": 2.1771, "reason_loss": 0.5203782320022583, "step": 1155, "utility_loss": 1.6566946506500244 }, { "cosine_similarity": 0.3179578403256457, "epoch": 1.0773532152842498, "grad_norm": 1.1940670151719084, "learning_rate": 1.424231964100794e-05, "loss": 1.9694, "reason_loss": 0.5029889345169067, "step": 1156, "utility_loss": 1.46638822555542 }, { "cosine_similarity": 0.04982356919295185, "epoch": 1.0782851817334576, "grad_norm": 1.140685739744995, "learning_rate": 1.423541594753193e-05, "loss": 2.0984, "reason_loss": 0.5017058849334717, "step": 1157, "utility_loss": 1.5967313051223755 }, { "cosine_similarity": 0.3476804734691717, "epoch": 1.0792171481826653, "grad_norm": 1.257156161608374, "learning_rate": 1.4228512254055922e-05, "loss": 2.1374, "reason_loss": 0.5022702217102051, "step": 1158, "utility_loss": 1.6351605653762817 }, { "cosine_similarity": 0.31286816658096966, "epoch": 1.0801491146318734, "grad_norm": 1.188029372811871, "learning_rate": 1.4221608560579911e-05, "loss": 1.5647, "reason_loss": 0.5024628043174744, "step": 1159, "utility_loss": 1.0622671842575073 }, { "cosine_similarity": 0.09972475103706999, "epoch": 1.0810810810810811, "grad_norm": 1.260449116092744, "learning_rate": 1.4214704867103902e-05, "loss": 1.6696, "reason_loss": 0.4882940649986267, "step": 1160, "utility_loss": 1.181267261505127 }, { "cosine_similarity": 0.07547457087734918, "epoch": 1.082013047530289, "grad_norm": 1.4328777835320436, "learning_rate": 1.4207801173627892e-05, "loss": 1.4666, "reason_loss": 0.5001401901245117, "step": 1161, "utility_loss": 0.9664120674133301 }, { "cosine_similarity": 0.34180007854697597, "epoch": 1.0829450139794967, "grad_norm": 1.0705230956012826, "learning_rate": 1.4200897480151883e-05, "loss": 1.6049, "reason_loss": 0.5334417819976807, "step": 1162, "utility_loss": 1.0714857578277588 }, { "cosine_similarity": 0.4130551661464063, "epoch": 1.0838769804287045, "grad_norm": 1.4566425589835348, "learning_rate": 1.4193993786675872e-05, "loss": 2.0762, "reason_loss": 0.5379344820976257, "step": 1163, "utility_loss": 1.538237452507019 }, { "cosine_similarity": 0.27037147669217065, "epoch": 1.0848089468779123, "grad_norm": 1.1125441840109538, "learning_rate": 1.4187090093199863e-05, "loss": 1.7841, "reason_loss": 0.49141889810562134, "step": 1164, "utility_loss": 1.292704701423645 }, { "cosine_similarity": 0.24792879983728813, "epoch": 1.0857409133271203, "grad_norm": 1.2764580002917545, "learning_rate": 1.4180186399723854e-05, "loss": 1.8242, "reason_loss": 0.482441782951355, "step": 1165, "utility_loss": 1.3417872190475464 }, { "cosine_similarity": -0.017835937193839392, "epoch": 1.086672879776328, "grad_norm": 1.0759906387413407, "learning_rate": 1.4173282706247844e-05, "loss": 1.6407, "reason_loss": 0.5028229355812073, "step": 1166, "utility_loss": 1.1378459930419922 }, { "cosine_similarity": 0.6926144973085804, "epoch": 1.0876048462255359, "grad_norm": 1.5399937994755915, "learning_rate": 1.4166379012771833e-05, "loss": 1.9616, "reason_loss": 0.5241611003875732, "step": 1167, "utility_loss": 1.437476396560669 }, { "cosine_similarity": 0.2919339956304396, "epoch": 1.0885368126747437, "grad_norm": 1.2476146333859233, "learning_rate": 1.4159475319295824e-05, "loss": 1.8672, "reason_loss": 0.4947349429130554, "step": 1168, "utility_loss": 1.372440218925476 }, { "cosine_similarity": 0.15919101727505458, "epoch": 1.0894687791239515, "grad_norm": 1.3169260762339134, "learning_rate": 1.4152571625819815e-05, "loss": 2.0884, "reason_loss": 0.5626168847084045, "step": 1169, "utility_loss": 1.525805115699768 }, { "cosine_similarity": 0.21426054933707203, "epoch": 1.0904007455731595, "grad_norm": 1.0087123288073796, "learning_rate": 1.4145667932343806e-05, "loss": 1.8008, "reason_loss": 0.5148321390151978, "step": 1170, "utility_loss": 1.2859256267547607 }, { "cosine_similarity": 0.11299417426938302, "epoch": 1.0913327120223673, "grad_norm": 1.3016311812295682, "learning_rate": 1.4138764238867795e-05, "loss": 1.7127, "reason_loss": 0.5288886427879333, "step": 1171, "utility_loss": 1.1838152408599854 }, { "cosine_similarity": 0.11814981582196903, "epoch": 1.092264678471575, "grad_norm": 1.0889770788200708, "learning_rate": 1.4131860545391787e-05, "loss": 1.7018, "reason_loss": 0.4989125430583954, "step": 1172, "utility_loss": 1.2028460502624512 }, { "cosine_similarity": 0.2105676442452842, "epoch": 1.0931966449207828, "grad_norm": 1.4343192619312075, "learning_rate": 1.4124956851915776e-05, "loss": 1.5124, "reason_loss": 0.49383744597435, "step": 1173, "utility_loss": 1.0185257196426392 }, { "cosine_similarity": 0.3587404066021657, "epoch": 1.0941286113699906, "grad_norm": 1.2537352253859646, "learning_rate": 1.4118053158439767e-05, "loss": 1.5699, "reason_loss": 0.4777856171131134, "step": 1174, "utility_loss": 1.092151403427124 }, { "cosine_similarity": 0.4723972760334214, "epoch": 1.0950605778191984, "grad_norm": 1.2184965971425485, "learning_rate": 1.4111149464963756e-05, "loss": 1.9672, "reason_loss": 0.5077734589576721, "step": 1175, "utility_loss": 1.4594646692276 }, { "cosine_similarity": 0.19888618660631996, "epoch": 1.0959925442684064, "grad_norm": 1.269461580170373, "learning_rate": 1.4104245771487748e-05, "loss": 1.8525, "reason_loss": 0.5071288347244263, "step": 1176, "utility_loss": 1.3454099893569946 }, { "cosine_similarity": 0.31643487588290514, "epoch": 1.0969245107176142, "grad_norm": 1.2560891236913438, "learning_rate": 1.4097342078011737e-05, "loss": 2.1303, "reason_loss": 0.5187797546386719, "step": 1177, "utility_loss": 1.611527681350708 }, { "cosine_similarity": 0.41597870560636574, "epoch": 1.097856477166822, "grad_norm": 1.2815871781367505, "learning_rate": 1.4090438384535728e-05, "loss": 1.573, "reason_loss": 0.5010433197021484, "step": 1178, "utility_loss": 1.0720030069351196 }, { "cosine_similarity": 0.1753780164138308, "epoch": 1.0987884436160298, "grad_norm": 1.1164711376724457, "learning_rate": 1.4083534691059717e-05, "loss": 1.9601, "reason_loss": 0.4937341809272766, "step": 1179, "utility_loss": 1.466383934020996 }, { "cosine_similarity": 0.24278714241969493, "epoch": 1.0997204100652376, "grad_norm": 1.169336466342267, "learning_rate": 1.407663099758371e-05, "loss": 1.603, "reason_loss": 0.516646683216095, "step": 1180, "utility_loss": 1.0863946676254272 }, { "cosine_similarity": 0.14160940073496428, "epoch": 1.1006523765144456, "grad_norm": 1.3524132577022754, "learning_rate": 1.4069727304107699e-05, "loss": 1.7039, "reason_loss": 0.4961528480052948, "step": 1181, "utility_loss": 1.20771062374115 }, { "cosine_similarity": 0.33358981424977235, "epoch": 1.1015843429636534, "grad_norm": 1.0618820487231067, "learning_rate": 1.4062823610631688e-05, "loss": 1.5265, "reason_loss": 0.5084003806114197, "step": 1182, "utility_loss": 1.018117904663086 }, { "cosine_similarity": 0.17509328131052393, "epoch": 1.1025163094128612, "grad_norm": 1.2176288717876305, "learning_rate": 1.405591991715568e-05, "loss": 1.7015, "reason_loss": 0.47844618558883667, "step": 1183, "utility_loss": 1.2230384349822998 }, { "cosine_similarity": 0.2705230875048379, "epoch": 1.103448275862069, "grad_norm": 1.2276891004892245, "learning_rate": 1.404901622367967e-05, "loss": 2.0126, "reason_loss": 0.5209113359451294, "step": 1184, "utility_loss": 1.491723895072937 }, { "cosine_similarity": 0.5467076947925713, "epoch": 1.1043802423112767, "grad_norm": 1.2912318883015814, "learning_rate": 1.404211253020366e-05, "loss": 1.7074, "reason_loss": 0.5104424953460693, "step": 1185, "utility_loss": 1.1969352960586548 }, { "cosine_similarity": 0.3950500215460423, "epoch": 1.1053122087604845, "grad_norm": 1.2454403174389987, "learning_rate": 1.4035208836727649e-05, "loss": 1.8565, "reason_loss": 0.5335943698883057, "step": 1186, "utility_loss": 1.3229199647903442 }, { "cosine_similarity": 0.15962749356798622, "epoch": 1.1062441752096925, "grad_norm": 1.2504740824435185, "learning_rate": 1.4028305143251641e-05, "loss": 1.93, "reason_loss": 0.506783664226532, "step": 1187, "utility_loss": 1.423187017440796 }, { "cosine_similarity": 0.046693920433219134, "epoch": 1.1071761416589003, "grad_norm": 1.341134806951387, "learning_rate": 1.402140144977563e-05, "loss": 1.9272, "reason_loss": 0.5215636491775513, "step": 1188, "utility_loss": 1.4056065082550049 }, { "cosine_similarity": 0.43999045105547024, "epoch": 1.1081081081081081, "grad_norm": 1.1285875558618312, "learning_rate": 1.4014497756299621e-05, "loss": 1.882, "reason_loss": 0.4852113723754883, "step": 1189, "utility_loss": 1.3967663049697876 }, { "cosine_similarity": 0.2490195322315866, "epoch": 1.109040074557316, "grad_norm": 1.2192122579814297, "learning_rate": 1.400759406282361e-05, "loss": 1.56, "reason_loss": 0.491463840007782, "step": 1190, "utility_loss": 1.0685476064682007 }, { "cosine_similarity": 0.12088012315251496, "epoch": 1.1099720410065237, "grad_norm": 1.1026305378202708, "learning_rate": 1.4000690369347603e-05, "loss": 1.8634, "reason_loss": 0.5129579305648804, "step": 1191, "utility_loss": 1.3503942489624023 }, { "cosine_similarity": -0.029614340867260048, "epoch": 1.1109040074557317, "grad_norm": 1.2197165777219567, "learning_rate": 1.3993786675871592e-05, "loss": 1.7613, "reason_loss": 0.5060437917709351, "step": 1192, "utility_loss": 1.255208969116211 }, { "cosine_similarity": 0.22629366528869488, "epoch": 1.1118359739049395, "grad_norm": 1.1871466985315497, "learning_rate": 1.3986882982395582e-05, "loss": 1.8794, "reason_loss": 0.5042709112167358, "step": 1193, "utility_loss": 1.3751695156097412 }, { "cosine_similarity": 0.4533509046109919, "epoch": 1.1127679403541473, "grad_norm": 1.3110589629438392, "learning_rate": 1.3979979288919575e-05, "loss": 1.9152, "reason_loss": 0.5213080644607544, "step": 1194, "utility_loss": 1.3939270973205566 }, { "cosine_similarity": 0.2683236723611503, "epoch": 1.113699906803355, "grad_norm": 1.2129237852312282, "learning_rate": 1.3973075595443564e-05, "loss": 1.7301, "reason_loss": 0.5353124141693115, "step": 1195, "utility_loss": 1.1947882175445557 }, { "cosine_similarity": 0.18721923353777314, "epoch": 1.1146318732525629, "grad_norm": 1.6379659541545002, "learning_rate": 1.3966171901967553e-05, "loss": 1.4804, "reason_loss": 0.5106667876243591, "step": 1196, "utility_loss": 0.9697433710098267 }, { "cosine_similarity": 0.27185177192529464, "epoch": 1.1155638397017706, "grad_norm": 1.0583529851740958, "learning_rate": 1.3959268208491544e-05, "loss": 2.1495, "reason_loss": 0.513333797454834, "step": 1197, "utility_loss": 1.6361663341522217 }, { "cosine_similarity": 0.2426450771877536, "epoch": 1.1164958061509787, "grad_norm": 1.2021615344559273, "learning_rate": 1.3952364515015534e-05, "loss": 2.2277, "reason_loss": 0.5009990930557251, "step": 1198, "utility_loss": 1.7267385721206665 }, { "cosine_similarity": -0.11286466682581667, "epoch": 1.1174277726001864, "grad_norm": 1.4060233496769163, "learning_rate": 1.3945460821539525e-05, "loss": 1.9566, "reason_loss": 0.512428343296051, "step": 1199, "utility_loss": 1.4441604614257812 }, { "cosine_similarity": 0.14598618807956046, "epoch": 1.1183597390493942, "grad_norm": 1.1706188667570168, "learning_rate": 1.3938557128063514e-05, "loss": 1.6374, "reason_loss": 0.49164003133773804, "step": 1200, "utility_loss": 1.1457284688949585 }, { "cosine_similarity": 0.5297344674665383, "epoch": 1.119291705498602, "grad_norm": 1.4571615736554453, "learning_rate": 1.3931653434587507e-05, "loss": 1.9682, "reason_loss": 0.5287073850631714, "step": 1201, "utility_loss": 1.4395421743392944 }, { "cosine_similarity": 0.16554277872667394, "epoch": 1.1202236719478098, "grad_norm": 1.1627944935456418, "learning_rate": 1.3924749741111496e-05, "loss": 2.3017, "reason_loss": 0.47901278734207153, "step": 1202, "utility_loss": 1.8226443529129028 }, { "cosine_similarity": 0.2632234782023209, "epoch": 1.1211556383970178, "grad_norm": 1.448452905146536, "learning_rate": 1.3917846047635486e-05, "loss": 1.8735, "reason_loss": 0.4908633828163147, "step": 1203, "utility_loss": 1.3826746940612793 }, { "cosine_similarity": 0.04399571162119571, "epoch": 1.1220876048462256, "grad_norm": 1.2808409358057637, "learning_rate": 1.3910942354159475e-05, "loss": 1.7201, "reason_loss": 0.4946115016937256, "step": 1204, "utility_loss": 1.2254961729049683 }, { "cosine_similarity": 0.24312818469712807, "epoch": 1.1230195712954334, "grad_norm": 1.3425157052852215, "learning_rate": 1.3904038660683468e-05, "loss": 1.8768, "reason_loss": 0.5190862417221069, "step": 1205, "utility_loss": 1.3577311038970947 }, { "cosine_similarity": -0.08071581802099247, "epoch": 1.1239515377446412, "grad_norm": 1.2405453594361497, "learning_rate": 1.3897134967207457e-05, "loss": 1.9649, "reason_loss": 0.4855182468891144, "step": 1206, "utility_loss": 1.479428768157959 }, { "cosine_similarity": 0.015295661749457805, "epoch": 1.124883504193849, "grad_norm": 1.2349968233540392, "learning_rate": 1.3890231273731448e-05, "loss": 1.7909, "reason_loss": 0.5127513408660889, "step": 1207, "utility_loss": 1.2781257629394531 }, { "cosine_similarity": 0.32620763076323583, "epoch": 1.1258154706430568, "grad_norm": 1.3432410309388017, "learning_rate": 1.3883327580255437e-05, "loss": 1.9229, "reason_loss": 0.5041407346725464, "step": 1208, "utility_loss": 1.4187750816345215 }, { "cosine_similarity": -0.099752517965315, "epoch": 1.1267474370922648, "grad_norm": 1.4164042323121817, "learning_rate": 1.3876423886779429e-05, "loss": 2.0116, "reason_loss": 0.48405706882476807, "step": 1209, "utility_loss": 1.527575969696045 }, { "cosine_similarity": -0.024841914575756657, "epoch": 1.1276794035414726, "grad_norm": 1.447291440009601, "learning_rate": 1.3869520193303418e-05, "loss": 1.8894, "reason_loss": 0.49939292669296265, "step": 1210, "utility_loss": 1.3900177478790283 }, { "cosine_similarity": 0.20502066535895055, "epoch": 1.1286113699906803, "grad_norm": 1.248302880094282, "learning_rate": 1.3862616499827409e-05, "loss": 1.8946, "reason_loss": 0.5101995468139648, "step": 1211, "utility_loss": 1.384443759918213 }, { "cosine_similarity": 0.2036608991489653, "epoch": 1.1295433364398881, "grad_norm": 0.9185821452036245, "learning_rate": 1.38557128063514e-05, "loss": 1.3877, "reason_loss": 0.4988054037094116, "step": 1212, "utility_loss": 0.8888669013977051 }, { "cosine_similarity": 0.4770679676617768, "epoch": 1.130475302889096, "grad_norm": 1.4198993856805493, "learning_rate": 1.384880911287539e-05, "loss": 2.2949, "reason_loss": 0.5309514403343201, "step": 1213, "utility_loss": 1.7639880180358887 }, { "cosine_similarity": 0.08669560485686847, "epoch": 1.131407269338304, "grad_norm": 1.3071481921484809, "learning_rate": 1.384190541939938e-05, "loss": 1.9414, "reason_loss": 0.508479118347168, "step": 1214, "utility_loss": 1.4328981637954712 }, { "cosine_similarity": 0.2184917409973243, "epoch": 1.1323392357875117, "grad_norm": 1.0748049397753798, "learning_rate": 1.383500172592337e-05, "loss": 1.7982, "reason_loss": 0.4931451082229614, "step": 1215, "utility_loss": 1.3050137758255005 }, { "cosine_similarity": 0.05872793609369321, "epoch": 1.1332712022367195, "grad_norm": 1.3659010516630785, "learning_rate": 1.382809803244736e-05, "loss": 2.1263, "reason_loss": 0.498487651348114, "step": 1216, "utility_loss": 1.6278491020202637 }, { "cosine_similarity": 0.009287185035041397, "epoch": 1.1342031686859273, "grad_norm": 1.5143695876328573, "learning_rate": 1.3821194338971351e-05, "loss": 1.6804, "reason_loss": 0.49579840898513794, "step": 1217, "utility_loss": 1.184584617614746 }, { "cosine_similarity": 0.1643538890141076, "epoch": 1.135135135135135, "grad_norm": 1.2012699086767973, "learning_rate": 1.381429064549534e-05, "loss": 2.0114, "reason_loss": 0.5066003203392029, "step": 1218, "utility_loss": 1.5047683715820312 }, { "cosine_similarity": 0.15212483516010422, "epoch": 1.1360671015843429, "grad_norm": 0.9570762238941625, "learning_rate": 1.3807386952019331e-05, "loss": 1.7613, "reason_loss": 0.49417808651924133, "step": 1219, "utility_loss": 1.2671632766723633 }, { "cosine_similarity": 0.10948483538838034, "epoch": 1.1369990680335509, "grad_norm": 1.270987520968199, "learning_rate": 1.3800483258543322e-05, "loss": 2.0056, "reason_loss": 0.5284931659698486, "step": 1220, "utility_loss": 1.477128505706787 }, { "cosine_similarity": 0.010251705879951127, "epoch": 1.1379310344827587, "grad_norm": 1.2401717870555764, "learning_rate": 1.3793579565067313e-05, "loss": 1.7484, "reason_loss": 0.5332733392715454, "step": 1221, "utility_loss": 1.2150797843933105 }, { "cosine_similarity": 0.07732210969106716, "epoch": 1.1388630009319665, "grad_norm": 1.1915663262204204, "learning_rate": 1.3786675871591302e-05, "loss": 1.6369, "reason_loss": 0.48158377408981323, "step": 1222, "utility_loss": 1.1553452014923096 }, { "cosine_similarity": 0.4677232210823734, "epoch": 1.1397949673811743, "grad_norm": 1.180250267829394, "learning_rate": 1.3779772178115294e-05, "loss": 1.9386, "reason_loss": 0.4945714473724365, "step": 1223, "utility_loss": 1.4439820051193237 }, { "cosine_similarity": 0.2304693790147672, "epoch": 1.140726933830382, "grad_norm": 1.4151061332312835, "learning_rate": 1.3772868484639283e-05, "loss": 2.0499, "reason_loss": 0.4900789260864258, "step": 1224, "utility_loss": 1.559781789779663 }, { "cosine_similarity": 0.265774217131656, "epoch": 1.14165890027959, "grad_norm": 1.1023178296699274, "learning_rate": 1.3765964791163274e-05, "loss": 1.7622, "reason_loss": 0.5249992609024048, "step": 1225, "utility_loss": 1.2372270822525024 }, { "cosine_similarity": 0.06497854518185289, "epoch": 1.1425908667287978, "grad_norm": 1.0175308082382555, "learning_rate": 1.3759061097687263e-05, "loss": 1.607, "reason_loss": 0.5399290323257446, "step": 1226, "utility_loss": 1.0670835971832275 }, { "cosine_similarity": 0.22506507880690413, "epoch": 1.1435228331780056, "grad_norm": 1.1035413082512127, "learning_rate": 1.3752157404211255e-05, "loss": 1.4229, "reason_loss": 0.5007145404815674, "step": 1227, "utility_loss": 0.9221951961517334 }, { "cosine_similarity": 0.15057704739812747, "epoch": 1.1444547996272134, "grad_norm": 2.138134285312285, "learning_rate": 1.3745253710735244e-05, "loss": 2.4615, "reason_loss": 0.5317467451095581, "step": 1228, "utility_loss": 1.9297184944152832 }, { "cosine_similarity": 0.18534654730166383, "epoch": 1.1453867660764212, "grad_norm": 1.406434666680661, "learning_rate": 1.3738350017259235e-05, "loss": 1.4592, "reason_loss": 0.476613312959671, "step": 1229, "utility_loss": 0.982568085193634 }, { "cosine_similarity": -0.2230954102310775, "epoch": 1.146318732525629, "grad_norm": 1.388591992661466, "learning_rate": 1.3731446323783224e-05, "loss": 1.9479, "reason_loss": 0.5020931959152222, "step": 1230, "utility_loss": 1.4457999467849731 }, { "cosine_similarity": 0.039777104309313624, "epoch": 1.147250698974837, "grad_norm": 1.2652542951567523, "learning_rate": 1.3724542630307217e-05, "loss": 1.7642, "reason_loss": 0.5455604791641235, "step": 1231, "utility_loss": 1.2185895442962646 }, { "cosine_similarity": 0.10336237161968145, "epoch": 1.1481826654240448, "grad_norm": 1.2218189947519942, "learning_rate": 1.3717638936831206e-05, "loss": 1.9581, "reason_loss": 0.4721214771270752, "step": 1232, "utility_loss": 1.485959768295288 }, { "cosine_similarity": -0.2679887296533443, "epoch": 1.1491146318732526, "grad_norm": 1.2015952657050903, "learning_rate": 1.3710735243355195e-05, "loss": 1.4825, "reason_loss": 0.49473828077316284, "step": 1233, "utility_loss": 0.9877695441246033 }, { "cosine_similarity": 0.06824835705984493, "epoch": 1.1500465983224604, "grad_norm": 1.3577480776195217, "learning_rate": 1.3703831549879187e-05, "loss": 1.6981, "reason_loss": 0.5103579759597778, "step": 1234, "utility_loss": 1.18770432472229 }, { "cosine_similarity": 0.2800081296901437, "epoch": 1.1509785647716682, "grad_norm": 1.3806789550717264, "learning_rate": 1.3696927856403178e-05, "loss": 1.916, "reason_loss": 0.5080949068069458, "step": 1235, "utility_loss": 1.4078943729400635 }, { "cosine_similarity": -0.024883331092988584, "epoch": 1.1519105312208762, "grad_norm": 1.0765561269328194, "learning_rate": 1.3690024162927167e-05, "loss": 1.9991, "reason_loss": 0.49978548288345337, "step": 1236, "utility_loss": 1.4993081092834473 }, { "cosine_similarity": 0.11078593579455917, "epoch": 1.152842497670084, "grad_norm": 1.1593489483774158, "learning_rate": 1.3683120469451156e-05, "loss": 1.9546, "reason_loss": 0.49687033891677856, "step": 1237, "utility_loss": 1.4577412605285645 }, { "cosine_similarity": 0.14841807828332548, "epoch": 1.1537744641192917, "grad_norm": 1.1510908450497432, "learning_rate": 1.3676216775975148e-05, "loss": 1.8911, "reason_loss": 0.5036906599998474, "step": 1238, "utility_loss": 1.3874026536941528 }, { "cosine_similarity": -0.09361238058471708, "epoch": 1.1547064305684995, "grad_norm": 1.2522681516115988, "learning_rate": 1.3669313082499137e-05, "loss": 1.7773, "reason_loss": 0.5033348798751831, "step": 1239, "utility_loss": 1.2739918231964111 }, { "cosine_similarity": 0.2514668220910077, "epoch": 1.1556383970177073, "grad_norm": 1.4083485170686005, "learning_rate": 1.3662409389023128e-05, "loss": 1.7829, "reason_loss": 0.5074604153633118, "step": 1240, "utility_loss": 1.2754206657409668 }, { "cosine_similarity": -0.03795479790931121, "epoch": 1.156570363466915, "grad_norm": 1.2091982131819776, "learning_rate": 1.365550569554712e-05, "loss": 1.7823, "reason_loss": 0.5261683464050293, "step": 1241, "utility_loss": 1.2561132907867432 }, { "cosine_similarity": 0.014120862858487205, "epoch": 1.157502329916123, "grad_norm": 1.3208724438122943, "learning_rate": 1.364860200207111e-05, "loss": 1.9959, "reason_loss": 0.502264142036438, "step": 1242, "utility_loss": 1.493600606918335 }, { "cosine_similarity": 0.013844159963230998, "epoch": 1.158434296365331, "grad_norm": 1.6024265217915015, "learning_rate": 1.3641698308595099e-05, "loss": 1.7286, "reason_loss": 0.5311128497123718, "step": 1243, "utility_loss": 1.197510004043579 }, { "cosine_similarity": 0.06486059643330688, "epoch": 1.1593662628145387, "grad_norm": 1.3542084132116616, "learning_rate": 1.363479461511909e-05, "loss": 2.0432, "reason_loss": 0.5257854461669922, "step": 1244, "utility_loss": 1.5174155235290527 }, { "cosine_similarity": -0.034711625659129886, "epoch": 1.1602982292637465, "grad_norm": 1.1715154504421659, "learning_rate": 1.3627890921643082e-05, "loss": 1.8342, "reason_loss": 0.4953833520412445, "step": 1245, "utility_loss": 1.3388354778289795 }, { "cosine_similarity": 0.08965142205806781, "epoch": 1.1612301957129543, "grad_norm": 1.028121810699531, "learning_rate": 1.362098722816707e-05, "loss": 1.7033, "reason_loss": 0.506205677986145, "step": 1246, "utility_loss": 1.1971087455749512 }, { "cosine_similarity": 0.3335735332654444, "epoch": 1.1621621621621623, "grad_norm": 1.1332038426156088, "learning_rate": 1.361408353469106e-05, "loss": 1.8338, "reason_loss": 0.529113233089447, "step": 1247, "utility_loss": 1.3046948909759521 }, { "cosine_similarity": -0.20052375380559298, "epoch": 1.16309412861137, "grad_norm": 1.6490197051487243, "learning_rate": 1.360717984121505e-05, "loss": 1.6447, "reason_loss": 0.5045652389526367, "step": 1248, "utility_loss": 1.1401457786560059 }, { "cosine_similarity": -0.08870646058124852, "epoch": 1.1640260950605779, "grad_norm": 2.0935435293217517, "learning_rate": 1.3600276147739041e-05, "loss": 2.3528, "reason_loss": 0.49728769063949585, "step": 1249, "utility_loss": 1.8555079698562622 }, { "cosine_similarity": 0.13083443054617783, "epoch": 1.1649580615097856, "grad_norm": 1.163955018958082, "learning_rate": 1.3593372454263032e-05, "loss": 1.8258, "reason_loss": 0.4733515977859497, "step": 1250, "utility_loss": 1.3524465560913086 }, { "cosine_similarity": 0.4056217612178402, "epoch": 1.1658900279589934, "grad_norm": 1.1592774907615964, "learning_rate": 1.3586468760787021e-05, "loss": 1.7355, "reason_loss": 0.47504064440727234, "step": 1251, "utility_loss": 1.2604737281799316 }, { "cosine_similarity": -0.019077006077548676, "epoch": 1.1668219944082012, "grad_norm": 1.1389959209176743, "learning_rate": 1.3579565067311014e-05, "loss": 1.5239, "reason_loss": 0.47376078367233276, "step": 1252, "utility_loss": 1.0501761436462402 }, { "cosine_similarity": -0.027033681434200794, "epoch": 1.167753960857409, "grad_norm": 1.257649177046181, "learning_rate": 1.3572661373835003e-05, "loss": 1.79, "reason_loss": 0.48681390285491943, "step": 1253, "utility_loss": 1.303168773651123 }, { "cosine_similarity": 0.054444298779441014, "epoch": 1.168685927306617, "grad_norm": 1.1908055355089786, "learning_rate": 1.3565757680358993e-05, "loss": 1.8915, "reason_loss": 0.48376187682151794, "step": 1254, "utility_loss": 1.4077211618423462 }, { "cosine_similarity": 0.15787787575218207, "epoch": 1.1696178937558248, "grad_norm": 1.3745930553691839, "learning_rate": 1.3558853986882982e-05, "loss": 1.9049, "reason_loss": 0.484552264213562, "step": 1255, "utility_loss": 1.4203391075134277 }, { "cosine_similarity": 0.16365884553301005, "epoch": 1.1705498602050326, "grad_norm": 1.4609101764277657, "learning_rate": 1.3551950293406975e-05, "loss": 1.9938, "reason_loss": 0.5098702907562256, "step": 1256, "utility_loss": 1.4839797019958496 }, { "cosine_similarity": 0.25286402776832123, "epoch": 1.1714818266542404, "grad_norm": 1.104417231931667, "learning_rate": 1.3545046599930964e-05, "loss": 2.0172, "reason_loss": 0.497200071811676, "step": 1257, "utility_loss": 1.519979476928711 }, { "cosine_similarity": 0.08631515513665533, "epoch": 1.1724137931034484, "grad_norm": 1.1290728321928512, "learning_rate": 1.3538142906454955e-05, "loss": 1.519, "reason_loss": 0.4769759774208069, "step": 1258, "utility_loss": 1.0420438051223755 }, { "cosine_similarity": 0.2229329947752906, "epoch": 1.1733457595526562, "grad_norm": 1.0387580215108005, "learning_rate": 1.3531239212978944e-05, "loss": 1.704, "reason_loss": 0.5283603668212891, "step": 1259, "utility_loss": 1.1756155490875244 }, { "cosine_similarity": 0.28542113857437373, "epoch": 1.174277726001864, "grad_norm": 1.0843522789005633, "learning_rate": 1.3524335519502936e-05, "loss": 1.7803, "reason_loss": 0.49408435821533203, "step": 1260, "utility_loss": 1.2862606048583984 }, { "cosine_similarity": 0.3257889695102764, "epoch": 1.1752096924510718, "grad_norm": 1.5323157880611311, "learning_rate": 1.3517431826026925e-05, "loss": 1.6915, "reason_loss": 0.49241143465042114, "step": 1261, "utility_loss": 1.1991137266159058 }, { "cosine_similarity": 0.29763940064958094, "epoch": 1.1761416589002796, "grad_norm": 1.3458303149247681, "learning_rate": 1.3510528132550916e-05, "loss": 1.7383, "reason_loss": 0.49074476957321167, "step": 1262, "utility_loss": 1.247593879699707 }, { "cosine_similarity": 0.36463774847737773, "epoch": 1.1770736253494873, "grad_norm": 1.0596190604009696, "learning_rate": 1.3503624439074907e-05, "loss": 1.7872, "reason_loss": 0.47894486784935, "step": 1263, "utility_loss": 1.3082267045974731 }, { "cosine_similarity": 0.2997586100458138, "epoch": 1.1780055917986951, "grad_norm": 1.1789642001938603, "learning_rate": 1.3496720745598897e-05, "loss": 1.8331, "reason_loss": 0.5164303779602051, "step": 1264, "utility_loss": 1.3166942596435547 }, { "cosine_similarity": 0.05206340010111586, "epoch": 1.1789375582479031, "grad_norm": 1.0975047569057685, "learning_rate": 1.3489817052122886e-05, "loss": 1.8639, "reason_loss": 0.4908619523048401, "step": 1265, "utility_loss": 1.3730583190917969 }, { "cosine_similarity": 0.09616133512837081, "epoch": 1.179869524697111, "grad_norm": 1.1817942576989766, "learning_rate": 1.3482913358646877e-05, "loss": 1.7077, "reason_loss": 0.5134609341621399, "step": 1266, "utility_loss": 1.1942079067230225 }, { "cosine_similarity": 0.4332799605951463, "epoch": 1.1808014911463187, "grad_norm": 1.2066372310680706, "learning_rate": 1.3476009665170868e-05, "loss": 1.894, "reason_loss": 0.5274332761764526, "step": 1267, "utility_loss": 1.3665239810943604 }, { "cosine_similarity": 0.20824265980935838, "epoch": 1.1817334575955265, "grad_norm": 1.4019745041031695, "learning_rate": 1.3469105971694858e-05, "loss": 1.8769, "reason_loss": 0.5152505040168762, "step": 1268, "utility_loss": 1.3616374731063843 }, { "cosine_similarity": 0.03846551620388525, "epoch": 1.1826654240447343, "grad_norm": 1.1297653902179512, "learning_rate": 1.3462202278218848e-05, "loss": 1.8153, "reason_loss": 0.5235704183578491, "step": 1269, "utility_loss": 1.2917311191558838 }, { "cosine_similarity": 0.0856680016492253, "epoch": 1.1835973904939423, "grad_norm": 1.166239865970818, "learning_rate": 1.3455298584742838e-05, "loss": 1.855, "reason_loss": 0.4903591275215149, "step": 1270, "utility_loss": 1.3646060228347778 }, { "cosine_similarity": 0.2438370355510403, "epoch": 1.18452935694315, "grad_norm": 1.034939224895192, "learning_rate": 1.3448394891266829e-05, "loss": 1.5455, "reason_loss": 0.5303598642349243, "step": 1271, "utility_loss": 1.0150914192199707 }, { "cosine_similarity": 0.1945804349299467, "epoch": 1.1854613233923579, "grad_norm": 1.0874483156286696, "learning_rate": 1.344149119779082e-05, "loss": 1.5625, "reason_loss": 0.5048030614852905, "step": 1272, "utility_loss": 1.0576804876327515 }, { "cosine_similarity": 0.23593921135320972, "epoch": 1.1863932898415657, "grad_norm": 1.2232985739997637, "learning_rate": 1.3434587504314809e-05, "loss": 1.6965, "reason_loss": 0.46846383810043335, "step": 1273, "utility_loss": 1.22800874710083 }, { "cosine_similarity": 0.12278503870227073, "epoch": 1.1873252562907735, "grad_norm": 1.237330387395931, "learning_rate": 1.3427683810838801e-05, "loss": 1.7106, "reason_loss": 0.4885047674179077, "step": 1274, "utility_loss": 1.2221406698226929 }, { "cosine_similarity": 0.39087514363224973, "epoch": 1.1882572227399812, "grad_norm": 1.432404387721035, "learning_rate": 1.342078011736279e-05, "loss": 1.921, "reason_loss": 0.5011680126190186, "step": 1275, "utility_loss": 1.4198095798492432 }, { "cosine_similarity": -0.148634990635076, "epoch": 1.1891891891891893, "grad_norm": 1.1521377761666258, "learning_rate": 1.3413876423886781e-05, "loss": 1.569, "reason_loss": 0.5181715488433838, "step": 1276, "utility_loss": 1.050801396369934 }, { "cosine_similarity": 0.17443937411606292, "epoch": 1.190121155638397, "grad_norm": 1.1872451497130299, "learning_rate": 1.340697273041077e-05, "loss": 1.8616, "reason_loss": 0.5371356010437012, "step": 1277, "utility_loss": 1.3244796991348267 }, { "cosine_similarity": 0.3515050557568323, "epoch": 1.1910531220876048, "grad_norm": 1.053910612129471, "learning_rate": 1.3400069036934762e-05, "loss": 1.806, "reason_loss": 0.5297192335128784, "step": 1278, "utility_loss": 1.2762738466262817 }, { "cosine_similarity": 0.09078568936820283, "epoch": 1.1919850885368126, "grad_norm": 1.260512372216086, "learning_rate": 1.3393165343458751e-05, "loss": 1.829, "reason_loss": 0.49282699823379517, "step": 1279, "utility_loss": 1.336150884628296 }, { "cosine_similarity": 0.019539399517102603, "epoch": 1.1929170549860204, "grad_norm": 1.1383787406602706, "learning_rate": 1.3386261649982742e-05, "loss": 1.4913, "reason_loss": 0.4936378300189972, "step": 1280, "utility_loss": 0.997641384601593 }, { "cosine_similarity": 0.18072503400106535, "epoch": 1.1938490214352284, "grad_norm": 1.1712517161899878, "learning_rate": 1.3379357956506733e-05, "loss": 1.9546, "reason_loss": 0.5110056400299072, "step": 1281, "utility_loss": 1.4435622692108154 }, { "cosine_similarity": 0.6334627542213357, "epoch": 1.1947809878844362, "grad_norm": 1.2260103055061415, "learning_rate": 1.3372454263030724e-05, "loss": 1.8333, "reason_loss": 0.4928759038448334, "step": 1282, "utility_loss": 1.3403805494308472 }, { "cosine_similarity": 0.20232172170389084, "epoch": 1.195712954333644, "grad_norm": 1.071176670736296, "learning_rate": 1.3365550569554713e-05, "loss": 1.563, "reason_loss": 0.5284441709518433, "step": 1283, "utility_loss": 1.0345478057861328 }, { "cosine_similarity": 0.07881884436347279, "epoch": 1.1966449207828518, "grad_norm": 1.173967870625619, "learning_rate": 1.3358646876078702e-05, "loss": 1.8907, "reason_loss": 0.5196015238761902, "step": 1284, "utility_loss": 1.3711321353912354 }, { "cosine_similarity": 0.2871841423274987, "epoch": 1.1975768872320596, "grad_norm": 1.284607229610137, "learning_rate": 1.3351743182602694e-05, "loss": 1.9874, "reason_loss": 0.5130667686462402, "step": 1285, "utility_loss": 1.4743354320526123 }, { "cosine_similarity": 0.15713590999054783, "epoch": 1.1985088536812674, "grad_norm": 0.9833025850162062, "learning_rate": 1.3344839489126685e-05, "loss": 1.6451, "reason_loss": 0.5338679552078247, "step": 1286, "utility_loss": 1.111212968826294 }, { "cosine_similarity": 0.02667939408773371, "epoch": 1.1994408201304754, "grad_norm": 1.2623669414169367, "learning_rate": 1.3337935795650674e-05, "loss": 1.8588, "reason_loss": 0.4697561264038086, "step": 1287, "utility_loss": 1.3890364170074463 }, { "cosine_similarity": 0.01867922070481744, "epoch": 1.2003727865796832, "grad_norm": 1.0840126959479994, "learning_rate": 1.3331032102174663e-05, "loss": 1.6116, "reason_loss": 0.49570193886756897, "step": 1288, "utility_loss": 1.115900993347168 }, { "cosine_similarity": 0.37055605429733846, "epoch": 1.201304753028891, "grad_norm": 0.9670582531779386, "learning_rate": 1.3324128408698655e-05, "loss": 1.1638, "reason_loss": 0.4817644953727722, "step": 1289, "utility_loss": 0.682047963142395 }, { "cosine_similarity": 0.743669927976803, "epoch": 1.2022367194780987, "grad_norm": 1.3671777738822462, "learning_rate": 1.3317224715222644e-05, "loss": 1.8599, "reason_loss": 0.4999830722808838, "step": 1290, "utility_loss": 1.3599263429641724 }, { "cosine_similarity": -0.14866024720461096, "epoch": 1.2031686859273065, "grad_norm": 1.622834979385821, "learning_rate": 1.3310321021746635e-05, "loss": 1.6248, "reason_loss": 0.5212887525558472, "step": 1291, "utility_loss": 1.1035128831863403 }, { "cosine_similarity": 0.1602203696007959, "epoch": 1.2041006523765145, "grad_norm": 2.3061579470038, "learning_rate": 1.3303417328270628e-05, "loss": 1.7676, "reason_loss": 0.45996391773223877, "step": 1292, "utility_loss": 1.307645320892334 }, { "cosine_similarity": -0.03909283516633406, "epoch": 1.2050326188257223, "grad_norm": 1.3877538668848084, "learning_rate": 1.3296513634794617e-05, "loss": 1.8784, "reason_loss": 0.5163048505783081, "step": 1293, "utility_loss": 1.362088918685913 }, { "cosine_similarity": 0.4314299490790082, "epoch": 1.2059645852749301, "grad_norm": 1.3553395939436381, "learning_rate": 1.3289609941318606e-05, "loss": 2.2903, "reason_loss": 0.5401628613471985, "step": 1294, "utility_loss": 1.7501246929168701 }, { "cosine_similarity": 0.20517885746434708, "epoch": 1.206896551724138, "grad_norm": 1.243318144682896, "learning_rate": 1.3282706247842596e-05, "loss": 1.6239, "reason_loss": 0.48635804653167725, "step": 1295, "utility_loss": 1.1375501155853271 }, { "cosine_similarity": 0.23384254730033335, "epoch": 1.2078285181733457, "grad_norm": 1.4914176983575007, "learning_rate": 1.3275802554366589e-05, "loss": 1.7476, "reason_loss": 0.518887996673584, "step": 1296, "utility_loss": 1.2287415266036987 }, { "cosine_similarity": 0.1350807064874851, "epoch": 1.2087604846225535, "grad_norm": 1.267940842419738, "learning_rate": 1.3268898860890578e-05, "loss": 1.7427, "reason_loss": 0.5058103799819946, "step": 1297, "utility_loss": 1.2368654012680054 }, { "cosine_similarity": 0.2221680932216513, "epoch": 1.2096924510717615, "grad_norm": 1.157190468843649, "learning_rate": 1.3261995167414567e-05, "loss": 1.8124, "reason_loss": 0.491862416267395, "step": 1298, "utility_loss": 1.3205180168151855 }, { "cosine_similarity": 0.1953317663893946, "epoch": 1.2106244175209693, "grad_norm": 1.18060334078055, "learning_rate": 1.3255091473938558e-05, "loss": 1.7179, "reason_loss": 0.515149712562561, "step": 1299, "utility_loss": 1.202782392501831 }, { "cosine_similarity": 0.2706253229079693, "epoch": 1.211556383970177, "grad_norm": 0.98918019301779, "learning_rate": 1.3248187780462548e-05, "loss": 1.608, "reason_loss": 0.5325948596000671, "step": 1300, "utility_loss": 1.075371265411377 }, { "cosine_similarity": 0.0463071153231333, "epoch": 1.2124883504193849, "grad_norm": 1.1505275313618757, "learning_rate": 1.3241284086986539e-05, "loss": 2.018, "reason_loss": 0.5182716846466064, "step": 1301, "utility_loss": 1.4997446537017822 }, { "cosine_similarity": 0.09244841246152251, "epoch": 1.2134203168685926, "grad_norm": 3.129329400588395, "learning_rate": 1.3234380393510528e-05, "loss": 1.9176, "reason_loss": 0.5262768268585205, "step": 1302, "utility_loss": 1.3913395404815674 }, { "cosine_similarity": 0.02197934171889935, "epoch": 1.2143522833178007, "grad_norm": 1.1006447400443236, "learning_rate": 1.322747670003452e-05, "loss": 1.6372, "reason_loss": 0.48976415395736694, "step": 1303, "utility_loss": 1.1474120616912842 }, { "cosine_similarity": 0.15998346912090178, "epoch": 1.2152842497670084, "grad_norm": 1.1770718150629436, "learning_rate": 1.322057300655851e-05, "loss": 1.9074, "reason_loss": 0.4791542887687683, "step": 1304, "utility_loss": 1.4282073974609375 }, { "cosine_similarity": 0.17219455127229863, "epoch": 1.2162162162162162, "grad_norm": 1.4119049452380712, "learning_rate": 1.32136693130825e-05, "loss": 1.959, "reason_loss": 0.5220723748207092, "step": 1305, "utility_loss": 1.4369083642959595 }, { "cosine_similarity": 0.35767284224902257, "epoch": 1.217148182665424, "grad_norm": 1.4544866778577794, "learning_rate": 1.320676561960649e-05, "loss": 1.7706, "reason_loss": 0.5090195536613464, "step": 1306, "utility_loss": 1.2615594863891602 }, { "cosine_similarity": 0.011210740422398063, "epoch": 1.2180801491146318, "grad_norm": 1.3499426843546363, "learning_rate": 1.3199861926130482e-05, "loss": 1.9866, "reason_loss": 0.5026766657829285, "step": 1307, "utility_loss": 1.4838736057281494 }, { "cosine_similarity": -0.047654481529497444, "epoch": 1.2190121155638396, "grad_norm": 1.424419214575723, "learning_rate": 1.3192958232654471e-05, "loss": 1.9767, "reason_loss": 0.4912029206752777, "step": 1308, "utility_loss": 1.4854774475097656 }, { "cosine_similarity": -0.11054328000773951, "epoch": 1.2199440820130476, "grad_norm": 1.1279355681948076, "learning_rate": 1.3186054539178462e-05, "loss": 1.573, "reason_loss": 0.49774402379989624, "step": 1309, "utility_loss": 1.0752465724945068 }, { "cosine_similarity": 0.4145060476724225, "epoch": 1.2208760484622554, "grad_norm": 1.3076225400958956, "learning_rate": 1.317915084570245e-05, "loss": 1.6583, "reason_loss": 0.47772565484046936, "step": 1310, "utility_loss": 1.18059504032135 }, { "cosine_similarity": 0.14070105783888712, "epoch": 1.2218080149114632, "grad_norm": 1.2905218953494961, "learning_rate": 1.3172247152226443e-05, "loss": 1.8233, "reason_loss": 0.5209530591964722, "step": 1311, "utility_loss": 1.302351713180542 }, { "cosine_similarity": 0.2625084595124106, "epoch": 1.222739981360671, "grad_norm": 1.1168385980809603, "learning_rate": 1.3165343458750432e-05, "loss": 1.9595, "reason_loss": 0.5557016134262085, "step": 1312, "utility_loss": 1.4037837982177734 }, { "cosine_similarity": 0.13427488076625507, "epoch": 1.2236719478098788, "grad_norm": 1.1636816700363044, "learning_rate": 1.3158439765274423e-05, "loss": 1.5958, "reason_loss": 0.4667941927909851, "step": 1313, "utility_loss": 1.129051923751831 }, { "cosine_similarity": 0.3816247213299287, "epoch": 1.2246039142590868, "grad_norm": 1.1508453744010707, "learning_rate": 1.3151536071798414e-05, "loss": 1.7418, "reason_loss": 0.5352809429168701, "step": 1314, "utility_loss": 1.2064870595932007 }, { "cosine_similarity": 0.18690123728708216, "epoch": 1.2255358807082946, "grad_norm": 1.1914389937285619, "learning_rate": 1.3144632378322404e-05, "loss": 1.7514, "reason_loss": 0.5431493520736694, "step": 1315, "utility_loss": 1.2082372903823853 }, { "cosine_similarity": 0.16593351320686794, "epoch": 1.2264678471575023, "grad_norm": 1.243229225174144, "learning_rate": 1.3137728684846393e-05, "loss": 1.7759, "reason_loss": 0.4870587885379791, "step": 1316, "utility_loss": 1.2888836860656738 }, { "cosine_similarity": 0.13390903368053872, "epoch": 1.2273998136067101, "grad_norm": 1.202460068885034, "learning_rate": 1.3130824991370384e-05, "loss": 1.7808, "reason_loss": 0.4983629584312439, "step": 1317, "utility_loss": 1.2824254035949707 }, { "cosine_similarity": 0.2068433262190376, "epoch": 1.228331780055918, "grad_norm": 1.098413878725346, "learning_rate": 1.3123921297894375e-05, "loss": 1.7515, "reason_loss": 0.4920496642589569, "step": 1318, "utility_loss": 1.259411096572876 }, { "cosine_similarity": -0.011967707160340884, "epoch": 1.2292637465051257, "grad_norm": 1.8785391830193345, "learning_rate": 1.3117017604418366e-05, "loss": 1.9678, "reason_loss": 0.5064708590507507, "step": 1319, "utility_loss": 1.4613072872161865 }, { "cosine_similarity": 0.20184474974865502, "epoch": 1.2301957129543337, "grad_norm": 1.482474787554017, "learning_rate": 1.3110113910942355e-05, "loss": 1.9137, "reason_loss": 0.5026677846908569, "step": 1320, "utility_loss": 1.4110339879989624 }, { "cosine_similarity": 0.26484778130484715, "epoch": 1.2311276794035415, "grad_norm": 1.2286611537006578, "learning_rate": 1.3103210217466345e-05, "loss": 1.6958, "reason_loss": 0.5432488918304443, "step": 1321, "utility_loss": 1.1525202989578247 }, { "cosine_similarity": -0.09802731036441442, "epoch": 1.2320596458527493, "grad_norm": 1.107783010832998, "learning_rate": 1.3096306523990336e-05, "loss": 1.5298, "reason_loss": 0.5017268657684326, "step": 1322, "utility_loss": 1.0280518531799316 }, { "cosine_similarity": -0.1538417793643042, "epoch": 1.232991612301957, "grad_norm": 1.2238262014726538, "learning_rate": 1.3089402830514327e-05, "loss": 1.8632, "reason_loss": 0.5123724937438965, "step": 1323, "utility_loss": 1.3507919311523438 }, { "cosine_similarity": 0.10311893599229621, "epoch": 1.2339235787511649, "grad_norm": 1.1651491507259826, "learning_rate": 1.3082499137038316e-05, "loss": 1.7946, "reason_loss": 0.5208158493041992, "step": 1324, "utility_loss": 1.2737572193145752 }, { "cosine_similarity": 0.27621925986273654, "epoch": 1.2348555452003729, "grad_norm": 1.2510700535521921, "learning_rate": 1.3075595443562308e-05, "loss": 1.6109, "reason_loss": 0.4947303533554077, "step": 1325, "utility_loss": 1.1162195205688477 }, { "cosine_similarity": 0.03327583530752671, "epoch": 1.2357875116495807, "grad_norm": 1.265491550703644, "learning_rate": 1.3068691750086297e-05, "loss": 1.7298, "reason_loss": 0.4967612028121948, "step": 1326, "utility_loss": 1.233069658279419 }, { "cosine_similarity": 0.2764140285689402, "epoch": 1.2367194780987885, "grad_norm": 1.2122954486032305, "learning_rate": 1.3061788056610288e-05, "loss": 2.2217, "reason_loss": 0.49880194664001465, "step": 1327, "utility_loss": 1.7229398488998413 }, { "cosine_similarity": 0.008950107630073747, "epoch": 1.2376514445479962, "grad_norm": 1.0362308444249664, "learning_rate": 1.3054884363134277e-05, "loss": 1.5056, "reason_loss": 0.5111600756645203, "step": 1328, "utility_loss": 0.9944434762001038 }, { "cosine_similarity": 0.02645654124626, "epoch": 1.238583410997204, "grad_norm": 1.2224440794738198, "learning_rate": 1.304798066965827e-05, "loss": 1.5761, "reason_loss": 0.5120253562927246, "step": 1329, "utility_loss": 1.0640789270401 }, { "cosine_similarity": 0.09720599302950612, "epoch": 1.2395153774464118, "grad_norm": 1.3776872563048743, "learning_rate": 1.3041076976182258e-05, "loss": 1.6393, "reason_loss": 0.4920811653137207, "step": 1330, "utility_loss": 1.147180438041687 }, { "cosine_similarity": 0.004730144922543473, "epoch": 1.2404473438956198, "grad_norm": 1.2397062985116694, "learning_rate": 1.3034173282706248e-05, "loss": 2.0573, "reason_loss": 0.5052160620689392, "step": 1331, "utility_loss": 1.5521328449249268 }, { "cosine_similarity": -0.004815246100440449, "epoch": 1.2413793103448276, "grad_norm": 1.1722838092117092, "learning_rate": 1.302726958923024e-05, "loss": 1.8891, "reason_loss": 0.4848223626613617, "step": 1332, "utility_loss": 1.4042969942092896 }, { "cosine_similarity": 0.16867255130155792, "epoch": 1.2423112767940354, "grad_norm": 1.250059675530502, "learning_rate": 1.302036589575423e-05, "loss": 1.6033, "reason_loss": 0.4862925708293915, "step": 1333, "utility_loss": 1.117006540298462 }, { "cosine_similarity": 0.10015207390780076, "epoch": 1.2432432432432432, "grad_norm": 1.1732195670305026, "learning_rate": 1.301346220227822e-05, "loss": 1.8015, "reason_loss": 0.501275897026062, "step": 1334, "utility_loss": 1.3001813888549805 }, { "cosine_similarity": 0.007893341199160714, "epoch": 1.244175209692451, "grad_norm": 1.3078633188143252, "learning_rate": 1.3006558508802209e-05, "loss": 1.9188, "reason_loss": 0.5137481689453125, "step": 1335, "utility_loss": 1.405093789100647 }, { "cosine_similarity": 0.3202390893687084, "epoch": 1.245107176141659, "grad_norm": 1.2039105618665271, "learning_rate": 1.2999654815326201e-05, "loss": 2.0891, "reason_loss": 0.48642560839653015, "step": 1336, "utility_loss": 1.6026633977890015 }, { "cosine_similarity": -0.1371963566215024, "epoch": 1.2460391425908668, "grad_norm": 1.12312582606836, "learning_rate": 1.2992751121850192e-05, "loss": 1.7115, "reason_loss": 0.5006451606750488, "step": 1337, "utility_loss": 1.2108752727508545 }, { "cosine_similarity": -0.1675452744430163, "epoch": 1.2469711090400746, "grad_norm": 1.1227007394581792, "learning_rate": 1.2985847428374181e-05, "loss": 1.9695, "reason_loss": 0.4878193140029907, "step": 1338, "utility_loss": 1.4816406965255737 }, { "cosine_similarity": -0.11639713540295395, "epoch": 1.2479030754892824, "grad_norm": 1.2183751760527153, "learning_rate": 1.297894373489817e-05, "loss": 1.7202, "reason_loss": 0.5359830260276794, "step": 1339, "utility_loss": 1.1842039823532104 }, { "cosine_similarity": 0.24054313688144877, "epoch": 1.2488350419384902, "grad_norm": 1.3904615842880501, "learning_rate": 1.2972040041422162e-05, "loss": 2.0411, "reason_loss": 0.516893208026886, "step": 1340, "utility_loss": 1.5241739749908447 }, { "cosine_similarity": 0.19965469114246218, "epoch": 1.249767008387698, "grad_norm": 1.3014736081736946, "learning_rate": 1.2965136347946151e-05, "loss": 1.9279, "reason_loss": 0.470916748046875, "step": 1341, "utility_loss": 1.4569838047027588 }, { "cosine_similarity": -0.18110889666960975, "epoch": 1.250698974836906, "grad_norm": 1.5649479696398925, "learning_rate": 1.2958232654470142e-05, "loss": 1.9182, "reason_loss": 0.5080807209014893, "step": 1342, "utility_loss": 1.410167932510376 }, { "cosine_similarity": -0.14832004093710358, "epoch": 1.2516309412861137, "grad_norm": 1.466058414790139, "learning_rate": 1.2951328960994135e-05, "loss": 1.5894, "reason_loss": 0.4782549738883972, "step": 1343, "utility_loss": 1.1111562252044678 }, { "cosine_similarity": -0.014194607639964305, "epoch": 1.2525629077353215, "grad_norm": 1.578707723206034, "learning_rate": 1.2944425267518124e-05, "loss": 1.9427, "reason_loss": 0.48215270042419434, "step": 1344, "utility_loss": 1.460590124130249 }, { "cosine_similarity": -0.13171762763521078, "epoch": 1.2534948741845293, "grad_norm": 1.190971546144224, "learning_rate": 1.2937521574042113e-05, "loss": 1.7048, "reason_loss": 0.4960939288139343, "step": 1345, "utility_loss": 1.2087254524230957 }, { "cosine_similarity": 0.03107028553952617, "epoch": 1.254426840633737, "grad_norm": 1.1149000276202354, "learning_rate": 1.2930617880566103e-05, "loss": 1.9092, "reason_loss": 0.4939919114112854, "step": 1346, "utility_loss": 1.4152064323425293 }, { "cosine_similarity": 0.08411452577522201, "epoch": 1.2553588070829451, "grad_norm": 1.2099786916072948, "learning_rate": 1.2923714187090096e-05, "loss": 2.0476, "reason_loss": 0.48796913027763367, "step": 1347, "utility_loss": 1.5596284866333008 }, { "cosine_similarity": 0.4808410862918331, "epoch": 1.256290773532153, "grad_norm": 1.246302418770171, "learning_rate": 1.2916810493614085e-05, "loss": 1.8415, "reason_loss": 0.5207837820053101, "step": 1348, "utility_loss": 1.3207049369812012 }, { "cosine_similarity": 0.38325175914736953, "epoch": 1.2572227399813607, "grad_norm": 1.0010069941753088, "learning_rate": 1.2909906800138074e-05, "loss": 1.827, "reason_loss": 0.5177526473999023, "step": 1349, "utility_loss": 1.3092267513275146 }, { "cosine_similarity": 0.027829112690448625, "epoch": 1.2581547064305685, "grad_norm": 1.0769492635846423, "learning_rate": 1.2903003106662065e-05, "loss": 1.5348, "reason_loss": 0.49185726046562195, "step": 1350, "utility_loss": 1.042985439300537 }, { "cosine_similarity": 0.516374656471606, "epoch": 1.2590866728797763, "grad_norm": 1.0763656995954858, "learning_rate": 1.2896099413186055e-05, "loss": 1.476, "reason_loss": 0.5192269682884216, "step": 1351, "utility_loss": 0.9567625522613525 }, { "cosine_similarity": 0.41720259547635324, "epoch": 1.260018639328984, "grad_norm": 1.3236919368205546, "learning_rate": 1.2889195719710046e-05, "loss": 2.0477, "reason_loss": 0.5077528357505798, "step": 1352, "utility_loss": 1.5399738550186157 }, { "cosine_similarity": 0.14362427614545983, "epoch": 1.2609506057781918, "grad_norm": 1.232290855969698, "learning_rate": 1.2882292026234035e-05, "loss": 1.8612, "reason_loss": 0.5312519073486328, "step": 1353, "utility_loss": 1.3299732208251953 }, { "cosine_similarity": -0.04604529275499894, "epoch": 1.2618825722273999, "grad_norm": 1.1787992585620286, "learning_rate": 1.2875388332758028e-05, "loss": 2.0721, "reason_loss": 0.5339577198028564, "step": 1354, "utility_loss": 1.5381639003753662 }, { "cosine_similarity": 0.29921659456824484, "epoch": 1.2628145386766076, "grad_norm": 1.3481877359101562, "learning_rate": 1.2868484639282017e-05, "loss": 1.7704, "reason_loss": 0.49796023964881897, "step": 1355, "utility_loss": 1.2724705934524536 }, { "cosine_similarity": 0.29918954338352866, "epoch": 1.2637465051258154, "grad_norm": 1.2507246398052028, "learning_rate": 1.2861580945806007e-05, "loss": 1.844, "reason_loss": 0.539916455745697, "step": 1356, "utility_loss": 1.30411958694458 }, { "cosine_similarity": 0.14511195617655293, "epoch": 1.2646784715750232, "grad_norm": 1.131222765978877, "learning_rate": 1.2854677252329996e-05, "loss": 1.904, "reason_loss": 0.4907279312610626, "step": 1357, "utility_loss": 1.4132404327392578 }, { "cosine_similarity": 0.33853957586857575, "epoch": 1.2656104380242312, "grad_norm": 1.1419980158122545, "learning_rate": 1.2847773558853989e-05, "loss": 1.6086, "reason_loss": 0.5077886581420898, "step": 1358, "utility_loss": 1.100803017616272 }, { "cosine_similarity": 0.11315365882538118, "epoch": 1.266542404473439, "grad_norm": 1.1025052343342643, "learning_rate": 1.2840869865377978e-05, "loss": 1.4213, "reason_loss": 0.4668729603290558, "step": 1359, "utility_loss": 0.9544147253036499 }, { "cosine_similarity": 0.11179723890557366, "epoch": 1.2674743709226468, "grad_norm": 1.0462594796797182, "learning_rate": 1.2833966171901969e-05, "loss": 1.7392, "reason_loss": 0.49482446908950806, "step": 1360, "utility_loss": 1.2443573474884033 }, { "cosine_similarity": 0.18252257561404125, "epoch": 1.2684063373718546, "grad_norm": 1.0996927136705037, "learning_rate": 1.2827062478425958e-05, "loss": 1.8568, "reason_loss": 0.5235320329666138, "step": 1361, "utility_loss": 1.333228349685669 }, { "cosine_similarity": 0.3962637927246402, "epoch": 1.2693383038210624, "grad_norm": 1.2726993910898379, "learning_rate": 1.282015878494995e-05, "loss": 1.8522, "reason_loss": 0.4947126507759094, "step": 1362, "utility_loss": 1.3575363159179688 }, { "cosine_similarity": 0.11401189646827249, "epoch": 1.2702702702702702, "grad_norm": 1.0373489602237407, "learning_rate": 1.2813255091473939e-05, "loss": 1.4975, "reason_loss": 0.4790950417518616, "step": 1363, "utility_loss": 1.0184065103530884 }, { "cosine_similarity": 0.16599468091457142, "epoch": 1.271202236719478, "grad_norm": 1.2178641450989642, "learning_rate": 1.280635139799793e-05, "loss": 2.0041, "reason_loss": 0.4921734035015106, "step": 1364, "utility_loss": 1.5119240283966064 }, { "cosine_similarity": 0.0925579029920847, "epoch": 1.272134203168686, "grad_norm": 1.2513989090422954, "learning_rate": 1.279944770452192e-05, "loss": 1.5864, "reason_loss": 0.5035254955291748, "step": 1365, "utility_loss": 1.082856297492981 }, { "cosine_similarity": 0.2494498065420367, "epoch": 1.2730661696178938, "grad_norm": 1.3855373046414836, "learning_rate": 1.2792544011045911e-05, "loss": 1.7658, "reason_loss": 0.5087770223617554, "step": 1366, "utility_loss": 1.2569777965545654 }, { "cosine_similarity": 0.3830956503085776, "epoch": 1.2739981360671015, "grad_norm": 1.3033870273573822, "learning_rate": 1.27856403175699e-05, "loss": 2.1779, "reason_loss": 0.5017102956771851, "step": 1367, "utility_loss": 1.6761785745620728 }, { "cosine_similarity": 0.08155826994030946, "epoch": 1.2749301025163093, "grad_norm": 1.2255422226135662, "learning_rate": 1.2778736624093891e-05, "loss": 1.9539, "reason_loss": 0.5280758142471313, "step": 1368, "utility_loss": 1.425819754600525 }, { "cosine_similarity": -0.19297812130660355, "epoch": 1.2758620689655173, "grad_norm": 1.1592141585773612, "learning_rate": 1.2771832930617882e-05, "loss": 2.0182, "reason_loss": 0.4924653470516205, "step": 1369, "utility_loss": 1.5256962776184082 }, { "cosine_similarity": -0.013651273967768673, "epoch": 1.2767940354147251, "grad_norm": 1.22774307085962, "learning_rate": 1.2764929237141873e-05, "loss": 1.7402, "reason_loss": 0.5040500164031982, "step": 1370, "utility_loss": 1.2361161708831787 }, { "cosine_similarity": 0.4477052677817876, "epoch": 1.277726001863933, "grad_norm": 1.2604528524952074, "learning_rate": 1.2758025543665862e-05, "loss": 1.8312, "reason_loss": 0.4976021945476532, "step": 1371, "utility_loss": 1.333629846572876 }, { "cosine_similarity": 0.5397377737065079, "epoch": 1.2786579683131407, "grad_norm": 1.03135143493862, "learning_rate": 1.2751121850189854e-05, "loss": 2.0306, "reason_loss": 0.5035915970802307, "step": 1372, "utility_loss": 1.5270450115203857 }, { "cosine_similarity": -0.1104951009237102, "epoch": 1.2795899347623485, "grad_norm": 1.247629709956434, "learning_rate": 1.2744218156713843e-05, "loss": 2.0857, "reason_loss": 0.4751870036125183, "step": 1373, "utility_loss": 1.6104854345321655 }, { "cosine_similarity": 0.3163211164507577, "epoch": 1.2805219012115563, "grad_norm": 1.0808713599162094, "learning_rate": 1.2737314463237834e-05, "loss": 1.6436, "reason_loss": 0.5084066390991211, "step": 1374, "utility_loss": 1.1351678371429443 }, { "cosine_similarity": 0.1623356396235974, "epoch": 1.281453867660764, "grad_norm": 0.9445165049996074, "learning_rate": 1.2730410769761823e-05, "loss": 1.9012, "reason_loss": 0.4766274094581604, "step": 1375, "utility_loss": 1.424546480178833 }, { "cosine_similarity": -0.07602142108722092, "epoch": 1.282385834109972, "grad_norm": 1.2642890790971761, "learning_rate": 1.2723507076285815e-05, "loss": 1.6417, "reason_loss": 0.5127208828926086, "step": 1376, "utility_loss": 1.128972053527832 }, { "cosine_similarity": 0.4111739655694207, "epoch": 1.2833178005591799, "grad_norm": 1.3042249263419443, "learning_rate": 1.2716603382809804e-05, "loss": 1.787, "reason_loss": 0.4734501838684082, "step": 1377, "utility_loss": 1.3135117292404175 }, { "cosine_similarity": 0.34958761019436446, "epoch": 1.2842497670083877, "grad_norm": 1.252176752263432, "learning_rate": 1.2709699689333795e-05, "loss": 1.6501, "reason_loss": 0.5137372016906738, "step": 1378, "utility_loss": 1.1363321542739868 }, { "cosine_similarity": 0.3110049930951638, "epoch": 1.2851817334575955, "grad_norm": 1.5224778088512234, "learning_rate": 1.2702795995857784e-05, "loss": 1.7365, "reason_loss": 0.5082594156265259, "step": 1379, "utility_loss": 1.228290319442749 }, { "cosine_similarity": 0.14290959887526386, "epoch": 1.2861136999068035, "grad_norm": 1.3510634397870345, "learning_rate": 1.2695892302381776e-05, "loss": 1.8417, "reason_loss": 0.5354738831520081, "step": 1380, "utility_loss": 1.3062362670898438 }, { "cosine_similarity": 0.07161375360014174, "epoch": 1.2870456663560113, "grad_norm": 1.221858014683552, "learning_rate": 1.2688988608905766e-05, "loss": 1.9959, "reason_loss": 0.5115225315093994, "step": 1381, "utility_loss": 1.4843697547912598 }, { "cosine_similarity": 0.5703029986783443, "epoch": 1.287977632805219, "grad_norm": 1.0896248393423762, "learning_rate": 1.2682084915429755e-05, "loss": 1.5916, "reason_loss": 0.5022422075271606, "step": 1382, "utility_loss": 1.0893127918243408 }, { "cosine_similarity": 0.24997669852924795, "epoch": 1.2889095992544268, "grad_norm": 1.2646568530689437, "learning_rate": 1.2675181221953747e-05, "loss": 1.818, "reason_loss": 0.5268816947937012, "step": 1383, "utility_loss": 1.2910783290863037 }, { "cosine_similarity": -0.0400964661612554, "epoch": 1.2898415657036346, "grad_norm": 1.2737010691955661, "learning_rate": 1.2668277528477738e-05, "loss": 1.8537, "reason_loss": 0.5106798410415649, "step": 1384, "utility_loss": 1.3430066108703613 }, { "cosine_similarity": 0.12791755626949364, "epoch": 1.2907735321528424, "grad_norm": 1.2193536344262892, "learning_rate": 1.2661373835001727e-05, "loss": 1.7942, "reason_loss": 0.5133605003356934, "step": 1385, "utility_loss": 1.2808691263198853 }, { "cosine_similarity": 0.2390358061372223, "epoch": 1.2917054986020502, "grad_norm": 1.1978899100306495, "learning_rate": 1.2654470141525716e-05, "loss": 1.9513, "reason_loss": 0.4910928010940552, "step": 1386, "utility_loss": 1.4601702690124512 }, { "cosine_similarity": -0.01584952194265315, "epoch": 1.2926374650512582, "grad_norm": 1.2714815542501168, "learning_rate": 1.2647566448049708e-05, "loss": 1.5718, "reason_loss": 0.49250736832618713, "step": 1387, "utility_loss": 1.0793302059173584 }, { "cosine_similarity": 0.262015434849436, "epoch": 1.293569431500466, "grad_norm": 1.2090246798285036, "learning_rate": 1.2640662754573699e-05, "loss": 1.5242, "reason_loss": 0.5163494348526001, "step": 1388, "utility_loss": 1.0078120231628418 }, { "cosine_similarity": 0.4624752230111939, "epoch": 1.2945013979496738, "grad_norm": 1.3248296465861946, "learning_rate": 1.2633759061097688e-05, "loss": 1.6428, "reason_loss": 0.505292534828186, "step": 1389, "utility_loss": 1.137521505355835 }, { "cosine_similarity": -0.02305408448943513, "epoch": 1.2954333643988816, "grad_norm": 1.1396674256236503, "learning_rate": 1.2626855367621677e-05, "loss": 1.7321, "reason_loss": 0.49454885721206665, "step": 1390, "utility_loss": 1.2375147342681885 }, { "cosine_similarity": 0.3088572121324208, "epoch": 1.2963653308480896, "grad_norm": 1.4894560486538466, "learning_rate": 1.261995167414567e-05, "loss": 1.7651, "reason_loss": 0.4872030019760132, "step": 1391, "utility_loss": 1.277907371520996 }, { "cosine_similarity": 0.3536526593042614, "epoch": 1.2972972972972974, "grad_norm": 1.297410468802267, "learning_rate": 1.2613047980669658e-05, "loss": 2.0115, "reason_loss": 0.5100244283676147, "step": 1392, "utility_loss": 1.5015192031860352 }, { "cosine_similarity": 0.3575375643617717, "epoch": 1.2982292637465052, "grad_norm": 1.3807557599381954, "learning_rate": 1.260614428719365e-05, "loss": 1.7222, "reason_loss": 0.5116353034973145, "step": 1393, "utility_loss": 1.2105445861816406 }, { "cosine_similarity": 0.005496752110741205, "epoch": 1.299161230195713, "grad_norm": 1.151993719855253, "learning_rate": 1.2599240593717642e-05, "loss": 1.7579, "reason_loss": 0.5240166783332825, "step": 1394, "utility_loss": 1.233915090560913 }, { "cosine_similarity": 0.00843985771688399, "epoch": 1.3000931966449207, "grad_norm": 1.1560844758485707, "learning_rate": 1.259233690024163e-05, "loss": 1.7582, "reason_loss": 0.5030604600906372, "step": 1395, "utility_loss": 1.25511634349823 }, { "cosine_similarity": 0.09100106009008296, "epoch": 1.3010251630941285, "grad_norm": 1.3841084304274474, "learning_rate": 1.258543320676562e-05, "loss": 1.989, "reason_loss": 0.49832645058631897, "step": 1396, "utility_loss": 1.490648627281189 }, { "cosine_similarity": 0.2237959079363047, "epoch": 1.3019571295433363, "grad_norm": 1.101230864795273, "learning_rate": 1.257852951328961e-05, "loss": 1.6445, "reason_loss": 0.5075079202651978, "step": 1397, "utility_loss": 1.1370313167572021 }, { "cosine_similarity": 0.3497866272019102, "epoch": 1.3028890959925443, "grad_norm": 1.0769808184689862, "learning_rate": 1.2571625819813603e-05, "loss": 1.2894, "reason_loss": 0.49022990465164185, "step": 1398, "utility_loss": 0.7991273999214172 }, { "cosine_similarity": 0.027065279852155656, "epoch": 1.303821062441752, "grad_norm": 1.2483931927450307, "learning_rate": 1.2564722126337592e-05, "loss": 1.755, "reason_loss": 0.491653174161911, "step": 1399, "utility_loss": 1.26334547996521 }, { "cosine_similarity": -0.0989716948750505, "epoch": 1.30475302889096, "grad_norm": 0.9965958941878693, "learning_rate": 1.2557818432861581e-05, "loss": 1.5994, "reason_loss": 0.5048888921737671, "step": 1400, "utility_loss": 1.0944745540618896 }, { "cosine_similarity": -0.13857392098657592, "epoch": 1.3056849953401677, "grad_norm": 1.319445227895055, "learning_rate": 1.2550914739385572e-05, "loss": 1.5691, "reason_loss": 0.5141233205795288, "step": 1401, "utility_loss": 1.0549771785736084 }, { "cosine_similarity": 0.16651416243982364, "epoch": 1.3066169617893757, "grad_norm": 1.111522322078229, "learning_rate": 1.2544011045909562e-05, "loss": 1.6685, "reason_loss": 0.5404092073440552, "step": 1402, "utility_loss": 1.128122091293335 }, { "cosine_similarity": 0.28128135281678196, "epoch": 1.3075489282385835, "grad_norm": 1.2296288649923424, "learning_rate": 1.2537107352433553e-05, "loss": 1.773, "reason_loss": 0.478617787361145, "step": 1403, "utility_loss": 1.2943551540374756 }, { "cosine_similarity": 0.28164137992494825, "epoch": 1.3084808946877913, "grad_norm": 1.215553139401925, "learning_rate": 1.2530203658957542e-05, "loss": 1.9159, "reason_loss": 0.5239308476448059, "step": 1404, "utility_loss": 1.3919589519500732 }, { "cosine_similarity": 0.2237780648561012, "epoch": 1.309412861136999, "grad_norm": 1.5481385316288672, "learning_rate": 1.2523299965481535e-05, "loss": 1.7747, "reason_loss": 0.4984341263771057, "step": 1405, "utility_loss": 1.2762649059295654 }, { "cosine_similarity": 0.39839523405962773, "epoch": 1.3103448275862069, "grad_norm": 1.3878671549576225, "learning_rate": 1.2516396272005524e-05, "loss": 2.0245, "reason_loss": 0.49763917922973633, "step": 1406, "utility_loss": 1.5268230438232422 }, { "cosine_similarity": 0.03667722362060217, "epoch": 1.3112767940354146, "grad_norm": 1.3337609031798825, "learning_rate": 1.2509492578529514e-05, "loss": 1.9457, "reason_loss": 0.4836156964302063, "step": 1407, "utility_loss": 1.4620509147644043 }, { "cosine_similarity": 0.18453157569578482, "epoch": 1.3122087604846224, "grad_norm": 1.2511182570869879, "learning_rate": 1.2502588885053503e-05, "loss": 2.0767, "reason_loss": 0.49834567308425903, "step": 1408, "utility_loss": 1.5783268213272095 }, { "cosine_similarity": -0.16517535886474177, "epoch": 1.3131407269338304, "grad_norm": 1.4369067418823451, "learning_rate": 1.2495685191577496e-05, "loss": 1.7749, "reason_loss": 0.49247121810913086, "step": 1409, "utility_loss": 1.2823898792266846 }, { "cosine_similarity": 0.17891323050890648, "epoch": 1.3140726933830382, "grad_norm": 1.3314482539730335, "learning_rate": 1.2488781498101485e-05, "loss": 1.8979, "reason_loss": 0.5235434174537659, "step": 1410, "utility_loss": 1.374333143234253 }, { "cosine_similarity": 0.5164069608977432, "epoch": 1.315004659832246, "grad_norm": 1.4056414868851728, "learning_rate": 1.2481877804625476e-05, "loss": 1.8027, "reason_loss": 0.5027375221252441, "step": 1411, "utility_loss": 1.2999271154403687 }, { "cosine_similarity": 0.5617596417374744, "epoch": 1.3159366262814538, "grad_norm": 1.3721767228910682, "learning_rate": 1.2474974111149465e-05, "loss": 1.5849, "reason_loss": 0.4801362156867981, "step": 1412, "utility_loss": 1.104813575744629 }, { "cosine_similarity": -0.09951759336254695, "epoch": 1.3168685927306618, "grad_norm": 1.3241889171046537, "learning_rate": 1.2468070417673457e-05, "loss": 1.9404, "reason_loss": 0.49439185857772827, "step": 1413, "utility_loss": 1.4459803104400635 }, { "cosine_similarity": 0.0826871308684401, "epoch": 1.3178005591798696, "grad_norm": 1.641806527779013, "learning_rate": 1.2461166724197446e-05, "loss": 1.7622, "reason_loss": 0.5288430452346802, "step": 1414, "utility_loss": 1.2333652973175049 }, { "cosine_similarity": 0.740592210762767, "epoch": 1.3187325256290774, "grad_norm": 1.1239216489072865, "learning_rate": 1.2454263030721437e-05, "loss": 2.0426, "reason_loss": 0.5126285552978516, "step": 1415, "utility_loss": 1.5300118923187256 }, { "cosine_similarity": 0.09856182721204948, "epoch": 1.3196644920782852, "grad_norm": 1.5686280149544078, "learning_rate": 1.2447359337245428e-05, "loss": 1.7722, "reason_loss": 0.4790421724319458, "step": 1416, "utility_loss": 1.2931361198425293 }, { "cosine_similarity": -0.11790754644917911, "epoch": 1.320596458527493, "grad_norm": 1.0661862951475196, "learning_rate": 1.2440455643769418e-05, "loss": 1.7392, "reason_loss": 0.46123433113098145, "step": 1417, "utility_loss": 1.277936339378357 }, { "cosine_similarity": 0.042843921627986566, "epoch": 1.3215284249767008, "grad_norm": 1.1595625865211727, "learning_rate": 1.2433551950293407e-05, "loss": 1.7718, "reason_loss": 0.5016121864318848, "step": 1418, "utility_loss": 1.2701964378356934 }, { "cosine_similarity": -0.04922056846207705, "epoch": 1.3224603914259085, "grad_norm": 1.1428526301316997, "learning_rate": 1.2426648256817398e-05, "loss": 1.9005, "reason_loss": 0.4850781559944153, "step": 1419, "utility_loss": 1.4154284000396729 }, { "cosine_similarity": 0.5235701296193358, "epoch": 1.3233923578751166, "grad_norm": 1.2241351547901946, "learning_rate": 1.2419744563341389e-05, "loss": 1.8892, "reason_loss": 0.49591895937919617, "step": 1420, "utility_loss": 1.3933013677597046 }, { "cosine_similarity": 0.2540347961650161, "epoch": 1.3243243243243243, "grad_norm": 1.3837506171456804, "learning_rate": 1.241284086986538e-05, "loss": 2.0103, "reason_loss": 0.4735363721847534, "step": 1421, "utility_loss": 1.5367431640625 }, { "cosine_similarity": 0.06248180603747934, "epoch": 1.3252562907735321, "grad_norm": 1.090673295827376, "learning_rate": 1.2405937176389369e-05, "loss": 1.5258, "reason_loss": 0.5020819306373596, "step": 1422, "utility_loss": 1.0237314701080322 }, { "cosine_similarity": 0.3311166178596249, "epoch": 1.32618825722274, "grad_norm": 1.241365690216852, "learning_rate": 1.2399033482913361e-05, "loss": 1.9026, "reason_loss": 0.5149672031402588, "step": 1423, "utility_loss": 1.3876323699951172 }, { "cosine_similarity": 0.26485650755231316, "epoch": 1.327120223671948, "grad_norm": 1.2003281938779657, "learning_rate": 1.239212978943735e-05, "loss": 1.7318, "reason_loss": 0.49437516927719116, "step": 1424, "utility_loss": 1.2374310493469238 }, { "cosine_similarity": -0.09809560983262412, "epoch": 1.3280521901211557, "grad_norm": 0.9866406848595985, "learning_rate": 1.238522609596134e-05, "loss": 1.5903, "reason_loss": 0.5098246335983276, "step": 1425, "utility_loss": 1.0804698467254639 }, { "cosine_similarity": 0.04377927332551189, "epoch": 1.3289841565703635, "grad_norm": 1.2595706940184441, "learning_rate": 1.237832240248533e-05, "loss": 1.9052, "reason_loss": 0.5366163849830627, "step": 1426, "utility_loss": 1.3686305284500122 }, { "cosine_similarity": 0.17801709106949934, "epoch": 1.3299161230195713, "grad_norm": 1.17540104758309, "learning_rate": 1.2371418709009322e-05, "loss": 1.7841, "reason_loss": 0.49963951110839844, "step": 1427, "utility_loss": 1.2844465970993042 }, { "cosine_similarity": 0.3951342420764964, "epoch": 1.330848089468779, "grad_norm": 1.0959928527621872, "learning_rate": 1.2364515015533311e-05, "loss": 1.8546, "reason_loss": 0.46799027919769287, "step": 1428, "utility_loss": 1.3866015672683716 }, { "cosine_similarity": 0.2810896512007562, "epoch": 1.3317800559179869, "grad_norm": 1.1348162850439611, "learning_rate": 1.2357611322057302e-05, "loss": 1.9481, "reason_loss": 0.5256019234657288, "step": 1429, "utility_loss": 1.422533392906189 }, { "cosine_similarity": 0.1658038910950947, "epoch": 1.3327120223671947, "grad_norm": 1.3720727520428433, "learning_rate": 1.2350707628581291e-05, "loss": 1.8856, "reason_loss": 0.500815749168396, "step": 1430, "utility_loss": 1.3847362995147705 }, { "cosine_similarity": 0.24980292386256106, "epoch": 1.3336439888164027, "grad_norm": 1.2124494762795686, "learning_rate": 1.2343803935105284e-05, "loss": 1.8331, "reason_loss": 0.4942387640476227, "step": 1431, "utility_loss": 1.3388793468475342 }, { "cosine_similarity": 0.20322248190192368, "epoch": 1.3345759552656105, "grad_norm": 1.1611563031464274, "learning_rate": 1.2336900241629273e-05, "loss": 1.9854, "reason_loss": 0.5338807702064514, "step": 1432, "utility_loss": 1.4515380859375 }, { "cosine_similarity": 0.012537988162780811, "epoch": 1.3355079217148182, "grad_norm": 1.2552340003388895, "learning_rate": 1.2329996548153262e-05, "loss": 1.5853, "reason_loss": 0.5106322169303894, "step": 1433, "utility_loss": 1.0746662616729736 }, { "cosine_similarity": 0.23568904915171315, "epoch": 1.336439888164026, "grad_norm": 1.4378608949798029, "learning_rate": 1.2323092854677254e-05, "loss": 1.4015, "reason_loss": 0.5236907601356506, "step": 1434, "utility_loss": 0.8778533339500427 }, { "cosine_similarity": -0.19773581029854054, "epoch": 1.337371854613234, "grad_norm": 1.3456785549192318, "learning_rate": 1.2316189161201245e-05, "loss": 1.7582, "reason_loss": 0.49704694747924805, "step": 1435, "utility_loss": 1.2611864805221558 }, { "cosine_similarity": 0.36434268904428624, "epoch": 1.3383038210624418, "grad_norm": 1.1273891390374498, "learning_rate": 1.2309285467725234e-05, "loss": 1.7916, "reason_loss": 0.5146712064743042, "step": 1436, "utility_loss": 1.2769064903259277 }, { "cosine_similarity": 0.16738624596896476, "epoch": 1.3392357875116496, "grad_norm": 1.6714342944934377, "learning_rate": 1.2302381774249223e-05, "loss": 1.7411, "reason_loss": 0.4950256943702698, "step": 1437, "utility_loss": 1.2461152076721191 }, { "cosine_similarity": 0.35631879629637225, "epoch": 1.3401677539608574, "grad_norm": 1.158069694091623, "learning_rate": 1.2295478080773215e-05, "loss": 1.7143, "reason_loss": 0.5033054351806641, "step": 1438, "utility_loss": 1.2109968662261963 }, { "cosine_similarity": 0.1295746881602834, "epoch": 1.3410997204100652, "grad_norm": 1.1969554051657603, "learning_rate": 1.2288574387297206e-05, "loss": 2.083, "reason_loss": 0.48583656549453735, "step": 1439, "utility_loss": 1.5971825122833252 }, { "cosine_similarity": 0.040527465746509665, "epoch": 1.342031686859273, "grad_norm": 1.0945353230191572, "learning_rate": 1.2281670693821195e-05, "loss": 2.1412, "reason_loss": 0.5095082521438599, "step": 1440, "utility_loss": 1.631702184677124 }, { "cosine_similarity": 0.011428246962429854, "epoch": 1.3429636533084808, "grad_norm": 1.976155996308262, "learning_rate": 1.2274767000345184e-05, "loss": 2.0195, "reason_loss": 0.5290924310684204, "step": 1441, "utility_loss": 1.4903936386108398 }, { "cosine_similarity": 0.02896595980126354, "epoch": 1.3438956197576888, "grad_norm": 1.2826281846685896, "learning_rate": 1.2267863306869176e-05, "loss": 1.8425, "reason_loss": 0.4850728511810303, "step": 1442, "utility_loss": 1.3574039936065674 }, { "cosine_similarity": 0.04227889577973056, "epoch": 1.3448275862068966, "grad_norm": 1.011309764520394, "learning_rate": 1.2260959613393166e-05, "loss": 1.5912, "reason_loss": 0.5148023962974548, "step": 1443, "utility_loss": 1.0763784646987915 }, { "cosine_similarity": 0.12163635482985932, "epoch": 1.3457595526561044, "grad_norm": 1.379941604057342, "learning_rate": 1.2254055919917156e-05, "loss": 1.8708, "reason_loss": 0.5326549410820007, "step": 1444, "utility_loss": 1.33811616897583 }, { "cosine_similarity": -0.12358116004429735, "epoch": 1.3466915191053122, "grad_norm": 1.1203093078546622, "learning_rate": 1.2247152226441149e-05, "loss": 1.9693, "reason_loss": 0.47691965103149414, "step": 1445, "utility_loss": 1.492384672164917 }, { "cosine_similarity": 0.14350841896539354, "epoch": 1.3476234855545202, "grad_norm": 1.1773142356424338, "learning_rate": 1.2240248532965138e-05, "loss": 1.9523, "reason_loss": 0.49674150347709656, "step": 1446, "utility_loss": 1.4555344581604004 }, { "cosine_similarity": 0.15389626019337072, "epoch": 1.348555452003728, "grad_norm": 1.2650427474561232, "learning_rate": 1.2233344839489127e-05, "loss": 1.9647, "reason_loss": 0.4694141745567322, "step": 1447, "utility_loss": 1.4952434301376343 }, { "cosine_similarity": 0.28851224050706764, "epoch": 1.3494874184529357, "grad_norm": 1.3210386465454984, "learning_rate": 1.2226441146013117e-05, "loss": 1.604, "reason_loss": 0.5076749324798584, "step": 1448, "utility_loss": 1.096282958984375 }, { "cosine_similarity": 0.22449942275746732, "epoch": 1.3504193849021435, "grad_norm": 1.0624989796823647, "learning_rate": 1.221953745253711e-05, "loss": 1.7021, "reason_loss": 0.49745607376098633, "step": 1449, "utility_loss": 1.2046003341674805 }, { "cosine_similarity": 0.09169826878555988, "epoch": 1.3513513513513513, "grad_norm": 1.0999946412316275, "learning_rate": 1.2212633759061099e-05, "loss": 1.6099, "reason_loss": 0.5167757272720337, "step": 1450, "utility_loss": 1.0931272506713867 }, { "cosine_similarity": 0.3278525507087339, "epoch": 1.352283317800559, "grad_norm": 1.3772044689335947, "learning_rate": 1.2205730065585088e-05, "loss": 1.7805, "reason_loss": 0.5137604475021362, "step": 1451, "utility_loss": 1.2667593955993652 }, { "cosine_similarity": -0.006831248435570993, "epoch": 1.353215284249767, "grad_norm": 1.4740785811809687, "learning_rate": 1.2198826372109079e-05, "loss": 1.6916, "reason_loss": 0.5294027328491211, "step": 1452, "utility_loss": 1.1622004508972168 }, { "cosine_similarity": 0.06059492702449031, "epoch": 1.354147250698975, "grad_norm": 1.0066528029645108, "learning_rate": 1.219192267863307e-05, "loss": 1.3336, "reason_loss": 0.510655403137207, "step": 1453, "utility_loss": 0.822965145111084 }, { "cosine_similarity": 0.17674841563571078, "epoch": 1.3550792171481827, "grad_norm": 0.9166322477279498, "learning_rate": 1.218501898515706e-05, "loss": 1.4528, "reason_loss": 0.47618114948272705, "step": 1454, "utility_loss": 0.9765984416007996 }, { "cosine_similarity": 0.34498122608635273, "epoch": 1.3560111835973905, "grad_norm": 1.3556076419331584, "learning_rate": 1.217811529168105e-05, "loss": 1.6194, "reason_loss": 0.49376657605171204, "step": 1455, "utility_loss": 1.1256530284881592 }, { "cosine_similarity": 0.26050190221161384, "epoch": 1.3569431500465983, "grad_norm": 1.1933637474278354, "learning_rate": 1.2171211598205042e-05, "loss": 1.6482, "reason_loss": 0.4946909546852112, "step": 1456, "utility_loss": 1.1534862518310547 }, { "cosine_similarity": 0.16419750256646606, "epoch": 1.3578751164958063, "grad_norm": 1.5079906094214128, "learning_rate": 1.216430790472903e-05, "loss": 1.7869, "reason_loss": 0.5330086946487427, "step": 1457, "utility_loss": 1.2538948059082031 }, { "cosine_similarity": 0.041190159584815283, "epoch": 1.358807082945014, "grad_norm": 1.2359917118975874, "learning_rate": 1.2157404211253021e-05, "loss": 1.707, "reason_loss": 0.518071174621582, "step": 1458, "utility_loss": 1.1889050006866455 }, { "cosine_similarity": 0.4320849835154755, "epoch": 1.3597390493942219, "grad_norm": 1.4535946885508473, "learning_rate": 1.215050051777701e-05, "loss": 2.1872, "reason_loss": 0.5345273017883301, "step": 1459, "utility_loss": 1.6526648998260498 }, { "cosine_similarity": 0.22783872410204506, "epoch": 1.3606710158434296, "grad_norm": 1.2058932874128938, "learning_rate": 1.2143596824301003e-05, "loss": 1.9197, "reason_loss": 0.4799976050853729, "step": 1460, "utility_loss": 1.43965744972229 }, { "cosine_similarity": 0.211416843930886, "epoch": 1.3616029822926374, "grad_norm": 1.0255562767970787, "learning_rate": 1.2136693130824992e-05, "loss": 1.5602, "reason_loss": 0.5032278299331665, "step": 1461, "utility_loss": 1.056933879852295 }, { "cosine_similarity": 0.055257460892724396, "epoch": 1.3625349487418452, "grad_norm": 1.397199533328584, "learning_rate": 1.2129789437348983e-05, "loss": 1.7858, "reason_loss": 0.49681785702705383, "step": 1462, "utility_loss": 1.28898286819458 }, { "cosine_similarity": -0.11950050742353555, "epoch": 1.363466915191053, "grad_norm": 1.2345388502370507, "learning_rate": 1.2122885743872973e-05, "loss": 1.9884, "reason_loss": 0.5196645855903625, "step": 1463, "utility_loss": 1.4687436819076538 }, { "cosine_similarity": 0.11587056752471737, "epoch": 1.364398881640261, "grad_norm": 1.033287003813047, "learning_rate": 1.2115982050396964e-05, "loss": 1.5623, "reason_loss": 0.5004627704620361, "step": 1464, "utility_loss": 1.0618270635604858 }, { "cosine_similarity": 0.300467226243491, "epoch": 1.3653308480894688, "grad_norm": 1.2615181772785866, "learning_rate": 1.2109078356920953e-05, "loss": 1.7081, "reason_loss": 0.5207167267799377, "step": 1465, "utility_loss": 1.1874005794525146 }, { "cosine_similarity": 0.3908913849678693, "epoch": 1.3662628145386766, "grad_norm": 1.1834455172887917, "learning_rate": 1.2102174663444944e-05, "loss": 2.0266, "reason_loss": 0.5098281502723694, "step": 1466, "utility_loss": 1.516775131225586 }, { "cosine_similarity": 0.04220486327689521, "epoch": 1.3671947809878844, "grad_norm": 1.333936080124143, "learning_rate": 1.2095270969968935e-05, "loss": 1.6424, "reason_loss": 0.4872857928276062, "step": 1467, "utility_loss": 1.1551408767700195 }, { "cosine_similarity": 0.2103586675726871, "epoch": 1.3681267474370924, "grad_norm": 1.355027574815171, "learning_rate": 1.2088367276492925e-05, "loss": 2.0596, "reason_loss": 0.4891524612903595, "step": 1468, "utility_loss": 1.5703983306884766 }, { "cosine_similarity": 0.015936550138970256, "epoch": 1.3690587138863002, "grad_norm": 1.312496623207534, "learning_rate": 1.2081463583016914e-05, "loss": 1.7741, "reason_loss": 0.5050867795944214, "step": 1469, "utility_loss": 1.2690074443817139 }, { "cosine_similarity": 0.213588087966121, "epoch": 1.369990680335508, "grad_norm": 1.2270970981775973, "learning_rate": 1.2074559889540905e-05, "loss": 2.1995, "reason_loss": 0.5077807903289795, "step": 1470, "utility_loss": 1.6916766166687012 }, { "cosine_similarity": 0.01968637904867167, "epoch": 1.3709226467847158, "grad_norm": 1.311003811490058, "learning_rate": 1.2067656196064896e-05, "loss": 1.666, "reason_loss": 0.48854586482048035, "step": 1471, "utility_loss": 1.1774828433990479 }, { "cosine_similarity": 0.2877249203090094, "epoch": 1.3718546132339235, "grad_norm": 1.1454430799188764, "learning_rate": 1.2060752502588887e-05, "loss": 2.0697, "reason_loss": 0.5013315677642822, "step": 1472, "utility_loss": 1.5683400630950928 }, { "cosine_similarity": 0.16748127475760002, "epoch": 1.3727865796831313, "grad_norm": 1.073740501486431, "learning_rate": 1.2053848809112876e-05, "loss": 1.8404, "reason_loss": 0.5083709359169006, "step": 1473, "utility_loss": 1.3320317268371582 }, { "cosine_similarity": 0.4949768025630614, "epoch": 1.3737185461323391, "grad_norm": 1.0981173327186509, "learning_rate": 1.2046945115636868e-05, "loss": 1.7216, "reason_loss": 0.49974966049194336, "step": 1474, "utility_loss": 1.221827745437622 }, { "cosine_similarity": 0.312268843078042, "epoch": 1.3746505125815471, "grad_norm": 1.3128883137294136, "learning_rate": 1.2040041422160857e-05, "loss": 1.7624, "reason_loss": 0.4821481704711914, "step": 1475, "utility_loss": 1.2802809476852417 }, { "cosine_similarity": 0.18022553325297427, "epoch": 1.375582479030755, "grad_norm": 1.0694335039759828, "learning_rate": 1.2033137728684848e-05, "loss": 1.7191, "reason_loss": 0.48587897419929504, "step": 1476, "utility_loss": 1.2332487106323242 }, { "cosine_similarity": 0.03838384250870828, "epoch": 1.3765144454799627, "grad_norm": 1.041637630438419, "learning_rate": 1.2026234035208837e-05, "loss": 1.5458, "reason_loss": 0.4590262174606323, "step": 1477, "utility_loss": 1.0867819786071777 }, { "cosine_similarity": 0.17081540934377087, "epoch": 1.3774464119291705, "grad_norm": 1.0995001167929732, "learning_rate": 1.201933034173283e-05, "loss": 1.8908, "reason_loss": 0.49972712993621826, "step": 1478, "utility_loss": 1.3910610675811768 }, { "cosine_similarity": 0.31911222937281936, "epoch": 1.3783783783783785, "grad_norm": 1.1149952845515345, "learning_rate": 1.2012426648256818e-05, "loss": 1.4233, "reason_loss": 0.4856643080711365, "step": 1479, "utility_loss": 0.9376369714736938 }, { "cosine_similarity": 0.02419972804290893, "epoch": 1.3793103448275863, "grad_norm": 1.0318622529767278, "learning_rate": 1.2005522954780809e-05, "loss": 1.9119, "reason_loss": 0.48638463020324707, "step": 1480, "utility_loss": 1.4254872798919678 }, { "cosine_similarity": -0.03862856393069311, "epoch": 1.380242311276794, "grad_norm": 1.4457228719394686, "learning_rate": 1.1998619261304798e-05, "loss": 1.7908, "reason_loss": 0.4935677945613861, "step": 1481, "utility_loss": 1.2971830368041992 }, { "cosine_similarity": 0.2377628429345274, "epoch": 1.3811742777260019, "grad_norm": 1.230142803226124, "learning_rate": 1.199171556782879e-05, "loss": 1.8669, "reason_loss": 0.49627846479415894, "step": 1482, "utility_loss": 1.370664358139038 }, { "cosine_similarity": 0.2112823119097806, "epoch": 1.3821062441752097, "grad_norm": 1.1743413919715413, "learning_rate": 1.198481187435278e-05, "loss": 1.7795, "reason_loss": 0.49270522594451904, "step": 1483, "utility_loss": 1.2867865562438965 }, { "cosine_similarity": -0.048458016039170236, "epoch": 1.3830382106244175, "grad_norm": 1.1747140389878912, "learning_rate": 1.1977908180876769e-05, "loss": 1.9823, "reason_loss": 0.5325769186019897, "step": 1484, "utility_loss": 1.4497573375701904 }, { "cosine_similarity": 0.22197259473634354, "epoch": 1.3839701770736252, "grad_norm": 1.4152715659120911, "learning_rate": 1.1971004487400761e-05, "loss": 1.9279, "reason_loss": 0.4909580647945404, "step": 1485, "utility_loss": 1.4369792938232422 }, { "cosine_similarity": 0.15932759752083112, "epoch": 1.3849021435228333, "grad_norm": 1.2121302962888718, "learning_rate": 1.1964100793924752e-05, "loss": 1.8178, "reason_loss": 0.524604320526123, "step": 1486, "utility_loss": 1.2932047843933105 }, { "cosine_similarity": 0.09774264084468981, "epoch": 1.385834109972041, "grad_norm": 1.1366059189436732, "learning_rate": 1.195719710044874e-05, "loss": 1.4046, "reason_loss": 0.5070022940635681, "step": 1487, "utility_loss": 0.897550106048584 }, { "cosine_similarity": 0.09868535106676862, "epoch": 1.3867660764212488, "grad_norm": 0.9805799676165871, "learning_rate": 1.195029340697273e-05, "loss": 1.5554, "reason_loss": 0.49659815430641174, "step": 1488, "utility_loss": 1.0587786436080933 }, { "cosine_similarity": 0.31119001009965985, "epoch": 1.3876980428704566, "grad_norm": 1.2349726727947454, "learning_rate": 1.1943389713496722e-05, "loss": 1.6898, "reason_loss": 0.5116568803787231, "step": 1489, "utility_loss": 1.1781911849975586 }, { "cosine_similarity": 0.624188686268309, "epoch": 1.3886300093196646, "grad_norm": 1.1417014675951385, "learning_rate": 1.1936486020020713e-05, "loss": 1.9207, "reason_loss": 0.5059956312179565, "step": 1490, "utility_loss": 1.4146728515625 }, { "cosine_similarity": -0.039734683041638935, "epoch": 1.3895619757688724, "grad_norm": 1.164109675821008, "learning_rate": 1.1929582326544702e-05, "loss": 1.6152, "reason_loss": 0.5157514214515686, "step": 1491, "utility_loss": 1.099457859992981 }, { "cosine_similarity": 0.3661612222383563, "epoch": 1.3904939422180802, "grad_norm": 1.2454957144336352, "learning_rate": 1.1922678633068691e-05, "loss": 1.8436, "reason_loss": 0.497933030128479, "step": 1492, "utility_loss": 1.3457129001617432 }, { "cosine_similarity": 0.43335091077907995, "epoch": 1.391425908667288, "grad_norm": 1.2961232866949535, "learning_rate": 1.1915774939592684e-05, "loss": 1.8701, "reason_loss": 0.5183240175247192, "step": 1493, "utility_loss": 1.3518235683441162 }, { "cosine_similarity": 0.1904559394054259, "epoch": 1.3923578751164958, "grad_norm": 1.2247110407265056, "learning_rate": 1.1908871246116673e-05, "loss": 1.6902, "reason_loss": 0.5137947797775269, "step": 1494, "utility_loss": 1.1763768196105957 }, { "cosine_similarity": 0.22267625523767923, "epoch": 1.3932898415657036, "grad_norm": 1.2314740363691195, "learning_rate": 1.1901967552640663e-05, "loss": 1.739, "reason_loss": 0.5336364507675171, "step": 1495, "utility_loss": 1.205333948135376 }, { "cosine_similarity": 0.36257401189615257, "epoch": 1.3942218080149114, "grad_norm": 1.2854087307615558, "learning_rate": 1.1895063859164656e-05, "loss": 1.8276, "reason_loss": 0.5204356908798218, "step": 1496, "utility_loss": 1.3071317672729492 }, { "cosine_similarity": 0.12390677533928181, "epoch": 1.3951537744641194, "grad_norm": 1.3082001574127338, "learning_rate": 1.1888160165688645e-05, "loss": 1.8386, "reason_loss": 0.5187304019927979, "step": 1497, "utility_loss": 1.3199048042297363 }, { "cosine_similarity": 0.17384417071898256, "epoch": 1.3960857409133272, "grad_norm": 1.1681848594729694, "learning_rate": 1.1881256472212634e-05, "loss": 1.5799, "reason_loss": 0.4921547770500183, "step": 1498, "utility_loss": 1.0877323150634766 }, { "cosine_similarity": 0.1423521333381355, "epoch": 1.397017707362535, "grad_norm": 1.1334070094198094, "learning_rate": 1.1874352778736625e-05, "loss": 1.7129, "reason_loss": 0.5118272304534912, "step": 1499, "utility_loss": 1.2010691165924072 }, { "cosine_similarity": 0.3369950684919774, "epoch": 1.3979496738117427, "grad_norm": 1.4024735191234754, "learning_rate": 1.1867449085260617e-05, "loss": 1.7436, "reason_loss": 0.5505474805831909, "step": 1500, "utility_loss": 1.1930086612701416 }, { "cosine_similarity": 0.3861299136999904, "epoch": 1.3988816402609507, "grad_norm": 1.4380582259698793, "learning_rate": 1.1860545391784606e-05, "loss": 1.6821, "reason_loss": 0.5120897889137268, "step": 1501, "utility_loss": 1.1699788570404053 }, { "cosine_similarity": 0.05928845027221307, "epoch": 1.3998136067101585, "grad_norm": 1.3024663355402115, "learning_rate": 1.1853641698308595e-05, "loss": 1.752, "reason_loss": 0.48911377787590027, "step": 1502, "utility_loss": 1.26290762424469 }, { "cosine_similarity": 0.19163030607459894, "epoch": 1.4007455731593663, "grad_norm": 1.1058593520220954, "learning_rate": 1.1846738004832586e-05, "loss": 1.7404, "reason_loss": 0.5014702677726746, "step": 1503, "utility_loss": 1.2388801574707031 }, { "cosine_similarity": -0.12998560431349654, "epoch": 1.401677539608574, "grad_norm": 1.2539473614791345, "learning_rate": 1.1839834311356576e-05, "loss": 1.6705, "reason_loss": 0.491205632686615, "step": 1504, "utility_loss": 1.1792917251586914 }, { "cosine_similarity": 0.23223467337489878, "epoch": 1.402609506057782, "grad_norm": 1.3930017644275556, "learning_rate": 1.1832930617880567e-05, "loss": 2.055, "reason_loss": 0.5579665303230286, "step": 1505, "utility_loss": 1.4970605373382568 }, { "cosine_similarity": 0.07051023309916993, "epoch": 1.4035414725069897, "grad_norm": 1.1468815809723993, "learning_rate": 1.1826026924404556e-05, "loss": 1.9663, "reason_loss": 0.5076407194137573, "step": 1506, "utility_loss": 1.4586877822875977 }, { "cosine_similarity": 0.31474660640786495, "epoch": 1.4044734389561975, "grad_norm": 1.416696611438082, "learning_rate": 1.1819123230928549e-05, "loss": 2.0431, "reason_loss": 0.5156676769256592, "step": 1507, "utility_loss": 1.5273913145065308 }, { "cosine_similarity": 0.019318411034342498, "epoch": 1.4054054054054055, "grad_norm": 1.418186686646786, "learning_rate": 1.1812219537452538e-05, "loss": 1.5558, "reason_loss": 0.4882845878601074, "step": 1508, "utility_loss": 1.0675389766693115 }, { "cosine_similarity": 0.1638315302683356, "epoch": 1.4063373718546133, "grad_norm": 1.6427496829939692, "learning_rate": 1.1805315843976528e-05, "loss": 2.3039, "reason_loss": 0.5035383701324463, "step": 1509, "utility_loss": 1.800379991531372 }, { "cosine_similarity": 0.10612420186617018, "epoch": 1.407269338303821, "grad_norm": 1.1943750546087044, "learning_rate": 1.1798412150500517e-05, "loss": 1.6315, "reason_loss": 0.4885439872741699, "step": 1510, "utility_loss": 1.1429235935211182 }, { "cosine_similarity": 0.2470066111875919, "epoch": 1.4082013047530288, "grad_norm": 1.4033481868320072, "learning_rate": 1.179150845702451e-05, "loss": 1.7198, "reason_loss": 0.49927371740341187, "step": 1511, "utility_loss": 1.2205512523651123 }, { "cosine_similarity": 0.12380201013368299, "epoch": 1.4091332712022366, "grad_norm": 1.0193033320325136, "learning_rate": 1.1784604763548499e-05, "loss": 1.3923, "reason_loss": 0.4801866412162781, "step": 1512, "utility_loss": 0.9121102094650269 }, { "cosine_similarity": 0.14228517012170028, "epoch": 1.4100652376514446, "grad_norm": 1.3140056281246955, "learning_rate": 1.177770107007249e-05, "loss": 1.8883, "reason_loss": 0.5046203136444092, "step": 1513, "utility_loss": 1.3837168216705322 }, { "cosine_similarity": 0.0018829864419056667, "epoch": 1.4109972041006524, "grad_norm": 1.3686072863669598, "learning_rate": 1.177079737659648e-05, "loss": 1.975, "reason_loss": 0.4981886148452759, "step": 1514, "utility_loss": 1.4768047332763672 }, { "cosine_similarity": 0.17847287030324757, "epoch": 1.4119291705498602, "grad_norm": 1.3195599765042523, "learning_rate": 1.1763893683120471e-05, "loss": 1.5358, "reason_loss": 0.5328026413917542, "step": 1515, "utility_loss": 1.0029776096343994 }, { "cosine_similarity": 0.014912441029334226, "epoch": 1.412861136999068, "grad_norm": 1.4190814543971506, "learning_rate": 1.175698998964446e-05, "loss": 1.7307, "reason_loss": 0.4849294424057007, "step": 1516, "utility_loss": 1.2458195686340332 }, { "cosine_similarity": 0.5271774953770827, "epoch": 1.4137931034482758, "grad_norm": 0.9836687178150676, "learning_rate": 1.1750086296168451e-05, "loss": 1.8068, "reason_loss": 0.5088579654693604, "step": 1517, "utility_loss": 1.2979562282562256 }, { "cosine_similarity": 0.42516387676332185, "epoch": 1.4147250698974836, "grad_norm": 1.120214288503743, "learning_rate": 1.1743182602692442e-05, "loss": 1.7612, "reason_loss": 0.5090606212615967, "step": 1518, "utility_loss": 1.2521724700927734 }, { "cosine_similarity": -0.022710600103891622, "epoch": 1.4156570363466916, "grad_norm": 1.607138299527646, "learning_rate": 1.1736278909216432e-05, "loss": 1.574, "reason_loss": 0.5052825212478638, "step": 1519, "utility_loss": 1.0687367916107178 }, { "cosine_similarity": 0.24171958696638532, "epoch": 1.4165890027958994, "grad_norm": 1.1335819807094534, "learning_rate": 1.1729375215740421e-05, "loss": 1.6659, "reason_loss": 0.4930368661880493, "step": 1520, "utility_loss": 1.1728938817977905 }, { "cosine_similarity": 0.09646287328360789, "epoch": 1.4175209692451072, "grad_norm": 1.0132620908553756, "learning_rate": 1.1722471522264412e-05, "loss": 1.8668, "reason_loss": 0.4995367228984833, "step": 1521, "utility_loss": 1.3672394752502441 }, { "cosine_similarity": 0.0038021018750313933, "epoch": 1.418452935694315, "grad_norm": 1.9047395594956664, "learning_rate": 1.1715567828788403e-05, "loss": 2.1112, "reason_loss": 0.5043997168540955, "step": 1522, "utility_loss": 1.6067878007888794 }, { "cosine_similarity": 0.40473871365292113, "epoch": 1.4193849021435228, "grad_norm": 1.118436272097355, "learning_rate": 1.1708664135312394e-05, "loss": 1.6886, "reason_loss": 0.4805943965911865, "step": 1523, "utility_loss": 1.2079596519470215 }, { "cosine_similarity": 0.2516541924651147, "epoch": 1.4203168685927308, "grad_norm": 2.0643200670337976, "learning_rate": 1.1701760441836383e-05, "loss": 1.7194, "reason_loss": 0.497133731842041, "step": 1524, "utility_loss": 1.2222392559051514 }, { "cosine_similarity": 0.08295842091926611, "epoch": 1.4212488350419386, "grad_norm": 1.3860530514093323, "learning_rate": 1.1694856748360375e-05, "loss": 1.8317, "reason_loss": 0.5275465846061707, "step": 1525, "utility_loss": 1.304159164428711 }, { "cosine_similarity": -0.004068477542286379, "epoch": 1.4221808014911463, "grad_norm": 1.488126778724687, "learning_rate": 1.1687953054884364e-05, "loss": 2.0969, "reason_loss": 0.49711084365844727, "step": 1526, "utility_loss": 1.5998320579528809 }, { "cosine_similarity": 0.09535430920082544, "epoch": 1.4231127679403541, "grad_norm": 1.3116726715915534, "learning_rate": 1.1681049361408355e-05, "loss": 1.8315, "reason_loss": 0.5037946701049805, "step": 1527, "utility_loss": 1.327738881111145 }, { "cosine_similarity": -0.06796331088894264, "epoch": 1.424044734389562, "grad_norm": 1.3611959294539768, "learning_rate": 1.1674145667932344e-05, "loss": 1.7248, "reason_loss": 0.49084341526031494, "step": 1528, "utility_loss": 1.233914852142334 }, { "cosine_similarity": 0.21955759739199404, "epoch": 1.4249767008387697, "grad_norm": 1.1969509714010411, "learning_rate": 1.1667241974456336e-05, "loss": 1.5122, "reason_loss": 0.5127012133598328, "step": 1529, "utility_loss": 0.9995023608207703 }, { "cosine_similarity": 0.2955936192756811, "epoch": 1.4259086672879777, "grad_norm": 1.4428821883585934, "learning_rate": 1.1660338280980325e-05, "loss": 1.8385, "reason_loss": 0.49294906854629517, "step": 1530, "utility_loss": 1.3455883264541626 }, { "cosine_similarity": 0.3350898189295874, "epoch": 1.4268406337371855, "grad_norm": 1.1897183676382819, "learning_rate": 1.1653434587504316e-05, "loss": 1.5981, "reason_loss": 0.4898490905761719, "step": 1531, "utility_loss": 1.108292818069458 }, { "cosine_similarity": 0.32116189421516467, "epoch": 1.4277726001863933, "grad_norm": 1.4728361895018258, "learning_rate": 1.1646530894028305e-05, "loss": 2.0625, "reason_loss": 0.4950961470603943, "step": 1532, "utility_loss": 1.5674279928207397 }, { "cosine_similarity": 0.0015395278567970354, "epoch": 1.428704566635601, "grad_norm": 1.3805184301834235, "learning_rate": 1.1639627200552298e-05, "loss": 1.829, "reason_loss": 0.5322232246398926, "step": 1533, "utility_loss": 1.296800971031189 }, { "cosine_similarity": 0.43571713419244107, "epoch": 1.4296365330848089, "grad_norm": 1.2692935702029224, "learning_rate": 1.1632723507076287e-05, "loss": 1.8166, "reason_loss": 0.47101473808288574, "step": 1534, "utility_loss": 1.3455941677093506 }, { "cosine_similarity": 0.7008723813527276, "epoch": 1.4305684995340169, "grad_norm": 1.1471876726049643, "learning_rate": 1.1625819813600276e-05, "loss": 1.7458, "reason_loss": 0.532579779624939, "step": 1535, "utility_loss": 1.2131767272949219 }, { "cosine_similarity": 0.07045565864848942, "epoch": 1.4315004659832247, "grad_norm": 1.3116440274253245, "learning_rate": 1.1618916120124268e-05, "loss": 1.4844, "reason_loss": 0.5127708315849304, "step": 1536, "utility_loss": 0.9716777205467224 }, { "cosine_similarity": 0.3324634458758801, "epoch": 1.4324324324324325, "grad_norm": 1.2206146018008075, "learning_rate": 1.1612012426648259e-05, "loss": 1.976, "reason_loss": 0.4885861873626709, "step": 1537, "utility_loss": 1.4873950481414795 }, { "cosine_similarity": 0.23976440495353077, "epoch": 1.4333643988816402, "grad_norm": 1.0606429795195333, "learning_rate": 1.1605108733172248e-05, "loss": 1.7386, "reason_loss": 0.5428722500801086, "step": 1538, "utility_loss": 1.1956883668899536 }, { "cosine_similarity": 0.24151633797042954, "epoch": 1.434296365330848, "grad_norm": 1.2391937658348544, "learning_rate": 1.1598205039696237e-05, "loss": 1.868, "reason_loss": 0.47679603099823, "step": 1539, "utility_loss": 1.391160488128662 }, { "cosine_similarity": -0.27906862781120023, "epoch": 1.4352283317800558, "grad_norm": 1.0758889443296833, "learning_rate": 1.159130134622023e-05, "loss": 1.748, "reason_loss": 0.4578178822994232, "step": 1540, "utility_loss": 1.2901737689971924 }, { "cosine_similarity": 0.473557740541205, "epoch": 1.4361602982292636, "grad_norm": 1.0435986565173903, "learning_rate": 1.158439765274422e-05, "loss": 1.6802, "reason_loss": 0.49630218744277954, "step": 1541, "utility_loss": 1.183938980102539 }, { "cosine_similarity": 0.06530258459285157, "epoch": 1.4370922646784716, "grad_norm": 1.4281453921292981, "learning_rate": 1.1577493959268209e-05, "loss": 1.8653, "reason_loss": 0.5181498527526855, "step": 1542, "utility_loss": 1.3471177816390991 }, { "cosine_similarity": 0.3000147696527014, "epoch": 1.4380242311276794, "grad_norm": 2.263120840367362, "learning_rate": 1.1570590265792198e-05, "loss": 1.9882, "reason_loss": 0.5098974704742432, "step": 1543, "utility_loss": 1.4783124923706055 }, { "cosine_similarity": 0.18789193214144387, "epoch": 1.4389561975768872, "grad_norm": 1.1393590773819007, "learning_rate": 1.156368657231619e-05, "loss": 1.7222, "reason_loss": 0.4901179075241089, "step": 1544, "utility_loss": 1.2320916652679443 }, { "cosine_similarity": 0.22885316145761925, "epoch": 1.439888164026095, "grad_norm": 1.1972019149288178, "learning_rate": 1.155678287884018e-05, "loss": 1.6216, "reason_loss": 0.4854848384857178, "step": 1545, "utility_loss": 1.1361465454101562 }, { "cosine_similarity": 0.10218353286089008, "epoch": 1.440820130475303, "grad_norm": 1.0999309203739676, "learning_rate": 1.154987918536417e-05, "loss": 1.783, "reason_loss": 0.489152193069458, "step": 1546, "utility_loss": 1.2938549518585205 }, { "cosine_similarity": -0.06819870980293957, "epoch": 1.4417520969245108, "grad_norm": 0.9497940225355074, "learning_rate": 1.1542975491888163e-05, "loss": 1.5919, "reason_loss": 0.4966927170753479, "step": 1547, "utility_loss": 1.0951675176620483 }, { "cosine_similarity": 0.20113700291014341, "epoch": 1.4426840633737186, "grad_norm": 1.5251176654137963, "learning_rate": 1.1536071798412152e-05, "loss": 1.6966, "reason_loss": 0.4874255061149597, "step": 1548, "utility_loss": 1.2091959714889526 }, { "cosine_similarity": 0.6202422959544457, "epoch": 1.4436160298229264, "grad_norm": 1.199352669734896, "learning_rate": 1.152916810493614e-05, "loss": 1.8648, "reason_loss": 0.49671435356140137, "step": 1549, "utility_loss": 1.3681297302246094 }, { "cosine_similarity": 0.12017356363359705, "epoch": 1.4445479962721341, "grad_norm": 1.2071735557719903, "learning_rate": 1.1522264411460132e-05, "loss": 1.8525, "reason_loss": 0.5011345148086548, "step": 1550, "utility_loss": 1.3513224124908447 }, { "cosine_similarity": 0.12404405243177798, "epoch": 1.445479962721342, "grad_norm": 1.484685562530661, "learning_rate": 1.1515360717984124e-05, "loss": 1.8162, "reason_loss": 0.5019217133522034, "step": 1551, "utility_loss": 1.314241886138916 }, { "cosine_similarity": 0.4240074715485593, "epoch": 1.4464119291705497, "grad_norm": 1.1225113430092217, "learning_rate": 1.1508457024508113e-05, "loss": 1.688, "reason_loss": 0.49642348289489746, "step": 1552, "utility_loss": 1.1915719509124756 }, { "cosine_similarity": 0.3087289291093144, "epoch": 1.4473438956197577, "grad_norm": 1.0664897111825689, "learning_rate": 1.1501553331032102e-05, "loss": 1.5183, "reason_loss": 0.4779502749443054, "step": 1553, "utility_loss": 1.0403807163238525 }, { "cosine_similarity": 0.05516543160237899, "epoch": 1.4482758620689655, "grad_norm": 1.1456038487869313, "learning_rate": 1.1494649637556094e-05, "loss": 1.9499, "reason_loss": 0.49498921632766724, "step": 1554, "utility_loss": 1.4549221992492676 }, { "cosine_similarity": 0.21213358452858105, "epoch": 1.4492078285181733, "grad_norm": 1.2287631337039138, "learning_rate": 1.1487745944080084e-05, "loss": 1.632, "reason_loss": 0.5120542049407959, "step": 1555, "utility_loss": 1.1199798583984375 }, { "cosine_similarity": 0.29688658959981873, "epoch": 1.450139794967381, "grad_norm": 1.0674328350279405, "learning_rate": 1.1480842250604074e-05, "loss": 1.9111, "reason_loss": 0.5061725378036499, "step": 1556, "utility_loss": 1.4049019813537598 }, { "cosine_similarity": 0.19446061182360388, "epoch": 1.4510717614165891, "grad_norm": 0.9993633393178188, "learning_rate": 1.1473938557128063e-05, "loss": 1.9364, "reason_loss": 0.48884275555610657, "step": 1557, "utility_loss": 1.4475336074829102 }, { "cosine_similarity": 0.24168343067901732, "epoch": 1.452003727865797, "grad_norm": 1.1573036129127618, "learning_rate": 1.1467034863652056e-05, "loss": 1.8553, "reason_loss": 0.4818773865699768, "step": 1558, "utility_loss": 1.3734071254730225 }, { "cosine_similarity": 0.14174801739601414, "epoch": 1.4529356943150047, "grad_norm": 1.362225049003562, "learning_rate": 1.1460131170176045e-05, "loss": 1.717, "reason_loss": 0.48467808961868286, "step": 1559, "utility_loss": 1.2322888374328613 }, { "cosine_similarity": 0.13472819677380582, "epoch": 1.4538676607642125, "grad_norm": 1.2584260626656323, "learning_rate": 1.1453227476700035e-05, "loss": 1.5683, "reason_loss": 0.5081832408905029, "step": 1560, "utility_loss": 1.060070276260376 }, { "cosine_similarity": 0.10812948113637101, "epoch": 1.4547996272134203, "grad_norm": 1.61952859188802, "learning_rate": 1.1446323783224025e-05, "loss": 1.7773, "reason_loss": 0.4842378497123718, "step": 1561, "utility_loss": 1.2930816411972046 }, { "cosine_similarity": 0.05583096333027191, "epoch": 1.455731593662628, "grad_norm": 1.1484977486916097, "learning_rate": 1.1439420089748017e-05, "loss": 1.8333, "reason_loss": 0.510992705821991, "step": 1562, "utility_loss": 1.322274923324585 }, { "cosine_similarity": -0.08407210615441867, "epoch": 1.4566635601118358, "grad_norm": 1.607937088312425, "learning_rate": 1.1432516396272006e-05, "loss": 1.7628, "reason_loss": 0.49298200011253357, "step": 1563, "utility_loss": 1.2698240280151367 }, { "cosine_similarity": 0.14645774997832345, "epoch": 1.4575955265610439, "grad_norm": 1.3671989756019027, "learning_rate": 1.1425612702795997e-05, "loss": 2.1583, "reason_loss": 0.5063073635101318, "step": 1564, "utility_loss": 1.6520347595214844 }, { "cosine_similarity": 0.2315159744965296, "epoch": 1.4585274930102516, "grad_norm": 1.1345730980711268, "learning_rate": 1.1418709009319987e-05, "loss": 1.6491, "reason_loss": 0.4835493266582489, "step": 1565, "utility_loss": 1.165510654449463 }, { "cosine_similarity": 0.0018579220641274817, "epoch": 1.4594594594594594, "grad_norm": 1.161321789574384, "learning_rate": 1.1411805315843978e-05, "loss": 1.8569, "reason_loss": 0.5371553897857666, "step": 1566, "utility_loss": 1.3197071552276611 }, { "cosine_similarity": -0.033546515989442936, "epoch": 1.4603914259086672, "grad_norm": 1.4496291009121547, "learning_rate": 1.1404901622367967e-05, "loss": 1.6181, "reason_loss": 0.46747761964797974, "step": 1567, "utility_loss": 1.1506342887878418 }, { "cosine_similarity": -0.05372999597578682, "epoch": 1.4613233923578752, "grad_norm": 1.3069168668085795, "learning_rate": 1.1397997928891958e-05, "loss": 1.945, "reason_loss": 0.5039142966270447, "step": 1568, "utility_loss": 1.4410690069198608 }, { "cosine_similarity": 0.13306046393475046, "epoch": 1.462255358807083, "grad_norm": 1.3415068031265482, "learning_rate": 1.1391094235415949e-05, "loss": 2.0317, "reason_loss": 0.5097321271896362, "step": 1569, "utility_loss": 1.5219602584838867 }, { "cosine_similarity": 0.11646124940877883, "epoch": 1.4631873252562908, "grad_norm": 1.339127560023719, "learning_rate": 1.138419054193994e-05, "loss": 1.9513, "reason_loss": 0.47507989406585693, "step": 1570, "utility_loss": 1.4761818647384644 }, { "cosine_similarity": 0.0830591867320864, "epoch": 1.4641192917054986, "grad_norm": 1.1207345754470033, "learning_rate": 1.1377286848463928e-05, "loss": 1.6379, "reason_loss": 0.5105645656585693, "step": 1571, "utility_loss": 1.127382516860962 }, { "cosine_similarity": 0.15415020091819878, "epoch": 1.4650512581547064, "grad_norm": 1.085843037771063, "learning_rate": 1.137038315498792e-05, "loss": 1.7871, "reason_loss": 0.504307746887207, "step": 1572, "utility_loss": 1.2827616930007935 }, { "cosine_similarity": -0.0053425770894494976, "epoch": 1.4659832246039142, "grad_norm": 1.1376326251869235, "learning_rate": 1.136347946151191e-05, "loss": 1.6465, "reason_loss": 0.49095419049263, "step": 1573, "utility_loss": 1.155504584312439 }, { "cosine_similarity": 0.17742803861517664, "epoch": 1.466915191053122, "grad_norm": 1.1757149047129436, "learning_rate": 1.13565757680359e-05, "loss": 1.7931, "reason_loss": 0.5258285999298096, "step": 1574, "utility_loss": 1.2673015594482422 }, { "cosine_similarity": 0.1277005096344228, "epoch": 1.46784715750233, "grad_norm": 1.3582507217767104, "learning_rate": 1.134967207455989e-05, "loss": 1.8086, "reason_loss": 0.5079910159111023, "step": 1575, "utility_loss": 1.3005647659301758 }, { "cosine_similarity": 0.08456255400890643, "epoch": 1.4687791239515378, "grad_norm": 1.5188767261778096, "learning_rate": 1.1342768381083882e-05, "loss": 1.6913, "reason_loss": 0.4804665446281433, "step": 1576, "utility_loss": 1.2108738422393799 }, { "cosine_similarity": 0.4670496501540556, "epoch": 1.4697110904007455, "grad_norm": 1.4161450279687555, "learning_rate": 1.1335864687607871e-05, "loss": 1.8266, "reason_loss": 0.4957011938095093, "step": 1577, "utility_loss": 1.3309049606323242 }, { "cosine_similarity": 0.19935133550345147, "epoch": 1.4706430568499533, "grad_norm": 1.0987639151849515, "learning_rate": 1.1328960994131862e-05, "loss": 1.8996, "reason_loss": 0.5142394304275513, "step": 1578, "utility_loss": 1.3853671550750732 }, { "cosine_similarity": 0.17287392238632193, "epoch": 1.4715750232991613, "grad_norm": 1.6741904444449534, "learning_rate": 1.1322057300655851e-05, "loss": 2.0725, "reason_loss": 0.5156292915344238, "step": 1579, "utility_loss": 1.5568385124206543 }, { "cosine_similarity": 0.04841669690242868, "epoch": 1.4725069897483691, "grad_norm": 1.2319703486359022, "learning_rate": 1.1315153607179843e-05, "loss": 1.7619, "reason_loss": 0.5092266798019409, "step": 1580, "utility_loss": 1.2526662349700928 }, { "cosine_similarity": 0.09963593899647978, "epoch": 1.473438956197577, "grad_norm": 2.5167387243894637, "learning_rate": 1.1308249913703832e-05, "loss": 1.5566, "reason_loss": 0.5071241855621338, "step": 1581, "utility_loss": 1.049504280090332 }, { "cosine_similarity": 0.3724318000049825, "epoch": 1.4743709226467847, "grad_norm": 1.5624846680074609, "learning_rate": 1.1301346220227823e-05, "loss": 1.6743, "reason_loss": 0.4824298024177551, "step": 1582, "utility_loss": 1.1918926239013672 }, { "cosine_similarity": 0.1751527245096971, "epoch": 1.4753028890959925, "grad_norm": 1.1909788923771196, "learning_rate": 1.1294442526751812e-05, "loss": 1.6637, "reason_loss": 0.49050986766815186, "step": 1583, "utility_loss": 1.173143982887268 }, { "cosine_similarity": 0.12691858399896655, "epoch": 1.4762348555452003, "grad_norm": 1.2955656969538922, "learning_rate": 1.1287538833275805e-05, "loss": 2.1802, "reason_loss": 0.5128642916679382, "step": 1584, "utility_loss": 1.6673157215118408 }, { "cosine_similarity": 0.14816565843220397, "epoch": 1.477166821994408, "grad_norm": 1.1358623189464196, "learning_rate": 1.1280635139799794e-05, "loss": 1.7739, "reason_loss": 0.520097017288208, "step": 1585, "utility_loss": 1.253812313079834 }, { "cosine_similarity": 0.21217992650559744, "epoch": 1.478098788443616, "grad_norm": 1.2221686987244136, "learning_rate": 1.1273731446323783e-05, "loss": 1.8788, "reason_loss": 0.5291640758514404, "step": 1586, "utility_loss": 1.3496617078781128 }, { "cosine_similarity": 0.040363882387843796, "epoch": 1.4790307548928239, "grad_norm": 1.0238852780108865, "learning_rate": 1.1266827752847775e-05, "loss": 1.5955, "reason_loss": 0.49456870555877686, "step": 1587, "utility_loss": 1.1009033918380737 }, { "cosine_similarity": 0.13435390335782252, "epoch": 1.4799627213420317, "grad_norm": 1.1645193591161598, "learning_rate": 1.1259924059371766e-05, "loss": 1.9486, "reason_loss": 0.4929544925689697, "step": 1588, "utility_loss": 1.4556922912597656 }, { "cosine_similarity": 0.4682011285558273, "epoch": 1.4808946877912395, "grad_norm": 1.160745018768675, "learning_rate": 1.1253020365895755e-05, "loss": 1.7305, "reason_loss": 0.5448885560035706, "step": 1589, "utility_loss": 1.1855814456939697 }, { "cosine_similarity": 0.08645283489516252, "epoch": 1.4818266542404475, "grad_norm": 1.0927287017535203, "learning_rate": 1.1246116672419744e-05, "loss": 1.9444, "reason_loss": 0.5021460056304932, "step": 1590, "utility_loss": 1.442291498184204 }, { "cosine_similarity": 0.1298796359521359, "epoch": 1.4827586206896552, "grad_norm": 1.3784465137273343, "learning_rate": 1.1239212978943736e-05, "loss": 1.8288, "reason_loss": 0.5266547799110413, "step": 1591, "utility_loss": 1.3021568059921265 }, { "cosine_similarity": 0.08041021798577473, "epoch": 1.483690587138863, "grad_norm": 1.280984894154267, "learning_rate": 1.1232309285467727e-05, "loss": 2.1401, "reason_loss": 0.49273085594177246, "step": 1592, "utility_loss": 1.647327184677124 }, { "cosine_similarity": 0.15529674888443282, "epoch": 1.4846225535880708, "grad_norm": 1.0546055959638454, "learning_rate": 1.1225405591991716e-05, "loss": 1.4805, "reason_loss": 0.5269570350646973, "step": 1593, "utility_loss": 0.9535740613937378 }, { "cosine_similarity": 0.4408816307130722, "epoch": 1.4855545200372786, "grad_norm": 1.0911497371570675, "learning_rate": 1.1218501898515709e-05, "loss": 2.035, "reason_loss": 0.48341503739356995, "step": 1594, "utility_loss": 1.5515402555465698 }, { "cosine_similarity": 0.34442129527660104, "epoch": 1.4864864864864864, "grad_norm": 1.281376986240337, "learning_rate": 1.1211598205039698e-05, "loss": 1.7566, "reason_loss": 0.4900578558444977, "step": 1595, "utility_loss": 1.2665321826934814 }, { "cosine_similarity": 0.46741409420193036, "epoch": 1.4874184529356942, "grad_norm": 1.2079932601476695, "learning_rate": 1.1204694511563687e-05, "loss": 1.8144, "reason_loss": 0.5057473182678223, "step": 1596, "utility_loss": 1.308671474456787 }, { "cosine_similarity": 0.43933656939545673, "epoch": 1.4883504193849022, "grad_norm": 1.2118842908411984, "learning_rate": 1.1197790818087677e-05, "loss": 1.7696, "reason_loss": 0.49267953634262085, "step": 1597, "utility_loss": 1.276906967163086 }, { "cosine_similarity": 0.10671671288861534, "epoch": 1.48928238583411, "grad_norm": 1.2790660773971487, "learning_rate": 1.119088712461167e-05, "loss": 1.6483, "reason_loss": 0.49880433082580566, "step": 1598, "utility_loss": 1.1494977474212646 }, { "cosine_similarity": 0.0028375258150007666, "epoch": 1.4902143522833178, "grad_norm": 1.4086155189645706, "learning_rate": 1.1183983431135659e-05, "loss": 1.7764, "reason_loss": 0.5012183785438538, "step": 1599, "utility_loss": 1.2752299308776855 }, { "cosine_similarity": 0.31420968263654114, "epoch": 1.4911463187325256, "grad_norm": 1.1868047183425763, "learning_rate": 1.1177079737659648e-05, "loss": 1.5207, "reason_loss": 0.5168207287788391, "step": 1600, "utility_loss": 1.0038806200027466 }, { "cosine_similarity": 0.6084736645475723, "epoch": 1.4920782851817336, "grad_norm": 1.136251362162119, "learning_rate": 1.1170176044183639e-05, "loss": 2.0909, "reason_loss": 0.5276640057563782, "step": 1601, "utility_loss": 1.5632683038711548 }, { "cosine_similarity": 0.08681490615411863, "epoch": 1.4930102516309414, "grad_norm": 1.1079959048062127, "learning_rate": 1.1163272350707631e-05, "loss": 1.8348, "reason_loss": 0.5017684698104858, "step": 1602, "utility_loss": 1.3330695629119873 }, { "cosine_similarity": 0.06871564380308182, "epoch": 1.4939422180801492, "grad_norm": 1.3922564534550417, "learning_rate": 1.115636865723162e-05, "loss": 1.6702, "reason_loss": 0.4989606440067291, "step": 1603, "utility_loss": 1.1712112426757812 }, { "cosine_similarity": 0.034154433252810305, "epoch": 1.494874184529357, "grad_norm": 1.2950258642672443, "learning_rate": 1.1149464963755609e-05, "loss": 1.7392, "reason_loss": 0.5233021378517151, "step": 1604, "utility_loss": 1.2158910036087036 }, { "cosine_similarity": 0.4629086935550524, "epoch": 1.4958061509785647, "grad_norm": 1.1891416791751435, "learning_rate": 1.1142561270279602e-05, "loss": 2.17, "reason_loss": 0.5083920359611511, "step": 1605, "utility_loss": 1.661644458770752 }, { "cosine_similarity": 0.009840977510340835, "epoch": 1.4967381174277725, "grad_norm": 1.386800037795068, "learning_rate": 1.113565757680359e-05, "loss": 2.2118, "reason_loss": 0.49357423186302185, "step": 1606, "utility_loss": 1.7182233333587646 }, { "cosine_similarity": 0.032334099037260755, "epoch": 1.4976700838769803, "grad_norm": 1.1628863981095245, "learning_rate": 1.1128753883327581e-05, "loss": 1.7694, "reason_loss": 0.5150015354156494, "step": 1607, "utility_loss": 1.2543761730194092 }, { "cosine_similarity": -0.05442283246596344, "epoch": 1.4986020503261883, "grad_norm": 1.1937399466257044, "learning_rate": 1.112185018985157e-05, "loss": 2.0088, "reason_loss": 0.537948489189148, "step": 1608, "utility_loss": 1.4708276987075806 }, { "cosine_similarity": -0.03510648493046052, "epoch": 1.499534016775396, "grad_norm": 0.9970500824562541, "learning_rate": 1.1114946496375563e-05, "loss": 1.4451, "reason_loss": 0.48131364583969116, "step": 1609, "utility_loss": 0.9638274908065796 }, { "cosine_similarity": 0.5188568829745361, "epoch": 1.500465983224604, "grad_norm": 1.4682587475186286, "learning_rate": 1.1108042802899552e-05, "loss": 1.9134, "reason_loss": 0.5107119083404541, "step": 1610, "utility_loss": 1.4026520252227783 }, { "cosine_similarity": 0.17627300693906, "epoch": 1.501397949673812, "grad_norm": 1.1662519503582334, "learning_rate": 1.1101139109423543e-05, "loss": 1.9483, "reason_loss": 0.4693697690963745, "step": 1611, "utility_loss": 1.4789628982543945 }, { "cosine_similarity": 0.18213793941473794, "epoch": 1.5023299161230197, "grad_norm": 1.3196799026681145, "learning_rate": 1.1094235415947532e-05, "loss": 1.8894, "reason_loss": 0.5314580202102661, "step": 1612, "utility_loss": 1.3579720258712769 }, { "cosine_similarity": 0.11502527377213972, "epoch": 1.5032618825722275, "grad_norm": 1.2010738848423839, "learning_rate": 1.1087331722471524e-05, "loss": 1.7448, "reason_loss": 0.4878396689891815, "step": 1613, "utility_loss": 1.2569389343261719 }, { "cosine_similarity": 0.08579173966203761, "epoch": 1.5041938490214353, "grad_norm": 1.3064642110454923, "learning_rate": 1.1080428028995513e-05, "loss": 1.8631, "reason_loss": 0.5144976377487183, "step": 1614, "utility_loss": 1.348646640777588 }, { "cosine_similarity": -0.08191253386683425, "epoch": 1.505125815470643, "grad_norm": 1.1689867824543831, "learning_rate": 1.1073524335519504e-05, "loss": 1.486, "reason_loss": 0.4940672814846039, "step": 1615, "utility_loss": 0.9919307231903076 }, { "cosine_similarity": 0.33254893145747905, "epoch": 1.5060577819198508, "grad_norm": 1.2523912340806254, "learning_rate": 1.1066620642043494e-05, "loss": 1.7338, "reason_loss": 0.4954532980918884, "step": 1616, "utility_loss": 1.2383475303649902 }, { "cosine_similarity": 0.06837881675976502, "epoch": 1.5069897483690586, "grad_norm": 1.1003706830057987, "learning_rate": 1.1059716948567485e-05, "loss": 1.4536, "reason_loss": 0.4829987585544586, "step": 1617, "utility_loss": 0.9706459641456604 }, { "cosine_similarity": 0.2805883742579105, "epoch": 1.5079217148182664, "grad_norm": 1.5814659023736075, "learning_rate": 1.1052813255091474e-05, "loss": 1.7104, "reason_loss": 0.5181862115859985, "step": 1618, "utility_loss": 1.1921800374984741 }, { "cosine_similarity": 0.12963508912096613, "epoch": 1.5088536812674742, "grad_norm": 1.038715336111997, "learning_rate": 1.1045909561615465e-05, "loss": 1.8126, "reason_loss": 0.4995010495185852, "step": 1619, "utility_loss": 1.3130531311035156 }, { "cosine_similarity": -0.16812839124188836, "epoch": 1.5097856477166822, "grad_norm": 1.1117210590199473, "learning_rate": 1.1039005868139456e-05, "loss": 1.8763, "reason_loss": 0.5051188468933105, "step": 1620, "utility_loss": 1.3711488246917725 }, { "cosine_similarity": 0.014805385120180701, "epoch": 1.51071761416589, "grad_norm": 1.0692089562090912, "learning_rate": 1.1032102174663446e-05, "loss": 1.8701, "reason_loss": 0.47195690870285034, "step": 1621, "utility_loss": 1.3981044292449951 }, { "cosine_similarity": 0.4056069808965911, "epoch": 1.511649580615098, "grad_norm": 1.128152714963113, "learning_rate": 1.1025198481187435e-05, "loss": 1.7082, "reason_loss": 0.48538339138031006, "step": 1622, "utility_loss": 1.2228641510009766 }, { "cosine_similarity": 0.5671910083405765, "epoch": 1.5125815470643058, "grad_norm": 1.401814217129504, "learning_rate": 1.1018294787711426e-05, "loss": 1.8269, "reason_loss": 0.49055030941963196, "step": 1623, "utility_loss": 1.3363670110702515 }, { "cosine_similarity": 0.20981295831479663, "epoch": 1.5135135135135136, "grad_norm": 1.268017477110797, "learning_rate": 1.1011391094235417e-05, "loss": 1.49, "reason_loss": 0.5023717284202576, "step": 1624, "utility_loss": 0.9876699447631836 }, { "cosine_similarity": 0.17068350679099345, "epoch": 1.5144454799627214, "grad_norm": 1.0378952365966003, "learning_rate": 1.1004487400759408e-05, "loss": 1.5817, "reason_loss": 0.4701060354709625, "step": 1625, "utility_loss": 1.1116266250610352 }, { "cosine_similarity": 0.43093188684497846, "epoch": 1.5153774464119292, "grad_norm": 1.3411033219590147, "learning_rate": 1.0997583707283397e-05, "loss": 1.7273, "reason_loss": 0.47714805603027344, "step": 1626, "utility_loss": 1.2501940727233887 }, { "cosine_similarity": 0.4913586742931616, "epoch": 1.516309412861137, "grad_norm": 1.0627194853136417, "learning_rate": 1.0990680013807389e-05, "loss": 1.4018, "reason_loss": 0.48006486892700195, "step": 1627, "utility_loss": 0.9217157959938049 }, { "cosine_similarity": 0.1214043246548676, "epoch": 1.5172413793103448, "grad_norm": 1.079316095328374, "learning_rate": 1.0983776320331378e-05, "loss": 1.5339, "reason_loss": 0.47983264923095703, "step": 1628, "utility_loss": 1.0540287494659424 }, { "cosine_similarity": 0.34613521709198636, "epoch": 1.5181733457595525, "grad_norm": 1.1143952743063368, "learning_rate": 1.0976872626855369e-05, "loss": 1.6304, "reason_loss": 0.49280011653900146, "step": 1629, "utility_loss": 1.1375977993011475 }, { "cosine_similarity": 0.04461836725816552, "epoch": 1.5191053122087603, "grad_norm": 1.3043290858860506, "learning_rate": 1.0969968933379358e-05, "loss": 1.6813, "reason_loss": 0.5116918087005615, "step": 1630, "utility_loss": 1.169602870941162 }, { "cosine_similarity": 0.5721432625812969, "epoch": 1.5200372786579683, "grad_norm": 1.1580746035229494, "learning_rate": 1.096306523990335e-05, "loss": 2.8021, "reason_loss": 0.4795922040939331, "step": 1631, "utility_loss": 2.3225059509277344 }, { "cosine_similarity": 0.8592782785027002, "epoch": 1.5209692451071761, "grad_norm": 1.2314394704851164, "learning_rate": 1.095616154642734e-05, "loss": 1.7599, "reason_loss": 0.4781641364097595, "step": 1632, "utility_loss": 1.2817237377166748 }, { "cosine_similarity": -0.07327906497824353, "epoch": 1.521901211556384, "grad_norm": 1.2506702881765983, "learning_rate": 1.094925785295133e-05, "loss": 1.8049, "reason_loss": 0.5279524922370911, "step": 1633, "utility_loss": 1.2769677639007568 }, { "cosine_similarity": 0.714337606464944, "epoch": 1.522833178005592, "grad_norm": 1.392602164034125, "learning_rate": 1.094235415947532e-05, "loss": 1.6571, "reason_loss": 0.49751871824264526, "step": 1634, "utility_loss": 1.1595641374588013 }, { "cosine_similarity": 0.9350636406141124, "epoch": 1.5237651444547997, "grad_norm": 1.043306451831823, "learning_rate": 1.0935450465999312e-05, "loss": 1.7097, "reason_loss": 0.4628380835056305, "step": 1635, "utility_loss": 1.24681556224823 }, { "cosine_similarity": 0.4986308316024928, "epoch": 1.5246971109040075, "grad_norm": 1.126499279582979, "learning_rate": 1.09285467725233e-05, "loss": 1.6204, "reason_loss": 0.5061957836151123, "step": 1636, "utility_loss": 1.1141901016235352 }, { "cosine_similarity": 0.17233210638782592, "epoch": 1.5256290773532153, "grad_norm": 1.2391472030505881, "learning_rate": 1.092164307904729e-05, "loss": 1.6424, "reason_loss": 0.47417813539505005, "step": 1637, "utility_loss": 1.1682639122009277 }, { "cosine_similarity": 0.11263362271474622, "epoch": 1.526561043802423, "grad_norm": 1.1614293469235906, "learning_rate": 1.0914739385571282e-05, "loss": 1.4591, "reason_loss": 0.48422175645828247, "step": 1638, "utility_loss": 0.9749086499214172 }, { "cosine_similarity": 0.047803730353396315, "epoch": 1.5274930102516309, "grad_norm": 1.3528606037283462, "learning_rate": 1.0907835692095273e-05, "loss": 1.8758, "reason_loss": 0.5095975399017334, "step": 1639, "utility_loss": 1.3662126064300537 }, { "cosine_similarity": 0.11055418673947297, "epoch": 1.5284249767008387, "grad_norm": 1.2450821963823118, "learning_rate": 1.0900931998619262e-05, "loss": 1.6352, "reason_loss": 0.49890851974487305, "step": 1640, "utility_loss": 1.1363214254379272 }, { "cosine_similarity": 0.11720323516000349, "epoch": 1.5293569431500464, "grad_norm": 1.2426131889858887, "learning_rate": 1.0894028305143251e-05, "loss": 1.8177, "reason_loss": 0.5157740116119385, "step": 1641, "utility_loss": 1.3018794059753418 }, { "cosine_similarity": -0.09181971980201506, "epoch": 1.5302889095992545, "grad_norm": 1.119341532889977, "learning_rate": 1.0887124611667243e-05, "loss": 1.773, "reason_loss": 0.5034475922584534, "step": 1642, "utility_loss": 1.2695248126983643 }, { "cosine_similarity": 0.2996920056459797, "epoch": 1.5312208760484622, "grad_norm": 1.2009418863798955, "learning_rate": 1.0880220918191234e-05, "loss": 1.7874, "reason_loss": 0.5381148457527161, "step": 1643, "utility_loss": 1.24928879737854 }, { "cosine_similarity": 0.19583114021457987, "epoch": 1.53215284249767, "grad_norm": 1.2529342452488434, "learning_rate": 1.0873317224715223e-05, "loss": 1.7768, "reason_loss": 0.5002395510673523, "step": 1644, "utility_loss": 1.2766036987304688 }, { "cosine_similarity": 0.12362197838352365, "epoch": 1.533084808946878, "grad_norm": 1.1252261036725832, "learning_rate": 1.0866413531239216e-05, "loss": 1.7955, "reason_loss": 0.5013770461082458, "step": 1645, "utility_loss": 1.2941007614135742 }, { "cosine_similarity": 0.3866096964844007, "epoch": 1.5340167753960858, "grad_norm": 1.1024660508996331, "learning_rate": 1.0859509837763205e-05, "loss": 1.6495, "reason_loss": 0.5262025594711304, "step": 1646, "utility_loss": 1.123342514038086 }, { "cosine_similarity": 0.26161584068101634, "epoch": 1.5349487418452936, "grad_norm": 1.3326764431862028, "learning_rate": 1.0852606144287194e-05, "loss": 1.8517, "reason_loss": 0.4919874370098114, "step": 1647, "utility_loss": 1.3596835136413574 }, { "cosine_similarity": 0.038831044670595456, "epoch": 1.5358807082945014, "grad_norm": 1.2131515075800137, "learning_rate": 1.0845702450811184e-05, "loss": 1.9079, "reason_loss": 0.5029914379119873, "step": 1648, "utility_loss": 1.404891848564148 }, { "cosine_similarity": 0.03834554918646499, "epoch": 1.5368126747437092, "grad_norm": 1.1694493644640105, "learning_rate": 1.0838798757335177e-05, "loss": 1.8413, "reason_loss": 0.5119550228118896, "step": 1649, "utility_loss": 1.3293488025665283 }, { "cosine_similarity": 0.12153703924832558, "epoch": 1.537744641192917, "grad_norm": 1.1810689615759447, "learning_rate": 1.0831895063859166e-05, "loss": 1.5229, "reason_loss": 0.4992918074131012, "step": 1650, "utility_loss": 1.0235673189163208 }, { "cosine_similarity": 0.5090424218481753, "epoch": 1.5386766076421248, "grad_norm": 1.5329422730655036, "learning_rate": 1.0824991370383155e-05, "loss": 1.9007, "reason_loss": 0.5167462825775146, "step": 1651, "utility_loss": 1.3839857578277588 }, { "cosine_similarity": 0.11245952098125972, "epoch": 1.5396085740913326, "grad_norm": 1.2359454946609683, "learning_rate": 1.0818087676907146e-05, "loss": 1.9282, "reason_loss": 0.5120190382003784, "step": 1652, "utility_loss": 1.4161372184753418 }, { "cosine_similarity": 0.21107194392495388, "epoch": 1.5405405405405406, "grad_norm": 1.2419830958457396, "learning_rate": 1.0811183983431136e-05, "loss": 1.8693, "reason_loss": 0.5254915356636047, "step": 1653, "utility_loss": 1.343847632408142 }, { "cosine_similarity": 0.08728069590572958, "epoch": 1.5414725069897484, "grad_norm": 1.3240382185326292, "learning_rate": 1.0804280289955127e-05, "loss": 1.6582, "reason_loss": 0.48571622371673584, "step": 1654, "utility_loss": 1.172454595565796 }, { "cosine_similarity": 0.03336908975612278, "epoch": 1.5424044734389561, "grad_norm": 1.587903885061422, "learning_rate": 1.0797376596479116e-05, "loss": 1.7704, "reason_loss": 0.5037938356399536, "step": 1655, "utility_loss": 1.2666069269180298 }, { "cosine_similarity": 0.01016858788217672, "epoch": 1.5433364398881642, "grad_norm": 1.2687145045874413, "learning_rate": 1.0790472903003109e-05, "loss": 1.6537, "reason_loss": 0.4971214830875397, "step": 1656, "utility_loss": 1.1565533876419067 }, { "cosine_similarity": 0.014179575401935814, "epoch": 1.544268406337372, "grad_norm": 1.3994567380788918, "learning_rate": 1.0783569209527098e-05, "loss": 2.1057, "reason_loss": 0.48060309886932373, "step": 1657, "utility_loss": 1.625105619430542 }, { "cosine_similarity": 0.043152809034476484, "epoch": 1.5452003727865797, "grad_norm": 1.3542555260716003, "learning_rate": 1.0776665516051088e-05, "loss": 2.0272, "reason_loss": 0.5056413412094116, "step": 1658, "utility_loss": 1.5215139389038086 }, { "cosine_similarity": 0.1943521166069174, "epoch": 1.5461323392357875, "grad_norm": 1.1455666970927094, "learning_rate": 1.0769761822575077e-05, "loss": 1.628, "reason_loss": 0.5046058893203735, "step": 1659, "utility_loss": 1.1234174966812134 }, { "cosine_similarity": 0.22069591709337788, "epoch": 1.5470643056849953, "grad_norm": 1.1302658827606655, "learning_rate": 1.076285812909907e-05, "loss": 1.629, "reason_loss": 0.5235393047332764, "step": 1660, "utility_loss": 1.1054832935333252 }, { "cosine_similarity": 0.1433932298111285, "epoch": 1.547996272134203, "grad_norm": 1.30799883184367, "learning_rate": 1.0755954435623059e-05, "loss": 1.8262, "reason_loss": 0.49267393350601196, "step": 1661, "utility_loss": 1.33353590965271 }, { "cosine_similarity": 0.16499523105809552, "epoch": 1.5489282385834109, "grad_norm": 1.428782940918589, "learning_rate": 1.074905074214705e-05, "loss": 1.6922, "reason_loss": 0.48653995990753174, "step": 1662, "utility_loss": 1.2056224346160889 }, { "cosine_similarity": 0.166787846329586, "epoch": 1.5498602050326187, "grad_norm": 1.1787529694151608, "learning_rate": 1.0742147048671039e-05, "loss": 1.7974, "reason_loss": 0.4889344573020935, "step": 1663, "utility_loss": 1.3084971904754639 }, { "cosine_similarity": 0.06386509199948656, "epoch": 1.5507921714818267, "grad_norm": 1.300366880096378, "learning_rate": 1.0735243355195031e-05, "loss": 1.7948, "reason_loss": 0.5020724534988403, "step": 1664, "utility_loss": 1.292771577835083 }, { "cosine_similarity": 0.00944684113172475, "epoch": 1.5517241379310345, "grad_norm": 1.2052261219683473, "learning_rate": 1.072833966171902e-05, "loss": 1.7493, "reason_loss": 0.4789375364780426, "step": 1665, "utility_loss": 1.2703170776367188 }, { "cosine_similarity": 0.0964738002358289, "epoch": 1.5526561043802423, "grad_norm": 1.1149125356111578, "learning_rate": 1.072143596824301e-05, "loss": 1.6305, "reason_loss": 0.4894223213195801, "step": 1666, "utility_loss": 1.1410315036773682 }, { "cosine_similarity": 0.22505096738050576, "epoch": 1.5535880708294503, "grad_norm": 1.183048319069169, "learning_rate": 1.0714532274767002e-05, "loss": 1.7765, "reason_loss": 0.5072016716003418, "step": 1667, "utility_loss": 1.2693099975585938 }, { "cosine_similarity": 0.021718601937149012, "epoch": 1.554520037278658, "grad_norm": 1.2702906118679056, "learning_rate": 1.0707628581290992e-05, "loss": 1.5754, "reason_loss": 0.4934687316417694, "step": 1668, "utility_loss": 1.0819387435913086 }, { "cosine_similarity": 0.20581593109928756, "epoch": 1.5554520037278659, "grad_norm": 0.9684096002130611, "learning_rate": 1.0700724887814981e-05, "loss": 1.6309, "reason_loss": 0.49676716327667236, "step": 1669, "utility_loss": 1.134131669998169 }, { "cosine_similarity": 0.3040357495170814, "epoch": 1.5563839701770736, "grad_norm": 1.3537309171705798, "learning_rate": 1.0693821194338972e-05, "loss": 1.5729, "reason_loss": 0.4851784110069275, "step": 1670, "utility_loss": 1.0877488851547241 }, { "cosine_similarity": 0.2580626231690669, "epoch": 1.5573159366262814, "grad_norm": 1.1552522127841485, "learning_rate": 1.0686917500862963e-05, "loss": 1.753, "reason_loss": 0.4682004451751709, "step": 1671, "utility_loss": 1.2848142385482788 }, { "cosine_similarity": -0.1196678548494313, "epoch": 1.5582479030754892, "grad_norm": 1.2567282604112398, "learning_rate": 1.0680013807386953e-05, "loss": 1.8864, "reason_loss": 0.5355730056762695, "step": 1672, "utility_loss": 1.350816011428833 }, { "cosine_similarity": 0.3234394063516881, "epoch": 1.559179869524697, "grad_norm": 1.076111685218276, "learning_rate": 1.0673110113910943e-05, "loss": 1.4981, "reason_loss": 0.5231062173843384, "step": 1673, "utility_loss": 0.9749981164932251 }, { "cosine_similarity": 0.09240060697330675, "epoch": 1.5601118359739048, "grad_norm": 1.222930002831737, "learning_rate": 1.0666206420434933e-05, "loss": 1.5595, "reason_loss": 0.4831109642982483, "step": 1674, "utility_loss": 1.0763566493988037 }, { "cosine_similarity": -0.07032965936382343, "epoch": 1.5610438024231128, "grad_norm": 1.58097244633191, "learning_rate": 1.0659302726958924e-05, "loss": 1.7806, "reason_loss": 0.5280326008796692, "step": 1675, "utility_loss": 1.2525756359100342 }, { "cosine_similarity": 0.08743391360120194, "epoch": 1.5619757688723206, "grad_norm": 1.3546627831701419, "learning_rate": 1.0652399033482915e-05, "loss": 1.7387, "reason_loss": 0.49300962686538696, "step": 1676, "utility_loss": 1.2456765174865723 }, { "cosine_similarity": 0.0971971941984686, "epoch": 1.5629077353215284, "grad_norm": 1.3187824910475678, "learning_rate": 1.0645495340006904e-05, "loss": 1.4953, "reason_loss": 0.4830762445926666, "step": 1677, "utility_loss": 1.012271523475647 }, { "cosine_similarity": -0.25660688186873454, "epoch": 1.5638397017707364, "grad_norm": 1.023476584156457, "learning_rate": 1.0638591646530896e-05, "loss": 1.5309, "reason_loss": 0.4940739870071411, "step": 1678, "utility_loss": 1.036781907081604 }, { "cosine_similarity": 0.6233527655534871, "epoch": 1.5647716682199442, "grad_norm": 1.1780127790437607, "learning_rate": 1.0631687953054885e-05, "loss": 1.5918, "reason_loss": 0.4760923981666565, "step": 1679, "utility_loss": 1.1156636476516724 }, { "cosine_similarity": 0.29097382798559035, "epoch": 1.565703634669152, "grad_norm": 1.159300669710748, "learning_rate": 1.0624784259578876e-05, "loss": 2.0201, "reason_loss": 0.5224757194519043, "step": 1680, "utility_loss": 1.4976361989974976 }, { "cosine_similarity": 0.22465333557490735, "epoch": 1.5666356011183598, "grad_norm": 0.9898177324034508, "learning_rate": 1.0617880566102865e-05, "loss": 1.7281, "reason_loss": 0.48567888140678406, "step": 1681, "utility_loss": 1.2424352169036865 }, { "cosine_similarity": 0.025905651446103153, "epoch": 1.5675675675675675, "grad_norm": 1.0739864946091628, "learning_rate": 1.0610976872626857e-05, "loss": 1.5437, "reason_loss": 0.47150057554244995, "step": 1682, "utility_loss": 1.0721653699874878 }, { "cosine_similarity": 0.5529836979624432, "epoch": 1.5684995340167753, "grad_norm": 1.1073709298535777, "learning_rate": 1.0604073179150846e-05, "loss": 1.8469, "reason_loss": 0.4781529903411865, "step": 1683, "utility_loss": 1.368720293045044 }, { "cosine_similarity": 0.5119396386221483, "epoch": 1.5694315004659831, "grad_norm": 1.0962244967919128, "learning_rate": 1.0597169485674837e-05, "loss": 2.0231, "reason_loss": 0.5082684755325317, "step": 1684, "utility_loss": 1.5147842168807983 }, { "cosine_similarity": 0.15515998173972573, "epoch": 1.570363466915191, "grad_norm": 1.1696236067149628, "learning_rate": 1.0590265792198828e-05, "loss": 2.1529, "reason_loss": 0.5039326548576355, "step": 1685, "utility_loss": 1.6489938497543335 }, { "cosine_similarity": 0.06067768076349129, "epoch": 1.571295433364399, "grad_norm": 1.1240295640078222, "learning_rate": 1.0583362098722819e-05, "loss": 1.7229, "reason_loss": 0.49338245391845703, "step": 1686, "utility_loss": 1.229560136795044 }, { "cosine_similarity": 0.29309410764209276, "epoch": 1.5722273998136067, "grad_norm": 1.2573511506900075, "learning_rate": 1.0576458405246808e-05, "loss": 1.9264, "reason_loss": 0.49599939584732056, "step": 1687, "utility_loss": 1.4304195642471313 }, { "cosine_similarity": 0.10120103659138688, "epoch": 1.5731593662628145, "grad_norm": 1.160225037051412, "learning_rate": 1.0569554711770797e-05, "loss": 1.6538, "reason_loss": 0.5204838514328003, "step": 1688, "utility_loss": 1.1333295106887817 }, { "cosine_similarity": 0.1885278975552702, "epoch": 1.5740913327120225, "grad_norm": 1.48009962062246, "learning_rate": 1.056265101829479e-05, "loss": 1.8228, "reason_loss": 0.5143601894378662, "step": 1689, "utility_loss": 1.308457374572754 }, { "cosine_similarity": 0.0712782903828135, "epoch": 1.5750232991612303, "grad_norm": 1.075032793215879, "learning_rate": 1.055574732481878e-05, "loss": 1.6948, "reason_loss": 0.5073586106300354, "step": 1690, "utility_loss": 1.1874608993530273 }, { "cosine_similarity": 0.43654907229719053, "epoch": 1.575955265610438, "grad_norm": 1.3358944646562785, "learning_rate": 1.0548843631342769e-05, "loss": 1.9831, "reason_loss": 0.5244916677474976, "step": 1691, "utility_loss": 1.4586331844329834 }, { "cosine_similarity": 0.08435451349238313, "epoch": 1.5768872320596459, "grad_norm": 1.0614530703815575, "learning_rate": 1.0541939937866758e-05, "loss": 1.3465, "reason_loss": 0.4993355870246887, "step": 1692, "utility_loss": 0.8471419811248779 }, { "cosine_similarity": 0.15928606182271388, "epoch": 1.5778191985088537, "grad_norm": 1.0546800770547233, "learning_rate": 1.053503624439075e-05, "loss": 1.9086, "reason_loss": 0.536292314529419, "step": 1693, "utility_loss": 1.3723077774047852 }, { "cosine_similarity": 0.047542100691981004, "epoch": 1.5787511649580614, "grad_norm": 1.0608566305966318, "learning_rate": 1.0528132550914741e-05, "loss": 1.695, "reason_loss": 0.4801405668258667, "step": 1694, "utility_loss": 1.214818000793457 }, { "cosine_similarity": 0.3200905327742759, "epoch": 1.5796831314072692, "grad_norm": 1.4293309777828098, "learning_rate": 1.052122885743873e-05, "loss": 1.7298, "reason_loss": 0.5044160485267639, "step": 1695, "utility_loss": 1.2254245281219482 }, { "cosine_similarity": 0.3225769045822906, "epoch": 1.580615097856477, "grad_norm": 1.3265524577187269, "learning_rate": 1.0514325163962723e-05, "loss": 1.8269, "reason_loss": 0.491697758436203, "step": 1696, "utility_loss": 1.3352320194244385 }, { "cosine_similarity": 0.02727635455877135, "epoch": 1.581547064305685, "grad_norm": 1.0770233123146682, "learning_rate": 1.0507421470486712e-05, "loss": 1.5976, "reason_loss": 0.5038206577301025, "step": 1697, "utility_loss": 1.0938141345977783 }, { "cosine_similarity": 0.18772623188639662, "epoch": 1.5824790307548928, "grad_norm": 1.1635826001855836, "learning_rate": 1.05005177770107e-05, "loss": 1.7833, "reason_loss": 0.5144532918930054, "step": 1698, "utility_loss": 1.268812894821167 }, { "cosine_similarity": 0.01947471055668784, "epoch": 1.5834109972041006, "grad_norm": 1.4244799850153895, "learning_rate": 1.0493614083534691e-05, "loss": 1.8217, "reason_loss": 0.47152385115623474, "step": 1699, "utility_loss": 1.3502196073532104 }, { "cosine_similarity": 0.061723640227054524, "epoch": 1.5843429636533086, "grad_norm": 1.121512902633989, "learning_rate": 1.0486710390058684e-05, "loss": 1.8589, "reason_loss": 0.520115315914154, "step": 1700, "utility_loss": 1.3387694358825684 }, { "cosine_similarity": -0.1049547152668137, "epoch": 1.5852749301025164, "grad_norm": 1.0225268883863288, "learning_rate": 1.0479806696582673e-05, "loss": 1.5545, "reason_loss": 0.4909660816192627, "step": 1701, "utility_loss": 1.0635318756103516 }, { "cosine_similarity": 0.003215649867415857, "epoch": 1.5862068965517242, "grad_norm": 1.3330643848671744, "learning_rate": 1.0472903003106662e-05, "loss": 1.5832, "reason_loss": 0.5042651891708374, "step": 1702, "utility_loss": 1.0789573192596436 }, { "cosine_similarity": 0.06547121099281951, "epoch": 1.587138863000932, "grad_norm": 1.1899884827071994, "learning_rate": 1.0465999309630653e-05, "loss": 1.6686, "reason_loss": 0.4786255657672882, "step": 1703, "utility_loss": 1.1899328231811523 }, { "cosine_similarity": 0.18541593269658815, "epoch": 1.5880708294501398, "grad_norm": 1.102924171406813, "learning_rate": 1.0459095616154643e-05, "loss": 1.7462, "reason_loss": 0.5397909879684448, "step": 1704, "utility_loss": 1.2064554691314697 }, { "cosine_similarity": 0.18761865060918625, "epoch": 1.5890027958993476, "grad_norm": 1.3983143162242564, "learning_rate": 1.0452191922678634e-05, "loss": 1.6893, "reason_loss": 0.49734699726104736, "step": 1705, "utility_loss": 1.191953182220459 }, { "cosine_similarity": -0.23038303552954859, "epoch": 1.5899347623485554, "grad_norm": 1.1118313096467494, "learning_rate": 1.0445288229202623e-05, "loss": 1.5306, "reason_loss": 0.4787019193172455, "step": 1706, "utility_loss": 1.051923155784607 }, { "cosine_similarity": 0.037690345148686945, "epoch": 1.5908667287977631, "grad_norm": 1.2320394602410236, "learning_rate": 1.0438384535726616e-05, "loss": 1.7997, "reason_loss": 0.48826757073402405, "step": 1707, "utility_loss": 1.3114683628082275 }, { "cosine_similarity": 0.22571987369761776, "epoch": 1.5917986952469712, "grad_norm": 1.5624065878759472, "learning_rate": 1.0431480842250605e-05, "loss": 1.8933, "reason_loss": 0.4991370141506195, "step": 1708, "utility_loss": 1.3941644430160522 }, { "cosine_similarity": -0.06019957225767226, "epoch": 1.592730661696179, "grad_norm": 1.581325416670256, "learning_rate": 1.0424577148774595e-05, "loss": 1.7355, "reason_loss": 0.516807496547699, "step": 1709, "utility_loss": 1.2186733484268188 }, { "cosine_similarity": 0.14351069119564785, "epoch": 1.5936626281453867, "grad_norm": 1.2745099688812265, "learning_rate": 1.0417673455298584e-05, "loss": 1.8817, "reason_loss": 0.5116588473320007, "step": 1710, "utility_loss": 1.370045781135559 }, { "cosine_similarity": 0.3690162823026321, "epoch": 1.5945945945945947, "grad_norm": 1.1908497883390297, "learning_rate": 1.0410769761822577e-05, "loss": 1.5934, "reason_loss": 0.49140918254852295, "step": 1711, "utility_loss": 1.1019997596740723 }, { "cosine_similarity": 0.47615238194284104, "epoch": 1.5955265610438025, "grad_norm": 1.0628332131843967, "learning_rate": 1.0403866068346566e-05, "loss": 1.6595, "reason_loss": 0.5059722661972046, "step": 1712, "utility_loss": 1.1535601615905762 }, { "cosine_similarity": 0.5192600322112103, "epoch": 1.5964585274930103, "grad_norm": 1.2525957116473951, "learning_rate": 1.0396962374870557e-05, "loss": 1.5966, "reason_loss": 0.508966863155365, "step": 1713, "utility_loss": 1.087641716003418 }, { "cosine_similarity": -0.3885864693262812, "epoch": 1.597390493942218, "grad_norm": 1.4877728203297553, "learning_rate": 1.0390058681394546e-05, "loss": 1.8601, "reason_loss": 0.4791127145290375, "step": 1714, "utility_loss": 1.380939245223999 }, { "cosine_similarity": 0.18395088022813919, "epoch": 1.598322460391426, "grad_norm": 1.1272520646013389, "learning_rate": 1.0383154987918538e-05, "loss": 1.8146, "reason_loss": 0.48829185962677, "step": 1715, "utility_loss": 1.3263381719589233 }, { "cosine_similarity": 0.2890555127781709, "epoch": 1.5992544268406337, "grad_norm": 1.082162452152418, "learning_rate": 1.0376251294442527e-05, "loss": 1.5738, "reason_loss": 0.5040277242660522, "step": 1716, "utility_loss": 1.0698161125183105 }, { "cosine_similarity": 0.376652431005912, "epoch": 1.6001863932898415, "grad_norm": 1.0739422150578766, "learning_rate": 1.0369347600966518e-05, "loss": 1.792, "reason_loss": 0.5048184394836426, "step": 1717, "utility_loss": 1.2871962785720825 }, { "cosine_similarity": 0.07409530101193935, "epoch": 1.6011183597390493, "grad_norm": 1.2794481706187286, "learning_rate": 1.0362443907490509e-05, "loss": 1.8493, "reason_loss": 0.480646014213562, "step": 1718, "utility_loss": 1.3686203956604004 }, { "cosine_similarity": 0.1863145985194117, "epoch": 1.6020503261882573, "grad_norm": 1.1579572981799022, "learning_rate": 1.03555402140145e-05, "loss": 1.8614, "reason_loss": 0.5018996000289917, "step": 1719, "utility_loss": 1.3595186471939087 }, { "cosine_similarity": -0.028867740090426575, "epoch": 1.602982292637465, "grad_norm": 1.2875132134547245, "learning_rate": 1.0348636520538488e-05, "loss": 1.7654, "reason_loss": 0.5254454612731934, "step": 1720, "utility_loss": 1.2399945259094238 }, { "cosine_similarity": 0.3531816754741787, "epoch": 1.6039142590866728, "grad_norm": 1.0757487445076397, "learning_rate": 1.0341732827062479e-05, "loss": 1.937, "reason_loss": 0.52321857213974, "step": 1721, "utility_loss": 1.4138141870498657 }, { "cosine_similarity": 0.014126111215549459, "epoch": 1.6048462255358809, "grad_norm": 0.9654284137247552, "learning_rate": 1.033482913358647e-05, "loss": 1.3573, "reason_loss": 0.4932299852371216, "step": 1722, "utility_loss": 0.8640838861465454 }, { "cosine_similarity": 0.04249329296487194, "epoch": 1.6057781919850886, "grad_norm": 1.047225868395879, "learning_rate": 1.032792544011046e-05, "loss": 1.4409, "reason_loss": 0.5006061792373657, "step": 1723, "utility_loss": 0.9402969479560852 }, { "cosine_similarity": 0.18044838823308812, "epoch": 1.6067101584342964, "grad_norm": 1.2253761968094374, "learning_rate": 1.032102174663445e-05, "loss": 1.9113, "reason_loss": 0.5104994773864746, "step": 1724, "utility_loss": 1.4007729291915894 }, { "cosine_similarity": -0.12586584822565708, "epoch": 1.6076421248835042, "grad_norm": 1.1717907211185503, "learning_rate": 1.031411805315844e-05, "loss": 1.8279, "reason_loss": 0.47071558237075806, "step": 1725, "utility_loss": 1.3571586608886719 }, { "cosine_similarity": 0.3088433251291629, "epoch": 1.608574091332712, "grad_norm": 1.2654132915276792, "learning_rate": 1.0307214359682431e-05, "loss": 1.7049, "reason_loss": 0.47589707374572754, "step": 1726, "utility_loss": 1.228992223739624 }, { "cosine_similarity": 0.05900346421157055, "epoch": 1.6095060577819198, "grad_norm": 1.1152839620494892, "learning_rate": 1.0300310666206422e-05, "loss": 1.7523, "reason_loss": 0.49608859419822693, "step": 1727, "utility_loss": 1.2562466859817505 }, { "cosine_similarity": -0.20405702505728765, "epoch": 1.6104380242311276, "grad_norm": 1.4229756088358116, "learning_rate": 1.029340697273041e-05, "loss": 1.7182, "reason_loss": 0.4994383454322815, "step": 1728, "utility_loss": 1.2187838554382324 }, { "cosine_similarity": -0.06327580714408583, "epoch": 1.6113699906803354, "grad_norm": 1.1064998038850338, "learning_rate": 1.0286503279254403e-05, "loss": 1.5287, "reason_loss": 0.5021952390670776, "step": 1729, "utility_loss": 1.0264735221862793 }, { "cosine_similarity": 0.11167650673395568, "epoch": 1.6123019571295434, "grad_norm": 1.1094104146440695, "learning_rate": 1.0279599585778392e-05, "loss": 1.7645, "reason_loss": 0.48601675033569336, "step": 1730, "utility_loss": 1.278470516204834 }, { "cosine_similarity": -0.15808266748416533, "epoch": 1.6132339235787512, "grad_norm": 1.1979625762086692, "learning_rate": 1.0272695892302383e-05, "loss": 1.7661, "reason_loss": 0.5004129409790039, "step": 1731, "utility_loss": 1.265679121017456 }, { "cosine_similarity": 0.5752299866736775, "epoch": 1.614165890027959, "grad_norm": 1.1301578898207378, "learning_rate": 1.0265792198826372e-05, "loss": 1.7269, "reason_loss": 0.5226719975471497, "step": 1732, "utility_loss": 1.2042773962020874 }, { "cosine_similarity": 0.35840496153225937, "epoch": 1.615097856477167, "grad_norm": 1.1146871087377894, "learning_rate": 1.0258888505350364e-05, "loss": 1.5974, "reason_loss": 0.4885406196117401, "step": 1733, "utility_loss": 1.1088955402374268 }, { "cosine_similarity": 0.11979038990817874, "epoch": 1.6160298229263748, "grad_norm": 0.9490318678574544, "learning_rate": 1.0251984811874353e-05, "loss": 1.3987, "reason_loss": 0.47819143533706665, "step": 1734, "utility_loss": 0.9205478429794312 }, { "cosine_similarity": 0.13045198240761322, "epoch": 1.6169617893755825, "grad_norm": 1.305329559369028, "learning_rate": 1.0245081118398344e-05, "loss": 1.7058, "reason_loss": 0.510162353515625, "step": 1735, "utility_loss": 1.195595622062683 }, { "cosine_similarity": -0.08525825651524449, "epoch": 1.6178937558247903, "grad_norm": 1.26042754810303, "learning_rate": 1.0238177424922335e-05, "loss": 1.7429, "reason_loss": 0.4782986044883728, "step": 1736, "utility_loss": 1.26460862159729 }, { "cosine_similarity": 0.4146631519289583, "epoch": 1.6188257222739981, "grad_norm": 1.342348805381906, "learning_rate": 1.0231273731446326e-05, "loss": 1.614, "reason_loss": 0.494926393032074, "step": 1737, "utility_loss": 1.1190707683563232 }, { "cosine_similarity": 0.008712191134783246, "epoch": 1.619757688723206, "grad_norm": 0.9782154310894333, "learning_rate": 1.0224370037970315e-05, "loss": 1.5471, "reason_loss": 0.49067819118499756, "step": 1738, "utility_loss": 1.0564244985580444 }, { "cosine_similarity": -0.00974872931510758, "epoch": 1.6206896551724137, "grad_norm": 1.1228301718809108, "learning_rate": 1.0217466344494304e-05, "loss": 1.5033, "reason_loss": 0.4923495948314667, "step": 1739, "utility_loss": 1.010924220085144 }, { "cosine_similarity": 0.09301067923297149, "epoch": 1.6216216216216215, "grad_norm": 1.283291703351943, "learning_rate": 1.0210562651018296e-05, "loss": 1.5322, "reason_loss": 0.5348157286643982, "step": 1740, "utility_loss": 0.9973360300064087 }, { "cosine_similarity": 0.18515624632266325, "epoch": 1.6225535880708295, "grad_norm": 1.1524742951201905, "learning_rate": 1.0203658957542287e-05, "loss": 1.7628, "reason_loss": 0.5162487626075745, "step": 1741, "utility_loss": 1.2465629577636719 }, { "cosine_similarity": 0.11150528311165914, "epoch": 1.6234855545200373, "grad_norm": 1.4443116136538368, "learning_rate": 1.0196755264066276e-05, "loss": 1.87, "reason_loss": 0.5224733352661133, "step": 1742, "utility_loss": 1.3475100994110107 }, { "cosine_similarity": 0.1963905116433889, "epoch": 1.624417520969245, "grad_norm": 1.28211383560405, "learning_rate": 1.0189851570590265e-05, "loss": 1.8478, "reason_loss": 0.4961739182472229, "step": 1743, "utility_loss": 1.351602554321289 }, { "cosine_similarity": 0.11865974402185106, "epoch": 1.625349487418453, "grad_norm": 1.2512622894067422, "learning_rate": 1.0182947877114257e-05, "loss": 1.7465, "reason_loss": 0.5198931694030762, "step": 1744, "utility_loss": 1.2266314029693604 }, { "cosine_similarity": 0.07447096746081489, "epoch": 1.6262814538676609, "grad_norm": 1.299449080701983, "learning_rate": 1.0176044183638248e-05, "loss": 1.7593, "reason_loss": 0.490530788898468, "step": 1745, "utility_loss": 1.2687647342681885 }, { "cosine_similarity": -0.16621995391902972, "epoch": 1.6272134203168687, "grad_norm": 1.2341802408771938, "learning_rate": 1.0169140490162237e-05, "loss": 1.7182, "reason_loss": 0.5202178359031677, "step": 1746, "utility_loss": 1.1980172395706177 }, { "cosine_similarity": -0.029527920972715803, "epoch": 1.6281453867660765, "grad_norm": 1.6030140173747949, "learning_rate": 1.016223679668623e-05, "loss": 2.0064, "reason_loss": 0.4811115860939026, "step": 1747, "utility_loss": 1.5252776145935059 }, { "cosine_similarity": 0.004111763057074839, "epoch": 1.6290773532152842, "grad_norm": 1.1615733819852938, "learning_rate": 1.0155333103210219e-05, "loss": 1.4949, "reason_loss": 0.5207148790359497, "step": 1748, "utility_loss": 0.9741939306259155 }, { "cosine_similarity": 0.2430986278560511, "epoch": 1.630009319664492, "grad_norm": 1.324612748794508, "learning_rate": 1.0148429409734208e-05, "loss": 2.1641, "reason_loss": 0.47828707098960876, "step": 1749, "utility_loss": 1.6857820749282837 }, { "cosine_similarity": -0.04974169305096142, "epoch": 1.6309412861136998, "grad_norm": 1.1955400314684443, "learning_rate": 1.0141525716258198e-05, "loss": 1.7112, "reason_loss": 0.5155813694000244, "step": 1750, "utility_loss": 1.1955749988555908 }, { "cosine_similarity": 0.001158002914093846, "epoch": 1.6318732525629076, "grad_norm": 1.203827633709888, "learning_rate": 1.0134622022782191e-05, "loss": 1.9437, "reason_loss": 0.4897559881210327, "step": 1751, "utility_loss": 1.4539704322814941 }, { "cosine_similarity": -0.07025062945221382, "epoch": 1.6328052190121156, "grad_norm": 1.097771301708426, "learning_rate": 1.012771832930618e-05, "loss": 1.7297, "reason_loss": 0.498851478099823, "step": 1752, "utility_loss": 1.230858564376831 }, { "cosine_similarity": -0.061669703295750194, "epoch": 1.6337371854613234, "grad_norm": 1.0473689983709507, "learning_rate": 1.0120814635830169e-05, "loss": 1.5519, "reason_loss": 0.5053975582122803, "step": 1753, "utility_loss": 1.0464706420898438 }, { "cosine_similarity": 0.26004832407427314, "epoch": 1.6346691519105312, "grad_norm": 1.149392081440945, "learning_rate": 1.011391094235416e-05, "loss": 1.6754, "reason_loss": 0.47946783900260925, "step": 1754, "utility_loss": 1.1959218978881836 }, { "cosine_similarity": 0.09714033595090246, "epoch": 1.6356011183597392, "grad_norm": 1.144319465140277, "learning_rate": 1.010700724887815e-05, "loss": 1.5054, "reason_loss": 0.5017536878585815, "step": 1755, "utility_loss": 1.0036087036132812 }, { "cosine_similarity": 0.13747802059788491, "epoch": 1.636533084808947, "grad_norm": 1.2564678680376737, "learning_rate": 1.0100103555402141e-05, "loss": 1.4051, "reason_loss": 0.5092630386352539, "step": 1756, "utility_loss": 0.8958224058151245 }, { "cosine_similarity": 0.10276635799474625, "epoch": 1.6374650512581548, "grad_norm": 1.184726088660451, "learning_rate": 1.009319986192613e-05, "loss": 1.7274, "reason_loss": 0.5019254684448242, "step": 1757, "utility_loss": 1.2254717350006104 }, { "cosine_similarity": 0.1680643192222828, "epoch": 1.6383970177073626, "grad_norm": 1.0969513669727788, "learning_rate": 1.0086296168450123e-05, "loss": 1.6707, "reason_loss": 0.4948582947254181, "step": 1758, "utility_loss": 1.1758819818496704 }, { "cosine_similarity": 0.24129457443969302, "epoch": 1.6393289841565704, "grad_norm": 1.330954923715016, "learning_rate": 1.0079392474974112e-05, "loss": 1.9901, "reason_loss": 0.5007997751235962, "step": 1759, "utility_loss": 1.4893091917037964 }, { "cosine_similarity": 0.14337494724219485, "epoch": 1.6402609506057781, "grad_norm": 1.3810701644153156, "learning_rate": 1.0072488781498102e-05, "loss": 2.2319, "reason_loss": 0.5132781863212585, "step": 1760, "utility_loss": 1.7186485528945923 }, { "cosine_similarity": 0.052708448276919506, "epoch": 1.641192917054986, "grad_norm": 1.3481759742222792, "learning_rate": 1.0065585088022091e-05, "loss": 1.5257, "reason_loss": 0.5106056332588196, "step": 1761, "utility_loss": 1.0151188373565674 }, { "cosine_similarity": 0.043312360692261805, "epoch": 1.6421248835041937, "grad_norm": 1.208085753654254, "learning_rate": 1.0058681394546084e-05, "loss": 1.695, "reason_loss": 0.4777125120162964, "step": 1762, "utility_loss": 1.2172493934631348 }, { "cosine_similarity": 0.12399113981747609, "epoch": 1.6430568499534017, "grad_norm": 1.1667597200611948, "learning_rate": 1.0051777701070073e-05, "loss": 2.0065, "reason_loss": 0.5119336247444153, "step": 1763, "utility_loss": 1.4945316314697266 }, { "cosine_similarity": 0.06936296513681446, "epoch": 1.6439888164026095, "grad_norm": 1.1302007575964719, "learning_rate": 1.0044874007594064e-05, "loss": 1.6012, "reason_loss": 0.5226923823356628, "step": 1764, "utility_loss": 1.0784755945205688 }, { "cosine_similarity": 0.04455063417995033, "epoch": 1.6449207828518173, "grad_norm": 1.148085026874583, "learning_rate": 1.0037970314118053e-05, "loss": 1.6339, "reason_loss": 0.49639642238616943, "step": 1765, "utility_loss": 1.137495994567871 }, { "cosine_similarity": 0.21896646478753584, "epoch": 1.6458527493010253, "grad_norm": 1.400545210558605, "learning_rate": 1.0031066620642045e-05, "loss": 1.9726, "reason_loss": 0.49149784445762634, "step": 1766, "utility_loss": 1.4810688495635986 }, { "cosine_similarity": 0.25592069313630855, "epoch": 1.646784715750233, "grad_norm": 1.2482526403823022, "learning_rate": 1.0024162927166034e-05, "loss": 1.8405, "reason_loss": 0.4898630380630493, "step": 1767, "utility_loss": 1.3506252765655518 }, { "cosine_similarity": 0.2937532840484979, "epoch": 1.647716682199441, "grad_norm": 0.9692690285457313, "learning_rate": 1.0017259233690025e-05, "loss": 1.7997, "reason_loss": 0.47555094957351685, "step": 1768, "utility_loss": 1.3241958618164062 }, { "cosine_similarity": 0.08676412814403463, "epoch": 1.6486486486486487, "grad_norm": 1.32397337026746, "learning_rate": 1.0010355540214016e-05, "loss": 1.801, "reason_loss": 0.5058045983314514, "step": 1769, "utility_loss": 1.2952396869659424 }, { "cosine_similarity": 0.1686187707654699, "epoch": 1.6495806150978565, "grad_norm": 1.0711929711770982, "learning_rate": 1.0003451846738006e-05, "loss": 1.7563, "reason_loss": 0.5134057998657227, "step": 1770, "utility_loss": 1.2429184913635254 }, { "cosine_similarity": 0.291629406889867, "epoch": 1.6505125815470643, "grad_norm": 1.1084632523340845, "learning_rate": 9.996548153261995e-06, "loss": 1.704, "reason_loss": 0.4723694622516632, "step": 1771, "utility_loss": 1.231658697128296 }, { "cosine_similarity": -0.05562951030171113, "epoch": 1.651444547996272, "grad_norm": 1.1705165617078468, "learning_rate": 9.989644459785986e-06, "loss": 2.0353, "reason_loss": 0.47668153047561646, "step": 1772, "utility_loss": 1.5586391687393188 }, { "cosine_similarity": 0.02918375935066818, "epoch": 1.6523765144454798, "grad_norm": 1.0674657032463164, "learning_rate": 9.982740766309977e-06, "loss": 1.4782, "reason_loss": 0.5211827158927917, "step": 1773, "utility_loss": 0.9569947123527527 }, { "cosine_similarity": 0.29828455633943707, "epoch": 1.6533084808946876, "grad_norm": 1.1110981534095683, "learning_rate": 9.975837072833968e-06, "loss": 1.6478, "reason_loss": 0.47960057854652405, "step": 1774, "utility_loss": 1.1682181358337402 }, { "cosine_similarity": 0.13921207825919388, "epoch": 1.6542404473438956, "grad_norm": 1.1479795775978174, "learning_rate": 9.968933379357957e-06, "loss": 1.8955, "reason_loss": 0.47142890095710754, "step": 1775, "utility_loss": 1.4240849018096924 }, { "cosine_similarity": -0.0362296342827058, "epoch": 1.6551724137931034, "grad_norm": 1.167364419531083, "learning_rate": 9.962029685881947e-06, "loss": 1.5127, "reason_loss": 0.5137395858764648, "step": 1776, "utility_loss": 0.9989375472068787 }, { "cosine_similarity": 0.2177325412686876, "epoch": 1.6561043802423114, "grad_norm": 1.2420789513323964, "learning_rate": 9.955125992405938e-06, "loss": 1.555, "reason_loss": 0.5047754049301147, "step": 1777, "utility_loss": 1.0502532720565796 }, { "cosine_similarity": 0.31950942944111693, "epoch": 1.6570363466915192, "grad_norm": 1.5327333056945236, "learning_rate": 9.948222298929929e-06, "loss": 1.7754, "reason_loss": 0.5005893707275391, "step": 1778, "utility_loss": 1.2747985124588013 }, { "cosine_similarity": 0.15073055737375363, "epoch": 1.657968313140727, "grad_norm": 1.3103002623659588, "learning_rate": 9.94131860545392e-06, "loss": 1.7538, "reason_loss": 0.5372344255447388, "step": 1779, "utility_loss": 1.2165765762329102 }, { "cosine_similarity": 0.1272786451432975, "epoch": 1.6589002795899348, "grad_norm": 1.0953650970806605, "learning_rate": 9.934414911977909e-06, "loss": 1.695, "reason_loss": 0.47735199332237244, "step": 1780, "utility_loss": 1.2176024913787842 }, { "cosine_similarity": 0.07970575560389409, "epoch": 1.6598322460391426, "grad_norm": 0.9995871132520449, "learning_rate": 9.9275112185019e-06, "loss": 1.6095, "reason_loss": 0.5089055895805359, "step": 1781, "utility_loss": 1.1005611419677734 }, { "cosine_similarity": -0.018937067430731654, "epoch": 1.6607642124883504, "grad_norm": 1.1153796670663243, "learning_rate": 9.92060752502589e-06, "loss": 1.7851, "reason_loss": 0.5159444808959961, "step": 1782, "utility_loss": 1.2691246271133423 }, { "cosine_similarity": 0.19957920943335353, "epoch": 1.6616961789375582, "grad_norm": 1.3208528055000635, "learning_rate": 9.91370383154988e-06, "loss": 1.7686, "reason_loss": 0.4880232512950897, "step": 1783, "utility_loss": 1.280529260635376 }, { "cosine_similarity": 0.17520814098226445, "epoch": 1.662628145386766, "grad_norm": 1.205085275200548, "learning_rate": 9.90680013807387e-06, "loss": 1.5985, "reason_loss": 0.48384928703308105, "step": 1784, "utility_loss": 1.114628791809082 }, { "cosine_similarity": 0.13504863661096264, "epoch": 1.6635601118359737, "grad_norm": 1.1420426144761406, "learning_rate": 9.89989644459786e-06, "loss": 2.0884, "reason_loss": 0.5022501945495605, "step": 1785, "utility_loss": 1.5861973762512207 }, { "cosine_similarity": 0.08882101703325447, "epoch": 1.6644920782851818, "grad_norm": 1.2837214233056722, "learning_rate": 9.892992751121851e-06, "loss": 2.2178, "reason_loss": 0.5007420182228088, "step": 1786, "utility_loss": 1.7170138359069824 }, { "cosine_similarity": 0.2474361159212963, "epoch": 1.6654240447343895, "grad_norm": 1.388221202354039, "learning_rate": 9.886089057645842e-06, "loss": 1.6368, "reason_loss": 0.4828765392303467, "step": 1787, "utility_loss": 1.1538807153701782 }, { "cosine_similarity": 0.28308705183583005, "epoch": 1.6663560111835976, "grad_norm": 1.144560467312751, "learning_rate": 9.879185364169833e-06, "loss": 1.8476, "reason_loss": 0.5061303377151489, "step": 1788, "utility_loss": 1.3415119647979736 }, { "cosine_similarity": 0.34772208393313336, "epoch": 1.6672879776328053, "grad_norm": 1.080388790233738, "learning_rate": 9.872281670693822e-06, "loss": 1.9272, "reason_loss": 0.5116413831710815, "step": 1789, "utility_loss": 1.4155969619750977 }, { "cosine_similarity": 0.009167776808717939, "epoch": 1.6682199440820131, "grad_norm": 1.4261096287779835, "learning_rate": 9.865377977217812e-06, "loss": 1.717, "reason_loss": 0.49996453523635864, "step": 1790, "utility_loss": 1.2170829772949219 }, { "cosine_similarity": 0.09652333962517519, "epoch": 1.669151910531221, "grad_norm": 1.088720388880664, "learning_rate": 9.858474283741803e-06, "loss": 1.6949, "reason_loss": 0.4877479076385498, "step": 1791, "utility_loss": 1.2071752548217773 }, { "cosine_similarity": 0.28126914128670866, "epoch": 1.6700838769804287, "grad_norm": 1.2486238827896383, "learning_rate": 9.851570590265794e-06, "loss": 1.727, "reason_loss": 0.4931704103946686, "step": 1792, "utility_loss": 1.2338625192642212 }, { "cosine_similarity": -0.06508384391710605, "epoch": 1.6710158434296365, "grad_norm": 1.2982684913826765, "learning_rate": 9.844666896789783e-06, "loss": 2.127, "reason_loss": 0.5116473436355591, "step": 1793, "utility_loss": 1.6153974533081055 }, { "cosine_similarity": -0.005347818876708798, "epoch": 1.6719478098788443, "grad_norm": 1.3704200956036325, "learning_rate": 9.837763203313774e-06, "loss": 1.8866, "reason_loss": 0.4652383327484131, "step": 1794, "utility_loss": 1.4213931560516357 }, { "cosine_similarity": 0.017788362316311798, "epoch": 1.672879776328052, "grad_norm": 1.313679947999218, "learning_rate": 9.830859509837763e-06, "loss": 1.8687, "reason_loss": 0.4870094358921051, "step": 1795, "utility_loss": 1.3816570043563843 }, { "cosine_similarity": 0.6189245912926822, "epoch": 1.6738117427772599, "grad_norm": 1.2588991911941485, "learning_rate": 9.823955816361753e-06, "loss": 1.8796, "reason_loss": 0.508752167224884, "step": 1796, "utility_loss": 1.3708908557891846 }, { "cosine_similarity": 0.20998402257710092, "epoch": 1.6747437092264679, "grad_norm": 1.1774861757207224, "learning_rate": 9.817052122885746e-06, "loss": 1.5327, "reason_loss": 0.46467795968055725, "step": 1797, "utility_loss": 1.0680586099624634 }, { "cosine_similarity": 0.3942476867433004, "epoch": 1.6756756756756757, "grad_norm": 1.1590268749717016, "learning_rate": 9.810148429409735e-06, "loss": 1.8767, "reason_loss": 0.5307104587554932, "step": 1798, "utility_loss": 1.3459956645965576 }, { "cosine_similarity": 0.21474158152126352, "epoch": 1.6766076421248837, "grad_norm": 1.2100601017741193, "learning_rate": 9.803244735933726e-06, "loss": 1.5529, "reason_loss": 0.44703054428100586, "step": 1799, "utility_loss": 1.105898141860962 }, { "cosine_similarity": 0.07402081403632192, "epoch": 1.6775396085740915, "grad_norm": 1.4653757369516394, "learning_rate": 9.796341042457715e-06, "loss": 1.8567, "reason_loss": 0.5032352805137634, "step": 1800, "utility_loss": 1.3534468412399292 }, { "cosine_similarity": 0.4030026729065801, "epoch": 1.6784715750232992, "grad_norm": 1.3852376236835962, "learning_rate": 9.789437348981705e-06, "loss": 1.9243, "reason_loss": 0.5077741146087646, "step": 1801, "utility_loss": 1.4165151119232178 }, { "cosine_similarity": 0.01863032241911338, "epoch": 1.679403541472507, "grad_norm": 1.138256624467283, "learning_rate": 9.782533655505696e-06, "loss": 1.6355, "reason_loss": 0.4872358441352844, "step": 1802, "utility_loss": 1.1482447385787964 }, { "cosine_similarity": 0.26850598978544044, "epoch": 1.6803355079217148, "grad_norm": 1.3603656482830009, "learning_rate": 9.775629962029687e-06, "loss": 1.7225, "reason_loss": 0.531200647354126, "step": 1803, "utility_loss": 1.191340684890747 }, { "cosine_similarity": -0.006458796069961594, "epoch": 1.6812674743709226, "grad_norm": 1.102682399776004, "learning_rate": 9.768726268553676e-06, "loss": 1.7616, "reason_loss": 0.4668443202972412, "step": 1804, "utility_loss": 1.2947602272033691 }, { "cosine_similarity": 0.3347983655175909, "epoch": 1.6821994408201304, "grad_norm": 1.1956910196753312, "learning_rate": 9.761822575077667e-06, "loss": 1.8188, "reason_loss": 0.46906396746635437, "step": 1805, "utility_loss": 1.3497533798217773 }, { "cosine_similarity": 0.21302647561772575, "epoch": 1.6831314072693382, "grad_norm": 1.2174880138241526, "learning_rate": 9.754918881601657e-06, "loss": 1.5976, "reason_loss": 0.4771721661090851, "step": 1806, "utility_loss": 1.1204547882080078 }, { "cosine_similarity": 0.0627064699615052, "epoch": 1.684063373718546, "grad_norm": 1.294747702873301, "learning_rate": 9.748015188125648e-06, "loss": 1.6663, "reason_loss": 0.4945208430290222, "step": 1807, "utility_loss": 1.171766757965088 }, { "cosine_similarity": 0.25144928459445176, "epoch": 1.684995340167754, "grad_norm": 1.1972514692385041, "learning_rate": 9.741111494649639e-06, "loss": 2.075, "reason_loss": 0.5261574983596802, "step": 1808, "utility_loss": 1.5488178730010986 }, { "cosine_similarity": 0.13389390322259467, "epoch": 1.6859273066169618, "grad_norm": 1.0932831162759216, "learning_rate": 9.734207801173628e-06, "loss": 1.7088, "reason_loss": 0.48687005043029785, "step": 1809, "utility_loss": 1.2218995094299316 }, { "cosine_similarity": 0.30863762768063696, "epoch": 1.6868592730661698, "grad_norm": 1.285389245837494, "learning_rate": 9.727304107697619e-06, "loss": 1.7063, "reason_loss": 0.4913836717605591, "step": 1810, "utility_loss": 1.2149295806884766 }, { "cosine_similarity": 0.42743398340801075, "epoch": 1.6877912395153776, "grad_norm": 1.26458328740121, "learning_rate": 9.72040041422161e-06, "loss": 1.9467, "reason_loss": 0.5127551555633545, "step": 1811, "utility_loss": 1.4339869022369385 }, { "cosine_similarity": 0.12518364749323646, "epoch": 1.6887232059645854, "grad_norm": 1.1469572985106997, "learning_rate": 9.7134967207456e-06, "loss": 1.5487, "reason_loss": 0.4563225507736206, "step": 1812, "utility_loss": 1.092370867729187 }, { "cosine_similarity": -0.20264367257890092, "epoch": 1.6896551724137931, "grad_norm": 1.4987510131844781, "learning_rate": 9.70659302726959e-06, "loss": 1.6767, "reason_loss": 0.4856392443180084, "step": 1813, "utility_loss": 1.1910130977630615 }, { "cosine_similarity": 0.12553682595445695, "epoch": 1.690587138863001, "grad_norm": 1.202194850734063, "learning_rate": 9.69968933379358e-06, "loss": 1.5287, "reason_loss": 0.4796254634857178, "step": 1814, "utility_loss": 1.0490602254867554 }, { "cosine_similarity": 0.3880877302215912, "epoch": 1.6915191053122087, "grad_norm": 1.4619902506097562, "learning_rate": 9.69278564031757e-06, "loss": 2.2148, "reason_loss": 0.4797821044921875, "step": 1815, "utility_loss": 1.735000729560852 }, { "cosine_similarity": 0.36459639517121356, "epoch": 1.6924510717614165, "grad_norm": 1.1964457103164512, "learning_rate": 9.685881946841561e-06, "loss": 1.8613, "reason_loss": 0.49614280462265015, "step": 1816, "utility_loss": 1.3651232719421387 }, { "cosine_similarity": 0.019899373237403477, "epoch": 1.6933830382106243, "grad_norm": 1.185481181498009, "learning_rate": 9.678978253365552e-06, "loss": 1.656, "reason_loss": 0.5004504919052124, "step": 1817, "utility_loss": 1.1555513143539429 }, { "cosine_similarity": 0.00020076939026350554, "epoch": 1.694315004659832, "grad_norm": 0.8966207649704905, "learning_rate": 9.672074559889541e-06, "loss": 1.4351, "reason_loss": 0.5108246803283691, "step": 1818, "utility_loss": 0.9242704510688782 }, { "cosine_similarity": 0.4940642384260876, "epoch": 1.69524697110904, "grad_norm": 1.4129571669086285, "learning_rate": 9.665170866413532e-06, "loss": 1.714, "reason_loss": 0.4728437662124634, "step": 1819, "utility_loss": 1.2411079406738281 }, { "cosine_similarity": -0.01836448256771527, "epoch": 1.696178937558248, "grad_norm": 1.247230624707747, "learning_rate": 9.658267172937523e-06, "loss": 1.4735, "reason_loss": 0.4693295955657959, "step": 1820, "utility_loss": 1.0042150020599365 }, { "cosine_similarity": 0.18202448090854176, "epoch": 1.6971109040074557, "grad_norm": 1.076797494947106, "learning_rate": 9.651363479461513e-06, "loss": 1.6783, "reason_loss": 0.4985598027706146, "step": 1821, "utility_loss": 1.1797840595245361 }, { "cosine_similarity": 0.2913477303602723, "epoch": 1.6980428704566637, "grad_norm": 1.4252935955382717, "learning_rate": 9.644459785985502e-06, "loss": 1.5893, "reason_loss": 0.4889606237411499, "step": 1822, "utility_loss": 1.1003167629241943 }, { "cosine_similarity": 0.2305734614180536, "epoch": 1.6989748369058715, "grad_norm": 1.2239184403684178, "learning_rate": 9.637556092509493e-06, "loss": 1.6701, "reason_loss": 0.5317299365997314, "step": 1823, "utility_loss": 1.1383730173110962 }, { "cosine_similarity": 0.6234995784148196, "epoch": 1.6999068033550793, "grad_norm": 1.3768708050591865, "learning_rate": 9.630652399033484e-06, "loss": 1.8926, "reason_loss": 0.5033683180809021, "step": 1824, "utility_loss": 1.389228343963623 }, { "cosine_similarity": -0.03156617041294401, "epoch": 1.700838769804287, "grad_norm": 1.1757221343478443, "learning_rate": 9.623748705557475e-06, "loss": 1.8566, "reason_loss": 0.5016586780548096, "step": 1825, "utility_loss": 1.3549848794937134 }, { "cosine_similarity": 0.15379411390747622, "epoch": 1.7017707362534948, "grad_norm": 1.3645155050851752, "learning_rate": 9.616845012081464e-06, "loss": 2.084, "reason_loss": 0.48948681354522705, "step": 1826, "utility_loss": 1.594521403312683 }, { "cosine_similarity": 0.27213743248522226, "epoch": 1.7027027027027026, "grad_norm": 1.116182187928784, "learning_rate": 9.609941318605454e-06, "loss": 1.7676, "reason_loss": 0.5380603075027466, "step": 1827, "utility_loss": 1.229589581489563 }, { "cosine_similarity": 0.21698801224834657, "epoch": 1.7036346691519104, "grad_norm": 1.356667646411938, "learning_rate": 9.603037625129445e-06, "loss": 1.9615, "reason_loss": 0.476866215467453, "step": 1828, "utility_loss": 1.484680414199829 }, { "cosine_similarity": 0.11648999367441062, "epoch": 1.7045666356011182, "grad_norm": 1.3955805102807672, "learning_rate": 9.596133931653436e-06, "loss": 2.05, "reason_loss": 0.4997524321079254, "step": 1829, "utility_loss": 1.5502703189849854 }, { "cosine_similarity": -0.01004617924379632, "epoch": 1.7054986020503262, "grad_norm": 1.2237705483894363, "learning_rate": 9.589230238177427e-06, "loss": 1.9559, "reason_loss": 0.49870356917381287, "step": 1830, "utility_loss": 1.4571717977523804 }, { "cosine_similarity": 0.25868780240688555, "epoch": 1.706430568499534, "grad_norm": 1.1277191197441179, "learning_rate": 9.582326544701416e-06, "loss": 1.8638, "reason_loss": 0.4857791066169739, "step": 1831, "utility_loss": 1.3780372142791748 }, { "cosine_similarity": 0.45979568266567983, "epoch": 1.7073625349487418, "grad_norm": 1.1297268478983402, "learning_rate": 9.575422851225406e-06, "loss": 1.7116, "reason_loss": 0.5022315382957458, "step": 1832, "utility_loss": 1.2094109058380127 }, { "cosine_similarity": -0.045451103918994706, "epoch": 1.7082945013979498, "grad_norm": 1.2388358132451083, "learning_rate": 9.568519157749397e-06, "loss": 1.587, "reason_loss": 0.5157104134559631, "step": 1833, "utility_loss": 1.0712566375732422 }, { "cosine_similarity": 0.05447395679465715, "epoch": 1.7092264678471576, "grad_norm": 1.06885780825365, "learning_rate": 9.561615464273388e-06, "loss": 1.6683, "reason_loss": 0.5039650201797485, "step": 1834, "utility_loss": 1.1643582582473755 }, { "cosine_similarity": -0.153597416519044, "epoch": 1.7101584342963654, "grad_norm": 1.0657738489491657, "learning_rate": 9.554711770797377e-06, "loss": 1.3845, "reason_loss": 0.5095059275627136, "step": 1835, "utility_loss": 0.8749579191207886 }, { "cosine_similarity": 0.13271241264723285, "epoch": 1.7110904007455732, "grad_norm": 1.2071642743863717, "learning_rate": 9.547808077321368e-06, "loss": 2.0538, "reason_loss": 0.4977130889892578, "step": 1836, "utility_loss": 1.5560991764068604 }, { "cosine_similarity": -0.08281376543350594, "epoch": 1.712022367194781, "grad_norm": 1.2353727129539744, "learning_rate": 9.540904383845358e-06, "loss": 1.6542, "reason_loss": 0.5218608975410461, "step": 1837, "utility_loss": 1.1323339939117432 }, { "cosine_similarity": 0.160172612058276, "epoch": 1.7129543336439887, "grad_norm": 1.1290259123011848, "learning_rate": 9.534000690369349e-06, "loss": 1.6458, "reason_loss": 0.45178496837615967, "step": 1838, "utility_loss": 1.194030523300171 }, { "cosine_similarity": 0.18148264877347062, "epoch": 1.7138863000931965, "grad_norm": 1.3663226498288996, "learning_rate": 9.52709699689334e-06, "loss": 1.8305, "reason_loss": 0.4893713593482971, "step": 1839, "utility_loss": 1.3411084413528442 }, { "cosine_similarity": 0.1631501035557449, "epoch": 1.7148182665424043, "grad_norm": 1.316939472676096, "learning_rate": 9.520193303417329e-06, "loss": 1.4916, "reason_loss": 0.5116761922836304, "step": 1840, "utility_loss": 0.9798774719238281 }, { "cosine_similarity": -0.037521998096966144, "epoch": 1.7157502329916123, "grad_norm": 1.334736879520943, "learning_rate": 9.51328960994132e-06, "loss": 1.903, "reason_loss": 0.5067253112792969, "step": 1841, "utility_loss": 1.3962695598602295 }, { "cosine_similarity": -0.06846770187173082, "epoch": 1.7166821994408201, "grad_norm": 1.3794744867220587, "learning_rate": 9.50638591646531e-06, "loss": 1.8164, "reason_loss": 0.49675649404525757, "step": 1842, "utility_loss": 1.319657564163208 }, { "cosine_similarity": 0.006374194888543332, "epoch": 1.717614165890028, "grad_norm": 1.2885361308464909, "learning_rate": 9.499482222989301e-06, "loss": 1.8147, "reason_loss": 0.5037623643875122, "step": 1843, "utility_loss": 1.3109763860702515 }, { "cosine_similarity": 0.10623529986866012, "epoch": 1.718546132339236, "grad_norm": 0.9123332087459152, "learning_rate": 9.49257852951329e-06, "loss": 1.5502, "reason_loss": 0.491712361574173, "step": 1844, "utility_loss": 1.0585061311721802 }, { "cosine_similarity": -0.007183861479632325, "epoch": 1.7194780987884437, "grad_norm": 1.4531342030249237, "learning_rate": 9.48567483603728e-06, "loss": 1.8763, "reason_loss": 0.4633025825023651, "step": 1845, "utility_loss": 1.4130277633666992 }, { "cosine_similarity": 0.09075684916376583, "epoch": 1.7204100652376515, "grad_norm": 1.20712497552587, "learning_rate": 9.47877114256127e-06, "loss": 1.7957, "reason_loss": 0.4871519207954407, "step": 1846, "utility_loss": 1.3085969686508179 }, { "cosine_similarity": 0.1585715404649151, "epoch": 1.7213420316868593, "grad_norm": 1.0942042492203907, "learning_rate": 9.47186744908526e-06, "loss": 1.5711, "reason_loss": 0.4879894554615021, "step": 1847, "utility_loss": 1.0830883979797363 }, { "cosine_similarity": 0.22377877616981007, "epoch": 1.722273998136067, "grad_norm": 1.3011644697758218, "learning_rate": 9.464963755609253e-06, "loss": 1.7226, "reason_loss": 0.4358994960784912, "step": 1848, "utility_loss": 1.286704421043396 }, { "cosine_similarity": 0.3138825271328214, "epoch": 1.7232059645852749, "grad_norm": 1.1317205847556595, "learning_rate": 9.458060062133242e-06, "loss": 1.4194, "reason_loss": 0.491241455078125, "step": 1849, "utility_loss": 0.9281511306762695 }, { "cosine_similarity": -0.05999905349246137, "epoch": 1.7241379310344827, "grad_norm": 1.2717507191297137, "learning_rate": 9.451156368657233e-06, "loss": 1.5812, "reason_loss": 0.4923633933067322, "step": 1850, "utility_loss": 1.0888234376907349 }, { "cosine_similarity": 0.32116102360437104, "epoch": 1.7250698974836904, "grad_norm": 1.2315791167645729, "learning_rate": 9.444252675181222e-06, "loss": 1.8779, "reason_loss": 0.4786689877510071, "step": 1851, "utility_loss": 1.3992588520050049 }, { "cosine_similarity": 0.06436679916741818, "epoch": 1.7260018639328985, "grad_norm": 1.1111290551876516, "learning_rate": 9.437348981705212e-06, "loss": 1.6999, "reason_loss": 0.5088269710540771, "step": 1852, "utility_loss": 1.1910760402679443 }, { "cosine_similarity": 0.24006255115409386, "epoch": 1.7269338303821062, "grad_norm": 1.293385558820543, "learning_rate": 9.430445288229203e-06, "loss": 1.7138, "reason_loss": 0.48424431681632996, "step": 1853, "utility_loss": 1.229507565498352 }, { "cosine_similarity": 0.015640786757672456, "epoch": 1.727865796831314, "grad_norm": 0.9953831847269908, "learning_rate": 9.423541594753194e-06, "loss": 1.6094, "reason_loss": 0.4762941896915436, "step": 1854, "utility_loss": 1.133066177368164 }, { "cosine_similarity": 0.4642294392840568, "epoch": 1.728797763280522, "grad_norm": 1.3431330171800278, "learning_rate": 9.416637901277183e-06, "loss": 1.4968, "reason_loss": 0.4903000593185425, "step": 1855, "utility_loss": 1.00650155544281 }, { "cosine_similarity": -0.053643528572357824, "epoch": 1.7297297297297298, "grad_norm": 1.053037438065906, "learning_rate": 9.409734207801174e-06, "loss": 1.5269, "reason_loss": 0.5003546476364136, "step": 1856, "utility_loss": 1.0265488624572754 }, { "cosine_similarity": -0.03158096311892526, "epoch": 1.7306616961789376, "grad_norm": 1.1026773881493164, "learning_rate": 9.402830514325164e-06, "loss": 1.61, "reason_loss": 0.5230288505554199, "step": 1857, "utility_loss": 1.0870182514190674 }, { "cosine_similarity": 0.13323313030371436, "epoch": 1.7315936626281454, "grad_norm": 1.1365769841043898, "learning_rate": 9.395926820849155e-06, "loss": 1.8296, "reason_loss": 0.4803202152252197, "step": 1858, "utility_loss": 1.3493274450302124 }, { "cosine_similarity": 0.3149412407233186, "epoch": 1.7325256290773532, "grad_norm": 1.1335993308331522, "learning_rate": 9.389023127373146e-06, "loss": 1.7582, "reason_loss": 0.4905075430870056, "step": 1859, "utility_loss": 1.2677127122879028 }, { "cosine_similarity": 0.30556781948467293, "epoch": 1.733457595526561, "grad_norm": 1.2301144767827639, "learning_rate": 9.382119433897135e-06, "loss": 1.9626, "reason_loss": 0.47422975301742554, "step": 1860, "utility_loss": 1.4884021282196045 }, { "cosine_similarity": 0.1371953699104254, "epoch": 1.7343895619757688, "grad_norm": 1.2625904587025658, "learning_rate": 9.375215740421126e-06, "loss": 1.7515, "reason_loss": 0.48338061571121216, "step": 1861, "utility_loss": 1.268089771270752 }, { "cosine_similarity": 0.06776015740211418, "epoch": 1.7353215284249766, "grad_norm": 1.0847977933419337, "learning_rate": 9.368312046945116e-06, "loss": 1.616, "reason_loss": 0.4770527482032776, "step": 1862, "utility_loss": 1.1389052867889404 }, { "cosine_similarity": 0.01721712165705671, "epoch": 1.7362534948741846, "grad_norm": 1.1352726827663677, "learning_rate": 9.361408353469107e-06, "loss": 1.4911, "reason_loss": 0.506321907043457, "step": 1863, "utility_loss": 0.9847890734672546 }, { "cosine_similarity": 0.220699702461783, "epoch": 1.7371854613233924, "grad_norm": 1.4568511864695015, "learning_rate": 9.354504659993096e-06, "loss": 1.7192, "reason_loss": 0.5119809508323669, "step": 1864, "utility_loss": 1.2072596549987793 }, { "cosine_similarity": 0.15128623888539533, "epoch": 1.7381174277726001, "grad_norm": 1.1623482992180294, "learning_rate": 9.347600966517087e-06, "loss": 1.5354, "reason_loss": 0.5017820596694946, "step": 1865, "utility_loss": 1.0336164236068726 }, { "cosine_similarity": 0.11794405463985559, "epoch": 1.7390493942218082, "grad_norm": 1.4287307365803938, "learning_rate": 9.340697273041078e-06, "loss": 1.9238, "reason_loss": 0.48618239164352417, "step": 1866, "utility_loss": 1.4376237392425537 }, { "cosine_similarity": 0.05031096821041733, "epoch": 1.739981360671016, "grad_norm": 1.0952286035750929, "learning_rate": 9.333793579565068e-06, "loss": 1.8984, "reason_loss": 0.5092940330505371, "step": 1867, "utility_loss": 1.3891286849975586 }, { "cosine_similarity": 0.27350904309294827, "epoch": 1.7409133271202237, "grad_norm": 1.3309988240918202, "learning_rate": 9.326889886089059e-06, "loss": 1.6633, "reason_loss": 0.48416921496391296, "step": 1868, "utility_loss": 1.1791260242462158 }, { "cosine_similarity": 0.27638449062553677, "epoch": 1.7418452935694315, "grad_norm": 1.0096976415472945, "learning_rate": 9.319986192613048e-06, "loss": 1.6471, "reason_loss": 0.4887022078037262, "step": 1869, "utility_loss": 1.1584343910217285 }, { "cosine_similarity": -0.036857026516912285, "epoch": 1.7427772600186393, "grad_norm": 1.6217465787059, "learning_rate": 9.313082499137039e-06, "loss": 1.9589, "reason_loss": 0.4885411262512207, "step": 1870, "utility_loss": 1.4703774452209473 }, { "cosine_similarity": 0.33263575143586, "epoch": 1.743709226467847, "grad_norm": 1.2212329442251153, "learning_rate": 9.30617880566103e-06, "loss": 1.9591, "reason_loss": 0.5077648162841797, "step": 1871, "utility_loss": 1.45135498046875 }, { "cosine_similarity": 0.03189121905379879, "epoch": 1.7446411929170549, "grad_norm": 1.2028600448982674, "learning_rate": 9.29927511218502e-06, "loss": 1.6879, "reason_loss": 0.5121455788612366, "step": 1872, "utility_loss": 1.1758003234863281 }, { "cosine_similarity": 0.29890105203486705, "epoch": 1.7455731593662627, "grad_norm": 1.3398929176913603, "learning_rate": 9.29237141870901e-06, "loss": 2.1076, "reason_loss": 0.5104067921638489, "step": 1873, "utility_loss": 1.5972340106964111 }, { "cosine_similarity": 0.12109574398693124, "epoch": 1.7465051258154707, "grad_norm": 1.551501656761227, "learning_rate": 9.285467725233e-06, "loss": 1.7761, "reason_loss": 0.47961753606796265, "step": 1874, "utility_loss": 1.2964739799499512 }, { "cosine_similarity": 0.35593164298092284, "epoch": 1.7474370922646785, "grad_norm": 1.3698459846619564, "learning_rate": 9.278564031756991e-06, "loss": 2.5786, "reason_loss": 0.49321937561035156, "step": 1875, "utility_loss": 2.0853941440582275 }, { "cosine_similarity": 0.39261995350742535, "epoch": 1.7483690587138863, "grad_norm": 1.2062079297710528, "learning_rate": 9.271660338280982e-06, "loss": 1.7381, "reason_loss": 0.5141511559486389, "step": 1876, "utility_loss": 1.2239155769348145 }, { "cosine_similarity": 0.3457845275378833, "epoch": 1.7493010251630943, "grad_norm": 1.2698287382790179, "learning_rate": 9.264756644804972e-06, "loss": 1.579, "reason_loss": 0.47624820470809937, "step": 1877, "utility_loss": 1.102787733078003 }, { "cosine_similarity": 0.1046669069238651, "epoch": 1.750232991612302, "grad_norm": 1.461592069138553, "learning_rate": 9.257852951328961e-06, "loss": 1.9872, "reason_loss": 0.4941714406013489, "step": 1878, "utility_loss": 1.4930293560028076 }, { "cosine_similarity": 0.26005668810625066, "epoch": 1.7511649580615098, "grad_norm": 1.2420028776821168, "learning_rate": 9.250949257852952e-06, "loss": 1.5798, "reason_loss": 0.5103785991668701, "step": 1879, "utility_loss": 1.0694063901901245 }, { "cosine_similarity": 0.06425224125004192, "epoch": 1.7520969245107176, "grad_norm": 1.1882255626174745, "learning_rate": 9.244045564376943e-06, "loss": 1.4174, "reason_loss": 0.502618134021759, "step": 1880, "utility_loss": 0.9147436618804932 }, { "cosine_similarity": 0.05286940303024946, "epoch": 1.7530288909599254, "grad_norm": 1.0512007687214573, "learning_rate": 9.237141870900934e-06, "loss": 1.7909, "reason_loss": 0.5082976818084717, "step": 1881, "utility_loss": 1.28263258934021 }, { "cosine_similarity": 0.16850625468947067, "epoch": 1.7539608574091332, "grad_norm": 1.1519078988794964, "learning_rate": 9.230238177424923e-06, "loss": 1.8348, "reason_loss": 0.49451446533203125, "step": 1882, "utility_loss": 1.340294361114502 }, { "cosine_similarity": 0.2553705839333683, "epoch": 1.754892823858341, "grad_norm": 1.2002493988580356, "learning_rate": 9.223334483948913e-06, "loss": 1.4369, "reason_loss": 0.5019997358322144, "step": 1883, "utility_loss": 0.9348753094673157 }, { "cosine_similarity": 0.025561977320697003, "epoch": 1.7558247903075488, "grad_norm": 1.1826921665734058, "learning_rate": 9.216430790472904e-06, "loss": 1.9262, "reason_loss": 0.490647554397583, "step": 1884, "utility_loss": 1.4355602264404297 }, { "cosine_similarity": -0.02745843857140917, "epoch": 1.7567567567567568, "grad_norm": 2.114530336680027, "learning_rate": 9.209527096996895e-06, "loss": 1.5614, "reason_loss": 0.49704957008361816, "step": 1885, "utility_loss": 1.0643384456634521 }, { "cosine_similarity": 0.11105427735083348, "epoch": 1.7576887232059646, "grad_norm": 1.1958212875341223, "learning_rate": 9.202623403520884e-06, "loss": 1.9823, "reason_loss": 0.5074888467788696, "step": 1886, "utility_loss": 1.4748437404632568 }, { "cosine_similarity": 0.048829453586897664, "epoch": 1.7586206896551724, "grad_norm": 1.0703762273226796, "learning_rate": 9.195719710044875e-06, "loss": 1.9841, "reason_loss": 0.5022884607315063, "step": 1887, "utility_loss": 1.4817637205123901 }, { "cosine_similarity": 0.05108608666838869, "epoch": 1.7595526561043804, "grad_norm": 1.3922117245481151, "learning_rate": 9.188816016568865e-06, "loss": 2.0288, "reason_loss": 0.4714611768722534, "step": 1888, "utility_loss": 1.557312250137329 }, { "cosine_similarity": 0.10301166870939298, "epoch": 1.7604846225535882, "grad_norm": 1.3107561421109521, "learning_rate": 9.181912323092856e-06, "loss": 1.8041, "reason_loss": 0.47997263073921204, "step": 1889, "utility_loss": 1.3241770267486572 }, { "cosine_similarity": 0.1919614442048862, "epoch": 1.761416589002796, "grad_norm": 1.1621077888407325, "learning_rate": 9.175008629616847e-06, "loss": 1.639, "reason_loss": 0.502743124961853, "step": 1890, "utility_loss": 1.136225700378418 }, { "cosine_similarity": 0.08736580952874202, "epoch": 1.7623485554520038, "grad_norm": 1.6643438336181682, "learning_rate": 9.168104936140836e-06, "loss": 2.1369, "reason_loss": 0.4762696921825409, "step": 1891, "utility_loss": 1.6606059074401855 }, { "cosine_similarity": 0.461807619512844, "epoch": 1.7632805219012115, "grad_norm": 1.2209627013704611, "learning_rate": 9.161201242664827e-06, "loss": 1.9858, "reason_loss": 0.4997549057006836, "step": 1892, "utility_loss": 1.4860005378723145 }, { "cosine_similarity": 0.6954773898020229, "epoch": 1.7642124883504193, "grad_norm": 1.347401337597879, "learning_rate": 9.154297549188816e-06, "loss": 1.5505, "reason_loss": 0.49928638339042664, "step": 1893, "utility_loss": 1.0511857271194458 }, { "cosine_similarity": 0.38240412275097513, "epoch": 1.7651444547996271, "grad_norm": 1.2879812483709143, "learning_rate": 9.147393855712808e-06, "loss": 1.9954, "reason_loss": 0.49558785557746887, "step": 1894, "utility_loss": 1.4998011589050293 }, { "cosine_similarity": -0.22385251114663116, "epoch": 1.766076421248835, "grad_norm": 1.2669328078334872, "learning_rate": 9.140490162236797e-06, "loss": 1.9577, "reason_loss": 0.5225119590759277, "step": 1895, "utility_loss": 1.4352095127105713 }, { "cosine_similarity": -0.2554792622754359, "epoch": 1.767008387698043, "grad_norm": 1.5105082839561939, "learning_rate": 9.133586468760788e-06, "loss": 1.7808, "reason_loss": 0.4956696331501007, "step": 1896, "utility_loss": 1.2851686477661133 }, { "cosine_similarity": 0.44058814266373897, "epoch": 1.7679403541472507, "grad_norm": 1.3520555938923196, "learning_rate": 9.126682775284779e-06, "loss": 1.887, "reason_loss": 0.48635733127593994, "step": 1897, "utility_loss": 1.4006260633468628 }, { "cosine_similarity": 0.8683785213025362, "epoch": 1.7688723205964585, "grad_norm": 1.28174303218684, "learning_rate": 9.119779081808768e-06, "loss": 2.1411, "reason_loss": 0.4825018048286438, "step": 1898, "utility_loss": 1.6585752964019775 }, { "cosine_similarity": 0.20270689665394437, "epoch": 1.7698042870456665, "grad_norm": 1.1430845176114406, "learning_rate": 9.11287538833276e-06, "loss": 1.7567, "reason_loss": 0.48911333084106445, "step": 1899, "utility_loss": 1.267571210861206 }, { "cosine_similarity": 0.20867727675042158, "epoch": 1.7707362534948743, "grad_norm": 1.3599710407579482, "learning_rate": 9.105971694856749e-06, "loss": 1.6749, "reason_loss": 0.47274455428123474, "step": 1900, "utility_loss": 1.202150821685791 }, { "cosine_similarity": 0.3706109148171629, "epoch": 1.771668219944082, "grad_norm": 1.1162932505824872, "learning_rate": 9.09906800138074e-06, "loss": 1.6367, "reason_loss": 0.4781985282897949, "step": 1901, "utility_loss": 1.158550500869751 }, { "cosine_similarity": -0.01957024374428856, "epoch": 1.7726001863932899, "grad_norm": 0.96472343842732, "learning_rate": 9.092164307904729e-06, "loss": 1.1817, "reason_loss": 0.47249460220336914, "step": 1902, "utility_loss": 0.7092463970184326 }, { "cosine_similarity": 0.42555652254504467, "epoch": 1.7735321528424977, "grad_norm": 1.1774203648458195, "learning_rate": 9.08526061442872e-06, "loss": 1.765, "reason_loss": 0.45798254013061523, "step": 1903, "utility_loss": 1.3069863319396973 }, { "cosine_similarity": 0.022984996409317345, "epoch": 1.7744641192917054, "grad_norm": 1.1924226710394674, "learning_rate": 9.07835692095271e-06, "loss": 1.6648, "reason_loss": 0.5091811418533325, "step": 1904, "utility_loss": 1.1555993556976318 }, { "cosine_similarity": 0.07549429334687935, "epoch": 1.7753960857409132, "grad_norm": 1.1983283710974606, "learning_rate": 9.071453227476701e-06, "loss": 1.9942, "reason_loss": 0.49414706230163574, "step": 1905, "utility_loss": 1.5000576972961426 }, { "cosine_similarity": 0.31357236618779344, "epoch": 1.776328052190121, "grad_norm": 1.2130121131955727, "learning_rate": 9.06454953400069e-06, "loss": 1.5604, "reason_loss": 0.5024080276489258, "step": 1906, "utility_loss": 1.0579502582550049 }, { "cosine_similarity": 0.26126065683802535, "epoch": 1.777260018639329, "grad_norm": 1.0569457213342957, "learning_rate": 9.05764584052468e-06, "loss": 1.7718, "reason_loss": 0.5049400329589844, "step": 1907, "utility_loss": 1.2668111324310303 }, { "cosine_similarity": 0.16170113597762215, "epoch": 1.7781919850885368, "grad_norm": 1.0105914796348683, "learning_rate": 9.050742147048671e-06, "loss": 1.7983, "reason_loss": 0.49445968866348267, "step": 1908, "utility_loss": 1.3038747310638428 }, { "cosine_similarity": -0.022288338286287644, "epoch": 1.7791239515377446, "grad_norm": 1.184203052824073, "learning_rate": 9.043838453572662e-06, "loss": 1.9121, "reason_loss": 0.5024404525756836, "step": 1909, "utility_loss": 1.4096481800079346 }, { "cosine_similarity": 0.048315902035742805, "epoch": 1.7800559179869526, "grad_norm": 1.2621279239415508, "learning_rate": 9.036934760096653e-06, "loss": 1.888, "reason_loss": 0.5023214817047119, "step": 1910, "utility_loss": 1.3856717348098755 }, { "cosine_similarity": 0.09260562131414637, "epoch": 1.7809878844361604, "grad_norm": 1.1733039319515775, "learning_rate": 9.030031066620642e-06, "loss": 1.9945, "reason_loss": 0.4969753921031952, "step": 1911, "utility_loss": 1.4975330829620361 }, { "cosine_similarity": 0.33732877857950044, "epoch": 1.7819198508853682, "grad_norm": 1.0169306262685005, "learning_rate": 9.023127373144633e-06, "loss": 1.3173, "reason_loss": 0.460021048784256, "step": 1912, "utility_loss": 0.8572442531585693 }, { "cosine_similarity": 0.08979988826207581, "epoch": 1.782851817334576, "grad_norm": 1.0169226004016507, "learning_rate": 9.016223679668623e-06, "loss": 1.5402, "reason_loss": 0.48645222187042236, "step": 1913, "utility_loss": 1.0537835359573364 }, { "cosine_similarity": 0.0784365629734783, "epoch": 1.7837837837837838, "grad_norm": 1.1978099636731436, "learning_rate": 9.009319986192614e-06, "loss": 1.4647, "reason_loss": 0.5068724155426025, "step": 1914, "utility_loss": 0.9578752517700195 }, { "cosine_similarity": 0.15021055253580928, "epoch": 1.7847157502329916, "grad_norm": 1.3402647733488295, "learning_rate": 9.002416292716603e-06, "loss": 1.6862, "reason_loss": 0.4645925462245941, "step": 1915, "utility_loss": 1.2215834856033325 }, { "cosine_similarity": 0.3971595270145896, "epoch": 1.7856477166821993, "grad_norm": 1.5556881441780113, "learning_rate": 8.995512599240594e-06, "loss": 1.7476, "reason_loss": 0.49988222122192383, "step": 1916, "utility_loss": 1.2477672100067139 }, { "cosine_similarity": 0.47500567121530063, "epoch": 1.7865796831314071, "grad_norm": 1.2206819513499385, "learning_rate": 8.988608905764585e-06, "loss": 1.6121, "reason_loss": 0.5119537115097046, "step": 1917, "utility_loss": 1.1001501083374023 }, { "cosine_similarity": 0.8653044714334347, "epoch": 1.7875116495806151, "grad_norm": 1.3893022224436562, "learning_rate": 8.981705212288575e-06, "loss": 1.9679, "reason_loss": 0.5087271928787231, "step": 1918, "utility_loss": 1.4591448307037354 }, { "cosine_similarity": 0.12022038386513115, "epoch": 1.788443616029823, "grad_norm": 1.2759581832799562, "learning_rate": 8.974801518812566e-06, "loss": 1.6673, "reason_loss": 0.48413705825805664, "step": 1919, "utility_loss": 1.183131217956543 }, { "cosine_similarity": -0.11934139628372595, "epoch": 1.7893755824790307, "grad_norm": 1.080022147051939, "learning_rate": 8.967897825336555e-06, "loss": 1.5638, "reason_loss": 0.5168488621711731, "step": 1920, "utility_loss": 1.0469386577606201 }, { "cosine_similarity": 0.08652644164780984, "epoch": 1.7903075489282387, "grad_norm": 1.2218255764075598, "learning_rate": 8.960994131860546e-06, "loss": 1.5178, "reason_loss": 0.5164602994918823, "step": 1921, "utility_loss": 1.0012900829315186 }, { "cosine_similarity": 0.43372738451918624, "epoch": 1.7912395153774465, "grad_norm": 1.2831934290513793, "learning_rate": 8.954090438384537e-06, "loss": 1.6194, "reason_loss": 0.49633944034576416, "step": 1922, "utility_loss": 1.1230778694152832 }, { "cosine_similarity": 0.31400241999770256, "epoch": 1.7921714818266543, "grad_norm": 1.215897292383615, "learning_rate": 8.947186744908527e-06, "loss": 1.7561, "reason_loss": 0.5002501010894775, "step": 1923, "utility_loss": 1.2558465003967285 }, { "cosine_similarity": 0.09068477703410327, "epoch": 1.793103448275862, "grad_norm": 1.1912705465522555, "learning_rate": 8.940283051432516e-06, "loss": 1.5524, "reason_loss": 0.48300936818122864, "step": 1924, "utility_loss": 1.069361686706543 }, { "cosine_similarity": 0.12608737315117763, "epoch": 1.7940354147250699, "grad_norm": 1.454269636514692, "learning_rate": 8.933379357956507e-06, "loss": 2.1195, "reason_loss": 0.47662872076034546, "step": 1925, "utility_loss": 1.6428565979003906 }, { "cosine_similarity": -0.11734707504894712, "epoch": 1.7949673811742777, "grad_norm": 1.0735645349945153, "learning_rate": 8.926475664480498e-06, "loss": 1.4603, "reason_loss": 0.4764289855957031, "step": 1926, "utility_loss": 0.9839063882827759 }, { "cosine_similarity": 0.3301704298488267, "epoch": 1.7958993476234855, "grad_norm": 1.1627675005052842, "learning_rate": 8.919571971004489e-06, "loss": 1.6676, "reason_loss": 0.4878947138786316, "step": 1927, "utility_loss": 1.179690957069397 }, { "cosine_similarity": 0.34654974987272796, "epoch": 1.7968313140726933, "grad_norm": 1.0220708143815145, "learning_rate": 8.91266827752848e-06, "loss": 1.5962, "reason_loss": 0.49873459339141846, "step": 1928, "utility_loss": 1.0974702835083008 }, { "cosine_similarity": 0.17261328785237412, "epoch": 1.7977632805219013, "grad_norm": 1.1394663253109385, "learning_rate": 8.905764584052468e-06, "loss": 1.8357, "reason_loss": 0.48063403367996216, "step": 1929, "utility_loss": 1.3550848960876465 }, { "cosine_similarity": -0.0060355014273873, "epoch": 1.798695246971109, "grad_norm": 1.103828315300412, "learning_rate": 8.898860890576459e-06, "loss": 1.8612, "reason_loss": 0.508445143699646, "step": 1930, "utility_loss": 1.3527100086212158 }, { "cosine_similarity": 0.154744795580186, "epoch": 1.7996272134203168, "grad_norm": 1.1167031650202577, "learning_rate": 8.89195719710045e-06, "loss": 1.6982, "reason_loss": 0.49982452392578125, "step": 1931, "utility_loss": 1.198413372039795 }, { "cosine_similarity": 0.2872886098047806, "epoch": 1.8005591798695249, "grad_norm": 1.0361915951842628, "learning_rate": 8.88505350362444e-06, "loss": 1.6049, "reason_loss": 0.4957886040210724, "step": 1932, "utility_loss": 1.1090962886810303 }, { "cosine_similarity": 0.26062748606348657, "epoch": 1.8014911463187326, "grad_norm": 1.2587898332702616, "learning_rate": 8.87814981014843e-06, "loss": 1.7828, "reason_loss": 0.5051435828208923, "step": 1933, "utility_loss": 1.2776936292648315 }, { "cosine_similarity": 0.12001926028847142, "epoch": 1.8024231127679404, "grad_norm": 1.2105414157149845, "learning_rate": 8.87124611667242e-06, "loss": 1.777, "reason_loss": 0.48550868034362793, "step": 1934, "utility_loss": 1.2914843559265137 }, { "cosine_similarity": 0.19851551550252966, "epoch": 1.8033550792171482, "grad_norm": 1.459595250202876, "learning_rate": 8.864342423196411e-06, "loss": 1.5839, "reason_loss": 0.4957585632801056, "step": 1935, "utility_loss": 1.0881867408752441 }, { "cosine_similarity": 0.0960281272369665, "epoch": 1.804287045666356, "grad_norm": 1.1175624682430074, "learning_rate": 8.857438729720402e-06, "loss": 1.7753, "reason_loss": 0.4956170916557312, "step": 1936, "utility_loss": 1.2796623706817627 }, { "cosine_similarity": 0.38109054655014246, "epoch": 1.8052190121155638, "grad_norm": 1.2693999234577846, "learning_rate": 8.850535036244391e-06, "loss": 1.7463, "reason_loss": 0.48828038573265076, "step": 1937, "utility_loss": 1.2580604553222656 }, { "cosine_similarity": -0.02485421658762768, "epoch": 1.8061509785647716, "grad_norm": 1.0670526342200726, "learning_rate": 8.843631342768382e-06, "loss": 1.4584, "reason_loss": 0.46257278323173523, "step": 1938, "utility_loss": 0.995808482170105 }, { "cosine_similarity": -0.13384556405779602, "epoch": 1.8070829450139794, "grad_norm": 1.2328012259453347, "learning_rate": 8.836727649292372e-06, "loss": 1.3984, "reason_loss": 0.47784870862960815, "step": 1939, "utility_loss": 0.9205888509750366 }, { "cosine_similarity": 0.34265680067214893, "epoch": 1.8080149114631874, "grad_norm": 1.3166093862037471, "learning_rate": 8.829823955816363e-06, "loss": 2.0306, "reason_loss": 0.4840245842933655, "step": 1940, "utility_loss": 1.546582579612732 }, { "cosine_similarity": 0.04529821522816994, "epoch": 1.8089468779123952, "grad_norm": 1.5242927304009228, "learning_rate": 8.822920262340354e-06, "loss": 1.8951, "reason_loss": 0.4976516664028168, "step": 1941, "utility_loss": 1.397458791732788 }, { "cosine_similarity": -0.04244536868353298, "epoch": 1.809878844361603, "grad_norm": 1.077630488355644, "learning_rate": 8.816016568864343e-06, "loss": 1.6765, "reason_loss": 0.5123242139816284, "step": 1942, "utility_loss": 1.1641639471054077 }, { "cosine_similarity": -0.047171539842891186, "epoch": 1.810810810810811, "grad_norm": 1.2008324255366127, "learning_rate": 8.809112875388334e-06, "loss": 1.5462, "reason_loss": 0.510717511177063, "step": 1943, "utility_loss": 1.035449504852295 }, { "cosine_similarity": 0.06524626868250134, "epoch": 1.8117427772600188, "grad_norm": 1.0428166328285373, "learning_rate": 8.802209181912323e-06, "loss": 1.7172, "reason_loss": 0.5058275461196899, "step": 1944, "utility_loss": 1.211326241493225 }, { "cosine_similarity": -0.034482257919223006, "epoch": 1.8126747437092265, "grad_norm": 1.312813593753051, "learning_rate": 8.795305488436315e-06, "loss": 1.966, "reason_loss": 0.5190637111663818, "step": 1945, "utility_loss": 1.4469166994094849 }, { "cosine_similarity": 0.2953365497064445, "epoch": 1.8136067101584343, "grad_norm": 1.1244110106565985, "learning_rate": 8.788401794960304e-06, "loss": 1.56, "reason_loss": 0.48215848207473755, "step": 1946, "utility_loss": 1.077834963798523 }, { "cosine_similarity": 0.3287714675246171, "epoch": 1.8145386766076421, "grad_norm": 1.2419341696399402, "learning_rate": 8.781498101484295e-06, "loss": 1.761, "reason_loss": 0.4671798348426819, "step": 1947, "utility_loss": 1.293809175491333 }, { "cosine_similarity": 0.09899281941692811, "epoch": 1.81547064305685, "grad_norm": 1.7949033042740288, "learning_rate": 8.774594408008286e-06, "loss": 1.8383, "reason_loss": 0.48942887783050537, "step": 1948, "utility_loss": 1.3488523960113525 }, { "cosine_similarity": 0.11317640333184929, "epoch": 1.8164026095060577, "grad_norm": 1.0248127815588497, "learning_rate": 8.767690714532275e-06, "loss": 1.5416, "reason_loss": 0.47742486000061035, "step": 1949, "utility_loss": 1.0641491413116455 }, { "cosine_similarity": 0.2808487870633065, "epoch": 1.8173345759552655, "grad_norm": 1.0789488301116383, "learning_rate": 8.760787021056267e-06, "loss": 1.483, "reason_loss": 0.48016369342803955, "step": 1950, "utility_loss": 1.0028557777404785 }, { "cosine_similarity": 0.19331010798735718, "epoch": 1.8182665424044733, "grad_norm": 1.1639821068403595, "learning_rate": 8.753883327580256e-06, "loss": 1.5269, "reason_loss": 0.4957495927810669, "step": 1951, "utility_loss": 1.0311806201934814 }, { "cosine_similarity": 0.057546037368129174, "epoch": 1.8191985088536813, "grad_norm": 1.2114865616683224, "learning_rate": 8.746979634104247e-06, "loss": 1.5381, "reason_loss": 0.4745476245880127, "step": 1952, "utility_loss": 1.0635648965835571 }, { "cosine_similarity": 0.06868374946996239, "epoch": 1.820130475302889, "grad_norm": 1.4256067521543863, "learning_rate": 8.740075940628236e-06, "loss": 1.611, "reason_loss": 0.49305853247642517, "step": 1953, "utility_loss": 1.1179618835449219 }, { "cosine_similarity": 0.18014173551666737, "epoch": 1.821062441752097, "grad_norm": 1.4151271514081987, "learning_rate": 8.733172247152227e-06, "loss": 1.8307, "reason_loss": 0.4785301685333252, "step": 1954, "utility_loss": 1.3521580696105957 }, { "cosine_similarity": 0.24529300589417258, "epoch": 1.8219944082013049, "grad_norm": 1.1087469865314252, "learning_rate": 8.726268553676217e-06, "loss": 1.4794, "reason_loss": 0.4945463538169861, "step": 1955, "utility_loss": 0.9848231673240662 }, { "cosine_similarity": 0.11624989416307037, "epoch": 1.8229263746505127, "grad_norm": 1.4340927214191999, "learning_rate": 8.719364860200208e-06, "loss": 1.7444, "reason_loss": 0.46979469060897827, "step": 1956, "utility_loss": 1.274646282196045 }, { "cosine_similarity": 0.13093982329324647, "epoch": 1.8238583410997204, "grad_norm": 1.199491976305772, "learning_rate": 8.712461166724197e-06, "loss": 2.0457, "reason_loss": 0.5009313821792603, "step": 1957, "utility_loss": 1.5447474718093872 }, { "cosine_similarity": 0.11317070080943509, "epoch": 1.8247903075489282, "grad_norm": 1.5633929534736555, "learning_rate": 8.705557473248188e-06, "loss": 1.7433, "reason_loss": 0.47300517559051514, "step": 1958, "utility_loss": 1.270295262336731 }, { "cosine_similarity": 0.08494972269503169, "epoch": 1.825722273998136, "grad_norm": 1.2602278688942428, "learning_rate": 8.698653779772179e-06, "loss": 2.0372, "reason_loss": 0.5027483701705933, "step": 1959, "utility_loss": 1.534456491470337 }, { "cosine_similarity": 0.05858746919805203, "epoch": 1.8266542404473438, "grad_norm": 1.2266165654022978, "learning_rate": 8.69175008629617e-06, "loss": 2.0426, "reason_loss": 0.5035930871963501, "step": 1960, "utility_loss": 1.5390263795852661 }, { "cosine_similarity": -0.061532955482115174, "epoch": 1.8275862068965516, "grad_norm": 0.9987243214515611, "learning_rate": 8.68484639282016e-06, "loss": 1.6387, "reason_loss": 0.4932674169540405, "step": 1961, "utility_loss": 1.14540433883667 }, { "cosine_similarity": -0.06883249481663375, "epoch": 1.8285181733457594, "grad_norm": 1.2162799952299304, "learning_rate": 8.677942699344149e-06, "loss": 2.0122, "reason_loss": 0.5009198784828186, "step": 1962, "utility_loss": 1.5112323760986328 }, { "cosine_similarity": 0.24839811514845142, "epoch": 1.8294501397949674, "grad_norm": 1.3461029361051033, "learning_rate": 8.67103900586814e-06, "loss": 2.0481, "reason_loss": 0.49012047052383423, "step": 1963, "utility_loss": 1.5579688549041748 }, { "cosine_similarity": 0.1658087813248635, "epoch": 1.8303821062441752, "grad_norm": 1.1432772387094337, "learning_rate": 8.66413531239213e-06, "loss": 1.538, "reason_loss": 0.5135470628738403, "step": 1964, "utility_loss": 1.02445387840271 }, { "cosine_similarity": 0.061404281971748, "epoch": 1.8313140726933832, "grad_norm": 1.0698947589492047, "learning_rate": 8.657231618916121e-06, "loss": 1.8235, "reason_loss": 0.5004873871803284, "step": 1965, "utility_loss": 1.3230273723602295 }, { "cosine_similarity": 0.11064608162930825, "epoch": 1.832246039142591, "grad_norm": 1.2052207208970696, "learning_rate": 8.65032792544011e-06, "loss": 1.4236, "reason_loss": 0.4890146255493164, "step": 1966, "utility_loss": 0.9345775842666626 }, { "cosine_similarity": 0.3167533552532404, "epoch": 1.8331780055917988, "grad_norm": 1.1448350984302456, "learning_rate": 8.643424231964101e-06, "loss": 1.8466, "reason_loss": 0.49429380893707275, "step": 1967, "utility_loss": 1.3522608280181885 }, { "cosine_similarity": 0.3779425797314398, "epoch": 1.8341099720410066, "grad_norm": 1.3717012316852797, "learning_rate": 8.636520538488092e-06, "loss": 1.8348, "reason_loss": 0.4762752950191498, "step": 1968, "utility_loss": 1.3584966659545898 }, { "cosine_similarity": 0.17150035651516732, "epoch": 1.8350419384902144, "grad_norm": 1.1840020155213993, "learning_rate": 8.629616845012082e-06, "loss": 1.9824, "reason_loss": 0.5304815173149109, "step": 1969, "utility_loss": 1.4518868923187256 }, { "cosine_similarity": 0.14249233462090238, "epoch": 1.8359739049394221, "grad_norm": 1.1569242828999586, "learning_rate": 8.622713151536073e-06, "loss": 1.614, "reason_loss": 0.497272253036499, "step": 1970, "utility_loss": 1.11673903465271 }, { "cosine_similarity": -0.04314904910552113, "epoch": 1.83690587138863, "grad_norm": 1.0937094814595116, "learning_rate": 8.615809458060062e-06, "loss": 1.6403, "reason_loss": 0.5191529989242554, "step": 1971, "utility_loss": 1.1211376190185547 }, { "cosine_similarity": 0.4088501124188136, "epoch": 1.8378378378378377, "grad_norm": 1.268630791596574, "learning_rate": 8.608905764584053e-06, "loss": 1.6545, "reason_loss": 0.5105479955673218, "step": 1972, "utility_loss": 1.1439802646636963 }, { "cosine_similarity": 0.29916959211657485, "epoch": 1.8387698042870455, "grad_norm": 1.1893993206760827, "learning_rate": 8.602002071108044e-06, "loss": 1.7326, "reason_loss": 0.4936058223247528, "step": 1973, "utility_loss": 1.239037036895752 }, { "cosine_similarity": -0.09758478762597719, "epoch": 1.8397017707362535, "grad_norm": 1.3594340514153553, "learning_rate": 8.595098377632034e-06, "loss": 1.8384, "reason_loss": 0.5136165618896484, "step": 1974, "utility_loss": 1.3247582912445068 }, { "cosine_similarity": 0.21272259273973254, "epoch": 1.8406337371854613, "grad_norm": 1.2860847930126686, "learning_rate": 8.588194684156023e-06, "loss": 1.6568, "reason_loss": 0.509689211845398, "step": 1975, "utility_loss": 1.147074818611145 }, { "cosine_similarity": 0.07941247561312025, "epoch": 1.8415657036346693, "grad_norm": 1.144375021799713, "learning_rate": 8.581290990680014e-06, "loss": 1.4023, "reason_loss": 0.5060027241706848, "step": 1976, "utility_loss": 0.8963326811790466 }, { "cosine_similarity": 0.46249061117924656, "epoch": 1.842497670083877, "grad_norm": 1.1519040695717244, "learning_rate": 8.574387297204005e-06, "loss": 1.6461, "reason_loss": 0.507302463054657, "step": 1977, "utility_loss": 1.138832449913025 }, { "cosine_similarity": 0.29880663377766015, "epoch": 1.843429636533085, "grad_norm": 1.146245384910278, "learning_rate": 8.567483603727996e-06, "loss": 1.6443, "reason_loss": 0.4927760064601898, "step": 1978, "utility_loss": 1.1514942646026611 }, { "cosine_similarity": -0.12729804617047152, "epoch": 1.8443616029822927, "grad_norm": 1.530576058603922, "learning_rate": 8.560579910251986e-06, "loss": 1.77, "reason_loss": 0.4666973352432251, "step": 1979, "utility_loss": 1.3033041954040527 }, { "cosine_similarity": 0.08071265144305134, "epoch": 1.8452935694315005, "grad_norm": 1.1000907791801198, "learning_rate": 8.553676216775975e-06, "loss": 1.6639, "reason_loss": 0.4761190414428711, "step": 1980, "utility_loss": 1.1878139972686768 }, { "cosine_similarity": 0.030717001919488732, "epoch": 1.8462255358807083, "grad_norm": 1.24740878734999, "learning_rate": 8.546772523299966e-06, "loss": 1.8195, "reason_loss": 0.4898419678211212, "step": 1981, "utility_loss": 1.329700231552124 }, { "cosine_similarity": 0.13186233679117973, "epoch": 1.847157502329916, "grad_norm": 1.078570509826801, "learning_rate": 8.539868829823957e-06, "loss": 1.6274, "reason_loss": 0.48294490575790405, "step": 1982, "utility_loss": 1.1444156169891357 }, { "cosine_similarity": 0.24368874361306048, "epoch": 1.8480894687791238, "grad_norm": 1.2412564788175253, "learning_rate": 8.532965136347948e-06, "loss": 1.6967, "reason_loss": 0.47612762451171875, "step": 1983, "utility_loss": 1.2205891609191895 }, { "cosine_similarity": 0.2501541019662889, "epoch": 1.8490214352283316, "grad_norm": 1.2871202829826751, "learning_rate": 8.526061442871937e-06, "loss": 1.6016, "reason_loss": 0.49612513184547424, "step": 1984, "utility_loss": 1.1055179834365845 }, { "cosine_similarity": 0.27756689178500327, "epoch": 1.8499534016775396, "grad_norm": 1.238485689825371, "learning_rate": 8.519157749395927e-06, "loss": 1.4498, "reason_loss": 0.48478975892066956, "step": 1985, "utility_loss": 0.9650014638900757 }, { "cosine_similarity": 0.2986929533774704, "epoch": 1.8508853681267474, "grad_norm": 1.6594400934905005, "learning_rate": 8.512254055919918e-06, "loss": 1.7234, "reason_loss": 0.5302340984344482, "step": 1986, "utility_loss": 1.193185567855835 }, { "cosine_similarity": -0.031506452384043204, "epoch": 1.8518173345759554, "grad_norm": 1.0920860412747648, "learning_rate": 8.505350362443909e-06, "loss": 1.5836, "reason_loss": 0.5099972486495972, "step": 1987, "utility_loss": 1.0736300945281982 }, { "cosine_similarity": -0.11280839560031693, "epoch": 1.8527493010251632, "grad_norm": 1.191525337804539, "learning_rate": 8.4984466689679e-06, "loss": 1.5097, "reason_loss": 0.5131736993789673, "step": 1988, "utility_loss": 0.9965640306472778 }, { "cosine_similarity": 0.2749283681420014, "epoch": 1.853681267474371, "grad_norm": 1.1029835689590135, "learning_rate": 8.491542975491889e-06, "loss": 1.78, "reason_loss": 0.519775390625, "step": 1989, "utility_loss": 1.2601903676986694 }, { "cosine_similarity": 0.5278154837534408, "epoch": 1.8546132339235788, "grad_norm": 1.3399523070759418, "learning_rate": 8.48463928201588e-06, "loss": 1.84, "reason_loss": 0.49654924869537354, "step": 1990, "utility_loss": 1.343482494354248 }, { "cosine_similarity": 0.0553483374118303, "epoch": 1.8555452003727866, "grad_norm": 1.0808155577716996, "learning_rate": 8.47773558853987e-06, "loss": 1.711, "reason_loss": 0.4627007842063904, "step": 1991, "utility_loss": 1.2483222484588623 }, { "cosine_similarity": 0.16249294197946082, "epoch": 1.8564771668219944, "grad_norm": 1.2230211869233922, "learning_rate": 8.47083189506386e-06, "loss": 2.0896, "reason_loss": 0.5051379203796387, "step": 1992, "utility_loss": 1.584498405456543 }, { "cosine_similarity": 0.2894624562520806, "epoch": 1.8574091332712022, "grad_norm": 1.2602201297071143, "learning_rate": 8.46392820158785e-06, "loss": 1.9706, "reason_loss": 0.49074772000312805, "step": 1993, "utility_loss": 1.4798145294189453 }, { "cosine_similarity": 0.2812011050632843, "epoch": 1.85834109972041, "grad_norm": 1.1635293090049044, "learning_rate": 8.45702450811184e-06, "loss": 1.7803, "reason_loss": 0.4897654354572296, "step": 1994, "utility_loss": 1.2905088663101196 }, { "cosine_similarity": 0.28312111176697885, "epoch": 1.8592730661696177, "grad_norm": 1.2761463708881824, "learning_rate": 8.45012081463583e-06, "loss": 1.8, "reason_loss": 0.5057350397109985, "step": 1995, "utility_loss": 1.294247031211853 }, { "cosine_similarity": -0.13972682207553327, "epoch": 1.8602050326188257, "grad_norm": 1.1553100279470678, "learning_rate": 8.443217121159822e-06, "loss": 1.7276, "reason_loss": 0.5006837844848633, "step": 1996, "utility_loss": 1.2269597053527832 }, { "cosine_similarity": 0.28538391310210875, "epoch": 1.8611369990680335, "grad_norm": 1.1911530869143658, "learning_rate": 8.436313427683811e-06, "loss": 1.5009, "reason_loss": 0.4910120666027069, "step": 1997, "utility_loss": 1.0098934173583984 }, { "cosine_similarity": 0.342057196091111, "epoch": 1.8620689655172413, "grad_norm": 1.2185669460485227, "learning_rate": 8.429409734207802e-06, "loss": 1.6271, "reason_loss": 0.4538188576698303, "step": 1998, "utility_loss": 1.1732455492019653 }, { "cosine_similarity": 0.5386477479033979, "epoch": 1.8630009319664493, "grad_norm": 1.1861111187827356, "learning_rate": 8.422506040731793e-06, "loss": 1.9108, "reason_loss": 0.5321651697158813, "step": 1999, "utility_loss": 1.3785877227783203 }, { "cosine_similarity": 0.02128382678272208, "epoch": 1.8639328984156571, "grad_norm": 1.355211962873395, "learning_rate": 8.415602347255782e-06, "loss": 2.3052, "reason_loss": 0.47876375913619995, "step": 2000, "utility_loss": 1.8264222145080566 }, { "cosine_similarity": 0.1656183775940257, "epoch": 1.864864864864865, "grad_norm": 1.2964665233848842, "learning_rate": 8.408698653779774e-06, "loss": 1.633, "reason_loss": 0.47578316926956177, "step": 2001, "utility_loss": 1.1572513580322266 }, { "cosine_similarity": 0.3376086797937528, "epoch": 1.8657968313140727, "grad_norm": 1.6094043764250823, "learning_rate": 8.401794960303763e-06, "loss": 1.8367, "reason_loss": 0.5134425163269043, "step": 2002, "utility_loss": 1.3232383728027344 }, { "cosine_similarity": 0.15865366436056888, "epoch": 1.8667287977632805, "grad_norm": 1.3384316485806034, "learning_rate": 8.394891266827754e-06, "loss": 1.7396, "reason_loss": 0.46256133913993835, "step": 2003, "utility_loss": 1.2770578861236572 }, { "cosine_similarity": -0.06119675726822214, "epoch": 1.8676607642124883, "grad_norm": 1.4907214718365247, "learning_rate": 8.387987573351743e-06, "loss": 2.032, "reason_loss": 0.49360835552215576, "step": 2004, "utility_loss": 1.538438320159912 }, { "cosine_similarity": -0.05744439030830568, "epoch": 1.868592730661696, "grad_norm": 0.9201366104415599, "learning_rate": 8.381083879875734e-06, "loss": 1.4548, "reason_loss": 0.49742138385772705, "step": 2005, "utility_loss": 0.9573696851730347 }, { "cosine_similarity": 0.2848799124454934, "epoch": 1.8695246971109039, "grad_norm": 1.4270948023183614, "learning_rate": 8.374180186399724e-06, "loss": 1.9892, "reason_loss": 0.5081487894058228, "step": 2006, "utility_loss": 1.4810951948165894 }, { "cosine_similarity": 0.19374371396859577, "epoch": 1.8704566635601119, "grad_norm": 1.190751723565613, "learning_rate": 8.367276492923715e-06, "loss": 1.748, "reason_loss": 0.514961838722229, "step": 2007, "utility_loss": 1.2330304384231567 }, { "cosine_similarity": 0.030724648375347583, "epoch": 1.8713886300093197, "grad_norm": 1.3952116300334025, "learning_rate": 8.360372799447706e-06, "loss": 1.9292, "reason_loss": 0.5006968975067139, "step": 2008, "utility_loss": 1.428504467010498 }, { "cosine_similarity": 0.23594060735930708, "epoch": 1.8723205964585274, "grad_norm": 1.3657866513761465, "learning_rate": 8.353469105971695e-06, "loss": 1.9227, "reason_loss": 0.49935343861579895, "step": 2009, "utility_loss": 1.4233293533325195 }, { "cosine_similarity": 0.23504729761958548, "epoch": 1.8732525629077355, "grad_norm": 1.2035562429967581, "learning_rate": 8.346565412495686e-06, "loss": 1.6015, "reason_loss": 0.4911558926105499, "step": 2010, "utility_loss": 1.1103291511535645 }, { "cosine_similarity": -0.10055108993841376, "epoch": 1.8741845293569432, "grad_norm": 1.3041407501822389, "learning_rate": 8.339661719019676e-06, "loss": 1.6419, "reason_loss": 0.4750855565071106, "step": 2011, "utility_loss": 1.1667897701263428 }, { "cosine_similarity": 0.02828682579845866, "epoch": 1.875116495806151, "grad_norm": 1.3386743456598504, "learning_rate": 8.332758025543667e-06, "loss": 1.7342, "reason_loss": 0.45313194394111633, "step": 2012, "utility_loss": 1.2810616493225098 }, { "cosine_similarity": -0.020812820149418036, "epoch": 1.8760484622553588, "grad_norm": 0.967481844985986, "learning_rate": 8.325854332067656e-06, "loss": 1.4825, "reason_loss": 0.5210678577423096, "step": 2013, "utility_loss": 0.9614772200584412 }, { "cosine_similarity": -0.0865135310960026, "epoch": 1.8769804287045666, "grad_norm": 1.3033989695547532, "learning_rate": 8.318950638591647e-06, "loss": 1.5868, "reason_loss": 0.5065536499023438, "step": 2014, "utility_loss": 1.0802174806594849 }, { "cosine_similarity": 0.22454799228394332, "epoch": 1.8779123951537744, "grad_norm": 1.1248568468942721, "learning_rate": 8.312046945115638e-06, "loss": 1.5012, "reason_loss": 0.4909231662750244, "step": 2015, "utility_loss": 1.0102529525756836 }, { "cosine_similarity": -0.18432614611538645, "epoch": 1.8788443616029822, "grad_norm": 1.0280338229890673, "learning_rate": 8.305143251639628e-06, "loss": 1.4344, "reason_loss": 0.5098292827606201, "step": 2016, "utility_loss": 0.9245883226394653 }, { "cosine_similarity": 0.15408500957090393, "epoch": 1.87977632805219, "grad_norm": 1.4151799260328746, "learning_rate": 8.298239558163617e-06, "loss": 1.7979, "reason_loss": 0.47018373012542725, "step": 2017, "utility_loss": 1.3277058601379395 }, { "cosine_similarity": 0.29185124401795637, "epoch": 1.880708294501398, "grad_norm": 1.1133116936920149, "learning_rate": 8.291335864687608e-06, "loss": 1.4712, "reason_loss": 0.4671313464641571, "step": 2018, "utility_loss": 1.0040509700775146 }, { "cosine_similarity": 0.17249285678093088, "epoch": 1.8816402609506058, "grad_norm": 1.3287704490314232, "learning_rate": 8.284432171211599e-06, "loss": 1.9481, "reason_loss": 0.4818067252635956, "step": 2019, "utility_loss": 1.4663028717041016 }, { "cosine_similarity": -0.06070043619057975, "epoch": 1.8825722273998136, "grad_norm": 1.1187621968755523, "learning_rate": 8.27752847773559e-06, "loss": 1.67, "reason_loss": 0.49465179443359375, "step": 2020, "utility_loss": 1.17530357837677 }, { "cosine_similarity": 0.12846809549275237, "epoch": 1.8835041938490216, "grad_norm": 1.3445880039829485, "learning_rate": 8.27062478425958e-06, "loss": 1.8006, "reason_loss": 0.49621716141700745, "step": 2021, "utility_loss": 1.3043546676635742 }, { "cosine_similarity": 0.21471577011780513, "epoch": 1.8844361602982294, "grad_norm": 1.4380320101624906, "learning_rate": 8.26372109078357e-06, "loss": 1.793, "reason_loss": 0.4808445870876312, "step": 2022, "utility_loss": 1.3121681213378906 }, { "cosine_similarity": -0.12294248923975828, "epoch": 1.8853681267474371, "grad_norm": 1.4285843672860081, "learning_rate": 8.25681739730756e-06, "loss": 1.8526, "reason_loss": 0.4915391504764557, "step": 2023, "utility_loss": 1.3610683679580688 }, { "cosine_similarity": 0.22358443376255854, "epoch": 1.886300093196645, "grad_norm": 1.71358521010454, "learning_rate": 8.24991370383155e-06, "loss": 1.8176, "reason_loss": 0.4748792052268982, "step": 2024, "utility_loss": 1.342692494392395 }, { "cosine_similarity": 0.2765602149760946, "epoch": 1.8872320596458527, "grad_norm": 1.102620635414355, "learning_rate": 8.243010010355541e-06, "loss": 1.5428, "reason_loss": 0.4896666705608368, "step": 2025, "utility_loss": 1.0530927181243896 }, { "cosine_similarity": -0.007796991098967198, "epoch": 1.8881640260950605, "grad_norm": 1.1753086940166078, "learning_rate": 8.23610631687953e-06, "loss": 1.6893, "reason_loss": 0.5096023678779602, "step": 2026, "utility_loss": 1.1797289848327637 }, { "cosine_similarity": -0.031179622612256975, "epoch": 1.8890959925442683, "grad_norm": 1.2727528577498068, "learning_rate": 8.229202623403521e-06, "loss": 1.5026, "reason_loss": 0.4886190891265869, "step": 2027, "utility_loss": 1.0139429569244385 }, { "cosine_similarity": 0.7138665043882846, "epoch": 1.890027958993476, "grad_norm": 1.3193028333622159, "learning_rate": 8.222298929927512e-06, "loss": 1.8963, "reason_loss": 0.47818559408187866, "step": 2028, "utility_loss": 1.4180853366851807 }, { "cosine_similarity": 0.3990864695691874, "epoch": 1.890959925442684, "grad_norm": 1.091771082349415, "learning_rate": 8.215395236451503e-06, "loss": 1.7353, "reason_loss": 0.5024338960647583, "step": 2029, "utility_loss": 1.2328572273254395 }, { "cosine_similarity": 0.03758872123198374, "epoch": 1.8918918918918919, "grad_norm": 1.1314046260236659, "learning_rate": 8.208491542975493e-06, "loss": 1.5983, "reason_loss": 0.49947115778923035, "step": 2030, "utility_loss": 1.0988271236419678 }, { "cosine_similarity": 0.3810820113137088, "epoch": 1.8928238583410997, "grad_norm": 1.1551361715167745, "learning_rate": 8.201587849499482e-06, "loss": 1.6606, "reason_loss": 0.5118709802627563, "step": 2031, "utility_loss": 1.1486915349960327 }, { "cosine_similarity": 0.05657683149324844, "epoch": 1.8937558247903077, "grad_norm": 1.223845340323687, "learning_rate": 8.194684156023473e-06, "loss": 1.6965, "reason_loss": 0.49622252583503723, "step": 2032, "utility_loss": 1.2002503871917725 }, { "cosine_similarity": 0.18663389958887522, "epoch": 1.8946877912395155, "grad_norm": 0.9455469114399245, "learning_rate": 8.187780462547464e-06, "loss": 1.2506, "reason_loss": 0.49672380089759827, "step": 2033, "utility_loss": 0.7539165019989014 }, { "cosine_similarity": 0.04610396898780503, "epoch": 1.8956197576887233, "grad_norm": 1.1864557115521757, "learning_rate": 8.180876769071455e-06, "loss": 1.4932, "reason_loss": 0.4589906930923462, "step": 2034, "utility_loss": 1.0341827869415283 }, { "cosine_similarity": 0.21201806898210526, "epoch": 1.896551724137931, "grad_norm": 1.205050679885893, "learning_rate": 8.173973075595444e-06, "loss": 1.9364, "reason_loss": 0.4809500575065613, "step": 2035, "utility_loss": 1.4554167985916138 }, { "cosine_similarity": 0.06452428837724913, "epoch": 1.8974836905871388, "grad_norm": 1.3442986553842202, "learning_rate": 8.167069382119434e-06, "loss": 1.7322, "reason_loss": 0.5185494422912598, "step": 2036, "utility_loss": 1.2136752605438232 }, { "cosine_similarity": -0.035541808714215126, "epoch": 1.8984156570363466, "grad_norm": 0.9967794107631801, "learning_rate": 8.160165688643425e-06, "loss": 1.3806, "reason_loss": 0.4502575099468231, "step": 2037, "utility_loss": 0.9303236603736877 }, { "cosine_similarity": 0.12238975494805986, "epoch": 1.8993476234855544, "grad_norm": 1.2477654037394261, "learning_rate": 8.153261995167416e-06, "loss": 2.1155, "reason_loss": 0.4939592182636261, "step": 2038, "utility_loss": 1.6215417385101318 }, { "cosine_similarity": 0.13617597067820092, "epoch": 1.9002795899347622, "grad_norm": 1.285463264316606, "learning_rate": 8.146358301691407e-06, "loss": 1.9355, "reason_loss": 0.5140202641487122, "step": 2039, "utility_loss": 1.4214547872543335 }, { "cosine_similarity": 0.1374380647263666, "epoch": 1.9012115563839702, "grad_norm": 1.156990571932754, "learning_rate": 8.139454608215396e-06, "loss": 1.4469, "reason_loss": 0.48835471272468567, "step": 2040, "utility_loss": 0.9585176706314087 }, { "cosine_similarity": 0.13471052442383477, "epoch": 1.902143522833178, "grad_norm": 1.2656062667703034, "learning_rate": 8.132550914739386e-06, "loss": 1.8549, "reason_loss": 0.4919440746307373, "step": 2041, "utility_loss": 1.3629841804504395 }, { "cosine_similarity": 0.06799185959236692, "epoch": 1.9030754892823858, "grad_norm": 1.166586009488941, "learning_rate": 8.125647221263377e-06, "loss": 1.8535, "reason_loss": 0.5298038721084595, "step": 2042, "utility_loss": 1.3237310647964478 }, { "cosine_similarity": 0.09785707249744942, "epoch": 1.9040074557315938, "grad_norm": 1.0402175198201724, "learning_rate": 8.118743527787368e-06, "loss": 1.4913, "reason_loss": 0.5456560850143433, "step": 2043, "utility_loss": 0.945690393447876 }, { "cosine_similarity": 0.06025256549693253, "epoch": 1.9049394221808016, "grad_norm": 1.2302824321225296, "learning_rate": 8.111839834311357e-06, "loss": 1.9883, "reason_loss": 0.508065938949585, "step": 2044, "utility_loss": 1.4802451133728027 }, { "cosine_similarity": 0.08546169046018078, "epoch": 1.9058713886300094, "grad_norm": 1.3554241369029356, "learning_rate": 8.104936140835348e-06, "loss": 1.7894, "reason_loss": 0.4817468523979187, "step": 2045, "utility_loss": 1.307640790939331 }, { "cosine_similarity": 0.061165385350970804, "epoch": 1.9068033550792172, "grad_norm": 1.1927816713328163, "learning_rate": 8.098032447359337e-06, "loss": 1.7239, "reason_loss": 0.4933274984359741, "step": 2046, "utility_loss": 1.2305612564086914 }, { "cosine_similarity": 0.0667085208825921, "epoch": 1.907735321528425, "grad_norm": 1.1223890832439198, "learning_rate": 8.091128753883329e-06, "loss": 1.581, "reason_loss": 0.4817429184913635, "step": 2047, "utility_loss": 1.0992655754089355 }, { "cosine_similarity": -0.0978688032329595, "epoch": 1.9086672879776327, "grad_norm": 1.442352391038991, "learning_rate": 8.084225060407318e-06, "loss": 1.8658, "reason_loss": 0.5411431789398193, "step": 2048, "utility_loss": 1.3246480226516724 }, { "cosine_similarity": 0.09460865362863746, "epoch": 1.9095992544268405, "grad_norm": 1.1435178277165197, "learning_rate": 8.077321366931309e-06, "loss": 1.8717, "reason_loss": 0.4787578880786896, "step": 2049, "utility_loss": 1.3929171562194824 }, { "cosine_similarity": 0.2613192042416817, "epoch": 1.9105312208760483, "grad_norm": 1.252191128352359, "learning_rate": 8.0704176734553e-06, "loss": 1.6651, "reason_loss": 0.5091280937194824, "step": 2050, "utility_loss": 1.1559287309646606 }, { "cosine_similarity": 0.059919630040778236, "epoch": 1.9114631873252563, "grad_norm": 1.2278572472581326, "learning_rate": 8.063513979979289e-06, "loss": 1.8464, "reason_loss": 0.5164381861686707, "step": 2051, "utility_loss": 1.3299615383148193 }, { "cosine_similarity": 0.04779767201298638, "epoch": 1.9123951537744641, "grad_norm": 1.3212272723625205, "learning_rate": 8.056610286503281e-06, "loss": 1.6942, "reason_loss": 0.4700721502304077, "step": 2052, "utility_loss": 1.2241531610488892 }, { "cosine_similarity": 0.28513147917552534, "epoch": 1.913327120223672, "grad_norm": 1.3566696838564445, "learning_rate": 8.04970659302727e-06, "loss": 1.6477, "reason_loss": 0.4951246678829193, "step": 2053, "utility_loss": 1.1525620222091675 }, { "cosine_similarity": 0.11465893839994346, "epoch": 1.91425908667288, "grad_norm": 1.2166153191983486, "learning_rate": 8.04280289955126e-06, "loss": 1.6495, "reason_loss": 0.4878121614456177, "step": 2054, "utility_loss": 1.1617141962051392 }, { "cosine_similarity": -0.04339299667899646, "epoch": 1.9151910531220877, "grad_norm": 1.1445165632721843, "learning_rate": 8.03589920607525e-06, "loss": 1.8708, "reason_loss": 0.4897618591785431, "step": 2055, "utility_loss": 1.381068229675293 }, { "cosine_similarity": 0.01926041082129574, "epoch": 1.9161230195712955, "grad_norm": 0.9582694951661695, "learning_rate": 8.02899551259924e-06, "loss": 1.5697, "reason_loss": 0.4834267199039459, "step": 2056, "utility_loss": 1.0862399339675903 }, { "cosine_similarity": 0.06931437243495582, "epoch": 1.9170549860205033, "grad_norm": 1.2137010908412342, "learning_rate": 8.022091819123231e-06, "loss": 1.9738, "reason_loss": 0.4986410140991211, "step": 2057, "utility_loss": 1.4751591682434082 }, { "cosine_similarity": 0.036980324272591826, "epoch": 1.917986952469711, "grad_norm": 1.1594216468166554, "learning_rate": 8.015188125647222e-06, "loss": 1.9932, "reason_loss": 0.5337894558906555, "step": 2058, "utility_loss": 1.4594452381134033 }, { "cosine_similarity": 0.08432384992905907, "epoch": 1.9189189189189189, "grad_norm": 1.2077440316780739, "learning_rate": 8.008284432171213e-06, "loss": 1.8063, "reason_loss": 0.5135763883590698, "step": 2059, "utility_loss": 1.292731523513794 }, { "cosine_similarity": 0.0658815750863478, "epoch": 1.9198508853681266, "grad_norm": 1.4162019110406325, "learning_rate": 8.001380738695202e-06, "loss": 1.642, "reason_loss": 0.49320855736732483, "step": 2060, "utility_loss": 1.1487840414047241 }, { "cosine_similarity": -0.02400705821069479, "epoch": 1.9207828518173344, "grad_norm": 1.3769740315096664, "learning_rate": 7.994477045219193e-06, "loss": 1.8678, "reason_loss": 0.4921419024467468, "step": 2061, "utility_loss": 1.375678300857544 }, { "cosine_similarity": 0.15405942523786678, "epoch": 1.9217148182665424, "grad_norm": 0.9969148597054321, "learning_rate": 7.987573351743183e-06, "loss": 1.7114, "reason_loss": 0.4905279576778412, "step": 2062, "utility_loss": 1.2208683490753174 }, { "cosine_similarity": 0.23442298490740263, "epoch": 1.9226467847157502, "grad_norm": 1.260738125565948, "learning_rate": 7.980669658267174e-06, "loss": 1.8248, "reason_loss": 0.4881060719490051, "step": 2063, "utility_loss": 1.336739182472229 }, { "cosine_similarity": 0.2525445667545903, "epoch": 1.923578751164958, "grad_norm": 1.0306000212924553, "learning_rate": 7.973765964791163e-06, "loss": 1.6469, "reason_loss": 0.49123454093933105, "step": 2064, "utility_loss": 1.1556448936462402 }, { "cosine_similarity": 0.014063115029877121, "epoch": 1.924510717614166, "grad_norm": 1.2847202332397467, "learning_rate": 7.966862271315154e-06, "loss": 1.7231, "reason_loss": 0.4814974367618561, "step": 2065, "utility_loss": 1.2416057586669922 }, { "cosine_similarity": 0.034201233838082025, "epoch": 1.9254426840633738, "grad_norm": 1.0454062830628208, "learning_rate": 7.959958577839145e-06, "loss": 1.3353, "reason_loss": 0.48566240072250366, "step": 2066, "utility_loss": 0.8496063947677612 }, { "cosine_similarity": 0.12084890090469284, "epoch": 1.9263746505125816, "grad_norm": 1.3284276574353269, "learning_rate": 7.953054884363135e-06, "loss": 1.6507, "reason_loss": 0.4988923966884613, "step": 2067, "utility_loss": 1.1518272161483765 }, { "cosine_similarity": 0.19077553387916335, "epoch": 1.9273066169617894, "grad_norm": 1.1934706962792947, "learning_rate": 7.946151190887124e-06, "loss": 1.7871, "reason_loss": 0.539750337600708, "step": 2068, "utility_loss": 1.2473922967910767 }, { "cosine_similarity": 0.23322319920890378, "epoch": 1.9282385834109972, "grad_norm": 1.0237171513664605, "learning_rate": 7.939247497411115e-06, "loss": 1.5129, "reason_loss": 0.4600222408771515, "step": 2069, "utility_loss": 1.0528861284255981 }, { "cosine_similarity": -0.22187225380075856, "epoch": 1.929170549860205, "grad_norm": 1.080052320029694, "learning_rate": 7.932343803935106e-06, "loss": 1.7904, "reason_loss": 0.49953415989875793, "step": 2070, "utility_loss": 1.2908461093902588 }, { "cosine_similarity": -0.21908402093335044, "epoch": 1.9301025163094128, "grad_norm": 1.1408439433067417, "learning_rate": 7.925440110459096e-06, "loss": 1.7587, "reason_loss": 0.5223506689071655, "step": 2071, "utility_loss": 1.2363070249557495 }, { "cosine_similarity": 0.04750414964547024, "epoch": 1.9310344827586206, "grad_norm": 1.2329261631209576, "learning_rate": 7.918536416983087e-06, "loss": 1.743, "reason_loss": 0.49407464265823364, "step": 2072, "utility_loss": 1.248937964439392 }, { "cosine_similarity": 0.6265067221651061, "epoch": 1.9319664492078286, "grad_norm": 1.3500408688888952, "learning_rate": 7.911632723507076e-06, "loss": 1.6362, "reason_loss": 0.4863038659095764, "step": 2073, "utility_loss": 1.1498477458953857 }, { "cosine_similarity": 0.34520142700381345, "epoch": 1.9328984156570364, "grad_norm": 1.2871719304515734, "learning_rate": 7.904729030031067e-06, "loss": 1.5448, "reason_loss": 0.5108394622802734, "step": 2074, "utility_loss": 1.0339449644088745 }, { "cosine_similarity": 0.16773087243612808, "epoch": 1.9338303821062441, "grad_norm": 1.1431681973031498, "learning_rate": 7.897825336555058e-06, "loss": 1.7271, "reason_loss": 0.4981078505516052, "step": 2075, "utility_loss": 1.2289459705352783 }, { "cosine_similarity": 0.06564836034809998, "epoch": 1.9347623485554521, "grad_norm": 1.1878946500458263, "learning_rate": 7.890921643079048e-06, "loss": 1.5542, "reason_loss": 0.4986535310745239, "step": 2076, "utility_loss": 1.0555216073989868 }, { "cosine_similarity": -0.1495406636549025, "epoch": 1.93569431500466, "grad_norm": 0.9738093937534268, "learning_rate": 7.884017949603038e-06, "loss": 1.4369, "reason_loss": 0.491157591342926, "step": 2077, "utility_loss": 0.9457060694694519 }, { "cosine_similarity": 0.05119423330315064, "epoch": 1.9366262814538677, "grad_norm": 1.0861072888227399, "learning_rate": 7.877114256127028e-06, "loss": 1.8098, "reason_loss": 0.4915600121021271, "step": 2078, "utility_loss": 1.318237543106079 }, { "cosine_similarity": 0.09352708943743018, "epoch": 1.9375582479030755, "grad_norm": 1.2385633358048873, "learning_rate": 7.870210562651019e-06, "loss": 1.7246, "reason_loss": 0.5000343322753906, "step": 2079, "utility_loss": 1.2245447635650635 }, { "cosine_similarity": 0.04163916109744907, "epoch": 1.9384902143522833, "grad_norm": 1.189202269890527, "learning_rate": 7.86330686917501e-06, "loss": 1.7255, "reason_loss": 0.4918442368507385, "step": 2080, "utility_loss": 1.2336574792861938 }, { "cosine_similarity": -0.08506962191552367, "epoch": 1.939422180801491, "grad_norm": 1.8059440691928204, "learning_rate": 7.856403175699e-06, "loss": 1.7081, "reason_loss": 0.4589470624923706, "step": 2081, "utility_loss": 1.2491652965545654 }, { "cosine_similarity": 0.3415966061934442, "epoch": 1.9403541472506989, "grad_norm": 1.0645479954261767, "learning_rate": 7.84949948222299e-06, "loss": 1.7253, "reason_loss": 0.4912906587123871, "step": 2082, "utility_loss": 1.2339787483215332 }, { "cosine_similarity": 0.18627274726559648, "epoch": 1.9412861136999067, "grad_norm": 1.1959155579056573, "learning_rate": 7.84259578874698e-06, "loss": 1.826, "reason_loss": 0.4922850430011749, "step": 2083, "utility_loss": 1.3336766958236694 }, { "cosine_similarity": 0.16400882644130202, "epoch": 1.9422180801491147, "grad_norm": 1.1248444987786663, "learning_rate": 7.835692095270971e-06, "loss": 1.4163, "reason_loss": 0.5007659792900085, "step": 2084, "utility_loss": 0.9155222773551941 }, { "cosine_similarity": 0.08743736603884844, "epoch": 1.9431500465983225, "grad_norm": 1.0822734714636015, "learning_rate": 7.828788401794962e-06, "loss": 1.8084, "reason_loss": 0.5399782657623291, "step": 2085, "utility_loss": 1.2684569358825684 }, { "cosine_similarity": 0.06609689823914303, "epoch": 1.9440820130475303, "grad_norm": 1.1811521466328814, "learning_rate": 7.82188470831895e-06, "loss": 1.5664, "reason_loss": 0.5144270062446594, "step": 2086, "utility_loss": 1.0519955158233643 }, { "cosine_similarity": 0.08787146870940295, "epoch": 1.9450139794967383, "grad_norm": 1.5305648853463927, "learning_rate": 7.814981014842941e-06, "loss": 2.1874, "reason_loss": 0.4750002324581146, "step": 2087, "utility_loss": 1.7123544216156006 }, { "cosine_similarity": 0.04998221915393868, "epoch": 1.945945945945946, "grad_norm": 1.1357361751263884, "learning_rate": 7.808077321366932e-06, "loss": 1.8341, "reason_loss": 0.4946889281272888, "step": 2088, "utility_loss": 1.3394418954849243 }, { "cosine_similarity": 0.01958222888374131, "epoch": 1.9468779123951538, "grad_norm": 1.2618321507194235, "learning_rate": 7.801173627890923e-06, "loss": 1.7821, "reason_loss": 0.4903068244457245, "step": 2089, "utility_loss": 1.291820764541626 }, { "cosine_similarity": 0.35441036363452694, "epoch": 1.9478098788443616, "grad_norm": 1.2228726641646626, "learning_rate": 7.794269934414914e-06, "loss": 1.6502, "reason_loss": 0.5105428099632263, "step": 2090, "utility_loss": 1.1396899223327637 }, { "cosine_similarity": 0.2226663549020032, "epoch": 1.9487418452935694, "grad_norm": 0.9881216765117012, "learning_rate": 7.787366240938903e-06, "loss": 1.6745, "reason_loss": 0.4758422076702118, "step": 2091, "utility_loss": 1.198677659034729 }, { "cosine_similarity": -0.07124703110028562, "epoch": 1.9496738117427772, "grad_norm": 1.0804633512204167, "learning_rate": 7.780462547462893e-06, "loss": 1.5286, "reason_loss": 0.46867966651916504, "step": 2092, "utility_loss": 1.0599321126937866 }, { "cosine_similarity": -0.06169346507496867, "epoch": 1.950605778191985, "grad_norm": 1.3022256233843676, "learning_rate": 7.773558853986884e-06, "loss": 1.9491, "reason_loss": 0.4903939962387085, "step": 2093, "utility_loss": 1.4587364196777344 }, { "cosine_similarity": 0.42112754633750754, "epoch": 1.9515377446411928, "grad_norm": 1.0451508549043038, "learning_rate": 7.766655160510875e-06, "loss": 1.6826, "reason_loss": 0.4525470733642578, "step": 2094, "utility_loss": 1.230100154876709 }, { "cosine_similarity": 0.21366890038909822, "epoch": 1.9524697110904008, "grad_norm": 1.19993886852947, "learning_rate": 7.759751467034864e-06, "loss": 1.5662, "reason_loss": 0.4995950758457184, "step": 2095, "utility_loss": 1.0666532516479492 }, { "cosine_similarity": 0.03471777665580617, "epoch": 1.9534016775396086, "grad_norm": 1.150004610413054, "learning_rate": 7.752847773558855e-06, "loss": 1.7189, "reason_loss": 0.49044230580329895, "step": 2096, "utility_loss": 1.2284398078918457 }, { "cosine_similarity": 0.1150241608680302, "epoch": 1.9543336439888164, "grad_norm": 0.9926539563411986, "learning_rate": 7.745944080082844e-06, "loss": 1.6379, "reason_loss": 0.5328667759895325, "step": 2097, "utility_loss": 1.1050240993499756 }, { "cosine_similarity": 0.07296879787036563, "epoch": 1.9552656104380244, "grad_norm": 1.215777726880124, "learning_rate": 7.739040386606836e-06, "loss": 1.7892, "reason_loss": 0.49853184819221497, "step": 2098, "utility_loss": 1.2906808853149414 }, { "cosine_similarity": 0.07877290415226883, "epoch": 1.9561975768872322, "grad_norm": 1.0539157230259495, "learning_rate": 7.732136693130827e-06, "loss": 1.5435, "reason_loss": 0.4969417452812195, "step": 2099, "utility_loss": 1.0465104579925537 }, { "cosine_similarity": -0.02677302221472255, "epoch": 1.95712954333644, "grad_norm": 1.1867866724966394, "learning_rate": 7.725232999654816e-06, "loss": 1.8271, "reason_loss": 0.4985504746437073, "step": 2100, "utility_loss": 1.3285866975784302 }, { "cosine_similarity": 0.036827066938171, "epoch": 1.9580615097856477, "grad_norm": 1.1596694766725266, "learning_rate": 7.718329306178807e-06, "loss": 1.8282, "reason_loss": 0.48135697841644287, "step": 2101, "utility_loss": 1.34683358669281 }, { "cosine_similarity": 0.2111753071885252, "epoch": 1.9589934762348555, "grad_norm": 1.3301436907522606, "learning_rate": 7.711425612702796e-06, "loss": 1.4689, "reason_loss": 0.4740029573440552, "step": 2102, "utility_loss": 0.9948973655700684 }, { "cosine_similarity": 0.14935298613274697, "epoch": 1.9599254426840633, "grad_norm": 1.0753075009806843, "learning_rate": 7.704521919226788e-06, "loss": 1.9105, "reason_loss": 0.4818619191646576, "step": 2103, "utility_loss": 1.4286726713180542 }, { "cosine_similarity": 0.18218813095925776, "epoch": 1.9608574091332711, "grad_norm": 1.1466399627589092, "learning_rate": 7.697618225750777e-06, "loss": 1.9066, "reason_loss": 0.5403600335121155, "step": 2104, "utility_loss": 1.3662463426589966 }, { "cosine_similarity": 0.0805980406714678, "epoch": 1.961789375582479, "grad_norm": 1.2536090428547728, "learning_rate": 7.690714532274768e-06, "loss": 1.9317, "reason_loss": 0.4976034164428711, "step": 2105, "utility_loss": 1.4341181516647339 }, { "cosine_similarity": 0.24939249468657734, "epoch": 1.962721342031687, "grad_norm": 1.1393071795408638, "learning_rate": 7.683810838798757e-06, "loss": 1.6109, "reason_loss": 0.48835140466690063, "step": 2106, "utility_loss": 1.1225769519805908 }, { "cosine_similarity": 0.14865572584951908, "epoch": 1.9636533084808947, "grad_norm": 1.1371232763520311, "learning_rate": 7.676907145322748e-06, "loss": 1.633, "reason_loss": 0.4738008379936218, "step": 2107, "utility_loss": 1.159217119216919 }, { "cosine_similarity": 0.09721483730666304, "epoch": 1.9645852749301025, "grad_norm": 1.114064973096787, "learning_rate": 7.670003451846738e-06, "loss": 2.0532, "reason_loss": 0.5470859408378601, "step": 2108, "utility_loss": 1.5061473846435547 }, { "cosine_similarity": 0.0891630052693417, "epoch": 1.9655172413793105, "grad_norm": 1.1056481757922565, "learning_rate": 7.663099758370729e-06, "loss": 1.5633, "reason_loss": 0.4913313686847687, "step": 2109, "utility_loss": 1.071990966796875 }, { "cosine_similarity": 0.0922582291891362, "epoch": 1.9664492078285183, "grad_norm": 1.4575735915603223, "learning_rate": 7.65619606489472e-06, "loss": 1.6356, "reason_loss": 0.5077744722366333, "step": 2110, "utility_loss": 1.127822995185852 }, { "cosine_similarity": 0.5829288842963488, "epoch": 1.967381174277726, "grad_norm": 1.2106536738296938, "learning_rate": 7.649292371418709e-06, "loss": 1.7165, "reason_loss": 0.5244224071502686, "step": 2111, "utility_loss": 1.1920819282531738 }, { "cosine_similarity": 0.03516858731082016, "epoch": 1.9683131407269339, "grad_norm": 1.2741457086702248, "learning_rate": 7.6423886779427e-06, "loss": 1.9029, "reason_loss": 0.5053166151046753, "step": 2112, "utility_loss": 1.397544026374817 }, { "cosine_similarity": 0.21021030451300335, "epoch": 1.9692451071761417, "grad_norm": 1.3504591846392575, "learning_rate": 7.63548498446669e-06, "loss": 1.8073, "reason_loss": 0.4878193736076355, "step": 2113, "utility_loss": 1.3195228576660156 }, { "cosine_similarity": -0.008742989741353456, "epoch": 1.9701770736253494, "grad_norm": 1.1391333355540334, "learning_rate": 7.628581290990681e-06, "loss": 1.5824, "reason_loss": 0.5001473426818848, "step": 2114, "utility_loss": 1.0822155475616455 }, { "cosine_similarity": -0.1114021601262675, "epoch": 1.9711090400745572, "grad_norm": 1.04872214242499, "learning_rate": 7.621677597514671e-06, "loss": 1.5912, "reason_loss": 0.506327748298645, "step": 2115, "utility_loss": 1.0848479270935059 }, { "cosine_similarity": 0.3112250963778802, "epoch": 1.972041006523765, "grad_norm": 1.1612545493277315, "learning_rate": 7.614773904038662e-06, "loss": 1.7858, "reason_loss": 0.5006334185600281, "step": 2116, "utility_loss": 1.2851732969284058 }, { "cosine_similarity": -0.17676645970395513, "epoch": 1.972972972972973, "grad_norm": 1.1709787952332713, "learning_rate": 7.6078702105626516e-06, "loss": 1.4807, "reason_loss": 0.4943365752696991, "step": 2117, "utility_loss": 0.9863741397857666 }, { "cosine_similarity": 0.05506683253414609, "epoch": 1.9739049394221808, "grad_norm": 1.2182938146854738, "learning_rate": 7.600966517086642e-06, "loss": 1.5634, "reason_loss": 0.5135818719863892, "step": 2118, "utility_loss": 1.0498288869857788 }, { "cosine_similarity": 0.16217178963513842, "epoch": 1.9748369058713886, "grad_norm": 1.2777017589726114, "learning_rate": 7.594062823610633e-06, "loss": 1.8949, "reason_loss": 0.48485803604125977, "step": 2119, "utility_loss": 1.4100441932678223 }, { "cosine_similarity": 0.03257648192483853, "epoch": 1.9757688723205966, "grad_norm": 1.1981286347433497, "learning_rate": 7.587159130134623e-06, "loss": 1.7303, "reason_loss": 0.45636802911758423, "step": 2120, "utility_loss": 1.2739452123641968 }, { "cosine_similarity": 0.002512590920304665, "epoch": 1.9767008387698044, "grad_norm": 1.4230128452502058, "learning_rate": 7.580255436658614e-06, "loss": 1.7492, "reason_loss": 0.5012975335121155, "step": 2121, "utility_loss": 1.247894287109375 }, { "cosine_similarity": 0.29565666227060705, "epoch": 1.9776328052190122, "grad_norm": 1.3861694418970745, "learning_rate": 7.5733517431826035e-06, "loss": 1.7857, "reason_loss": 0.4977037310600281, "step": 2122, "utility_loss": 1.288030982017517 }, { "cosine_similarity": 0.0835769128107909, "epoch": 1.97856477166822, "grad_norm": 1.193439040070212, "learning_rate": 7.566448049706594e-06, "loss": 1.9073, "reason_loss": 0.4999157190322876, "step": 2123, "utility_loss": 1.4074265956878662 }, { "cosine_similarity": -0.1027866645626926, "epoch": 1.9794967381174278, "grad_norm": 1.3320156946278134, "learning_rate": 7.559544356230583e-06, "loss": 1.7779, "reason_loss": 0.4926240146160126, "step": 2124, "utility_loss": 1.2852755784988403 }, { "cosine_similarity": -0.11979283142085721, "epoch": 1.9804287045666356, "grad_norm": 1.270361114130431, "learning_rate": 7.552640662754575e-06, "loss": 1.9628, "reason_loss": 0.4590233862400055, "step": 2125, "utility_loss": 1.503809928894043 }, { "cosine_similarity": 0.363441893889094, "epoch": 1.9813606710158433, "grad_norm": 1.1320215501219602, "learning_rate": 7.545736969278564e-06, "loss": 1.788, "reason_loss": 0.47027939558029175, "step": 2126, "utility_loss": 1.3177173137664795 }, { "cosine_similarity": 0.15766029435551296, "epoch": 1.9822926374650511, "grad_norm": 1.38549919695841, "learning_rate": 7.5388332758025555e-06, "loss": 1.7786, "reason_loss": 0.49521589279174805, "step": 2127, "utility_loss": 1.283428430557251 }, { "cosine_similarity": 0.08129523856263328, "epoch": 1.983224603914259, "grad_norm": 1.0676038035424755, "learning_rate": 7.5319295823265445e-06, "loss": 2.006, "reason_loss": 0.5141687393188477, "step": 2128, "utility_loss": 1.4918160438537598 }, { "cosine_similarity": -0.10743992373721391, "epoch": 1.984156570363467, "grad_norm": 1.0514027710695968, "learning_rate": 7.525025888850535e-06, "loss": 1.7885, "reason_loss": 0.5114859342575073, "step": 2129, "utility_loss": 1.2770559787750244 }, { "cosine_similarity": -0.04266873370636973, "epoch": 1.9850885368126747, "grad_norm": 1.0494827777939102, "learning_rate": 7.518122195374527e-06, "loss": 1.7618, "reason_loss": 0.4967689514160156, "step": 2130, "utility_loss": 1.2650330066680908 }, { "cosine_similarity": 0.1058393820466861, "epoch": 1.9860205032618827, "grad_norm": 1.0471439216262295, "learning_rate": 7.511218501898516e-06, "loss": 1.4969, "reason_loss": 0.4789983034133911, "step": 2131, "utility_loss": 1.017866849899292 }, { "cosine_similarity": 0.15235738966421472, "epoch": 1.9869524697110905, "grad_norm": 1.1407477931623087, "learning_rate": 7.504314808422507e-06, "loss": 1.703, "reason_loss": 0.48075544834136963, "step": 2132, "utility_loss": 1.222272276878357 }, { "cosine_similarity": 0.020697726531295316, "epoch": 1.9878844361602983, "grad_norm": 1.0791822237173623, "learning_rate": 7.4974111149464965e-06, "loss": 1.5962, "reason_loss": 0.4569224715232849, "step": 2133, "utility_loss": 1.139296293258667 }, { "cosine_similarity": -0.008908623181766771, "epoch": 1.988816402609506, "grad_norm": 1.1053782620837493, "learning_rate": 7.490507421470487e-06, "loss": 1.6114, "reason_loss": 0.4749569594860077, "step": 2134, "utility_loss": 1.136476755142212 }, { "cosine_similarity": 0.07862246284901366, "epoch": 1.9897483690587139, "grad_norm": 1.0940872031103623, "learning_rate": 7.483603727994477e-06, "loss": 1.5893, "reason_loss": 0.49390852451324463, "step": 2135, "utility_loss": 1.0954341888427734 }, { "cosine_similarity": 0.015515824815278271, "epoch": 1.9906803355079217, "grad_norm": 1.1654035917161982, "learning_rate": 7.476700034518468e-06, "loss": 1.7208, "reason_loss": 0.4675758481025696, "step": 2136, "utility_loss": 1.2532234191894531 }, { "cosine_similarity": -0.2693549782221923, "epoch": 1.9916123019571295, "grad_norm": 1.54906722793214, "learning_rate": 7.469796341042458e-06, "loss": 1.6142, "reason_loss": 0.5052294135093689, "step": 2137, "utility_loss": 1.1089787483215332 }, { "cosine_similarity": 0.16220795698424562, "epoch": 1.9925442684063372, "grad_norm": 0.9867717351506611, "learning_rate": 7.4628926475664485e-06, "loss": 1.648, "reason_loss": 0.48272544145584106, "step": 2138, "utility_loss": 1.165318489074707 }, { "cosine_similarity": 0.19170351413074094, "epoch": 1.993476234855545, "grad_norm": 1.0709483450410704, "learning_rate": 7.455988954090439e-06, "loss": 1.6862, "reason_loss": 0.491473525762558, "step": 2139, "utility_loss": 1.1947572231292725 }, { "cosine_similarity": -0.0394028426908788, "epoch": 1.994408201304753, "grad_norm": 1.0197135781197881, "learning_rate": 7.449085260614429e-06, "loss": 1.6116, "reason_loss": 0.5147231221199036, "step": 2140, "utility_loss": 1.0969243049621582 }, { "cosine_similarity": 0.12627730231506568, "epoch": 1.9953401677539608, "grad_norm": 1.1802152509691837, "learning_rate": 7.44218156713842e-06, "loss": 1.684, "reason_loss": 0.47701331973075867, "step": 2141, "utility_loss": 1.206987738609314 }, { "cosine_similarity": -0.11943057225830951, "epoch": 1.9962721342031688, "grad_norm": 1.0584273091263474, "learning_rate": 7.43527787366241e-06, "loss": 1.6472, "reason_loss": 0.4878911077976227, "step": 2142, "utility_loss": 1.1592929363250732 }, { "cosine_similarity": 0.04723640816902136, "epoch": 1.9972041006523766, "grad_norm": 1.2392420451461883, "learning_rate": 7.4283741801864004e-06, "loss": 1.6764, "reason_loss": 0.46890783309936523, "step": 2143, "utility_loss": 1.2074716091156006 }, { "cosine_similarity": 0.12406724598100269, "epoch": 1.9981360671015844, "grad_norm": 1.0684955992012697, "learning_rate": 7.42147048671039e-06, "loss": 1.3879, "reason_loss": 0.5304180383682251, "step": 2144, "utility_loss": 0.8574621677398682 }, { "cosine_similarity": 0.28131555790952634, "epoch": 1.9990680335507922, "grad_norm": 1.1377666419336365, "learning_rate": 7.414566793234381e-06, "loss": 1.4443, "reason_loss": 0.51756352186203, "step": 2145, "utility_loss": 0.9267770648002625 }, { "cosine_similarity": 0.0055834426262278145, "epoch": 2.0, "grad_norm": 1.1367640009844195, "learning_rate": 7.407663099758371e-06, "loss": 1.4574, "reason_loss": 0.4617825448513031, "step": 2146, "utility_loss": 0.9956390857696533 }, { "cosine_similarity": 0.3277431993698084, "epoch": 2.000931966449208, "grad_norm": 1.031903117973587, "learning_rate": 7.400759406282362e-06, "loss": 1.8551, "reason_loss": 0.48136991262435913, "step": 2147, "utility_loss": 1.373731017112732 }, { "cosine_similarity": 0.1876336584844848, "epoch": 2.0018639328984156, "grad_norm": 1.2805382486927253, "learning_rate": 7.3938557128063516e-06, "loss": 1.4992, "reason_loss": 0.4647969901561737, "step": 2148, "utility_loss": 1.0344114303588867 }, { "cosine_similarity": -0.044730454706661614, "epoch": 2.0027958993476234, "grad_norm": 1.0860870297967433, "learning_rate": 7.386952019330342e-06, "loss": 1.5637, "reason_loss": 0.4697873592376709, "step": 2149, "utility_loss": 1.0938940048217773 }, { "cosine_similarity": -0.07332692300294599, "epoch": 2.003727865796831, "grad_norm": 0.8778482457133454, "learning_rate": 7.380048325854333e-06, "loss": 1.4467, "reason_loss": 0.46641913056373596, "step": 2150, "utility_loss": 0.9802919626235962 }, { "cosine_similarity": -0.13203222200183162, "epoch": 2.004659832246039, "grad_norm": 1.0592378663985655, "learning_rate": 7.373144632378323e-06, "loss": 1.486, "reason_loss": 0.5109826326370239, "step": 2151, "utility_loss": 0.9750542640686035 }, { "cosine_similarity": -0.011519050776382536, "epoch": 2.005591798695247, "grad_norm": 1.1287870661933026, "learning_rate": 7.366240938902314e-06, "loss": 1.2444, "reason_loss": 0.4771624505519867, "step": 2152, "utility_loss": 0.7672688961029053 }, { "cosine_similarity": 0.0679269154630991, "epoch": 2.006523765144455, "grad_norm": 1.016878788521321, "learning_rate": 7.3593372454263035e-06, "loss": 1.3523, "reason_loss": 0.5221277475357056, "step": 2153, "utility_loss": 0.830198347568512 }, { "cosine_similarity": 0.15607001770896953, "epoch": 2.0074557315936628, "grad_norm": 0.9373626378394062, "learning_rate": 7.352433551950294e-06, "loss": 1.2257, "reason_loss": 0.5330639481544495, "step": 2154, "utility_loss": 0.6926330924034119 }, { "cosine_similarity": 0.17488157520604516, "epoch": 2.0083876980428705, "grad_norm": 1.0718863396484681, "learning_rate": 7.345529858474284e-06, "loss": 1.4544, "reason_loss": 0.4782502055168152, "step": 2155, "utility_loss": 0.9761109352111816 }, { "cosine_similarity": 0.27990492395591604, "epoch": 2.0093196644920783, "grad_norm": 1.441316615985403, "learning_rate": 7.338626164998275e-06, "loss": 1.5622, "reason_loss": 0.5098925828933716, "step": 2156, "utility_loss": 1.0522699356079102 }, { "cosine_similarity": -0.008336449389424521, "epoch": 2.010251630941286, "grad_norm": 1.2722999434331095, "learning_rate": 7.331722471522265e-06, "loss": 1.453, "reason_loss": 0.504146933555603, "step": 2157, "utility_loss": 0.9488191604614258 }, { "cosine_similarity": 0.06232343421142531, "epoch": 2.011183597390494, "grad_norm": 1.0762759956879673, "learning_rate": 7.3248187780462555e-06, "loss": 1.4364, "reason_loss": 0.4713141918182373, "step": 2158, "utility_loss": 0.9651221632957458 }, { "cosine_similarity": 0.18216430962227628, "epoch": 2.0121155638397017, "grad_norm": 1.1486653262313327, "learning_rate": 7.317915084570245e-06, "loss": 1.4427, "reason_loss": 0.48144784569740295, "step": 2159, "utility_loss": 0.9612221717834473 }, { "cosine_similarity": 0.002251682548135195, "epoch": 2.0130475302889095, "grad_norm": 1.2194788404643129, "learning_rate": 7.311011391094236e-06, "loss": 1.3282, "reason_loss": 0.49254709482192993, "step": 2160, "utility_loss": 0.8356254696846008 }, { "cosine_similarity": 0.2233597355188342, "epoch": 2.0139794967381173, "grad_norm": 1.108914179254389, "learning_rate": 7.304107697618227e-06, "loss": 1.6323, "reason_loss": 0.5162135362625122, "step": 2161, "utility_loss": 1.1160800457000732 }, { "cosine_similarity": 0.20878534076525007, "epoch": 2.014911463187325, "grad_norm": 1.2830781127180508, "learning_rate": 7.297204004142217e-06, "loss": 1.148, "reason_loss": 0.464424729347229, "step": 2162, "utility_loss": 0.6836252212524414 }, { "cosine_similarity": -0.010548605914548516, "epoch": 2.0158434296365333, "grad_norm": 1.0916510473004661, "learning_rate": 7.2903003106662075e-06, "loss": 1.6467, "reason_loss": 0.48789772391319275, "step": 2163, "utility_loss": 1.1588200330734253 }, { "cosine_similarity": -0.007534237474613168, "epoch": 2.016775396085741, "grad_norm": 1.23091013781351, "learning_rate": 7.283396617190197e-06, "loss": 1.7338, "reason_loss": 0.47614431381225586, "step": 2164, "utility_loss": 1.2576534748077393 }, { "cosine_similarity": 0.1782601449164907, "epoch": 2.017707362534949, "grad_norm": 1.0569550616073375, "learning_rate": 7.276492923714188e-06, "loss": 1.3647, "reason_loss": 0.5067057609558105, "step": 2165, "utility_loss": 0.8580224514007568 }, { "cosine_similarity": 0.07281675100781235, "epoch": 2.0186393289841567, "grad_norm": 1.157991196978645, "learning_rate": 7.269589230238178e-06, "loss": 1.3608, "reason_loss": 0.47321775555610657, "step": 2166, "utility_loss": 0.8875651955604553 }, { "cosine_similarity": 0.2598738182310768, "epoch": 2.0195712954333644, "grad_norm": 1.09186864492075, "learning_rate": 7.262685536762169e-06, "loss": 1.928, "reason_loss": 0.5052133798599243, "step": 2167, "utility_loss": 1.4227559566497803 }, { "cosine_similarity": -0.12580830304843346, "epoch": 2.0205032618825722, "grad_norm": 1.104069112976049, "learning_rate": 7.255781843286159e-06, "loss": 1.4333, "reason_loss": 0.5168942809104919, "step": 2168, "utility_loss": 0.9164156913757324 }, { "cosine_similarity": 0.1565441678091998, "epoch": 2.02143522833178, "grad_norm": 1.0385296767074559, "learning_rate": 7.248878149810149e-06, "loss": 1.4217, "reason_loss": 0.4892072081565857, "step": 2169, "utility_loss": 0.9325255155563354 }, { "cosine_similarity": 0.11792933421798706, "epoch": 2.022367194780988, "grad_norm": 1.4151503821496028, "learning_rate": 7.24197445633414e-06, "loss": 1.708, "reason_loss": 0.47913044691085815, "step": 2170, "utility_loss": 1.2288904190063477 }, { "cosine_similarity": 0.18109634352385576, "epoch": 2.0232991612301956, "grad_norm": 1.150842665359134, "learning_rate": 7.23507076285813e-06, "loss": 1.4396, "reason_loss": 0.48347410559654236, "step": 2171, "utility_loss": 0.9561498761177063 }, { "cosine_similarity": 0.12227491874389239, "epoch": 2.0242311276794034, "grad_norm": 1.0533555512886594, "learning_rate": 7.228167069382121e-06, "loss": 1.4442, "reason_loss": 0.49363696575164795, "step": 2172, "utility_loss": 0.9505146741867065 }, { "cosine_similarity": 0.13734091088700218, "epoch": 2.025163094128611, "grad_norm": 0.874538008598196, "learning_rate": 7.2212633759061106e-06, "loss": 1.3788, "reason_loss": 0.4988851845264435, "step": 2173, "utility_loss": 0.8799082040786743 }, { "cosine_similarity": 0.2802686857698922, "epoch": 2.0260950605778194, "grad_norm": 1.1067447934688035, "learning_rate": 7.214359682430101e-06, "loss": 1.3693, "reason_loss": 0.4775351583957672, "step": 2174, "utility_loss": 0.8917849063873291 }, { "cosine_similarity": -0.08023576526158396, "epoch": 2.027027027027027, "grad_norm": 1.2559614067693554, "learning_rate": 7.20745598895409e-06, "loss": 1.8281, "reason_loss": 0.4613972008228302, "step": 2175, "utility_loss": 1.3667125701904297 }, { "cosine_similarity": -0.030672285398053244, "epoch": 2.027958993476235, "grad_norm": 1.2002776816238443, "learning_rate": 7.200552295478082e-06, "loss": 1.4359, "reason_loss": 0.4723745584487915, "step": 2176, "utility_loss": 0.9635157585144043 }, { "cosine_similarity": -0.0730281763300959, "epoch": 2.0288909599254428, "grad_norm": 1.1838139632315425, "learning_rate": 7.193648602002071e-06, "loss": 1.6993, "reason_loss": 0.5015928149223328, "step": 2177, "utility_loss": 1.1977267265319824 }, { "cosine_similarity": -0.3363143526581334, "epoch": 2.0298229263746506, "grad_norm": 1.4108627926592507, "learning_rate": 7.1867449085260625e-06, "loss": 1.4905, "reason_loss": 0.5016295313835144, "step": 2178, "utility_loss": 0.9889180660247803 }, { "cosine_similarity": -0.08237175237366258, "epoch": 2.0307548928238583, "grad_norm": 1.2553156118138988, "learning_rate": 7.1798412150500516e-06, "loss": 1.4952, "reason_loss": 0.5057691335678101, "step": 2179, "utility_loss": 0.9894630312919617 }, { "cosine_similarity": 0.20874375889872784, "epoch": 2.031686859273066, "grad_norm": 0.9257498387027525, "learning_rate": 7.172937521574042e-06, "loss": 1.2658, "reason_loss": 0.4999467730522156, "step": 2180, "utility_loss": 0.7658476233482361 }, { "cosine_similarity": 0.3209656572001257, "epoch": 2.032618825722274, "grad_norm": 1.095084532307375, "learning_rate": 7.166033828098034e-06, "loss": 1.4253, "reason_loss": 0.5087014436721802, "step": 2181, "utility_loss": 0.9165546894073486 }, { "cosine_similarity": 0.11663399224503375, "epoch": 2.0335507921714817, "grad_norm": 1.0176543500182655, "learning_rate": 7.159130134622023e-06, "loss": 1.106, "reason_loss": 0.501057505607605, "step": 2182, "utility_loss": 0.604988157749176 }, { "cosine_similarity": -0.28099632097946203, "epoch": 2.0344827586206895, "grad_norm": 1.412569388659629, "learning_rate": 7.152226441146014e-06, "loss": 1.7361, "reason_loss": 0.4973401427268982, "step": 2183, "utility_loss": 1.2387303113937378 }, { "cosine_similarity": 0.14201562986834762, "epoch": 2.0354147250698973, "grad_norm": 1.7230016542801103, "learning_rate": 7.1453227476700035e-06, "loss": 1.6481, "reason_loss": 0.4736683964729309, "step": 2184, "utility_loss": 1.1744153499603271 }, { "cosine_similarity": 0.2478645106717436, "epoch": 2.0363466915191055, "grad_norm": 1.2361313897097852, "learning_rate": 7.138419054193994e-06, "loss": 1.2779, "reason_loss": 0.44923269748687744, "step": 2185, "utility_loss": 0.8286929130554199 }, { "cosine_similarity": -0.06829906621757699, "epoch": 2.0372786579683133, "grad_norm": 1.2431739270854327, "learning_rate": 7.131515360717984e-06, "loss": 1.3768, "reason_loss": 0.4773288071155548, "step": 2186, "utility_loss": 0.8994839191436768 }, { "cosine_similarity": 0.2680194307596057, "epoch": 2.038210624417521, "grad_norm": 1.1239839263065718, "learning_rate": 7.124611667241975e-06, "loss": 1.6065, "reason_loss": 0.5109223127365112, "step": 2187, "utility_loss": 1.0956225395202637 }, { "cosine_similarity": 0.20922206760569378, "epoch": 2.039142590866729, "grad_norm": 1.2354859078146159, "learning_rate": 7.117707973765965e-06, "loss": 1.6357, "reason_loss": 0.4916754364967346, "step": 2188, "utility_loss": 1.1440740823745728 }, { "cosine_similarity": 0.1682625319568884, "epoch": 2.0400745573159367, "grad_norm": 0.9260835138008907, "learning_rate": 7.1108042802899555e-06, "loss": 1.2425, "reason_loss": 0.47393855452537537, "step": 2189, "utility_loss": 0.7685294151306152 }, { "cosine_similarity": 0.24725934364763805, "epoch": 2.0410065237651445, "grad_norm": 1.4273269687394095, "learning_rate": 7.103900586813946e-06, "loss": 1.6017, "reason_loss": 0.4922603964805603, "step": 2190, "utility_loss": 1.1094634532928467 }, { "cosine_similarity": 0.32523694899410494, "epoch": 2.0419384902143523, "grad_norm": 1.2857133421036002, "learning_rate": 7.096996893337936e-06, "loss": 1.5416, "reason_loss": 0.5271738171577454, "step": 2191, "utility_loss": 1.0144370794296265 }, { "cosine_similarity": 0.37022479365138167, "epoch": 2.04287045666356, "grad_norm": 1.1214248459346439, "learning_rate": 7.090093199861927e-06, "loss": 1.7904, "reason_loss": 0.5008265376091003, "step": 2192, "utility_loss": 1.289549708366394 }, { "cosine_similarity": 0.16125820793079323, "epoch": 2.043802423112768, "grad_norm": 1.1146757601845445, "learning_rate": 7.083189506385917e-06, "loss": 1.3623, "reason_loss": 0.4774605929851532, "step": 2193, "utility_loss": 0.884857714176178 }, { "cosine_similarity": 0.44033885596260913, "epoch": 2.0447343895619756, "grad_norm": 1.0444444483817714, "learning_rate": 7.0762858129099075e-06, "loss": 1.6744, "reason_loss": 0.4941718578338623, "step": 2194, "utility_loss": 1.1802119016647339 }, { "cosine_similarity": 0.20315901106249093, "epoch": 2.0456663560111834, "grad_norm": 1.220978476818249, "learning_rate": 7.069382119433897e-06, "loss": 1.7241, "reason_loss": 0.5409271717071533, "step": 2195, "utility_loss": 1.1831929683685303 }, { "cosine_similarity": 0.12316067511256051, "epoch": 2.0465983224603916, "grad_norm": 1.0510500245291827, "learning_rate": 7.062478425957888e-06, "loss": 1.3592, "reason_loss": 0.516180157661438, "step": 2196, "utility_loss": 0.8430265784263611 }, { "cosine_similarity": 0.22312389830164361, "epoch": 2.0475302889095994, "grad_norm": 1.1056446748048663, "learning_rate": 7.055574732481878e-06, "loss": 1.4516, "reason_loss": 0.4723650813102722, "step": 2197, "utility_loss": 0.9792007207870483 }, { "cosine_similarity": 0.1759053454846447, "epoch": 2.048462255358807, "grad_norm": 1.0679534522069711, "learning_rate": 7.048671039005869e-06, "loss": 1.1735, "reason_loss": 0.5173155069351196, "step": 2198, "utility_loss": 0.6562333703041077 }, { "cosine_similarity": 0.05691244946062487, "epoch": 2.049394221808015, "grad_norm": 1.0882198032924688, "learning_rate": 7.041767345529859e-06, "loss": 1.4754, "reason_loss": 0.5276132822036743, "step": 2199, "utility_loss": 0.9478106498718262 }, { "cosine_similarity": 0.046530119600216406, "epoch": 2.050326188257223, "grad_norm": 1.273647808587827, "learning_rate": 7.034863652053849e-06, "loss": 1.8315, "reason_loss": 0.4829106330871582, "step": 2200, "utility_loss": 1.3486156463623047 }, { "cosine_similarity": 0.0824283627521087, "epoch": 2.0512581547064306, "grad_norm": 1.0805154620728596, "learning_rate": 7.02795995857784e-06, "loss": 1.1375, "reason_loss": 0.4834393262863159, "step": 2201, "utility_loss": 0.6540476679801941 }, { "cosine_similarity": -0.2660550279760336, "epoch": 2.0521901211556384, "grad_norm": 1.321317377571165, "learning_rate": 7.02105626510183e-06, "loss": 1.5884, "reason_loss": 0.5080610513687134, "step": 2202, "utility_loss": 1.0803040266036987 }, { "cosine_similarity": 0.15922937335688203, "epoch": 2.053122087604846, "grad_norm": 1.0232878492339732, "learning_rate": 7.014152571625821e-06, "loss": 1.4129, "reason_loss": 0.48735010623931885, "step": 2203, "utility_loss": 0.9255408048629761 }, { "cosine_similarity": -0.04779432981128099, "epoch": 2.054054054054054, "grad_norm": 1.0595397570407505, "learning_rate": 7.0072488781498106e-06, "loss": 1.4122, "reason_loss": 0.4678645431995392, "step": 2204, "utility_loss": 0.9443588256835938 }, { "cosine_similarity": 0.07204480167174247, "epoch": 2.0549860205032617, "grad_norm": 1.298022466155197, "learning_rate": 7.000345184673801e-06, "loss": 1.3725, "reason_loss": 0.4886223375797272, "step": 2205, "utility_loss": 0.8839027285575867 }, { "cosine_similarity": 0.23320123080741273, "epoch": 2.0559179869524695, "grad_norm": 1.033824107155529, "learning_rate": 6.993441491197791e-06, "loss": 1.4565, "reason_loss": 0.5100136995315552, "step": 2206, "utility_loss": 0.946517288684845 }, { "cosine_similarity": 0.059088757448047605, "epoch": 2.0568499534016778, "grad_norm": 1.2922077905875762, "learning_rate": 6.986537797721782e-06, "loss": 1.5481, "reason_loss": 0.4928751587867737, "step": 2207, "utility_loss": 1.0551807880401611 }, { "cosine_similarity": 0.041058827058334565, "epoch": 2.0577819198508855, "grad_norm": 1.6550736278347624, "learning_rate": 6.979634104245772e-06, "loss": 1.5347, "reason_loss": 0.47686195373535156, "step": 2208, "utility_loss": 1.0578385591506958 }, { "cosine_similarity": 0.050786996341509634, "epoch": 2.0587138863000933, "grad_norm": 0.9688495151854546, "learning_rate": 6.9727304107697625e-06, "loss": 1.4173, "reason_loss": 0.49647724628448486, "step": 2209, "utility_loss": 0.920785665512085 }, { "cosine_similarity": 0.08763779466828538, "epoch": 2.059645852749301, "grad_norm": 0.9202792386189733, "learning_rate": 6.965826717293753e-06, "loss": 1.5094, "reason_loss": 0.47258707880973816, "step": 2210, "utility_loss": 1.0368566513061523 }, { "cosine_similarity": 0.05346479606986849, "epoch": 2.060577819198509, "grad_norm": 1.2688829654103202, "learning_rate": 6.958923023817743e-06, "loss": 1.5294, "reason_loss": 0.4803463816642761, "step": 2211, "utility_loss": 1.0490244626998901 }, { "cosine_similarity": 0.15465655022377253, "epoch": 2.0615097856477167, "grad_norm": 0.9793164971655216, "learning_rate": 6.952019330341734e-06, "loss": 1.2729, "reason_loss": 0.4716976284980774, "step": 2212, "utility_loss": 0.8012173771858215 }, { "cosine_similarity": -0.21593629056229902, "epoch": 2.0624417520969245, "grad_norm": 1.3356420776742075, "learning_rate": 6.945115636865724e-06, "loss": 1.3892, "reason_loss": 0.5091490745544434, "step": 2213, "utility_loss": 0.8800612688064575 }, { "cosine_similarity": -0.031405131447160936, "epoch": 2.0633737185461323, "grad_norm": 1.1497713078499854, "learning_rate": 6.9382119433897145e-06, "loss": 1.3553, "reason_loss": 0.47757214307785034, "step": 2214, "utility_loss": 0.8777774572372437 }, { "cosine_similarity": 0.030591282518017154, "epoch": 2.06430568499534, "grad_norm": 1.2046334635290694, "learning_rate": 6.931308249913704e-06, "loss": 1.4736, "reason_loss": 0.5145754814147949, "step": 2215, "utility_loss": 0.9589858651161194 }, { "cosine_similarity": -0.08412020287831724, "epoch": 2.065237651444548, "grad_norm": 1.1412135930239007, "learning_rate": 6.924404556437695e-06, "loss": 1.2465, "reason_loss": 0.4513168931007385, "step": 2216, "utility_loss": 0.7951728105545044 }, { "cosine_similarity": -0.03645112154556538, "epoch": 2.0661696178937556, "grad_norm": 1.2272283090555125, "learning_rate": 6.917500862961685e-06, "loss": 1.3397, "reason_loss": 0.5105926990509033, "step": 2217, "utility_loss": 0.8291012644767761 }, { "cosine_similarity": 0.14303708683617636, "epoch": 2.0671015843429634, "grad_norm": 1.115547841022857, "learning_rate": 6.910597169485676e-06, "loss": 1.6341, "reason_loss": 0.45473337173461914, "step": 2218, "utility_loss": 1.1793277263641357 }, { "cosine_similarity": 0.08054045007245017, "epoch": 2.0680335507921717, "grad_norm": 1.5315885627325996, "learning_rate": 6.903693476009666e-06, "loss": 1.5639, "reason_loss": 0.4921426773071289, "step": 2219, "utility_loss": 1.0718069076538086 }, { "cosine_similarity": 0.11961650863634396, "epoch": 2.0689655172413794, "grad_norm": 1.2607396564145712, "learning_rate": 6.896789782533656e-06, "loss": 1.257, "reason_loss": 0.48349621891975403, "step": 2220, "utility_loss": 0.7734963893890381 }, { "cosine_similarity": 0.19109835013849644, "epoch": 2.0698974836905872, "grad_norm": 1.1537815410519772, "learning_rate": 6.889886089057647e-06, "loss": 1.5959, "reason_loss": 0.47714272141456604, "step": 2221, "utility_loss": 1.1187376976013184 }, { "cosine_similarity": 0.245178261997951, "epoch": 2.070829450139795, "grad_norm": 1.3124713738178468, "learning_rate": 6.882982395581637e-06, "loss": 1.5315, "reason_loss": 0.48610830307006836, "step": 2222, "utility_loss": 1.045413851737976 }, { "cosine_similarity": 0.07438879780315752, "epoch": 2.071761416589003, "grad_norm": 1.231894300043486, "learning_rate": 6.876078702105628e-06, "loss": 1.4196, "reason_loss": 0.4830669164657593, "step": 2223, "utility_loss": 0.936488151550293 }, { "cosine_similarity": -0.03274801201117539, "epoch": 2.0726933830382106, "grad_norm": 1.32604505152634, "learning_rate": 6.869175008629618e-06, "loss": 1.7105, "reason_loss": 0.4657355844974518, "step": 2224, "utility_loss": 1.2447510957717896 }, { "cosine_similarity": 0.04597328032901473, "epoch": 2.0736253494874184, "grad_norm": 1.1579864635561172, "learning_rate": 6.862271315153608e-06, "loss": 1.4554, "reason_loss": 0.46789854764938354, "step": 2225, "utility_loss": 0.9874532222747803 }, { "cosine_similarity": 0.09264698010340368, "epoch": 2.074557315936626, "grad_norm": 1.1348708570951958, "learning_rate": 6.855367621677597e-06, "loss": 1.6246, "reason_loss": 0.44587820768356323, "step": 2226, "utility_loss": 1.178720474243164 }, { "cosine_similarity": 0.050445730582291966, "epoch": 2.075489282385834, "grad_norm": 1.422663586484961, "learning_rate": 6.848463928201589e-06, "loss": 1.5208, "reason_loss": 0.5084042549133301, "step": 2227, "utility_loss": 1.0123720169067383 }, { "cosine_similarity": 0.28315342699286133, "epoch": 2.0764212488350418, "grad_norm": 0.9551938103887821, "learning_rate": 6.841560234725578e-06, "loss": 1.5096, "reason_loss": 0.4838784337043762, "step": 2228, "utility_loss": 1.0257571935653687 }, { "cosine_similarity": 0.14199928307127854, "epoch": 2.0773532152842495, "grad_norm": 1.51541200853217, "learning_rate": 6.834656541249569e-06, "loss": 1.4498, "reason_loss": 0.48346981406211853, "step": 2229, "utility_loss": 0.9662842154502869 }, { "cosine_similarity": 0.14355135759043924, "epoch": 2.0782851817334578, "grad_norm": 1.0850906254666304, "learning_rate": 6.82775284777356e-06, "loss": 1.258, "reason_loss": 0.47360578179359436, "step": 2230, "utility_loss": 0.7843853831291199 }, { "cosine_similarity": 0.05398885754661218, "epoch": 2.0792171481826656, "grad_norm": 1.0566427299112668, "learning_rate": 6.820849154297549e-06, "loss": 1.3208, "reason_loss": 0.5335067510604858, "step": 2231, "utility_loss": 0.7873053550720215 }, { "cosine_similarity": -0.037330100573537674, "epoch": 2.0801491146318734, "grad_norm": 1.223068324535243, "learning_rate": 6.813945460821541e-06, "loss": 1.5061, "reason_loss": 0.46556779742240906, "step": 2232, "utility_loss": 1.0405099391937256 }, { "cosine_similarity": 0.09086284575593832, "epoch": 2.081081081081081, "grad_norm": 1.1149491358753867, "learning_rate": 6.80704176734553e-06, "loss": 1.1451, "reason_loss": 0.47020673751831055, "step": 2233, "utility_loss": 0.6748559474945068 }, { "cosine_similarity": 0.15154947609190256, "epoch": 2.082013047530289, "grad_norm": 1.2690962371127952, "learning_rate": 6.800138073869521e-06, "loss": 1.5478, "reason_loss": 0.4771241545677185, "step": 2234, "utility_loss": 1.0706591606140137 }, { "cosine_similarity": -0.15337215935011664, "epoch": 2.0829450139794967, "grad_norm": 1.182905154393656, "learning_rate": 6.7932343803935106e-06, "loss": 1.4751, "reason_loss": 0.4632797837257385, "step": 2235, "utility_loss": 1.011770248413086 }, { "cosine_similarity": 0.11532119927569706, "epoch": 2.0838769804287045, "grad_norm": 1.2901268343687846, "learning_rate": 6.786330686917501e-06, "loss": 1.3354, "reason_loss": 0.4918275475502014, "step": 2236, "utility_loss": 0.8435800075531006 }, { "cosine_similarity": 0.201042992155535, "epoch": 2.0848089468779123, "grad_norm": 1.1866826185103874, "learning_rate": 6.779426993441491e-06, "loss": 1.6128, "reason_loss": 0.5244809985160828, "step": 2237, "utility_loss": 1.0883097648620605 }, { "cosine_similarity": 0.29649284608265397, "epoch": 2.08574091332712, "grad_norm": 1.2119108838989765, "learning_rate": 6.772523299965482e-06, "loss": 1.6161, "reason_loss": 0.520176887512207, "step": 2238, "utility_loss": 1.0959097146987915 }, { "cosine_similarity": 0.11642812343393794, "epoch": 2.086672879776328, "grad_norm": 1.086680199380452, "learning_rate": 6.765619606489472e-06, "loss": 1.408, "reason_loss": 0.49075543880462646, "step": 2239, "utility_loss": 0.9172749519348145 }, { "cosine_similarity": 0.1504504680755715, "epoch": 2.0876048462255357, "grad_norm": 1.4934195469165708, "learning_rate": 6.7587159130134625e-06, "loss": 1.5885, "reason_loss": 0.498779833316803, "step": 2240, "utility_loss": 1.089747428894043 }, { "cosine_similarity": 0.014050567746125444, "epoch": 2.088536812674744, "grad_norm": 1.2827079293027466, "learning_rate": 6.751812219537453e-06, "loss": 1.6207, "reason_loss": 0.48425865173339844, "step": 2241, "utility_loss": 1.1364235877990723 }, { "cosine_similarity": 0.17975211447512168, "epoch": 2.0894687791239517, "grad_norm": 1.2436276957426409, "learning_rate": 6.744908526061443e-06, "loss": 1.4281, "reason_loss": 0.49716347455978394, "step": 2242, "utility_loss": 0.9309661388397217 }, { "cosine_similarity": 0.04140680676367346, "epoch": 2.0904007455731595, "grad_norm": 1.1076511488822385, "learning_rate": 6.738004832585434e-06, "loss": 1.2768, "reason_loss": 0.4840949773788452, "step": 2243, "utility_loss": 0.792698323726654 }, { "cosine_similarity": 0.10703740032754598, "epoch": 2.0913327120223673, "grad_norm": 1.311138146254791, "learning_rate": 6.731101139109424e-06, "loss": 1.3599, "reason_loss": 0.48506960272789, "step": 2244, "utility_loss": 0.8748140335083008 }, { "cosine_similarity": -0.09171971326021172, "epoch": 2.092264678471575, "grad_norm": 0.9902181327754824, "learning_rate": 6.7241974456334145e-06, "loss": 1.2919, "reason_loss": 0.49653059244155884, "step": 2245, "utility_loss": 0.7953688502311707 }, { "cosine_similarity": 0.02073953095652366, "epoch": 2.093196644920783, "grad_norm": 1.3103791986135747, "learning_rate": 6.717293752157404e-06, "loss": 1.4382, "reason_loss": 0.4925681948661804, "step": 2246, "utility_loss": 0.9456204771995544 }, { "cosine_similarity": 0.2978503843305991, "epoch": 2.0941286113699906, "grad_norm": 1.4899498631451344, "learning_rate": 6.710390058681395e-06, "loss": 1.564, "reason_loss": 0.525496244430542, "step": 2247, "utility_loss": 1.0385419130325317 }, { "cosine_similarity": 0.12249266891927263, "epoch": 2.0950605778191984, "grad_norm": 0.9514614614814305, "learning_rate": 6.703486365205385e-06, "loss": 1.1922, "reason_loss": 0.4740194082260132, "step": 2248, "utility_loss": 0.7181954383850098 }, { "cosine_similarity": 0.06092360538938178, "epoch": 2.095992544268406, "grad_norm": 1.1000846949441654, "learning_rate": 6.696582671729376e-06, "loss": 1.3135, "reason_loss": 0.4941428303718567, "step": 2249, "utility_loss": 0.819334864616394 }, { "cosine_similarity": 0.13214350489324625, "epoch": 2.096924510717614, "grad_norm": 1.4195279798267797, "learning_rate": 6.6896789782533665e-06, "loss": 1.596, "reason_loss": 0.4987204372882843, "step": 2250, "utility_loss": 1.0972329378128052 }, { "cosine_similarity": 0.11704505630426326, "epoch": 2.0978564771668218, "grad_norm": 1.2866287415896456, "learning_rate": 6.682775284777356e-06, "loss": 1.42, "reason_loss": 0.49688130617141724, "step": 2251, "utility_loss": 0.9231119155883789 }, { "cosine_similarity": 0.01546459219825708, "epoch": 2.09878844361603, "grad_norm": 1.168232937677231, "learning_rate": 6.675871591301347e-06, "loss": 1.8636, "reason_loss": 0.49083906412124634, "step": 2252, "utility_loss": 1.3727160692214966 }, { "cosine_similarity": 0.02656879624497741, "epoch": 2.099720410065238, "grad_norm": 1.160709680635777, "learning_rate": 6.668967897825337e-06, "loss": 1.4186, "reason_loss": 0.49440664052963257, "step": 2253, "utility_loss": 0.9242342710494995 }, { "cosine_similarity": 0.14315330790249692, "epoch": 2.1006523765144456, "grad_norm": 1.4962254445357062, "learning_rate": 6.662064204349328e-06, "loss": 1.7285, "reason_loss": 0.4975162744522095, "step": 2254, "utility_loss": 1.2310285568237305 }, { "cosine_similarity": 0.10349650034785902, "epoch": 2.1015843429636534, "grad_norm": 0.9869801478815466, "learning_rate": 6.655160510873318e-06, "loss": 1.4079, "reason_loss": 0.5221444368362427, "step": 2255, "utility_loss": 0.8857911825180054 }, { "cosine_similarity": 0.170935974027015, "epoch": 2.102516309412861, "grad_norm": 1.1978776708131014, "learning_rate": 6.648256817397308e-06, "loss": 1.9365, "reason_loss": 0.5198826789855957, "step": 2256, "utility_loss": 1.4166122674942017 }, { "cosine_similarity": -0.012174147758584029, "epoch": 2.103448275862069, "grad_norm": 1.1875461336973832, "learning_rate": 6.641353123921298e-06, "loss": 1.8781, "reason_loss": 0.4756908118724823, "step": 2257, "utility_loss": 1.4024336338043213 }, { "cosine_similarity": -0.06246885462379004, "epoch": 2.1043802423112767, "grad_norm": 1.0240782750049442, "learning_rate": 6.634449430445289e-06, "loss": 1.3283, "reason_loss": 0.5106946229934692, "step": 2258, "utility_loss": 0.8175631761550903 }, { "cosine_similarity": 0.0673525233373269, "epoch": 2.1053122087604845, "grad_norm": 1.015378288526708, "learning_rate": 6.627545736969279e-06, "loss": 1.6143, "reason_loss": 0.49807286262512207, "step": 2259, "utility_loss": 1.1162711381912231 }, { "cosine_similarity": -0.052530440705539284, "epoch": 2.1062441752096923, "grad_norm": 1.1506596079572855, "learning_rate": 6.6206420434932696e-06, "loss": 1.2943, "reason_loss": 0.47000470757484436, "step": 2260, "utility_loss": 0.8243294954299927 }, { "cosine_similarity": 0.17081105462055018, "epoch": 2.1071761416589, "grad_norm": 1.139230337146722, "learning_rate": 6.61373835001726e-06, "loss": 1.5971, "reason_loss": 0.5015926361083984, "step": 2261, "utility_loss": 1.0954840183258057 }, { "cosine_similarity": 0.03351924753098649, "epoch": 2.108108108108108, "grad_norm": 1.2803950853214763, "learning_rate": 6.60683465654125e-06, "loss": 1.6949, "reason_loss": 0.4802168905735016, "step": 2262, "utility_loss": 1.2147066593170166 }, { "cosine_similarity": -0.019837557011802374, "epoch": 2.109040074557316, "grad_norm": 1.0899350006035422, "learning_rate": 6.599930963065241e-06, "loss": 1.4831, "reason_loss": 0.5029987692832947, "step": 2263, "utility_loss": 0.980087399482727 }, { "cosine_similarity": 0.09142296955795423, "epoch": 2.109972041006524, "grad_norm": 1.1206764190259537, "learning_rate": 6.593027269589231e-06, "loss": 1.1834, "reason_loss": 0.47388017177581787, "step": 2264, "utility_loss": 0.7095456123352051 }, { "cosine_similarity": -0.0551755245142314, "epoch": 2.1109040074557317, "grad_norm": 1.0666058460087426, "learning_rate": 6.5861235761132215e-06, "loss": 1.3884, "reason_loss": 0.4876154959201813, "step": 2265, "utility_loss": 0.9007759690284729 }, { "cosine_similarity": 0.13106973215606552, "epoch": 2.1118359739049395, "grad_norm": 0.984701965792291, "learning_rate": 6.579219882637211e-06, "loss": 1.4147, "reason_loss": 0.4803142845630646, "step": 2266, "utility_loss": 0.9343518614768982 }, { "cosine_similarity": 0.146776099348854, "epoch": 2.1127679403541473, "grad_norm": 1.3532991858559549, "learning_rate": 6.572316189161202e-06, "loss": 1.8063, "reason_loss": 0.4952125549316406, "step": 2267, "utility_loss": 1.3110922574996948 }, { "cosine_similarity": 0.049648284166291905, "epoch": 2.113699906803355, "grad_norm": 1.140447125956021, "learning_rate": 6.565412495685192e-06, "loss": 1.6199, "reason_loss": 0.48967328667640686, "step": 2268, "utility_loss": 1.1302448511123657 }, { "cosine_similarity": 0.15110007565169725, "epoch": 2.114631873252563, "grad_norm": 1.0147756409738475, "learning_rate": 6.558508802209183e-06, "loss": 1.4341, "reason_loss": 0.49408528208732605, "step": 2269, "utility_loss": 0.9400097131729126 }, { "cosine_similarity": 0.10229402955803342, "epoch": 2.1155638397017706, "grad_norm": 1.304052723241347, "learning_rate": 6.551605108733173e-06, "loss": 1.8054, "reason_loss": 0.5422918200492859, "step": 2270, "utility_loss": 1.2630926370620728 }, { "cosine_similarity": 0.013725445574747861, "epoch": 2.1164958061509784, "grad_norm": 1.1586093510316415, "learning_rate": 6.544701415257163e-06, "loss": 1.4607, "reason_loss": 0.5317641496658325, "step": 2271, "utility_loss": 0.928913950920105 }, { "cosine_similarity": -0.07718239304774872, "epoch": 2.117427772600186, "grad_norm": 1.0731287582987024, "learning_rate": 6.537797721781154e-06, "loss": 1.6117, "reason_loss": 0.4796972870826721, "step": 2272, "utility_loss": 1.132016658782959 }, { "cosine_similarity": -0.0823794483401004, "epoch": 2.118359739049394, "grad_norm": 1.1308889695255, "learning_rate": 6.530894028305144e-06, "loss": 1.3357, "reason_loss": 0.4831485152244568, "step": 2273, "utility_loss": 0.852537989616394 }, { "cosine_similarity": 0.228777441632483, "epoch": 2.1192917054986022, "grad_norm": 1.533139570137581, "learning_rate": 6.523990334829135e-06, "loss": 1.5387, "reason_loss": 0.5031462907791138, "step": 2274, "utility_loss": 1.0355257987976074 }, { "cosine_similarity": 0.17199291818216875, "epoch": 2.12022367194781, "grad_norm": 0.9577784731958301, "learning_rate": 6.517086641353124e-06, "loss": 1.2488, "reason_loss": 0.4597164988517761, "step": 2275, "utility_loss": 0.7891296148300171 }, { "cosine_similarity": 0.19696890218731905, "epoch": 2.121155638397018, "grad_norm": 1.1678871256342436, "learning_rate": 6.510182947877115e-06, "loss": 1.5326, "reason_loss": 0.48598217964172363, "step": 2276, "utility_loss": 1.0466595888137817 }, { "cosine_similarity": 0.18511528236581204, "epoch": 2.1220876048462256, "grad_norm": 1.188276614137066, "learning_rate": 6.503279254401104e-06, "loss": 1.4979, "reason_loss": 0.48094791173934937, "step": 2277, "utility_loss": 1.0169446468353271 }, { "cosine_similarity": 0.09103507566569659, "epoch": 2.1230195712954334, "grad_norm": 1.278108306394111, "learning_rate": 6.496375560925096e-06, "loss": 1.6128, "reason_loss": 0.5160059928894043, "step": 2278, "utility_loss": 1.0968139171600342 }, { "cosine_similarity": 0.10219944638701087, "epoch": 2.123951537744641, "grad_norm": 1.2961359270613986, "learning_rate": 6.489471867449085e-06, "loss": 1.1943, "reason_loss": 0.4794624149799347, "step": 2279, "utility_loss": 0.7148720622062683 }, { "cosine_similarity": 0.09943343205851825, "epoch": 2.124883504193849, "grad_norm": 0.8979266858026741, "learning_rate": 6.482568173973076e-06, "loss": 1.1797, "reason_loss": 0.5206936597824097, "step": 2280, "utility_loss": 0.6589757204055786 }, { "cosine_similarity": 0.23406321339833097, "epoch": 2.1258154706430568, "grad_norm": 1.3368992546773648, "learning_rate": 6.475664480497067e-06, "loss": 1.2964, "reason_loss": 0.45330750942230225, "step": 2281, "utility_loss": 0.8431288599967957 }, { "cosine_similarity": 0.1390484571750319, "epoch": 2.1267474370922645, "grad_norm": 1.3290201012944303, "learning_rate": 6.468760787021056e-06, "loss": 1.4268, "reason_loss": 0.48241978883743286, "step": 2282, "utility_loss": 0.9443780779838562 }, { "cosine_similarity": 0.13427221059036692, "epoch": 2.1276794035414723, "grad_norm": 1.3281127203471108, "learning_rate": 6.461857093545048e-06, "loss": 1.8944, "reason_loss": 0.4902702867984772, "step": 2283, "utility_loss": 1.4041199684143066 }, { "cosine_similarity": -0.17976355201426036, "epoch": 2.12861136999068, "grad_norm": 1.4456538715990765, "learning_rate": 6.454953400069037e-06, "loss": 1.5167, "reason_loss": 0.48224937915802, "step": 2284, "utility_loss": 1.0344979763031006 }, { "cosine_similarity": -0.008661805500812794, "epoch": 2.1295433364398884, "grad_norm": 1.0646427624791162, "learning_rate": 6.448049706593028e-06, "loss": 1.2223, "reason_loss": 0.4691608250141144, "step": 2285, "utility_loss": 0.7531017065048218 }, { "cosine_similarity": 0.3609337790138562, "epoch": 2.130475302889096, "grad_norm": 1.332618891935055, "learning_rate": 6.441146013117018e-06, "loss": 1.8682, "reason_loss": 0.5005480051040649, "step": 2286, "utility_loss": 1.3676203489303589 }, { "cosine_similarity": 0.17668890296945436, "epoch": 2.131407269338304, "grad_norm": 1.20692378354556, "learning_rate": 6.434242319641008e-06, "loss": 1.4994, "reason_loss": 0.5073174238204956, "step": 2287, "utility_loss": 0.9920977354049683 }, { "cosine_similarity": 0.07143723445136473, "epoch": 2.1323392357875117, "grad_norm": 1.2301626126207632, "learning_rate": 6.427338626164998e-06, "loss": 1.3317, "reason_loss": 0.47757720947265625, "step": 2288, "utility_loss": 0.8541666269302368 }, { "cosine_similarity": 0.2799037163514522, "epoch": 2.1332712022367195, "grad_norm": 1.2074816562678286, "learning_rate": 6.420434932688989e-06, "loss": 1.6389, "reason_loss": 0.4951079487800598, "step": 2289, "utility_loss": 1.1438121795654297 }, { "cosine_similarity": 0.03933232064837218, "epoch": 2.1342031686859273, "grad_norm": 1.3433431405138094, "learning_rate": 6.413531239212979e-06, "loss": 1.643, "reason_loss": 0.4747377038002014, "step": 2290, "utility_loss": 1.1682865619659424 }, { "cosine_similarity": 0.10969198750498832, "epoch": 2.135135135135135, "grad_norm": 1.1512099590328282, "learning_rate": 6.4066275457369696e-06, "loss": 1.4923, "reason_loss": 0.4808337986469269, "step": 2291, "utility_loss": 1.0114214420318604 }, { "cosine_similarity": 0.00782339222386027, "epoch": 2.136067101584343, "grad_norm": 1.104606165038516, "learning_rate": 6.39972385226096e-06, "loss": 1.2494, "reason_loss": 0.5383262634277344, "step": 2292, "utility_loss": 0.7111074328422546 }, { "cosine_similarity": 0.015248519387326754, "epoch": 2.1369990680335507, "grad_norm": 1.1618007507260804, "learning_rate": 6.39282015878495e-06, "loss": 1.6221, "reason_loss": 0.4410094618797302, "step": 2293, "utility_loss": 1.181121826171875 }, { "cosine_similarity": 0.03843693664644097, "epoch": 2.1379310344827585, "grad_norm": 1.1327588242640694, "learning_rate": 6.385916465308941e-06, "loss": 1.4212, "reason_loss": 0.5038673877716064, "step": 2294, "utility_loss": 0.9173789024353027 }, { "cosine_similarity": 0.08254847951396563, "epoch": 2.1388630009319662, "grad_norm": 1.2965927408643327, "learning_rate": 6.379012771832931e-06, "loss": 1.7069, "reason_loss": 0.4960600435733795, "step": 2295, "utility_loss": 1.210851788520813 }, { "cosine_similarity": 0.04052277706708962, "epoch": 2.1397949673811745, "grad_norm": 1.1645618179669814, "learning_rate": 6.3721090783569215e-06, "loss": 1.3678, "reason_loss": 0.5055952072143555, "step": 2296, "utility_loss": 0.8621964454650879 }, { "cosine_similarity": 0.06447935276176722, "epoch": 2.1407269338303823, "grad_norm": 1.5230860456811381, "learning_rate": 6.365205384880911e-06, "loss": 1.6776, "reason_loss": 0.49273669719696045, "step": 2297, "utility_loss": 1.1848888397216797 }, { "cosine_similarity": 0.2905027341661047, "epoch": 2.14165890027959, "grad_norm": 1.137449727561903, "learning_rate": 6.358301691404902e-06, "loss": 1.7318, "reason_loss": 0.5028207302093506, "step": 2298, "utility_loss": 1.2289408445358276 }, { "cosine_similarity": 0.2022913799855354, "epoch": 2.142590866728798, "grad_norm": 1.3319161333323577, "learning_rate": 6.351397997928892e-06, "loss": 1.4112, "reason_loss": 0.4569109082221985, "step": 2299, "utility_loss": 0.954249382019043 }, { "cosine_similarity": 0.09030554716447678, "epoch": 2.1435228331780056, "grad_norm": 1.3973886976898713, "learning_rate": 6.344494304452883e-06, "loss": 1.471, "reason_loss": 0.5076994895935059, "step": 2300, "utility_loss": 0.9632569551467896 }, { "cosine_similarity": 0.1561705672064915, "epoch": 2.1444547996272134, "grad_norm": 1.2787816392472244, "learning_rate": 6.3375906109768735e-06, "loss": 1.3444, "reason_loss": 0.48075640201568604, "step": 2301, "utility_loss": 0.8636486530303955 }, { "cosine_similarity": 0.11382779620506711, "epoch": 2.145386766076421, "grad_norm": 1.115434534602042, "learning_rate": 6.330686917500863e-06, "loss": 1.3764, "reason_loss": 0.5207265019416809, "step": 2302, "utility_loss": 0.855722963809967 }, { "cosine_similarity": 0.01906497394604235, "epoch": 2.146318732525629, "grad_norm": 1.0606419525036959, "learning_rate": 6.323783224024854e-06, "loss": 1.3742, "reason_loss": 0.4812238812446594, "step": 2303, "utility_loss": 0.8929437398910522 }, { "cosine_similarity": 0.014672107840458509, "epoch": 2.147250698974837, "grad_norm": 1.0054326925471493, "learning_rate": 6.316879530548844e-06, "loss": 1.4219, "reason_loss": 0.4666874408721924, "step": 2304, "utility_loss": 0.9551831483840942 }, { "cosine_similarity": 0.07072046885873338, "epoch": 2.1481826654240446, "grad_norm": 1.1989559229588922, "learning_rate": 6.309975837072835e-06, "loss": 1.6375, "reason_loss": 0.4846010208129883, "step": 2305, "utility_loss": 1.152897834777832 }, { "cosine_similarity": -0.12349559790670549, "epoch": 2.1491146318732524, "grad_norm": 1.2621487372473759, "learning_rate": 6.303072143596825e-06, "loss": 1.36, "reason_loss": 0.45564284920692444, "step": 2306, "utility_loss": 0.9043222069740295 }, { "cosine_similarity": 0.05595065021418206, "epoch": 2.1500465983224606, "grad_norm": 0.9995672351034113, "learning_rate": 6.296168450120815e-06, "loss": 1.3658, "reason_loss": 0.5263322591781616, "step": 2307, "utility_loss": 0.8395161628723145 }, { "cosine_similarity": 0.19466722532330305, "epoch": 2.1509785647716684, "grad_norm": 1.233021999373667, "learning_rate": 6.289264756644805e-06, "loss": 1.5842, "reason_loss": 0.49889904260635376, "step": 2308, "utility_loss": 1.0852770805358887 }, { "cosine_similarity": 0.11602526562588025, "epoch": 2.151910531220876, "grad_norm": 1.1869117690278332, "learning_rate": 6.282361063168796e-06, "loss": 1.4278, "reason_loss": 0.4964302182197571, "step": 2309, "utility_loss": 0.931391716003418 }, { "cosine_similarity": -0.03497259784822501, "epoch": 2.152842497670084, "grad_norm": 1.1171282179199113, "learning_rate": 6.275457369692786e-06, "loss": 1.4059, "reason_loss": 0.46137821674346924, "step": 2310, "utility_loss": 0.9445317387580872 }, { "cosine_similarity": 0.17978345119593492, "epoch": 2.1537744641192917, "grad_norm": 1.1762386021206475, "learning_rate": 6.268553676216777e-06, "loss": 1.4654, "reason_loss": 0.5167402625083923, "step": 2311, "utility_loss": 0.9486262202262878 }, { "cosine_similarity": 0.02740299428956504, "epoch": 2.1547064305684995, "grad_norm": 1.371919521587519, "learning_rate": 6.261649982740767e-06, "loss": 1.6441, "reason_loss": 0.4952009320259094, "step": 2312, "utility_loss": 1.1489266157150269 }, { "cosine_similarity": 0.03474073052732873, "epoch": 2.1556383970177073, "grad_norm": 1.2200152361410876, "learning_rate": 6.254746289264757e-06, "loss": 1.748, "reason_loss": 0.518767237663269, "step": 2313, "utility_loss": 1.2292025089263916 }, { "cosine_similarity": 0.16383699866352994, "epoch": 2.156570363466915, "grad_norm": 1.282746469819626, "learning_rate": 6.247842595788748e-06, "loss": 1.2257, "reason_loss": 0.47589632868766785, "step": 2314, "utility_loss": 0.7498067021369934 }, { "cosine_similarity": 0.15890409228255478, "epoch": 2.157502329916123, "grad_norm": 1.207306231773159, "learning_rate": 6.240938902312738e-06, "loss": 1.4315, "reason_loss": 0.49961936473846436, "step": 2315, "utility_loss": 0.9318329095840454 }, { "cosine_similarity": 0.227411084156967, "epoch": 2.1584342963653307, "grad_norm": 1.252011885878634, "learning_rate": 6.2340352088367286e-06, "loss": 1.7656, "reason_loss": 0.5023448467254639, "step": 2316, "utility_loss": 1.2632694244384766 }, { "cosine_similarity": 0.10131114308200467, "epoch": 2.1593662628145385, "grad_norm": 1.3457103419616636, "learning_rate": 6.2271315153607184e-06, "loss": 1.4352, "reason_loss": 0.45487165451049805, "step": 2317, "utility_loss": 0.9803242087364197 }, { "cosine_similarity": 0.16325115355152697, "epoch": 2.1602982292637467, "grad_norm": 1.21044449112803, "learning_rate": 6.220227821884709e-06, "loss": 1.4681, "reason_loss": 0.48171812295913696, "step": 2318, "utility_loss": 0.9863547086715698 }, { "cosine_similarity": 0.047990557580279564, "epoch": 2.1612301957129545, "grad_norm": 1.1331982286747693, "learning_rate": 6.213324128408699e-06, "loss": 1.3056, "reason_loss": 0.4635430574417114, "step": 2319, "utility_loss": 0.8421038389205933 }, { "cosine_similarity": -0.09530129385041398, "epoch": 2.1621621621621623, "grad_norm": 1.1227768703818921, "learning_rate": 6.20642043493269e-06, "loss": 1.4901, "reason_loss": 0.47234997153282166, "step": 2320, "utility_loss": 1.017714023590088 }, { "cosine_similarity": 0.3688873236472074, "epoch": 2.16309412861137, "grad_norm": 1.5256744229913652, "learning_rate": 6.1995167414566805e-06, "loss": 1.8787, "reason_loss": 0.4953218400478363, "step": 2321, "utility_loss": 1.3834108114242554 }, { "cosine_similarity": 0.17701794175423455, "epoch": 2.164026095060578, "grad_norm": 1.1564589767717361, "learning_rate": 6.19261304798067e-06, "loss": 1.5421, "reason_loss": 0.5326105356216431, "step": 2322, "utility_loss": 1.0095134973526 }, { "cosine_similarity": -0.08373878944865314, "epoch": 2.1649580615097856, "grad_norm": 1.2769376552354699, "learning_rate": 6.185709354504661e-06, "loss": 2.1637, "reason_loss": 0.4759092330932617, "step": 2323, "utility_loss": 1.6877577304840088 }, { "cosine_similarity": 0.22922774356019135, "epoch": 2.1658900279589934, "grad_norm": 1.0542482632252317, "learning_rate": 6.178805661028651e-06, "loss": 1.3169, "reason_loss": 0.4772723317146301, "step": 2324, "utility_loss": 0.8395986557006836 }, { "cosine_similarity": 0.31483530605863835, "epoch": 2.1668219944082012, "grad_norm": 1.0274345482624256, "learning_rate": 6.171901967552642e-06, "loss": 1.3343, "reason_loss": 0.5274391174316406, "step": 2325, "utility_loss": 0.8068290948867798 }, { "cosine_similarity": 0.18963263730517826, "epoch": 2.167753960857409, "grad_norm": 1.3167053187259372, "learning_rate": 6.164998274076631e-06, "loss": 1.5421, "reason_loss": 0.5085176825523376, "step": 2326, "utility_loss": 1.0336253643035889 }, { "cosine_similarity": 0.13618181017555758, "epoch": 2.168685927306617, "grad_norm": 1.1724106685795264, "learning_rate": 6.158094580600622e-06, "loss": 1.3984, "reason_loss": 0.4883524775505066, "step": 2327, "utility_loss": 0.9100583791732788 }, { "cosine_similarity": -0.001226846484957333, "epoch": 2.1696178937558246, "grad_norm": 1.3851844916408484, "learning_rate": 6.151190887124611e-06, "loss": 1.3161, "reason_loss": 0.4870220422744751, "step": 2328, "utility_loss": 0.8290908336639404 }, { "cosine_similarity": -0.23251266787351138, "epoch": 2.170549860205033, "grad_norm": 1.1562260937960733, "learning_rate": 6.144287193648603e-06, "loss": 1.5928, "reason_loss": 0.5007132291793823, "step": 2329, "utility_loss": 1.0920933485031128 }, { "cosine_similarity": 0.11557466605986508, "epoch": 2.1714818266542406, "grad_norm": 0.9910856077239159, "learning_rate": 6.137383500172592e-06, "loss": 1.6165, "reason_loss": 0.4890815019607544, "step": 2330, "utility_loss": 1.1274371147155762 }, { "cosine_similarity": 0.047507072884370095, "epoch": 2.1724137931034484, "grad_norm": 1.1029272471323064, "learning_rate": 6.130479806696583e-06, "loss": 1.753, "reason_loss": 0.4818960428237915, "step": 2331, "utility_loss": 1.2710916996002197 }, { "cosine_similarity": 0.30244818754922015, "epoch": 2.173345759552656, "grad_norm": 1.2260459038702471, "learning_rate": 6.123576113220574e-06, "loss": 1.5809, "reason_loss": 0.49060720205307007, "step": 2332, "utility_loss": 1.0902812480926514 }, { "cosine_similarity": 0.14974251165120941, "epoch": 2.174277726001864, "grad_norm": 1.1677623708640505, "learning_rate": 6.116672419744563e-06, "loss": 1.443, "reason_loss": 0.4815565347671509, "step": 2333, "utility_loss": 0.961449146270752 }, { "cosine_similarity": -0.026557759090805082, "epoch": 2.1752096924510718, "grad_norm": 1.187072629337628, "learning_rate": 6.109768726268555e-06, "loss": 1.6177, "reason_loss": 0.48609310388565063, "step": 2334, "utility_loss": 1.1315836906433105 }, { "cosine_similarity": -0.04608147121508434, "epoch": 2.1761416589002796, "grad_norm": 1.6048293700181253, "learning_rate": 6.102865032792544e-06, "loss": 1.4291, "reason_loss": 0.46543076634407043, "step": 2335, "utility_loss": 0.9636878371238708 }, { "cosine_similarity": 0.04566683954564349, "epoch": 2.1770736253494873, "grad_norm": 1.266678350139517, "learning_rate": 6.095961339316535e-06, "loss": 1.6005, "reason_loss": 0.4590277075767517, "step": 2336, "utility_loss": 1.141444444656372 }, { "cosine_similarity": -0.1349620362009822, "epoch": 2.178005591798695, "grad_norm": 1.1809380630641955, "learning_rate": 6.089057645840525e-06, "loss": 1.3507, "reason_loss": 0.5000265836715698, "step": 2337, "utility_loss": 0.8506386280059814 }, { "cosine_similarity": 0.13218008901152392, "epoch": 2.178937558247903, "grad_norm": 1.023289903928054, "learning_rate": 6.082153952364515e-06, "loss": 1.3161, "reason_loss": 0.47971782088279724, "step": 2338, "utility_loss": 0.8363785743713379 }, { "cosine_similarity": -0.09667438693986738, "epoch": 2.1798695246971107, "grad_norm": 1.4362795087025113, "learning_rate": 6.075250258888505e-06, "loss": 1.4711, "reason_loss": 0.5128360390663147, "step": 2339, "utility_loss": 0.9582870006561279 }, { "cosine_similarity": -0.1491125083553109, "epoch": 2.180801491146319, "grad_norm": 1.031591802215214, "learning_rate": 6.068346565412496e-06, "loss": 1.4644, "reason_loss": 0.45427656173706055, "step": 2340, "utility_loss": 1.010119915008545 }, { "cosine_similarity": -0.01868422127196003, "epoch": 2.1817334575955267, "grad_norm": 1.2481307977028397, "learning_rate": 6.061442871936487e-06, "loss": 1.4307, "reason_loss": 0.49543753266334534, "step": 2341, "utility_loss": 0.9352982044219971 }, { "cosine_similarity": 0.024707166940333473, "epoch": 2.1826654240447345, "grad_norm": 1.3056735642222292, "learning_rate": 6.054539178460477e-06, "loss": 1.6035, "reason_loss": 0.5197217464447021, "step": 2342, "utility_loss": 1.083742380142212 }, { "cosine_similarity": 0.1297919388974837, "epoch": 2.1835973904939423, "grad_norm": 1.0491293490257754, "learning_rate": 6.047635484984467e-06, "loss": 1.7236, "reason_loss": 0.49506813287734985, "step": 2343, "utility_loss": 1.2285466194152832 }, { "cosine_similarity": 0.26634647172955633, "epoch": 2.18452935694315, "grad_norm": 1.099748697781264, "learning_rate": 6.040731791508457e-06, "loss": 1.6337, "reason_loss": 0.5051359534263611, "step": 2344, "utility_loss": 1.1285725831985474 }, { "cosine_similarity": -0.15435803102750945, "epoch": 2.185461323392358, "grad_norm": 1.1538477733939654, "learning_rate": 6.033828098032448e-06, "loss": 1.576, "reason_loss": 0.5067543387413025, "step": 2345, "utility_loss": 1.0692834854125977 }, { "cosine_similarity": 0.2654674562476242, "epoch": 2.1863932898415657, "grad_norm": 1.24823980373656, "learning_rate": 6.026924404556438e-06, "loss": 1.4903, "reason_loss": 0.47718536853790283, "step": 2346, "utility_loss": 1.0131608247756958 }, { "cosine_similarity": 0.08762626827356508, "epoch": 2.1873252562907735, "grad_norm": 1.2746715136583149, "learning_rate": 6.0200207110804286e-06, "loss": 1.563, "reason_loss": 0.47432631254196167, "step": 2347, "utility_loss": 1.0887095928192139 }, { "cosine_similarity": 0.023187209581934615, "epoch": 2.1882572227399812, "grad_norm": 1.0703079567417118, "learning_rate": 6.0131170176044184e-06, "loss": 1.4505, "reason_loss": 0.44790419936180115, "step": 2348, "utility_loss": 1.00260591506958 }, { "cosine_similarity": 0.16430329148968265, "epoch": 2.189189189189189, "grad_norm": 1.090573316803895, "learning_rate": 6.006213324128409e-06, "loss": 1.5327, "reason_loss": 0.44792041182518005, "step": 2349, "utility_loss": 1.0847454071044922 }, { "cosine_similarity": 0.18647077620853944, "epoch": 2.190121155638397, "grad_norm": 1.2885314575180038, "learning_rate": 5.999309630652399e-06, "loss": 1.3646, "reason_loss": 0.49476882815361023, "step": 2350, "utility_loss": 0.8698384165763855 }, { "cosine_similarity": 0.008867057783681478, "epoch": 2.191053122087605, "grad_norm": 0.956289630886661, "learning_rate": 5.99240593717639e-06, "loss": 1.6539, "reason_loss": 0.4786525070667267, "step": 2351, "utility_loss": 1.1752545833587646 }, { "cosine_similarity": 0.28021652915535544, "epoch": 2.191985088536813, "grad_norm": 1.3176264178061583, "learning_rate": 5.9855022437003805e-06, "loss": 1.9862, "reason_loss": 0.5153250098228455, "step": 2352, "utility_loss": 1.470905065536499 }, { "cosine_similarity": -0.02842350273704619, "epoch": 2.1929170549860206, "grad_norm": 1.3733500693526863, "learning_rate": 5.97859855022437e-06, "loss": 1.253, "reason_loss": 0.49465692043304443, "step": 2353, "utility_loss": 0.7583439946174622 }, { "cosine_similarity": -0.07365515419138818, "epoch": 2.1938490214352284, "grad_norm": 1.1546091311175761, "learning_rate": 5.971694856748361e-06, "loss": 1.601, "reason_loss": 0.5224317908287048, "step": 2354, "utility_loss": 1.0785670280456543 }, { "cosine_similarity": 0.26019307345018955, "epoch": 2.194780987884436, "grad_norm": 1.228295941389961, "learning_rate": 5.964791163272351e-06, "loss": 1.3361, "reason_loss": 0.4992687404155731, "step": 2355, "utility_loss": 0.8367959856987 }, { "cosine_similarity": 0.055387682151094006, "epoch": 2.195712954333644, "grad_norm": 1.0704709391693779, "learning_rate": 5.957887469796342e-06, "loss": 1.3737, "reason_loss": 0.5265425443649292, "step": 2356, "utility_loss": 0.8471269607543945 }, { "cosine_similarity": 0.047831110060811456, "epoch": 2.196644920782852, "grad_norm": 1.2081400549147927, "learning_rate": 5.950983776320332e-06, "loss": 1.823, "reason_loss": 0.48631536960601807, "step": 2357, "utility_loss": 1.336727499961853 }, { "cosine_similarity": -0.006359851392148423, "epoch": 2.1975768872320596, "grad_norm": 1.0910324721869409, "learning_rate": 5.944080082844322e-06, "loss": 1.7062, "reason_loss": 0.4818105399608612, "step": 2358, "utility_loss": 1.2244160175323486 }, { "cosine_similarity": 0.08568652748763095, "epoch": 2.1985088536812674, "grad_norm": 1.1091157867887498, "learning_rate": 5.937176389368312e-06, "loss": 1.6386, "reason_loss": 0.4810738265514374, "step": 2359, "utility_loss": 1.1575586795806885 }, { "cosine_similarity": 0.29056182928882357, "epoch": 2.199440820130475, "grad_norm": 1.3031128991137124, "learning_rate": 5.930272695892303e-06, "loss": 1.629, "reason_loss": 0.5002049207687378, "step": 2360, "utility_loss": 1.1288061141967773 }, { "cosine_similarity": -0.014186675547283362, "epoch": 2.200372786579683, "grad_norm": 1.0999812912955553, "learning_rate": 5.923369002416293e-06, "loss": 1.6335, "reason_loss": 0.4572851061820984, "step": 2361, "utility_loss": 1.1762409210205078 }, { "cosine_similarity": -0.041373379973909274, "epoch": 2.201304753028891, "grad_norm": 1.2829514753455187, "learning_rate": 5.916465308940284e-06, "loss": 1.3393, "reason_loss": 0.4633931815624237, "step": 2362, "utility_loss": 0.8759068250656128 }, { "cosine_similarity": 0.17003196546152463, "epoch": 2.202236719478099, "grad_norm": 1.168022900669488, "learning_rate": 5.909561615464274e-06, "loss": 1.644, "reason_loss": 0.46790674328804016, "step": 2363, "utility_loss": 1.1761302947998047 }, { "cosine_similarity": 0.2979316298579878, "epoch": 2.2031686859273067, "grad_norm": 1.2210111418165384, "learning_rate": 5.902657921988264e-06, "loss": 1.6081, "reason_loss": 0.4729699492454529, "step": 2364, "utility_loss": 1.1350904703140259 }, { "cosine_similarity": 0.22036042608909953, "epoch": 2.2041006523765145, "grad_norm": 1.3001168335818754, "learning_rate": 5.895754228512255e-06, "loss": 1.3662, "reason_loss": 0.5007537007331848, "step": 2365, "utility_loss": 0.8653977513313293 }, { "cosine_similarity": 0.1257098326125199, "epoch": 2.2050326188257223, "grad_norm": 1.2021226366005096, "learning_rate": 5.888850535036245e-06, "loss": 1.3206, "reason_loss": 0.479439914226532, "step": 2366, "utility_loss": 0.8411495089530945 }, { "cosine_similarity": 0.11984405688774424, "epoch": 2.20596458527493, "grad_norm": 1.269778848634158, "learning_rate": 5.881946841560236e-06, "loss": 1.4159, "reason_loss": 0.47457242012023926, "step": 2367, "utility_loss": 0.9413328766822815 }, { "cosine_similarity": 0.1538696134025514, "epoch": 2.206896551724138, "grad_norm": 1.385767913058677, "learning_rate": 5.8750431480842255e-06, "loss": 1.7971, "reason_loss": 0.45986437797546387, "step": 2368, "utility_loss": 1.3372150659561157 }, { "cosine_similarity": 0.02708972618674628, "epoch": 2.2078285181733457, "grad_norm": 1.1621951425777661, "learning_rate": 5.868139454608216e-06, "loss": 1.2361, "reason_loss": 0.4746575355529785, "step": 2369, "utility_loss": 0.7613942623138428 }, { "cosine_similarity": 0.1798822911478514, "epoch": 2.2087604846225535, "grad_norm": 1.285195083932955, "learning_rate": 5.861235761132206e-06, "loss": 1.5779, "reason_loss": 0.4889775514602661, "step": 2370, "utility_loss": 1.0889060497283936 }, { "cosine_similarity": 0.1902447062887943, "epoch": 2.2096924510717613, "grad_norm": 1.1906591021789446, "learning_rate": 5.854332067656197e-06, "loss": 1.5089, "reason_loss": 0.4760817289352417, "step": 2371, "utility_loss": 1.0328022241592407 }, { "cosine_similarity": -0.09527069767290047, "epoch": 2.210624417520969, "grad_norm": 1.193651326333328, "learning_rate": 5.8474283741801875e-06, "loss": 1.2845, "reason_loss": 0.4840652346611023, "step": 2372, "utility_loss": 0.8004423379898071 }, { "cosine_similarity": 0.019827041379455557, "epoch": 2.2115563839701773, "grad_norm": 1.1826233207980492, "learning_rate": 5.8405246807041774e-06, "loss": 1.6756, "reason_loss": 0.46151238679885864, "step": 2373, "utility_loss": 1.2140382528305054 }, { "cosine_similarity": 0.046684200539475335, "epoch": 2.212488350419385, "grad_norm": 1.108222710826659, "learning_rate": 5.833620987228168e-06, "loss": 1.3534, "reason_loss": 0.48272377252578735, "step": 2374, "utility_loss": 0.8707221746444702 }, { "cosine_similarity": -0.09033625234241567, "epoch": 2.213420316868593, "grad_norm": 1.1953141011272315, "learning_rate": 5.826717293752158e-06, "loss": 1.4212, "reason_loss": 0.49610525369644165, "step": 2375, "utility_loss": 0.9251031875610352 }, { "cosine_similarity": 0.03409406736026529, "epoch": 2.2143522833178007, "grad_norm": 1.68469899072879, "learning_rate": 5.819813600276149e-06, "loss": 1.3697, "reason_loss": 0.5067148804664612, "step": 2376, "utility_loss": 0.8629720211029053 }, { "cosine_similarity": 0.09586111730078363, "epoch": 2.2152842497670084, "grad_norm": 1.0438064534234495, "learning_rate": 5.812909906800138e-06, "loss": 1.5083, "reason_loss": 0.48577696084976196, "step": 2377, "utility_loss": 1.0224871635437012 }, { "cosine_similarity": -0.02076071342735418, "epoch": 2.2162162162162162, "grad_norm": 1.113604527593195, "learning_rate": 5.806006213324129e-06, "loss": 1.3517, "reason_loss": 0.47282874584198, "step": 2378, "utility_loss": 0.8788639307022095 }, { "cosine_similarity": 0.03287023807942953, "epoch": 2.217148182665424, "grad_norm": 1.1592672209090324, "learning_rate": 5.7991025198481184e-06, "loss": 1.494, "reason_loss": 0.5158436298370361, "step": 2379, "utility_loss": 0.9781885147094727 }, { "cosine_similarity": 0.1666219631665925, "epoch": 2.218080149114632, "grad_norm": 1.0811285957513124, "learning_rate": 5.79219882637211e-06, "loss": 1.3732, "reason_loss": 0.4776552617549896, "step": 2380, "utility_loss": 0.8955855965614319 }, { "cosine_similarity": 0.2914894288104433, "epoch": 2.2190121155638396, "grad_norm": 1.0994409719411202, "learning_rate": 5.785295132896099e-06, "loss": 1.6575, "reason_loss": 0.5129759907722473, "step": 2381, "utility_loss": 1.144554853439331 }, { "cosine_similarity": 0.36546790478260166, "epoch": 2.2199440820130474, "grad_norm": 1.379261293353175, "learning_rate": 5.77839143942009e-06, "loss": 1.7318, "reason_loss": 0.4627159833908081, "step": 2382, "utility_loss": 1.2691278457641602 }, { "cosine_similarity": -0.05066531174321488, "epoch": 2.220876048462255, "grad_norm": 1.0428998392468807, "learning_rate": 5.771487745944081e-06, "loss": 1.231, "reason_loss": 0.45805028080940247, "step": 2383, "utility_loss": 0.772932231426239 }, { "cosine_similarity": 0.1702194964747387, "epoch": 2.2218080149114634, "grad_norm": 1.369307301547545, "learning_rate": 5.76458405246807e-06, "loss": 1.2047, "reason_loss": 0.4791508913040161, "step": 2384, "utility_loss": 0.7255293130874634 }, { "cosine_similarity": 0.19019054596174007, "epoch": 2.222739981360671, "grad_norm": 1.076953365870322, "learning_rate": 5.757680358992062e-06, "loss": 1.3063, "reason_loss": 0.46028125286102295, "step": 2385, "utility_loss": 0.8460631370544434 }, { "cosine_similarity": 0.12700032263913028, "epoch": 2.223671947809879, "grad_norm": 1.08975446981184, "learning_rate": 5.750776665516051e-06, "loss": 1.693, "reason_loss": 0.4861310124397278, "step": 2386, "utility_loss": 1.2068734169006348 }, { "cosine_similarity": 0.15939031951587312, "epoch": 2.2246039142590868, "grad_norm": 1.3138415653737008, "learning_rate": 5.743872972040042e-06, "loss": 1.5764, "reason_loss": 0.504952073097229, "step": 2387, "utility_loss": 1.071455955505371 }, { "cosine_similarity": -0.017760780713772663, "epoch": 2.2255358807082946, "grad_norm": 1.114364606129218, "learning_rate": 5.736969278564032e-06, "loss": 1.4582, "reason_loss": 0.48065561056137085, "step": 2388, "utility_loss": 0.9775429964065552 }, { "cosine_similarity": 0.10110500345512695, "epoch": 2.2264678471575023, "grad_norm": 1.124026085716249, "learning_rate": 5.730065585088022e-06, "loss": 1.577, "reason_loss": 0.48251742124557495, "step": 2389, "utility_loss": 1.094443440437317 }, { "cosine_similarity": 0.05678561539698684, "epoch": 2.22739981360671, "grad_norm": 0.9890145497208352, "learning_rate": 5.723161891612012e-06, "loss": 1.2459, "reason_loss": 0.4810398519039154, "step": 2390, "utility_loss": 0.7648272514343262 }, { "cosine_similarity": 0.16593325564477138, "epoch": 2.228331780055918, "grad_norm": 1.0565405558557506, "learning_rate": 5.716258198136003e-06, "loss": 1.3228, "reason_loss": 0.5101047158241272, "step": 2391, "utility_loss": 0.8126497268676758 }, { "cosine_similarity": -0.10545934400146921, "epoch": 2.2292637465051257, "grad_norm": 1.2132537889665886, "learning_rate": 5.709354504659994e-06, "loss": 1.442, "reason_loss": 0.5086460113525391, "step": 2392, "utility_loss": 0.9333264231681824 }, { "cosine_similarity": -0.0020663418957791, "epoch": 2.2301957129543335, "grad_norm": 1.259333832080703, "learning_rate": 5.702450811183984e-06, "loss": 1.5125, "reason_loss": 0.5201419591903687, "step": 2393, "utility_loss": 0.9924044609069824 }, { "cosine_similarity": 0.04179300650863178, "epoch": 2.2311276794035413, "grad_norm": 1.2015129147357009, "learning_rate": 5.695547117707974e-06, "loss": 1.5202, "reason_loss": 0.4904862642288208, "step": 2394, "utility_loss": 1.0297175645828247 }, { "cosine_similarity": -0.08518239369508879, "epoch": 2.2320596458527495, "grad_norm": 1.0409420130271347, "learning_rate": 5.688643424231964e-06, "loss": 1.2106, "reason_loss": 0.47536489367485046, "step": 2395, "utility_loss": 0.7352536916732788 }, { "cosine_similarity": 0.0792761762722393, "epoch": 2.2329916123019573, "grad_norm": 1.4196392373264128, "learning_rate": 5.681739730755955e-06, "loss": 1.3833, "reason_loss": 0.47552046179771423, "step": 2396, "utility_loss": 0.9077891111373901 }, { "cosine_similarity": 0.11118537582799773, "epoch": 2.233923578751165, "grad_norm": 1.2177639108452838, "learning_rate": 5.674836037279945e-06, "loss": 1.4264, "reason_loss": 0.47308528423309326, "step": 2397, "utility_loss": 0.953344464302063 }, { "cosine_similarity": 0.05537952117324805, "epoch": 2.234855545200373, "grad_norm": 1.0408945229029574, "learning_rate": 5.667932343803936e-06, "loss": 1.2706, "reason_loss": 0.4960018992424011, "step": 2398, "utility_loss": 0.7745901346206665 }, { "cosine_similarity": 0.1428783805169334, "epoch": 2.2357875116495807, "grad_norm": 1.3100280227158676, "learning_rate": 5.6610286503279255e-06, "loss": 1.5864, "reason_loss": 0.4696207046508789, "step": 2399, "utility_loss": 1.116739273071289 }, { "cosine_similarity": 0.03578876723830443, "epoch": 2.2367194780987885, "grad_norm": 1.1687903088338893, "learning_rate": 5.654124956851916e-06, "loss": 1.716, "reason_loss": 0.4898262619972229, "step": 2400, "utility_loss": 1.2262228727340698 }, { "cosine_similarity": 0.1325824335917992, "epoch": 2.2376514445479962, "grad_norm": 1.1292564000972134, "learning_rate": 5.647221263375906e-06, "loss": 1.7096, "reason_loss": 0.48072922229766846, "step": 2401, "utility_loss": 1.2288758754730225 }, { "cosine_similarity": 0.15087461505201752, "epoch": 2.238583410997204, "grad_norm": 1.016021456081121, "learning_rate": 5.640317569899897e-06, "loss": 1.4761, "reason_loss": 0.4502209424972534, "step": 2402, "utility_loss": 1.025846004486084 }, { "cosine_similarity": -0.008804556561853744, "epoch": 2.239515377446412, "grad_norm": 1.225985680395278, "learning_rate": 5.6334138764238875e-06, "loss": 1.5146, "reason_loss": 0.45454713702201843, "step": 2403, "utility_loss": 1.0600709915161133 }, { "cosine_similarity": 0.30250625195401215, "epoch": 2.2404473438956196, "grad_norm": 1.2900827142639673, "learning_rate": 5.6265101829478774e-06, "loss": 1.1362, "reason_loss": 0.47322678565979004, "step": 2404, "utility_loss": 0.6629776954650879 }, { "cosine_similarity": 0.012418963707303358, "epoch": 2.2413793103448274, "grad_norm": 1.288208494559314, "learning_rate": 5.619606489471868e-06, "loss": 1.4228, "reason_loss": 0.47182130813598633, "step": 2405, "utility_loss": 0.9509984254837036 }, { "cosine_similarity": 0.05415506013130113, "epoch": 2.2423112767940356, "grad_norm": 1.1712961987325778, "learning_rate": 5.612702795995858e-06, "loss": 1.3232, "reason_loss": 0.5036570429801941, "step": 2406, "utility_loss": 0.8195158243179321 }, { "cosine_similarity": 0.32916780077136715, "epoch": 2.2432432432432434, "grad_norm": 1.1784720635483736, "learning_rate": 5.605799102519849e-06, "loss": 1.9012, "reason_loss": 0.48267272114753723, "step": 2407, "utility_loss": 1.4185242652893066 }, { "cosine_similarity": 0.09835531058429925, "epoch": 2.244175209692451, "grad_norm": 1.3583060905059257, "learning_rate": 5.598895409043839e-06, "loss": 1.3637, "reason_loss": 0.5154014825820923, "step": 2408, "utility_loss": 0.8483274579048157 }, { "cosine_similarity": 0.08468435217766544, "epoch": 2.245107176141659, "grad_norm": 1.6322355957076866, "learning_rate": 5.591991715567829e-06, "loss": 1.4278, "reason_loss": 0.48058179020881653, "step": 2409, "utility_loss": 0.9472286701202393 }, { "cosine_similarity": 0.07000141562318478, "epoch": 2.246039142590867, "grad_norm": 1.1987892784102727, "learning_rate": 5.585088022091819e-06, "loss": 1.4659, "reason_loss": 0.501175045967102, "step": 2410, "utility_loss": 0.9647434949874878 }, { "cosine_similarity": 0.01625152107042762, "epoch": 2.2469711090400746, "grad_norm": 1.546815300537271, "learning_rate": 5.57818432861581e-06, "loss": 1.492, "reason_loss": 0.5007954835891724, "step": 2411, "utility_loss": 0.9911922216415405 }, { "cosine_similarity": -0.11271359503193441, "epoch": 2.2479030754892824, "grad_norm": 1.092420558765873, "learning_rate": 5.571280635139801e-06, "loss": 1.4521, "reason_loss": 0.46202269196510315, "step": 2412, "utility_loss": 0.9900956153869629 }, { "cosine_similarity": 0.01921515052693402, "epoch": 2.24883504193849, "grad_norm": 1.0353801664429807, "learning_rate": 5.564376941663791e-06, "loss": 1.387, "reason_loss": 0.5007978677749634, "step": 2413, "utility_loss": 0.886165201663971 }, { "cosine_similarity": 0.14927506675715876, "epoch": 2.249767008387698, "grad_norm": 1.2150789724448705, "learning_rate": 5.557473248187781e-06, "loss": 1.4376, "reason_loss": 0.4918900728225708, "step": 2414, "utility_loss": 0.9457395076751709 }, { "cosine_similarity": 0.17401685614423684, "epoch": 2.2506989748369057, "grad_norm": 0.9749051159106775, "learning_rate": 5.550569554711771e-06, "loss": 1.4686, "reason_loss": 0.49619030952453613, "step": 2415, "utility_loss": 0.9724470376968384 }, { "cosine_similarity": 0.21573675351743676, "epoch": 2.2516309412861135, "grad_norm": 1.6527150533042225, "learning_rate": 5.543665861235762e-06, "loss": 1.6221, "reason_loss": 0.4883817732334137, "step": 2416, "utility_loss": 1.1337594985961914 }, { "cosine_similarity": 0.20634099179765, "epoch": 2.2525629077353218, "grad_norm": 1.2726793429017564, "learning_rate": 5.536762167759752e-06, "loss": 1.3925, "reason_loss": 0.49919793009757996, "step": 2417, "utility_loss": 0.8932546377182007 }, { "cosine_similarity": -0.058060739656488586, "epoch": 2.2534948741845295, "grad_norm": 1.3642765911433852, "learning_rate": 5.529858474283743e-06, "loss": 1.629, "reason_loss": 0.5101028084754944, "step": 2418, "utility_loss": 1.1188616752624512 }, { "cosine_similarity": 0.12810218759376726, "epoch": 2.2544268406337373, "grad_norm": 1.1433775707633858, "learning_rate": 5.5229547808077325e-06, "loss": 1.4222, "reason_loss": 0.46944090723991394, "step": 2419, "utility_loss": 0.9527575969696045 }, { "cosine_similarity": 0.04928823597396639, "epoch": 2.255358807082945, "grad_norm": 1.3149130286219157, "learning_rate": 5.516051087331723e-06, "loss": 1.3555, "reason_loss": 0.4785821735858917, "step": 2420, "utility_loss": 0.876915693283081 }, { "cosine_similarity": 0.0042277963870336705, "epoch": 2.256290773532153, "grad_norm": 1.3512860450235038, "learning_rate": 5.509147393855713e-06, "loss": 1.2686, "reason_loss": 0.49078646302223206, "step": 2421, "utility_loss": 0.7778550982475281 }, { "cosine_similarity": -0.15497499249996657, "epoch": 2.2572227399813607, "grad_norm": 1.042923987613776, "learning_rate": 5.502243700379704e-06, "loss": 1.3111, "reason_loss": 0.48827701807022095, "step": 2422, "utility_loss": 0.8228346109390259 }, { "cosine_similarity": 0.15847994577705304, "epoch": 2.2581547064305685, "grad_norm": 1.283040382008291, "learning_rate": 5.4953400069036946e-06, "loss": 1.3209, "reason_loss": 0.4805469512939453, "step": 2423, "utility_loss": 0.8403872847557068 }, { "cosine_similarity": 0.09280094120673602, "epoch": 2.2590866728797763, "grad_norm": 1.4679841825154054, "learning_rate": 5.4884363134276845e-06, "loss": 1.5149, "reason_loss": 0.48640185594558716, "step": 2424, "utility_loss": 1.0285292863845825 }, { "cosine_similarity": 0.05452547053763168, "epoch": 2.260018639328984, "grad_norm": 1.0562624519624433, "learning_rate": 5.481532619951675e-06, "loss": 1.4029, "reason_loss": 0.498982310295105, "step": 2425, "utility_loss": 0.9039105772972107 }, { "cosine_similarity": 0.15394790543656492, "epoch": 2.260950605778192, "grad_norm": 1.4536629235550917, "learning_rate": 5.474628926475665e-06, "loss": 1.6928, "reason_loss": 0.483865886926651, "step": 2426, "utility_loss": 1.2089401483535767 }, { "cosine_similarity": 0.06395117795490741, "epoch": 2.2618825722273996, "grad_norm": 1.2030879282204288, "learning_rate": 5.467725232999656e-06, "loss": 1.6667, "reason_loss": 0.4801778793334961, "step": 2427, "utility_loss": 1.1865179538726807 }, { "cosine_similarity": 0.04578347441887206, "epoch": 2.262814538676608, "grad_norm": 1.1251002577968618, "learning_rate": 5.460821539523645e-06, "loss": 1.4946, "reason_loss": 0.4921422004699707, "step": 2428, "utility_loss": 1.002436876296997 }, { "cosine_similarity": 0.14258277617657217, "epoch": 2.2637465051258157, "grad_norm": 1.167544687721844, "learning_rate": 5.4539178460476364e-06, "loss": 1.4227, "reason_loss": 0.4780746102333069, "step": 2429, "utility_loss": 0.9445862770080566 }, { "cosine_similarity": -0.022567528729189715, "epoch": 2.2646784715750234, "grad_norm": 1.054990457097712, "learning_rate": 5.4470141525716255e-06, "loss": 1.1637, "reason_loss": 0.48359793424606323, "step": 2430, "utility_loss": 0.6800865530967712 }, { "cosine_similarity": 0.20519973976540914, "epoch": 2.2656104380242312, "grad_norm": 1.1718992686020757, "learning_rate": 5.440110459095617e-06, "loss": 1.3757, "reason_loss": 0.4417516589164734, "step": 2431, "utility_loss": 0.9339949488639832 }, { "cosine_similarity": 0.10570903811364697, "epoch": 2.266542404473439, "grad_norm": 1.041682312841757, "learning_rate": 5.433206765619608e-06, "loss": 1.5093, "reason_loss": 0.5179803371429443, "step": 2432, "utility_loss": 0.9913282990455627 }, { "cosine_similarity": 0.07078492369160899, "epoch": 2.267474370922647, "grad_norm": 1.1845618432541314, "learning_rate": 5.426303072143597e-06, "loss": 1.4238, "reason_loss": 0.48598819971084595, "step": 2433, "utility_loss": 0.9377738237380981 }, { "cosine_similarity": 0.06097202512045572, "epoch": 2.2684063373718546, "grad_norm": 1.1719792191647032, "learning_rate": 5.419399378667588e-06, "loss": 1.3749, "reason_loss": 0.4930737316608429, "step": 2434, "utility_loss": 0.8818130493164062 }, { "cosine_similarity": -0.06444565259906868, "epoch": 2.2693383038210624, "grad_norm": 1.2140902260574076, "learning_rate": 5.4124956851915774e-06, "loss": 1.5643, "reason_loss": 0.5150666832923889, "step": 2435, "utility_loss": 1.0491929054260254 }, { "cosine_similarity": 0.0401354202006552, "epoch": 2.27027027027027, "grad_norm": 1.207187625080345, "learning_rate": 5.405591991715568e-06, "loss": 1.2501, "reason_loss": 0.47060203552246094, "step": 2436, "utility_loss": 0.7795184850692749 }, { "cosine_similarity": 0.09516023833209322, "epoch": 2.271202236719478, "grad_norm": 1.2289594197495866, "learning_rate": 5.398688298239558e-06, "loss": 1.5922, "reason_loss": 0.48498424887657166, "step": 2437, "utility_loss": 1.1071984767913818 }, { "cosine_similarity": 0.09269195756127181, "epoch": 2.2721342031686858, "grad_norm": 1.0557835028605036, "learning_rate": 5.391784604763549e-06, "loss": 1.1959, "reason_loss": 0.4990094006061554, "step": 2438, "utility_loss": 0.696851372718811 }, { "cosine_similarity": 0.03961709262988621, "epoch": 2.273066169617894, "grad_norm": 1.0544429755673055, "learning_rate": 5.384880911287539e-06, "loss": 1.3004, "reason_loss": 0.45244014263153076, "step": 2439, "utility_loss": 0.8479874134063721 }, { "cosine_similarity": -0.05728651932814815, "epoch": 2.2739981360671018, "grad_norm": 1.1471077989919274, "learning_rate": 5.377977217811529e-06, "loss": 1.6107, "reason_loss": 0.508588433265686, "step": 2440, "utility_loss": 1.1021063327789307 }, { "cosine_similarity": 0.1574463221157047, "epoch": 2.2749301025163096, "grad_norm": 1.1676453661659125, "learning_rate": 5.371073524335519e-06, "loss": 1.3363, "reason_loss": 0.48160815238952637, "step": 2441, "utility_loss": 0.8546661138534546 }, { "cosine_similarity": 0.05556719852159632, "epoch": 2.2758620689655173, "grad_norm": 1.0157434059253325, "learning_rate": 5.36416983085951e-06, "loss": 1.54, "reason_loss": 0.49775877594947815, "step": 2442, "utility_loss": 1.0422732830047607 }, { "cosine_similarity": -0.05563477252594206, "epoch": 2.276794035414725, "grad_norm": 1.2302751286709748, "learning_rate": 5.357266137383501e-06, "loss": 1.3564, "reason_loss": 0.4820318818092346, "step": 2443, "utility_loss": 0.8743219375610352 }, { "cosine_similarity": 0.10551826648196504, "epoch": 2.277726001863933, "grad_norm": 1.0810619547002167, "learning_rate": 5.350362443907491e-06, "loss": 1.5829, "reason_loss": 0.5120160579681396, "step": 2444, "utility_loss": 1.070892333984375 }, { "cosine_similarity": 0.1629732820373553, "epoch": 2.2786579683131407, "grad_norm": 1.1193387320348136, "learning_rate": 5.343458750431481e-06, "loss": 1.7475, "reason_loss": 0.47775977849960327, "step": 2445, "utility_loss": 1.269774079322815 }, { "cosine_similarity": 0.024831353715397228, "epoch": 2.2795899347623485, "grad_norm": 1.1912721868204543, "learning_rate": 5.336555056955471e-06, "loss": 1.6353, "reason_loss": 0.48704296350479126, "step": 2446, "utility_loss": 1.1482163667678833 }, { "cosine_similarity": 0.03524697296119619, "epoch": 2.2805219012115563, "grad_norm": 1.2845207455192025, "learning_rate": 5.329651363479462e-06, "loss": 1.7702, "reason_loss": 0.49055126309394836, "step": 2447, "utility_loss": 1.2796039581298828 }, { "cosine_similarity": 0.23589940657910752, "epoch": 2.281453867660764, "grad_norm": 1.2367938590820737, "learning_rate": 5.322747670003452e-06, "loss": 1.7419, "reason_loss": 0.48782259225845337, "step": 2448, "utility_loss": 1.2540346384048462 }, { "cosine_similarity": 0.3013868793660229, "epoch": 2.282385834109972, "grad_norm": 1.3222931872388903, "learning_rate": 5.315843976527443e-06, "loss": 1.5155, "reason_loss": 0.49251091480255127, "step": 2449, "utility_loss": 1.0229744911193848 }, { "cosine_similarity": 0.10904113531211096, "epoch": 2.28331780055918, "grad_norm": 1.2334347983443656, "learning_rate": 5.3089402830514325e-06, "loss": 1.747, "reason_loss": 0.49717071652412415, "step": 2450, "utility_loss": 1.249835729598999 }, { "cosine_similarity": 0.054640976062601765, "epoch": 2.284249767008388, "grad_norm": 1.1234874818295042, "learning_rate": 5.302036589575423e-06, "loss": 1.5828, "reason_loss": 0.5055137276649475, "step": 2451, "utility_loss": 1.0772521495819092 }, { "cosine_similarity": 0.10839594667820307, "epoch": 2.2851817334575957, "grad_norm": 1.2170831344361197, "learning_rate": 5.295132896099414e-06, "loss": 1.4285, "reason_loss": 0.5041075348854065, "step": 2452, "utility_loss": 0.9244006872177124 }, { "cosine_similarity": 0.11728716379965481, "epoch": 2.2861136999068035, "grad_norm": 1.0183812633550062, "learning_rate": 5.288229202623404e-06, "loss": 1.3686, "reason_loss": 0.4730728268623352, "step": 2453, "utility_loss": 0.8954837322235107 }, { "cosine_similarity": 0.05016691232499226, "epoch": 2.2870456663560113, "grad_norm": 1.1629880718702725, "learning_rate": 5.281325509147395e-06, "loss": 1.6517, "reason_loss": 0.4800572395324707, "step": 2454, "utility_loss": 1.1716443300247192 }, { "cosine_similarity": -0.0353173937168463, "epoch": 2.287977632805219, "grad_norm": 1.271911490749903, "learning_rate": 5.2744218156713845e-06, "loss": 1.6872, "reason_loss": 0.49481824040412903, "step": 2455, "utility_loss": 1.192400574684143 }, { "cosine_similarity": -0.015653776135688188, "epoch": 2.288909599254427, "grad_norm": 1.1081268049519097, "learning_rate": 5.267518122195375e-06, "loss": 1.5802, "reason_loss": 0.5060640573501587, "step": 2456, "utility_loss": 1.074145793914795 }, { "cosine_similarity": -0.006635252517551288, "epoch": 2.2898415657036346, "grad_norm": 1.1604700587113157, "learning_rate": 5.260614428719365e-06, "loss": 1.6517, "reason_loss": 0.4799801707267761, "step": 2457, "utility_loss": 1.1716984510421753 }, { "cosine_similarity": 0.17010653916473217, "epoch": 2.2907735321528424, "grad_norm": 1.1140182840201875, "learning_rate": 5.253710735243356e-06, "loss": 1.3508, "reason_loss": 0.48242324590682983, "step": 2458, "utility_loss": 0.8683934807777405 }, { "cosine_similarity": 0.23070210660402704, "epoch": 2.29170549860205, "grad_norm": 1.2755407580423352, "learning_rate": 5.246807041767346e-06, "loss": 1.352, "reason_loss": 0.483288049697876, "step": 2459, "utility_loss": 0.8687260150909424 }, { "cosine_similarity": 0.043972878234732976, "epoch": 2.292637465051258, "grad_norm": 1.2095037313062529, "learning_rate": 5.2399033482913364e-06, "loss": 1.3969, "reason_loss": 0.4993041157722473, "step": 2460, "utility_loss": 0.8976444005966187 }, { "cosine_similarity": 0.07304932904555121, "epoch": 2.293569431500466, "grad_norm": 1.0430283184329958, "learning_rate": 5.232999654815326e-06, "loss": 1.473, "reason_loss": 0.4922930896282196, "step": 2461, "utility_loss": 0.9806984663009644 }, { "cosine_similarity": 0.11566480992326712, "epoch": 2.294501397949674, "grad_norm": 1.2661755713386031, "learning_rate": 5.226095961339317e-06, "loss": 1.3243, "reason_loss": 0.4812960922718048, "step": 2462, "utility_loss": 0.8429654240608215 }, { "cosine_similarity": 0.22365953124897006, "epoch": 2.295433364398882, "grad_norm": 1.4328205523104305, "learning_rate": 5.219192267863308e-06, "loss": 1.3427, "reason_loss": 0.47938624024391174, "step": 2463, "utility_loss": 0.8633536100387573 }, { "cosine_similarity": 0.25718998187580305, "epoch": 2.2963653308480896, "grad_norm": 1.0721499914398518, "learning_rate": 5.212288574387298e-06, "loss": 1.4325, "reason_loss": 0.48106715083122253, "step": 2464, "utility_loss": 0.951421320438385 }, { "cosine_similarity": 0.15219462842425638, "epoch": 2.2972972972972974, "grad_norm": 1.179847441555689, "learning_rate": 5.205384880911288e-06, "loss": 1.7939, "reason_loss": 0.4798743426799774, "step": 2465, "utility_loss": 1.3140751123428345 }, { "cosine_similarity": 0.031843268630526986, "epoch": 2.298229263746505, "grad_norm": 1.0829108563837855, "learning_rate": 5.198481187435278e-06, "loss": 1.4397, "reason_loss": 0.48872119188308716, "step": 2466, "utility_loss": 0.9510072469711304 }, { "cosine_similarity": -0.05536940833185534, "epoch": 2.299161230195713, "grad_norm": 1.2719632154527019, "learning_rate": 5.191577493959269e-06, "loss": 1.3612, "reason_loss": 0.4795752763748169, "step": 2467, "utility_loss": 0.8816277980804443 }, { "cosine_similarity": -0.008054030071109228, "epoch": 2.3000931966449207, "grad_norm": 1.2639208517396272, "learning_rate": 5.184673800483259e-06, "loss": 1.6171, "reason_loss": 0.47854769229888916, "step": 2468, "utility_loss": 1.1385760307312012 }, { "cosine_similarity": 0.12478959866927822, "epoch": 2.3010251630941285, "grad_norm": 1.0033880308985959, "learning_rate": 5.17777010700725e-06, "loss": 1.4253, "reason_loss": 0.44991400837898254, "step": 2469, "utility_loss": 0.9754037261009216 }, { "cosine_similarity": 0.18292537880648435, "epoch": 2.3019571295433363, "grad_norm": 1.2651740883214773, "learning_rate": 5.1708664135312395e-06, "loss": 1.5763, "reason_loss": 0.5229330062866211, "step": 2470, "utility_loss": 1.053332805633545 }, { "cosine_similarity": 0.07395247942405084, "epoch": 2.302889095992544, "grad_norm": 1.1333110042421157, "learning_rate": 5.16396272005523e-06, "loss": 1.6081, "reason_loss": 0.5118167400360107, "step": 2471, "utility_loss": 1.096279263496399 }, { "cosine_similarity": -0.06175733767432694, "epoch": 2.3038210624417523, "grad_norm": 1.1229355526802565, "learning_rate": 5.15705902657922e-06, "loss": 1.6013, "reason_loss": 0.48865002393722534, "step": 2472, "utility_loss": 1.1126139163970947 }, { "cosine_similarity": -0.025539178120969702, "epoch": 2.3047530288909597, "grad_norm": 1.0649120008521857, "learning_rate": 5.150155333103211e-06, "loss": 1.4717, "reason_loss": 0.5095689296722412, "step": 2473, "utility_loss": 0.9621528387069702 }, { "cosine_similarity": 0.16343734634662807, "epoch": 2.305684995340168, "grad_norm": 1.2491352004641372, "learning_rate": 5.143251639627202e-06, "loss": 1.4309, "reason_loss": 0.4640616476535797, "step": 2474, "utility_loss": 0.96681809425354 }, { "cosine_similarity": 0.16713416047448715, "epoch": 2.3066169617893757, "grad_norm": 1.0623936996202092, "learning_rate": 5.1363479461511915e-06, "loss": 1.5408, "reason_loss": 0.48647773265838623, "step": 2475, "utility_loss": 1.0543484687805176 }, { "cosine_similarity": 0.14744714413467486, "epoch": 2.3075489282385835, "grad_norm": 0.9901629729850756, "learning_rate": 5.129444252675182e-06, "loss": 1.5462, "reason_loss": 0.46063393354415894, "step": 2476, "utility_loss": 1.0855507850646973 }, { "cosine_similarity": 0.051767331577596415, "epoch": 2.3084808946877913, "grad_norm": 1.2384474136029047, "learning_rate": 5.122540559199172e-06, "loss": 1.4323, "reason_loss": 0.48795896768569946, "step": 2477, "utility_loss": 0.9443178176879883 }, { "cosine_similarity": 0.031070408187434777, "epoch": 2.309412861136999, "grad_norm": 1.2150194018398972, "learning_rate": 5.115636865723163e-06, "loss": 1.5117, "reason_loss": 0.49076542258262634, "step": 2478, "utility_loss": 1.020946741104126 }, { "cosine_similarity": 0.07539485861755296, "epoch": 2.310344827586207, "grad_norm": 1.2379376612242425, "learning_rate": 5.108733172247152e-06, "loss": 1.4833, "reason_loss": 0.45818907022476196, "step": 2479, "utility_loss": 1.0250978469848633 }, { "cosine_similarity": 0.12660956656436542, "epoch": 2.3112767940354146, "grad_norm": 1.1584804078753468, "learning_rate": 5.1018294787711435e-06, "loss": 1.1704, "reason_loss": 0.49051186442375183, "step": 2480, "utility_loss": 0.6798880696296692 }, { "cosine_similarity": 0.11114866783073415, "epoch": 2.3122087604846224, "grad_norm": 1.3177955035493787, "learning_rate": 5.0949257852951325e-06, "loss": 1.7599, "reason_loss": 0.48186418414115906, "step": 2481, "utility_loss": 1.2779935598373413 }, { "cosine_similarity": 0.13533535399597915, "epoch": 2.31314072693383, "grad_norm": 1.1357554934581586, "learning_rate": 5.088022091819124e-06, "loss": 1.3369, "reason_loss": 0.5250566005706787, "step": 2482, "utility_loss": 0.8118686676025391 }, { "cosine_similarity": -0.1475511026042998, "epoch": 2.3140726933830384, "grad_norm": 1.2094376744144275, "learning_rate": 5.081118398343115e-06, "loss": 1.4224, "reason_loss": 0.49500852823257446, "step": 2483, "utility_loss": 0.9273642301559448 }, { "cosine_similarity": 0.024630764447836143, "epoch": 2.315004659832246, "grad_norm": 1.2966813116411593, "learning_rate": 5.074214704867104e-06, "loss": 1.4405, "reason_loss": 0.4768793284893036, "step": 2484, "utility_loss": 0.9636404514312744 }, { "cosine_similarity": 0.12876024343441708, "epoch": 2.315936626281454, "grad_norm": 1.1386726090544035, "learning_rate": 5.0673110113910954e-06, "loss": 1.5687, "reason_loss": 0.4677773118019104, "step": 2485, "utility_loss": 1.1008988618850708 }, { "cosine_similarity": -0.027691676301498738, "epoch": 2.316868592730662, "grad_norm": 1.226134460128019, "learning_rate": 5.0604073179150845e-06, "loss": 1.4355, "reason_loss": 0.46775558590888977, "step": 2486, "utility_loss": 0.9677708148956299 }, { "cosine_similarity": 0.20454295936236117, "epoch": 2.3178005591798696, "grad_norm": 1.1275915737596816, "learning_rate": 5.053503624439075e-06, "loss": 1.5845, "reason_loss": 0.5003060698509216, "step": 2487, "utility_loss": 1.084201455116272 }, { "cosine_similarity": 0.13239309194128623, "epoch": 2.3187325256290774, "grad_norm": 1.5462613209863445, "learning_rate": 5.046599930963065e-06, "loss": 1.5873, "reason_loss": 0.476982980966568, "step": 2488, "utility_loss": 1.1102776527404785 }, { "cosine_similarity": 0.01891511026433129, "epoch": 2.319664492078285, "grad_norm": 1.3095039924856304, "learning_rate": 5.039696237487056e-06, "loss": 1.4679, "reason_loss": 0.495851069688797, "step": 2489, "utility_loss": 0.9720110297203064 }, { "cosine_similarity": -0.04037882968890408, "epoch": 2.320596458527493, "grad_norm": 1.2420745378648874, "learning_rate": 5.032792544011046e-06, "loss": 1.6426, "reason_loss": 0.460853636264801, "step": 2490, "utility_loss": 1.1817634105682373 }, { "cosine_similarity": 0.018145401250506364, "epoch": 2.3215284249767008, "grad_norm": 1.187776086792087, "learning_rate": 5.0258888505350364e-06, "loss": 1.9533, "reason_loss": 0.5082945227622986, "step": 2491, "utility_loss": 1.4450396299362183 }, { "cosine_similarity": 0.015327287585462646, "epoch": 2.3224603914259085, "grad_norm": 1.1414165056953531, "learning_rate": 5.018985157059026e-06, "loss": 1.666, "reason_loss": 0.5172191858291626, "step": 2492, "utility_loss": 1.148773431777954 }, { "cosine_similarity": 0.06120621194994683, "epoch": 2.3233923578751163, "grad_norm": 1.2220150291777607, "learning_rate": 5.012081463583017e-06, "loss": 1.7122, "reason_loss": 0.5056097507476807, "step": 2493, "utility_loss": 1.2065855264663696 }, { "cosine_similarity": 0.05164256660743694, "epoch": 2.3243243243243246, "grad_norm": 1.2267081849696209, "learning_rate": 5.005177770107008e-06, "loss": 1.6686, "reason_loss": 0.4877767562866211, "step": 2494, "utility_loss": 1.1808067560195923 }, { "cosine_similarity": -0.018921912755932784, "epoch": 2.325256290773532, "grad_norm": 1.0668349627141156, "learning_rate": 4.998274076630998e-06, "loss": 1.5664, "reason_loss": 0.5099471211433411, "step": 2495, "utility_loss": 1.0564541816711426 }, { "cosine_similarity": 0.0017354949826441251, "epoch": 2.32618825722274, "grad_norm": 1.1641409317648779, "learning_rate": 4.991370383154988e-06, "loss": 1.4003, "reason_loss": 0.4900231957435608, "step": 2496, "utility_loss": 0.9103097915649414 }, { "cosine_similarity": -0.12841096575022923, "epoch": 2.327120223671948, "grad_norm": 1.183822940285698, "learning_rate": 4.984466689678978e-06, "loss": 1.3331, "reason_loss": 0.47326967120170593, "step": 2497, "utility_loss": 0.8598340749740601 }, { "cosine_similarity": -0.1317592077312834, "epoch": 2.3280521901211557, "grad_norm": 1.3294283796920237, "learning_rate": 4.977562996202969e-06, "loss": 1.6684, "reason_loss": 0.512451171875, "step": 2498, "utility_loss": 1.1559057235717773 }, { "cosine_similarity": 0.12051733358680077, "epoch": 2.3289841565703635, "grad_norm": 1.3864918359532314, "learning_rate": 4.97065930272696e-06, "loss": 1.4229, "reason_loss": 0.4831312596797943, "step": 2499, "utility_loss": 0.9397270679473877 }, { "cosine_similarity": 0.06259289256722589, "epoch": 2.3299161230195713, "grad_norm": 1.305857163421668, "learning_rate": 4.96375560925095e-06, "loss": 1.432, "reason_loss": 0.47232121229171753, "step": 2500, "utility_loss": 0.9597094058990479 }, { "cosine_similarity": 0.05888467529117276, "epoch": 2.330848089468779, "grad_norm": 1.4039798536578012, "learning_rate": 4.95685191577494e-06, "loss": 1.4562, "reason_loss": 0.5129084587097168, "step": 2501, "utility_loss": 0.9432783126831055 }, { "cosine_similarity": 0.09948352245184075, "epoch": 2.331780055917987, "grad_norm": 1.389910802953763, "learning_rate": 4.94994822229893e-06, "loss": 1.5035, "reason_loss": 0.46196627616882324, "step": 2502, "utility_loss": 1.0415706634521484 }, { "cosine_similarity": 0.10273734846608634, "epoch": 2.3327120223671947, "grad_norm": 1.2825694388057047, "learning_rate": 4.943044528822921e-06, "loss": 1.5412, "reason_loss": 0.487018346786499, "step": 2503, "utility_loss": 1.0542271137237549 }, { "cosine_similarity": -0.04416054888633978, "epoch": 2.3336439888164024, "grad_norm": 1.4207483940534051, "learning_rate": 4.936140835346911e-06, "loss": 1.3195, "reason_loss": 0.4653107225894928, "step": 2504, "utility_loss": 0.8541464805603027 }, { "cosine_similarity": 0.06758022718886993, "epoch": 2.3345759552656107, "grad_norm": 1.111856232728743, "learning_rate": 4.929237141870902e-06, "loss": 1.4435, "reason_loss": 0.5237400531768799, "step": 2505, "utility_loss": 0.9197700023651123 }, { "cosine_similarity": 0.07472110823565792, "epoch": 2.335507921714818, "grad_norm": 1.2876908345252633, "learning_rate": 4.9223334483948915e-06, "loss": 1.3752, "reason_loss": 0.4862704277038574, "step": 2506, "utility_loss": 0.8889626264572144 }, { "cosine_similarity": 0.012537807826191133, "epoch": 2.3364398881640263, "grad_norm": 1.2282661784925208, "learning_rate": 4.915429754918881e-06, "loss": 1.4844, "reason_loss": 0.49340710043907166, "step": 2507, "utility_loss": 0.9909811019897461 }, { "cosine_similarity": 0.16166688980445204, "epoch": 2.337371854613234, "grad_norm": 1.3615552751562146, "learning_rate": 4.908526061442873e-06, "loss": 1.5402, "reason_loss": 0.5078963041305542, "step": 2508, "utility_loss": 1.0322837829589844 }, { "cosine_similarity": -0.010433220434158367, "epoch": 2.338303821062442, "grad_norm": 1.1498465806220706, "learning_rate": 4.901622367966863e-06, "loss": 1.7895, "reason_loss": 0.5290201306343079, "step": 2509, "utility_loss": 1.2604517936706543 }, { "cosine_similarity": -0.05648259355817125, "epoch": 2.3392357875116496, "grad_norm": 1.176842871391183, "learning_rate": 4.894718674490853e-06, "loss": 1.2883, "reason_loss": 0.5117052793502808, "step": 2510, "utility_loss": 0.7765785455703735 }, { "cosine_similarity": 0.07628000584787843, "epoch": 2.3401677539608574, "grad_norm": 1.3287016781783243, "learning_rate": 4.8878149810148435e-06, "loss": 1.5775, "reason_loss": 0.49149221181869507, "step": 2511, "utility_loss": 1.0859829187393188 }, { "cosine_similarity": -0.08652590096350542, "epoch": 2.341099720410065, "grad_norm": 1.1265074412863028, "learning_rate": 4.880911287538833e-06, "loss": 1.382, "reason_loss": 0.4809950590133667, "step": 2512, "utility_loss": 0.9009553790092468 }, { "cosine_similarity": 0.05141576427690214, "epoch": 2.342031686859273, "grad_norm": 1.1652741743124786, "learning_rate": 4.874007594062824e-06, "loss": 1.2992, "reason_loss": 0.49390357732772827, "step": 2513, "utility_loss": 0.8052810430526733 }, { "cosine_similarity": -0.040466821742688716, "epoch": 2.3429636533084808, "grad_norm": 1.0389971737214707, "learning_rate": 4.867103900586814e-06, "loss": 1.4539, "reason_loss": 0.4869260787963867, "step": 2514, "utility_loss": 0.966951847076416 }, { "cosine_similarity": 0.09844559697843634, "epoch": 2.3438956197576886, "grad_norm": 1.2694375056221892, "learning_rate": 4.860200207110805e-06, "loss": 1.5167, "reason_loss": 0.523196816444397, "step": 2515, "utility_loss": 0.9935473203659058 }, { "cosine_similarity": 0.015507536384682939, "epoch": 2.344827586206897, "grad_norm": 1.4080934831638685, "learning_rate": 4.853296513634795e-06, "loss": 1.5004, "reason_loss": 0.48237279057502747, "step": 2516, "utility_loss": 1.0180728435516357 }, { "cosine_similarity": 0.14629919821354376, "epoch": 2.345759552656104, "grad_norm": 1.082534797162814, "learning_rate": 4.846392820158785e-06, "loss": 1.6997, "reason_loss": 0.49411270022392273, "step": 2517, "utility_loss": 1.2056143283843994 }, { "cosine_similarity": -0.004393706126791714, "epoch": 2.3466915191053124, "grad_norm": 1.111799304051371, "learning_rate": 4.839489126682776e-06, "loss": 1.3622, "reason_loss": 0.4842909574508667, "step": 2518, "utility_loss": 0.8779542446136475 }, { "cosine_similarity": -0.006903563578635937, "epoch": 2.34762348555452, "grad_norm": 1.0411105326955368, "learning_rate": 4.832585433206766e-06, "loss": 1.3824, "reason_loss": 0.5016399621963501, "step": 2519, "utility_loss": 0.8807855248451233 }, { "cosine_similarity": -0.025943417841145167, "epoch": 2.348555452003728, "grad_norm": 1.2056713019144907, "learning_rate": 4.825681739730757e-06, "loss": 1.3534, "reason_loss": 0.49292558431625366, "step": 2520, "utility_loss": 0.8604986667633057 }, { "cosine_similarity": 0.1838421232179861, "epoch": 2.3494874184529357, "grad_norm": 1.2741992094966477, "learning_rate": 4.8187780462547465e-06, "loss": 1.4651, "reason_loss": 0.47565957903862, "step": 2521, "utility_loss": 0.9893984198570251 }, { "cosine_similarity": 0.14054556253820155, "epoch": 2.3504193849021435, "grad_norm": 1.1243653933768774, "learning_rate": 4.811874352778737e-06, "loss": 1.0781, "reason_loss": 0.4843006134033203, "step": 2522, "utility_loss": 0.593786358833313 }, { "cosine_similarity": 0.15462026803883053, "epoch": 2.3513513513513513, "grad_norm": 1.046363381391075, "learning_rate": 4.804970659302727e-06, "loss": 1.8207, "reason_loss": 0.47712188959121704, "step": 2523, "utility_loss": 1.3435604572296143 }, { "cosine_similarity": 0.11458699236364812, "epoch": 2.352283317800559, "grad_norm": 1.2475745211522182, "learning_rate": 4.798066965826718e-06, "loss": 1.7692, "reason_loss": 0.4973975121974945, "step": 2524, "utility_loss": 1.2717666625976562 }, { "cosine_similarity": 0.18985453840265507, "epoch": 2.353215284249767, "grad_norm": 1.1060248723357327, "learning_rate": 4.791163272350708e-06, "loss": 1.6579, "reason_loss": 0.49781695008277893, "step": 2525, "utility_loss": 1.1600656509399414 }, { "cosine_similarity": 0.061791862499013396, "epoch": 2.3541472506989747, "grad_norm": 1.1171499918662375, "learning_rate": 4.7842595788746985e-06, "loss": 1.3963, "reason_loss": 0.5088406801223755, "step": 2526, "utility_loss": 0.8874825239181519 }, { "cosine_similarity": -0.04052305422863238, "epoch": 2.355079217148183, "grad_norm": 1.1390428198620572, "learning_rate": 4.777355885398688e-06, "loss": 1.5525, "reason_loss": 0.502377986907959, "step": 2527, "utility_loss": 1.0501291751861572 }, { "cosine_similarity": 0.06758217944872093, "epoch": 2.3560111835973903, "grad_norm": 1.2652456558845946, "learning_rate": 4.770452191922679e-06, "loss": 1.5122, "reason_loss": 0.4762539863586426, "step": 2528, "utility_loss": 1.035987138748169 }, { "cosine_similarity": -0.22486654869873568, "epoch": 2.3569431500465985, "grad_norm": 1.4537267538936112, "learning_rate": 4.76354849844667e-06, "loss": 1.6551, "reason_loss": 0.48996442556381226, "step": 2529, "utility_loss": 1.1651215553283691 }, { "cosine_similarity": 0.04531207281244104, "epoch": 2.3578751164958063, "grad_norm": 1.4030645082502122, "learning_rate": 4.75664480497066e-06, "loss": 1.6518, "reason_loss": 0.49647000432014465, "step": 2530, "utility_loss": 1.1553772687911987 }, { "cosine_similarity": 0.08895224220707948, "epoch": 2.358807082945014, "grad_norm": 1.004640991967533, "learning_rate": 4.7497411114946505e-06, "loss": 1.3378, "reason_loss": 0.5038957595825195, "step": 2531, "utility_loss": 0.8339385986328125 }, { "cosine_similarity": 0.027049810022316538, "epoch": 2.359739049394222, "grad_norm": 1.1762723713676415, "learning_rate": 4.74283741801864e-06, "loss": 1.4726, "reason_loss": 0.4870237708091736, "step": 2532, "utility_loss": 0.9855920076370239 }, { "cosine_similarity": 0.06484613649012086, "epoch": 2.3606710158434296, "grad_norm": 1.156528447685511, "learning_rate": 4.73593372454263e-06, "loss": 1.405, "reason_loss": 0.4691840410232544, "step": 2533, "utility_loss": 0.9357859492301941 }, { "cosine_similarity": 0.20113724606710867, "epoch": 2.3616029822926374, "grad_norm": 1.2571403861570634, "learning_rate": 4.729030031066621e-06, "loss": 1.5925, "reason_loss": 0.493091881275177, "step": 2534, "utility_loss": 1.099437952041626 }, { "cosine_similarity": -0.00658611484130181, "epoch": 2.362534948741845, "grad_norm": 1.4003406352642274, "learning_rate": 4.722126337590611e-06, "loss": 1.2429, "reason_loss": 0.4824884235858917, "step": 2535, "utility_loss": 0.7604389190673828 }, { "cosine_similarity": 0.17597722403542776, "epoch": 2.363466915191053, "grad_norm": 1.1955291580819456, "learning_rate": 4.715222644114602e-06, "loss": 1.533, "reason_loss": 0.46683141589164734, "step": 2536, "utility_loss": 1.0661449432373047 }, { "cosine_similarity": -0.08203910547907711, "epoch": 2.364398881640261, "grad_norm": 1.2486288851822707, "learning_rate": 4.7083189506385915e-06, "loss": 1.3701, "reason_loss": 0.49637550115585327, "step": 2537, "utility_loss": 0.8737234473228455 }, { "cosine_similarity": 0.20559330284494393, "epoch": 2.3653308480894686, "grad_norm": 1.3704241692986179, "learning_rate": 4.701415257162582e-06, "loss": 1.4146, "reason_loss": 0.49750739336013794, "step": 2538, "utility_loss": 0.9170923233032227 }, { "cosine_similarity": -0.004164760240332464, "epoch": 2.3662628145386764, "grad_norm": 1.1492363102402376, "learning_rate": 4.694511563686573e-06, "loss": 1.5361, "reason_loss": 0.4524915814399719, "step": 2539, "utility_loss": 1.0836200714111328 }, { "cosine_similarity": 0.05491894975182509, "epoch": 2.3671947809878846, "grad_norm": 1.0666877654551228, "learning_rate": 4.687607870210563e-06, "loss": 1.5409, "reason_loss": 0.4656161963939667, "step": 2540, "utility_loss": 1.0752705335617065 }, { "cosine_similarity": 0.021765706675204097, "epoch": 2.3681267474370924, "grad_norm": 1.1324029687445232, "learning_rate": 4.6807041767345536e-06, "loss": 2.0716, "reason_loss": 0.4693768322467804, "step": 2541, "utility_loss": 1.6022597551345825 }, { "cosine_similarity": -0.07752760782380366, "epoch": 2.3690587138863, "grad_norm": 1.1803333112164214, "learning_rate": 4.6738004832585435e-06, "loss": 1.247, "reason_loss": 0.49491095542907715, "step": 2542, "utility_loss": 0.7520627379417419 }, { "cosine_similarity": 0.06979775237872408, "epoch": 2.369990680335508, "grad_norm": 1.4386645186117353, "learning_rate": 4.666896789782534e-06, "loss": 1.6576, "reason_loss": 0.5436016321182251, "step": 2543, "utility_loss": 1.114021897315979 }, { "cosine_similarity": -0.03052892211877846, "epoch": 2.3709226467847158, "grad_norm": 1.12295107288247, "learning_rate": 4.659993096306524e-06, "loss": 1.5129, "reason_loss": 0.5020712018013, "step": 2544, "utility_loss": 1.01084303855896 }, { "cosine_similarity": -0.09687663483324085, "epoch": 2.3718546132339235, "grad_norm": 1.2319501780486255, "learning_rate": 4.653089402830515e-06, "loss": 1.269, "reason_loss": 0.48062431812286377, "step": 2545, "utility_loss": 0.7883880138397217 }, { "cosine_similarity": -0.007012119748040617, "epoch": 2.3727865796831313, "grad_norm": 1.2829790348813335, "learning_rate": 4.646185709354505e-06, "loss": 1.4773, "reason_loss": 0.5003791451454163, "step": 2546, "utility_loss": 0.976887583732605 }, { "cosine_similarity": 0.33594582574383436, "epoch": 2.373718546132339, "grad_norm": 1.136448921245562, "learning_rate": 4.6392820158784954e-06, "loss": 1.2987, "reason_loss": 0.4793267250061035, "step": 2547, "utility_loss": 0.8193637132644653 }, { "cosine_similarity": -0.012034275524694141, "epoch": 2.374650512581547, "grad_norm": 1.0383787453591906, "learning_rate": 4.632378322402486e-06, "loss": 1.256, "reason_loss": 0.45716673135757446, "step": 2548, "utility_loss": 0.7988090515136719 }, { "cosine_similarity": 0.08618087854589114, "epoch": 2.3755824790307547, "grad_norm": 1.2864056673760083, "learning_rate": 4.625474628926476e-06, "loss": 1.5749, "reason_loss": 0.469485342502594, "step": 2549, "utility_loss": 1.1053788661956787 }, { "cosine_similarity": 0.22060175847362493, "epoch": 2.3765144454799625, "grad_norm": 1.1315200100690261, "learning_rate": 4.618570935450467e-06, "loss": 1.2127, "reason_loss": 0.47254809737205505, "step": 2550, "utility_loss": 0.7401409149169922 }, { "cosine_similarity": 0.12191746489702832, "epoch": 2.3774464119291707, "grad_norm": 1.2139293180678488, "learning_rate": 4.611667241974457e-06, "loss": 1.4681, "reason_loss": 0.48989951610565186, "step": 2551, "utility_loss": 0.9781947135925293 }, { "cosine_similarity": 0.06663861852465748, "epoch": 2.3783783783783785, "grad_norm": 1.308042772873832, "learning_rate": 4.604763548498447e-06, "loss": 1.3342, "reason_loss": 0.46059900522232056, "step": 2552, "utility_loss": 0.8735579252243042 }, { "cosine_similarity": 0.07314734334511114, "epoch": 2.3793103448275863, "grad_norm": 1.0705182643861777, "learning_rate": 4.597859855022437e-06, "loss": 1.5038, "reason_loss": 0.4862782061100006, "step": 2553, "utility_loss": 1.0174736976623535 }, { "cosine_similarity": -0.11253732925307164, "epoch": 2.380242311276794, "grad_norm": 1.2354727912710912, "learning_rate": 4.590956161546428e-06, "loss": 1.5863, "reason_loss": 0.4750904440879822, "step": 2554, "utility_loss": 1.1112351417541504 }, { "cosine_similarity": 0.09200815594435022, "epoch": 2.381174277726002, "grad_norm": 1.304805843379635, "learning_rate": 4.584052468070418e-06, "loss": 1.4841, "reason_loss": 0.47027578949928284, "step": 2555, "utility_loss": 1.013784646987915 }, { "cosine_similarity": 0.034289811612560825, "epoch": 2.3821062441752097, "grad_norm": 1.1296251050736115, "learning_rate": 4.577148774594408e-06, "loss": 1.4443, "reason_loss": 0.48597657680511475, "step": 2556, "utility_loss": 0.9583240151405334 }, { "cosine_similarity": 0.0516447810561257, "epoch": 2.3830382106244175, "grad_norm": 1.260130510345343, "learning_rate": 4.5702450811183985e-06, "loss": 1.7014, "reason_loss": 0.47923409938812256, "step": 2557, "utility_loss": 1.2221882343292236 }, { "cosine_similarity": 0.18171351304508143, "epoch": 2.3839701770736252, "grad_norm": 1.3465281492313472, "learning_rate": 4.563341387642389e-06, "loss": 1.4125, "reason_loss": 0.4792296886444092, "step": 2558, "utility_loss": 0.933293342590332 }, { "cosine_similarity": 0.09247981161555, "epoch": 2.384902143522833, "grad_norm": 1.1904269597574162, "learning_rate": 4.55643769416638e-06, "loss": 1.3573, "reason_loss": 0.5090090036392212, "step": 2559, "utility_loss": 0.8483191132545471 }, { "cosine_similarity": 0.14943339782009346, "epoch": 2.385834109972041, "grad_norm": 1.2870751463887136, "learning_rate": 4.54953400069037e-06, "loss": 1.3997, "reason_loss": 0.4677984416484833, "step": 2560, "utility_loss": 0.9318636655807495 }, { "cosine_similarity": 0.11397457236270576, "epoch": 2.3867660764212486, "grad_norm": 1.3213761936021744, "learning_rate": 4.54263030721436e-06, "loss": 1.5978, "reason_loss": 0.5108364820480347, "step": 2561, "utility_loss": 1.0869419574737549 }, { "cosine_similarity": -0.0024696401140482677, "epoch": 2.387698042870457, "grad_norm": 1.0590206092841299, "learning_rate": 4.5357266137383505e-06, "loss": 1.2259, "reason_loss": 0.49373140931129456, "step": 2562, "utility_loss": 0.7321350574493408 }, { "cosine_similarity": 0.2056646776139398, "epoch": 2.3886300093196646, "grad_norm": 1.1379031965454751, "learning_rate": 4.52882292026234e-06, "loss": 1.5041, "reason_loss": 0.5072962641716003, "step": 2563, "utility_loss": 0.9968093633651733 }, { "cosine_similarity": 0.020274682883331938, "epoch": 2.3895619757688724, "grad_norm": 1.2988120546361668, "learning_rate": 4.521919226786331e-06, "loss": 1.5737, "reason_loss": 0.5047875642776489, "step": 2564, "utility_loss": 1.068932056427002 }, { "cosine_similarity": 0.11999903723705678, "epoch": 2.39049394221808, "grad_norm": 1.1591014811847686, "learning_rate": 4.515015533310321e-06, "loss": 1.4671, "reason_loss": 0.5003659725189209, "step": 2565, "utility_loss": 0.9666877388954163 }, { "cosine_similarity": 0.12357684532159967, "epoch": 2.391425908667288, "grad_norm": 1.1504225874170824, "learning_rate": 4.508111839834312e-06, "loss": 1.7169, "reason_loss": 0.4728217124938965, "step": 2566, "utility_loss": 1.2440521717071533 }, { "cosine_similarity": 0.07391477664298389, "epoch": 2.392357875116496, "grad_norm": 1.0131075372212779, "learning_rate": 4.501208146358302e-06, "loss": 1.2124, "reason_loss": 0.4962399899959564, "step": 2567, "utility_loss": 0.7161415815353394 }, { "cosine_similarity": 0.10310506232156806, "epoch": 2.3932898415657036, "grad_norm": 1.2584719068065464, "learning_rate": 4.494304452882292e-06, "loss": 1.5094, "reason_loss": 0.47753000259399414, "step": 2568, "utility_loss": 1.0319030284881592 }, { "cosine_similarity": 0.14307520916136324, "epoch": 2.3942218080149114, "grad_norm": 1.3065847548566463, "learning_rate": 4.487400759406283e-06, "loss": 1.6465, "reason_loss": 0.5158864855766296, "step": 2569, "utility_loss": 1.1305828094482422 }, { "cosine_similarity": -0.0851143821254772, "epoch": 2.395153774464119, "grad_norm": 1.1591599855599035, "learning_rate": 4.480497065930273e-06, "loss": 1.3625, "reason_loss": 0.49989163875579834, "step": 2570, "utility_loss": 0.8626498579978943 }, { "cosine_similarity": -0.007527786518284022, "epoch": 2.396085740913327, "grad_norm": 1.1406208580974726, "learning_rate": 4.473593372454264e-06, "loss": 1.554, "reason_loss": 0.47065621614456177, "step": 2571, "utility_loss": 1.0833783149719238 }, { "cosine_similarity": 0.06461134394868241, "epoch": 2.3970177073625347, "grad_norm": 1.1505180644493769, "learning_rate": 4.466689678978254e-06, "loss": 1.6983, "reason_loss": 0.5074118375778198, "step": 2572, "utility_loss": 1.1909130811691284 }, { "cosine_similarity": -0.007429470737696909, "epoch": 2.397949673811743, "grad_norm": 1.386162699709354, "learning_rate": 4.459785985502244e-06, "loss": 1.2194, "reason_loss": 0.4905247092247009, "step": 2573, "utility_loss": 0.7288592457771301 }, { "cosine_similarity": 0.17184848843919273, "epoch": 2.3988816402609507, "grad_norm": 1.3260331923603377, "learning_rate": 4.452882292026234e-06, "loss": 1.3862, "reason_loss": 0.48899635672569275, "step": 2574, "utility_loss": 0.8971694707870483 }, { "cosine_similarity": 0.1325489161381053, "epoch": 2.3998136067101585, "grad_norm": 1.2756895463333109, "learning_rate": 4.445978598550225e-06, "loss": 1.5307, "reason_loss": 0.4651077389717102, "step": 2575, "utility_loss": 1.0656380653381348 }, { "cosine_similarity": -0.0918015344313561, "epoch": 2.4007455731593663, "grad_norm": 1.1064742783826924, "learning_rate": 4.439074905074215e-06, "loss": 1.4425, "reason_loss": 0.4948730766773224, "step": 2576, "utility_loss": 0.9476374387741089 }, { "cosine_similarity": -0.0004188354146749722, "epoch": 2.401677539608574, "grad_norm": 1.1818741019777614, "learning_rate": 4.4321712115982055e-06, "loss": 1.8166, "reason_loss": 0.48809176683425903, "step": 2577, "utility_loss": 1.3285250663757324 }, { "cosine_similarity": -0.20571826232215676, "epoch": 2.402609506057782, "grad_norm": 1.2260192444619558, "learning_rate": 4.4252675181221954e-06, "loss": 1.4325, "reason_loss": 0.48993802070617676, "step": 2578, "utility_loss": 0.9425268173217773 }, { "cosine_similarity": -0.040083416108552775, "epoch": 2.4035414725069897, "grad_norm": 1.260653265852734, "learning_rate": 4.418363824646186e-06, "loss": 1.4759, "reason_loss": 0.4868384003639221, "step": 2579, "utility_loss": 0.9890535473823547 }, { "cosine_similarity": 0.0739084366422214, "epoch": 2.4044734389561975, "grad_norm": 1.433624787809809, "learning_rate": 4.411460131170177e-06, "loss": 1.6395, "reason_loss": 0.45491182804107666, "step": 2580, "utility_loss": 1.1845767498016357 }, { "cosine_similarity": -0.02766892798199054, "epoch": 2.4054054054054053, "grad_norm": 1.0207613854010433, "learning_rate": 4.404556437694167e-06, "loss": 1.1327, "reason_loss": 0.5090140700340271, "step": 2581, "utility_loss": 0.6237128376960754 }, { "cosine_similarity": -0.05893900442625285, "epoch": 2.406337371854613, "grad_norm": 1.1162633142265106, "learning_rate": 4.3976527442181575e-06, "loss": 1.3087, "reason_loss": 0.5116373300552368, "step": 2582, "utility_loss": 0.7970559597015381 }, { "cosine_similarity": 0.1361955497060044, "epoch": 2.407269338303821, "grad_norm": 1.2705330563769326, "learning_rate": 4.390749050742147e-06, "loss": 1.4745, "reason_loss": 0.5107015371322632, "step": 2583, "utility_loss": 0.9638224840164185 }, { "cosine_similarity": 0.07246859261073413, "epoch": 2.408201304753029, "grad_norm": 1.386660423619562, "learning_rate": 4.383845357266137e-06, "loss": 1.4225, "reason_loss": 0.5317810773849487, "step": 2584, "utility_loss": 0.8907631635665894 }, { "cosine_similarity": -0.04229537217482817, "epoch": 2.409133271202237, "grad_norm": 1.2810675500752835, "learning_rate": 4.376941663790128e-06, "loss": 1.5187, "reason_loss": 0.48798394203186035, "step": 2585, "utility_loss": 1.030668020248413 }, { "cosine_similarity": 0.06414661333005844, "epoch": 2.4100652376514446, "grad_norm": 1.2142006661599913, "learning_rate": 4.370037970314118e-06, "loss": 1.4226, "reason_loss": 0.4692170023918152, "step": 2586, "utility_loss": 0.9534167647361755 }, { "cosine_similarity": -0.03582636044496641, "epoch": 2.4109972041006524, "grad_norm": 1.1172346465674414, "learning_rate": 4.363134276838109e-06, "loss": 1.5723, "reason_loss": 0.4998292922973633, "step": 2587, "utility_loss": 1.072515606880188 }, { "cosine_similarity": 0.03437436527364249, "epoch": 2.4119291705498602, "grad_norm": 1.0847571011697095, "learning_rate": 4.3562305833620985e-06, "loss": 1.3279, "reason_loss": 0.4668809175491333, "step": 2588, "utility_loss": 0.8609880208969116 }, { "cosine_similarity": 0.017630003983183474, "epoch": 2.412861136999068, "grad_norm": 1.0950376733720635, "learning_rate": 4.349326889886089e-06, "loss": 1.4264, "reason_loss": 0.48980826139450073, "step": 2589, "utility_loss": 0.9365599751472473 }, { "cosine_similarity": -0.10883214087111502, "epoch": 2.413793103448276, "grad_norm": 1.2483878617518114, "learning_rate": 4.34242319641008e-06, "loss": 1.2654, "reason_loss": 0.4735987186431885, "step": 2590, "utility_loss": 0.7918386459350586 }, { "cosine_similarity": 0.07131987969554172, "epoch": 2.4147250698974836, "grad_norm": 1.3885524970662444, "learning_rate": 4.33551950293407e-06, "loss": 1.6069, "reason_loss": 0.4675898551940918, "step": 2591, "utility_loss": 1.1393227577209473 }, { "cosine_similarity": 0.14283935784732096, "epoch": 2.4156570363466914, "grad_norm": 1.2319449999475562, "learning_rate": 4.328615809458061e-06, "loss": 1.3373, "reason_loss": 0.4811195731163025, "step": 2592, "utility_loss": 0.856143593788147 }, { "cosine_similarity": 0.0514447270487306, "epoch": 2.416589002795899, "grad_norm": 1.0791705988561562, "learning_rate": 4.3217121159820505e-06, "loss": 1.6557, "reason_loss": 0.5095021724700928, "step": 2593, "utility_loss": 1.1461796760559082 }, { "cosine_similarity": -0.02866349445338026, "epoch": 2.417520969245107, "grad_norm": 1.0887719741929478, "learning_rate": 4.314808422506041e-06, "loss": 1.315, "reason_loss": 0.4952574372291565, "step": 2594, "utility_loss": 0.8197873830795288 }, { "cosine_similarity": 0.16734632897245352, "epoch": 2.418452935694315, "grad_norm": 1.2816374431567201, "learning_rate": 4.307904729030031e-06, "loss": 1.8013, "reason_loss": 0.49717381596565247, "step": 2595, "utility_loss": 1.304133653640747 }, { "cosine_similarity": 0.007760933167537894, "epoch": 2.419384902143523, "grad_norm": 1.1242286860052424, "learning_rate": 4.301001035554022e-06, "loss": 1.2658, "reason_loss": 0.5171858668327332, "step": 2596, "utility_loss": 0.748638927936554 }, { "cosine_similarity": 0.04607169533394323, "epoch": 2.4203168685927308, "grad_norm": 1.0226454161588936, "learning_rate": 4.294097342078012e-06, "loss": 1.5378, "reason_loss": 0.5112975239753723, "step": 2597, "utility_loss": 1.0265295505523682 }, { "cosine_similarity": -0.06286843740593467, "epoch": 2.4212488350419386, "grad_norm": 1.2730414444212486, "learning_rate": 4.2871936486020025e-06, "loss": 1.6272, "reason_loss": 0.4958696961402893, "step": 2598, "utility_loss": 1.1313360929489136 }, { "cosine_similarity": 0.2468331599529817, "epoch": 2.4221808014911463, "grad_norm": 1.2797716338804142, "learning_rate": 4.280289955125993e-06, "loss": 1.404, "reason_loss": 0.4600863754749298, "step": 2599, "utility_loss": 0.9439094066619873 }, { "cosine_similarity": 0.18489083119799496, "epoch": 2.423112767940354, "grad_norm": 1.224221048769388, "learning_rate": 4.273386261649983e-06, "loss": 1.5022, "reason_loss": 0.47340673208236694, "step": 2600, "utility_loss": 1.0288081169128418 }, { "cosine_similarity": 0.08533070164817312, "epoch": 2.424044734389562, "grad_norm": 1.8757065554965497, "learning_rate": 4.266482568173974e-06, "loss": 1.7391, "reason_loss": 0.4897281527519226, "step": 2601, "utility_loss": 1.2493226528167725 }, { "cosine_similarity": 0.10484934109821524, "epoch": 2.4249767008387697, "grad_norm": 1.184820216500371, "learning_rate": 4.259578874697964e-06, "loss": 1.4735, "reason_loss": 0.49560585618019104, "step": 2602, "utility_loss": 0.9778532981872559 }, { "cosine_similarity": -0.13858878569859556, "epoch": 2.4259086672879775, "grad_norm": 1.062439948017866, "learning_rate": 4.2526751812219544e-06, "loss": 1.2815, "reason_loss": 0.510216236114502, "step": 2603, "utility_loss": 0.7712792158126831 }, { "cosine_similarity": -0.11092415653166506, "epoch": 2.4268406337371853, "grad_norm": 1.1507322614475128, "learning_rate": 4.245771487745944e-06, "loss": 1.369, "reason_loss": 0.4801018536090851, "step": 2604, "utility_loss": 0.8889183402061462 }, { "cosine_similarity": 0.0457240359266403, "epoch": 2.427772600186393, "grad_norm": 1.1196426165040938, "learning_rate": 4.238867794269935e-06, "loss": 1.5397, "reason_loss": 0.5104017853736877, "step": 2605, "utility_loss": 1.0292491912841797 }, { "cosine_similarity": 0.12794369745194387, "epoch": 2.4287045666356013, "grad_norm": 1.2320779960924158, "learning_rate": 4.231964100793925e-06, "loss": 1.6488, "reason_loss": 0.4984486699104309, "step": 2606, "utility_loss": 1.1503574848175049 }, { "cosine_similarity": 0.10490384733929038, "epoch": 2.429636533084809, "grad_norm": 1.1908720621831765, "learning_rate": 4.225060407317915e-06, "loss": 1.5049, "reason_loss": 0.47840583324432373, "step": 2607, "utility_loss": 1.0264543294906616 }, { "cosine_similarity": -0.041057328930948225, "epoch": 2.430568499534017, "grad_norm": 1.461061056621729, "learning_rate": 4.2181567138419055e-06, "loss": 1.6598, "reason_loss": 0.4748988747596741, "step": 2608, "utility_loss": 1.184903860092163 }, { "cosine_similarity": 0.08299424914428181, "epoch": 2.4315004659832247, "grad_norm": 1.0196608994694534, "learning_rate": 4.211253020365896e-06, "loss": 1.3122, "reason_loss": 0.48240381479263306, "step": 2609, "utility_loss": 0.8298014998435974 }, { "cosine_similarity": -0.06460375647500752, "epoch": 2.4324324324324325, "grad_norm": 1.3201215420427286, "learning_rate": 4.204349326889887e-06, "loss": 1.623, "reason_loss": 0.4789670705795288, "step": 2610, "utility_loss": 1.143983006477356 }, { "cosine_similarity": 0.1254609439421577, "epoch": 2.4333643988816402, "grad_norm": 1.1716188982481888, "learning_rate": 4.197445633413877e-06, "loss": 1.6532, "reason_loss": 0.4946545958518982, "step": 2611, "utility_loss": 1.158545970916748 }, { "cosine_similarity": 0.11763799015183257, "epoch": 2.434296365330848, "grad_norm": 1.2103058022083684, "learning_rate": 4.190541939937867e-06, "loss": 1.4951, "reason_loss": 0.5032768249511719, "step": 2612, "utility_loss": 0.9917871952056885 }, { "cosine_similarity": 0.03627947530529511, "epoch": 2.435228331780056, "grad_norm": 1.0953614753068426, "learning_rate": 4.1836382464618575e-06, "loss": 1.2302, "reason_loss": 0.45477205514907837, "step": 2613, "utility_loss": 0.7754043340682983 }, { "cosine_similarity": 0.02212507038108125, "epoch": 2.4361602982292636, "grad_norm": 1.1156455070206293, "learning_rate": 4.176734552985847e-06, "loss": 1.3265, "reason_loss": 0.4734259843826294, "step": 2614, "utility_loss": 0.853032112121582 }, { "cosine_similarity": -0.1307019169501824, "epoch": 2.4370922646784714, "grad_norm": 1.2575121889624967, "learning_rate": 4.169830859509838e-06, "loss": 1.4672, "reason_loss": 0.48851555585861206, "step": 2615, "utility_loss": 0.9786498546600342 }, { "cosine_similarity": 0.016997175460011754, "epoch": 2.438024231127679, "grad_norm": 1.1562323944761737, "learning_rate": 4.162927166033828e-06, "loss": 1.2008, "reason_loss": 0.472566157579422, "step": 2616, "utility_loss": 0.7282019853591919 }, { "cosine_similarity": 0.027432189705250146, "epoch": 2.4389561975768874, "grad_norm": 1.3511430018476498, "learning_rate": 4.156023472557819e-06, "loss": 1.6393, "reason_loss": 0.4870866537094116, "step": 2617, "utility_loss": 1.1522489786148071 }, { "cosine_similarity": -0.03824129025332783, "epoch": 2.439888164026095, "grad_norm": 1.2195504388492988, "learning_rate": 4.149119779081809e-06, "loss": 1.6075, "reason_loss": 0.46860557794570923, "step": 2618, "utility_loss": 1.1388654708862305 }, { "cosine_similarity": 0.2632780645107828, "epoch": 2.440820130475303, "grad_norm": 1.299289998195637, "learning_rate": 4.142216085605799e-06, "loss": 1.5776, "reason_loss": 0.49546730518341064, "step": 2619, "utility_loss": 1.0820953845977783 }, { "cosine_similarity": -0.11118330313658532, "epoch": 2.441752096924511, "grad_norm": 1.4000881457817718, "learning_rate": 4.13531239212979e-06, "loss": 1.4541, "reason_loss": 0.49398890137672424, "step": 2620, "utility_loss": 0.9601423740386963 }, { "cosine_similarity": 0.07216187601633588, "epoch": 2.4426840633737186, "grad_norm": 1.1145019951369306, "learning_rate": 4.12840869865378e-06, "loss": 1.4548, "reason_loss": 0.5073672533035278, "step": 2621, "utility_loss": 0.9473903775215149 }, { "cosine_similarity": 0.04137930613664578, "epoch": 2.4436160298229264, "grad_norm": 1.1849940740232818, "learning_rate": 4.121505005177771e-06, "loss": 1.427, "reason_loss": 0.47548460960388184, "step": 2622, "utility_loss": 0.9515038728713989 }, { "cosine_similarity": 0.02959920436110106, "epoch": 2.444547996272134, "grad_norm": 1.1761199860169005, "learning_rate": 4.114601311701761e-06, "loss": 1.4949, "reason_loss": 0.4689621329307556, "step": 2623, "utility_loss": 1.025926113128662 }, { "cosine_similarity": -0.09888666142527654, "epoch": 2.445479962721342, "grad_norm": 1.1554267713633153, "learning_rate": 4.107697618225751e-06, "loss": 1.2237, "reason_loss": 0.48102739453315735, "step": 2624, "utility_loss": 0.742674708366394 }, { "cosine_similarity": 0.015054875820778364, "epoch": 2.4464119291705497, "grad_norm": 1.2369385375324409, "learning_rate": 4.100793924749741e-06, "loss": 1.6392, "reason_loss": 0.4843177795410156, "step": 2625, "utility_loss": 1.1548854112625122 }, { "cosine_similarity": 0.02003793364880418, "epoch": 2.4473438956197575, "grad_norm": 1.0948343972676327, "learning_rate": 4.093890231273732e-06, "loss": 1.4099, "reason_loss": 0.48016318678855896, "step": 2626, "utility_loss": 0.9297508001327515 }, { "cosine_similarity": -0.18930983653703531, "epoch": 2.4482758620689653, "grad_norm": 1.3191142826387088, "learning_rate": 4.086986537797722e-06, "loss": 1.5749, "reason_loss": 0.4763249456882477, "step": 2627, "utility_loss": 1.0985589027404785 }, { "cosine_similarity": -0.07936533482884259, "epoch": 2.4492078285181735, "grad_norm": 1.207319933402445, "learning_rate": 4.0800828443217126e-06, "loss": 1.3063, "reason_loss": 0.4837977886199951, "step": 2628, "utility_loss": 0.8224848508834839 }, { "cosine_similarity": 0.06052250676177402, "epoch": 2.4501397949673813, "grad_norm": 1.1199893885192014, "learning_rate": 4.073179150845703e-06, "loss": 1.283, "reason_loss": 0.5006141662597656, "step": 2629, "utility_loss": 0.7823834419250488 }, { "cosine_similarity": 0.044001499518292186, "epoch": 2.451071761416589, "grad_norm": 1.245520582927911, "learning_rate": 4.066275457369693e-06, "loss": 1.6481, "reason_loss": 0.4742882251739502, "step": 2630, "utility_loss": 1.1738173961639404 }, { "cosine_similarity": 0.03831366799665233, "epoch": 2.452003727865797, "grad_norm": 1.1217016270382212, "learning_rate": 4.059371763893684e-06, "loss": 1.6351, "reason_loss": 0.4861705005168915, "step": 2631, "utility_loss": 1.1489179134368896 }, { "cosine_similarity": 0.03553670473012864, "epoch": 2.4529356943150047, "grad_norm": 1.1183497732795613, "learning_rate": 4.052468070417674e-06, "loss": 1.3304, "reason_loss": 0.47940531373023987, "step": 2632, "utility_loss": 0.8510279655456543 }, { "cosine_similarity": 0.15022110072722505, "epoch": 2.4538676607642125, "grad_norm": 1.2660668056718445, "learning_rate": 4.0455643769416645e-06, "loss": 1.5424, "reason_loss": 0.4792076349258423, "step": 2633, "utility_loss": 1.063172459602356 }, { "cosine_similarity": 0.09922928810866553, "epoch": 2.4547996272134203, "grad_norm": 1.0199903007784443, "learning_rate": 4.0386606834656544e-06, "loss": 1.4359, "reason_loss": 0.45890870690345764, "step": 2634, "utility_loss": 0.9770045280456543 }, { "cosine_similarity": -0.0524226483400031, "epoch": 2.455731593662628, "grad_norm": 1.2256689730523664, "learning_rate": 4.031756989989644e-06, "loss": 1.4876, "reason_loss": 0.47462746500968933, "step": 2635, "utility_loss": 1.012995958328247 }, { "cosine_similarity": 0.06757488619825046, "epoch": 2.456663560111836, "grad_norm": 1.2708375761754793, "learning_rate": 4.024853296513635e-06, "loss": 1.7466, "reason_loss": 0.4566666781902313, "step": 2636, "utility_loss": 1.2899219989776611 }, { "cosine_similarity": 0.18422900384873747, "epoch": 2.4575955265610436, "grad_norm": 1.3454484862317009, "learning_rate": 4.017949603037625e-06, "loss": 1.5275, "reason_loss": 0.5003824830055237, "step": 2637, "utility_loss": 1.0271265506744385 }, { "cosine_similarity": 0.1589945717372124, "epoch": 2.4585274930102514, "grad_norm": 1.5137348261800359, "learning_rate": 4.011045909561616e-06, "loss": 1.3195, "reason_loss": 0.47602713108062744, "step": 2638, "utility_loss": 0.8434572815895081 }, { "cosine_similarity": -0.022865098134087294, "epoch": 2.4594594594594597, "grad_norm": 1.0179484923148843, "learning_rate": 4.004142216085606e-06, "loss": 1.4392, "reason_loss": 0.49481937289237976, "step": 2639, "utility_loss": 0.9444088935852051 }, { "cosine_similarity": 0.12173026554810662, "epoch": 2.4603914259086674, "grad_norm": 1.4034748791901017, "learning_rate": 3.997238522609596e-06, "loss": 1.428, "reason_loss": 0.47080618143081665, "step": 2640, "utility_loss": 0.9572309255599976 }, { "cosine_similarity": 0.012682247874015532, "epoch": 2.4613233923578752, "grad_norm": 1.109235724088455, "learning_rate": 3.990334829133587e-06, "loss": 1.5582, "reason_loss": 0.49523624777793884, "step": 2641, "utility_loss": 1.0629385709762573 }, { "cosine_similarity": 0.1316940066025505, "epoch": 2.462255358807083, "grad_norm": 1.1848506781445216, "learning_rate": 3.983431135657577e-06, "loss": 1.293, "reason_loss": 0.4785116910934448, "step": 2642, "utility_loss": 0.8145191073417664 }, { "cosine_similarity": 0.010154987142909026, "epoch": 2.463187325256291, "grad_norm": 1.0315109799593878, "learning_rate": 3.976527442181568e-06, "loss": 1.4688, "reason_loss": 0.4812154769897461, "step": 2643, "utility_loss": 0.9875362515449524 }, { "cosine_similarity": 0.07966273898123315, "epoch": 2.4641192917054986, "grad_norm": 1.0822855721631397, "learning_rate": 3.9696237487055575e-06, "loss": 1.4694, "reason_loss": 0.483590304851532, "step": 2644, "utility_loss": 0.9857622385025024 }, { "cosine_similarity": -0.06330423987247993, "epoch": 2.4650512581547064, "grad_norm": 1.3127679693219927, "learning_rate": 3.962720055229548e-06, "loss": 1.4763, "reason_loss": 0.4410789906978607, "step": 2645, "utility_loss": 1.035196304321289 }, { "cosine_similarity": -0.12961352863944336, "epoch": 2.465983224603914, "grad_norm": 1.2475280949284826, "learning_rate": 3.955816361753538e-06, "loss": 1.3879, "reason_loss": 0.5057145357131958, "step": 2646, "utility_loss": 0.8822286128997803 }, { "cosine_similarity": -0.009725080973683776, "epoch": 2.466915191053122, "grad_norm": 1.0128815601757433, "learning_rate": 3.948912668277529e-06, "loss": 1.369, "reason_loss": 0.4982820749282837, "step": 2647, "utility_loss": 0.8707156181335449 }, { "cosine_similarity": 0.16460018660789755, "epoch": 2.4678471575023297, "grad_norm": 1.296017484500723, "learning_rate": 3.942008974801519e-06, "loss": 1.3046, "reason_loss": 0.4729114770889282, "step": 2648, "utility_loss": 0.8317043781280518 }, { "cosine_similarity": 0.1319972472781448, "epoch": 2.4687791239515375, "grad_norm": 1.1335171048396244, "learning_rate": 3.9351052813255095e-06, "loss": 1.4244, "reason_loss": 0.4751632809638977, "step": 2649, "utility_loss": 0.9492339491844177 }, { "cosine_similarity": -0.10592835690062986, "epoch": 2.4697110904007458, "grad_norm": 1.0880717121946675, "learning_rate": 3.9282015878495e-06, "loss": 1.2456, "reason_loss": 0.5106579661369324, "step": 2650, "utility_loss": 0.7349295616149902 }, { "cosine_similarity": 0.10079034605779336, "epoch": 2.4706430568499536, "grad_norm": 1.3440693486181832, "learning_rate": 3.92129789437349e-06, "loss": 1.4663, "reason_loss": 0.4591714143753052, "step": 2651, "utility_loss": 1.0071489810943604 }, { "cosine_similarity": 0.04512275839594952, "epoch": 2.4715750232991613, "grad_norm": 1.1095908479555037, "learning_rate": 3.914394200897481e-06, "loss": 1.5154, "reason_loss": 0.5015189051628113, "step": 2652, "utility_loss": 1.0139002799987793 }, { "cosine_similarity": -0.00892261573502345, "epoch": 2.472506989748369, "grad_norm": 1.1184981860083942, "learning_rate": 3.907490507421471e-06, "loss": 1.5338, "reason_loss": 0.48653116822242737, "step": 2653, "utility_loss": 1.0473072528839111 }, { "cosine_similarity": 0.1279391014834495, "epoch": 2.473438956197577, "grad_norm": 1.234925980141719, "learning_rate": 3.9005868139454615e-06, "loss": 1.4034, "reason_loss": 0.49121764302253723, "step": 2654, "utility_loss": 0.9121462106704712 }, { "cosine_similarity": 0.06708622615959724, "epoch": 2.4743709226467847, "grad_norm": 1.1144220625532852, "learning_rate": 3.893683120469451e-06, "loss": 1.7842, "reason_loss": 0.5050172805786133, "step": 2655, "utility_loss": 1.2792103290557861 }, { "cosine_similarity": 0.1741064226461576, "epoch": 2.4753028890959925, "grad_norm": 1.285077373525901, "learning_rate": 3.886779426993442e-06, "loss": 2.0699, "reason_loss": 0.49734050035476685, "step": 2656, "utility_loss": 1.5725281238555908 }, { "cosine_similarity": -0.05425354440866427, "epoch": 2.4762348555452003, "grad_norm": 1.295485847023279, "learning_rate": 3.879875733517432e-06, "loss": 1.1778, "reason_loss": 0.4494721293449402, "step": 2657, "utility_loss": 0.7283003330230713 }, { "cosine_similarity": -0.18922859250181995, "epoch": 2.477166821994408, "grad_norm": 1.309922387931771, "learning_rate": 3.872972040041422e-06, "loss": 1.5119, "reason_loss": 0.46893447637557983, "step": 2658, "utility_loss": 1.043014407157898 }, { "cosine_similarity": -0.04522360035186763, "epoch": 2.478098788443616, "grad_norm": 1.2495739743407037, "learning_rate": 3.866068346565413e-06, "loss": 1.4515, "reason_loss": 0.49567922949790955, "step": 2659, "utility_loss": 0.9558113813400269 }, { "cosine_similarity": 0.1827845959272522, "epoch": 2.4790307548928237, "grad_norm": 1.1600277611464023, "learning_rate": 3.859164653089403e-06, "loss": 1.6341, "reason_loss": 0.4898681044578552, "step": 2660, "utility_loss": 1.144197940826416 }, { "cosine_similarity": 0.23394032978275048, "epoch": 2.479962721342032, "grad_norm": 1.2489029895641168, "learning_rate": 3.852260959613394e-06, "loss": 1.4491, "reason_loss": 0.4782263934612274, "step": 2661, "utility_loss": 0.9708978533744812 }, { "cosine_similarity": -0.044030583303386994, "epoch": 2.4808946877912397, "grad_norm": 1.5936115926807866, "learning_rate": 3.845357266137384e-06, "loss": 1.5602, "reason_loss": 0.5120605826377869, "step": 2662, "utility_loss": 1.0481514930725098 }, { "cosine_similarity": 0.11160913889615881, "epoch": 2.4818266542404475, "grad_norm": 1.3948662003183394, "learning_rate": 3.838453572661374e-06, "loss": 1.3972, "reason_loss": 0.5104029178619385, "step": 2663, "utility_loss": 0.8868277668952942 }, { "cosine_similarity": -0.028075436597107876, "epoch": 2.4827586206896552, "grad_norm": 1.147273035925188, "learning_rate": 3.8315498791853645e-06, "loss": 1.5085, "reason_loss": 0.4628787636756897, "step": 2664, "utility_loss": 1.0456315279006958 }, { "cosine_similarity": -0.019702920436123317, "epoch": 2.483690587138863, "grad_norm": 1.03955516224076, "learning_rate": 3.8246461857093544e-06, "loss": 1.1888, "reason_loss": 0.4780600666999817, "step": 2665, "utility_loss": 0.7107736468315125 }, { "cosine_similarity": 0.34400841663042014, "epoch": 2.484622553588071, "grad_norm": 1.3350904204963212, "learning_rate": 3.817742492233345e-06, "loss": 1.4655, "reason_loss": 0.49126529693603516, "step": 2666, "utility_loss": 0.9741978049278259 }, { "cosine_similarity": 0.08302902366493968, "epoch": 2.4855545200372786, "grad_norm": 1.2949684588700423, "learning_rate": 3.8108387987573355e-06, "loss": 1.4417, "reason_loss": 0.5105588436126709, "step": 2667, "utility_loss": 0.9311585426330566 }, { "cosine_similarity": 0.03774723711745433, "epoch": 2.4864864864864864, "grad_norm": 1.126774410959279, "learning_rate": 3.8039351052813258e-06, "loss": 1.4454, "reason_loss": 0.46244338154792786, "step": 2668, "utility_loss": 0.9829795360565186 }, { "cosine_similarity": -0.1464930174722078, "epoch": 2.487418452935694, "grad_norm": 1.2139278652774552, "learning_rate": 3.7970314118053165e-06, "loss": 1.6121, "reason_loss": 0.45602869987487793, "step": 2669, "utility_loss": 1.156084418296814 }, { "cosine_similarity": 0.07350183452677891, "epoch": 2.488350419384902, "grad_norm": 1.0322971256216047, "learning_rate": 3.790127718329307e-06, "loss": 1.4038, "reason_loss": 0.4998505115509033, "step": 2670, "utility_loss": 0.9039644002914429 }, { "cosine_similarity": 0.03022414567814981, "epoch": 2.4892823858341098, "grad_norm": 1.142196746884278, "learning_rate": 3.783224024853297e-06, "loss": 1.5859, "reason_loss": 0.5018818974494934, "step": 2671, "utility_loss": 1.0840058326721191 }, { "cosine_similarity": 0.09132122655345462, "epoch": 2.490214352283318, "grad_norm": 1.0311266264070065, "learning_rate": 3.7763203313772874e-06, "loss": 1.2388, "reason_loss": 0.4787612855434418, "step": 2672, "utility_loss": 0.7600648403167725 }, { "cosine_similarity": 0.10244219809327189, "epoch": 2.491146318732526, "grad_norm": 1.2109666121954161, "learning_rate": 3.7694166379012777e-06, "loss": 1.6648, "reason_loss": 0.5362010598182678, "step": 2673, "utility_loss": 1.1286087036132812 }, { "cosine_similarity": 0.04477430960875341, "epoch": 2.4920782851817336, "grad_norm": 1.1009514734101973, "learning_rate": 3.7625129444252676e-06, "loss": 1.4689, "reason_loss": 0.4758933484554291, "step": 2674, "utility_loss": 0.9929724335670471 }, { "cosine_similarity": 0.15495385549943172, "epoch": 2.4930102516309414, "grad_norm": 1.285311750781085, "learning_rate": 3.755609250949258e-06, "loss": 1.5255, "reason_loss": 0.5012046098709106, "step": 2675, "utility_loss": 1.024278163909912 }, { "cosine_similarity": -0.04766543882547631, "epoch": 2.493942218080149, "grad_norm": 1.2962288412812164, "learning_rate": 3.7487055574732483e-06, "loss": 1.5981, "reason_loss": 0.48457595705986023, "step": 2676, "utility_loss": 1.1135457754135132 }, { "cosine_similarity": 0.04929903740899481, "epoch": 2.494874184529357, "grad_norm": 1.1720442782572251, "learning_rate": 3.7418018639972386e-06, "loss": 1.5791, "reason_loss": 0.48865365982055664, "step": 2677, "utility_loss": 1.0904868841171265 }, { "cosine_similarity": 0.0167955224730526, "epoch": 2.4958061509785647, "grad_norm": 1.0693550772894174, "learning_rate": 3.734898170521229e-06, "loss": 1.1493, "reason_loss": 0.48762673139572144, "step": 2678, "utility_loss": 0.6616818308830261 }, { "cosine_similarity": 0.028569238774109683, "epoch": 2.4967381174277725, "grad_norm": 0.9905923343707989, "learning_rate": 3.7279944770452196e-06, "loss": 1.6345, "reason_loss": 0.5078020095825195, "step": 2679, "utility_loss": 1.1266968250274658 }, { "cosine_similarity": 0.09602482626306383, "epoch": 2.4976700838769803, "grad_norm": 1.1943207924185646, "learning_rate": 3.72109078356921e-06, "loss": 1.3929, "reason_loss": 0.5110379457473755, "step": 2680, "utility_loss": 0.8818604946136475 }, { "cosine_similarity": 0.09698813052754443, "epoch": 2.498602050326188, "grad_norm": 1.2280293167113923, "learning_rate": 3.7141870900932002e-06, "loss": 1.4334, "reason_loss": 0.49926698207855225, "step": 2681, "utility_loss": 0.934135913848877 }, { "cosine_similarity": 0.13459939878948202, "epoch": 2.499534016775396, "grad_norm": 1.1317862001669374, "learning_rate": 3.7072833966171905e-06, "loss": 1.583, "reason_loss": 0.4763046205043793, "step": 2682, "utility_loss": 1.1066465377807617 }, { "cosine_similarity": -0.0756707334286387, "epoch": 2.500465983224604, "grad_norm": 1.2101679123241866, "learning_rate": 3.700379703141181e-06, "loss": 1.6121, "reason_loss": 0.47065863013267517, "step": 2683, "utility_loss": 1.141453742980957 }, { "cosine_similarity": 0.051694095462194514, "epoch": 2.501397949673812, "grad_norm": 1.2371353869747985, "learning_rate": 3.693476009665171e-06, "loss": 1.5387, "reason_loss": 0.47546499967575073, "step": 2684, "utility_loss": 1.0632460117340088 }, { "cosine_similarity": 0.07136595787758147, "epoch": 2.5023299161230197, "grad_norm": 1.0677425178525848, "learning_rate": 3.6865723161891615e-06, "loss": 1.616, "reason_loss": 0.5063508749008179, "step": 2685, "utility_loss": 1.1096224784851074 }, { "cosine_similarity": 0.11126791977494568, "epoch": 2.5032618825722275, "grad_norm": 1.044067189738865, "learning_rate": 3.6796686227131518e-06, "loss": 1.4105, "reason_loss": 0.5085585713386536, "step": 2686, "utility_loss": 0.9019837975502014 }, { "cosine_similarity": 0.04407629958645181, "epoch": 2.5041938490214353, "grad_norm": 1.3214705875490889, "learning_rate": 3.672764929237142e-06, "loss": 1.6841, "reason_loss": 0.48576152324676514, "step": 2687, "utility_loss": 1.1983387470245361 }, { "cosine_similarity": 0.006559005978817175, "epoch": 2.505125815470643, "grad_norm": 1.0793275133873281, "learning_rate": 3.6658612357611324e-06, "loss": 1.3652, "reason_loss": 0.4843660295009613, "step": 2688, "utility_loss": 0.8808221220970154 }, { "cosine_similarity": 0.08536449824766548, "epoch": 2.506057781919851, "grad_norm": 1.066702374979441, "learning_rate": 3.6589575422851227e-06, "loss": 1.2926, "reason_loss": 0.5059462189674377, "step": 2689, "utility_loss": 0.7866787314414978 }, { "cosine_similarity": 0.013921646417795721, "epoch": 2.5069897483690586, "grad_norm": 1.2335972566934132, "learning_rate": 3.6520538488091134e-06, "loss": 1.5974, "reason_loss": 0.5029837489128113, "step": 2690, "utility_loss": 1.0944026708602905 }, { "cosine_similarity": -0.0647457034863085, "epoch": 2.5079217148182664, "grad_norm": 1.2050300911194602, "learning_rate": 3.6451501553331037e-06, "loss": 1.6242, "reason_loss": 0.49661654233932495, "step": 2691, "utility_loss": 1.1275464296340942 }, { "cosine_similarity": -0.007302219484095189, "epoch": 2.508853681267474, "grad_norm": 1.3578322484022087, "learning_rate": 3.638246461857094e-06, "loss": 1.6329, "reason_loss": 0.47410476207733154, "step": 2692, "utility_loss": 1.1587979793548584 }, { "cosine_similarity": 0.15764992596393318, "epoch": 2.509785647716682, "grad_norm": 1.1944461710267147, "learning_rate": 3.6313427683810844e-06, "loss": 1.5359, "reason_loss": 0.47257739305496216, "step": 2693, "utility_loss": 1.063307523727417 }, { "cosine_similarity": -0.050152620134067794, "epoch": 2.5107176141658902, "grad_norm": 1.1703871668570136, "learning_rate": 3.6244390749050747e-06, "loss": 1.4527, "reason_loss": 0.4982280731201172, "step": 2694, "utility_loss": 0.9545075297355652 }, { "cosine_similarity": -0.035621382353775016, "epoch": 2.511649580615098, "grad_norm": 1.1517925862736866, "learning_rate": 3.617535381429065e-06, "loss": 1.7548, "reason_loss": 0.482070654630661, "step": 2695, "utility_loss": 1.2727354764938354 }, { "cosine_similarity": 0.0882637623781122, "epoch": 2.512581547064306, "grad_norm": 1.0891878624189597, "learning_rate": 3.6106316879530553e-06, "loss": 1.5447, "reason_loss": 0.49419689178466797, "step": 2696, "utility_loss": 1.0505138635635376 }, { "cosine_similarity": -0.23521565198364933, "epoch": 2.5135135135135136, "grad_norm": 1.2650251306076126, "learning_rate": 3.603727994477045e-06, "loss": 1.1975, "reason_loss": 0.4816094636917114, "step": 2697, "utility_loss": 0.7158759236335754 }, { "cosine_similarity": 0.0254348356391117, "epoch": 2.5144454799627214, "grad_norm": 1.8163271009929987, "learning_rate": 3.5968243010010355e-06, "loss": 1.8163, "reason_loss": 0.5005919933319092, "step": 2698, "utility_loss": 1.3157472610473633 }, { "cosine_similarity": -0.12394034762468288, "epoch": 2.515377446411929, "grad_norm": 1.117651403555562, "learning_rate": 3.5899206075250258e-06, "loss": 1.2685, "reason_loss": 0.470502108335495, "step": 2699, "utility_loss": 0.7979790568351746 }, { "cosine_similarity": 0.1552563568213792, "epoch": 2.516309412861137, "grad_norm": 1.6158971192311278, "learning_rate": 3.583016914049017e-06, "loss": 1.4282, "reason_loss": 0.4751385450363159, "step": 2700, "utility_loss": 0.9530491232872009 }, { "cosine_similarity": 0.02017207371723206, "epoch": 2.5172413793103448, "grad_norm": 1.3543975992490889, "learning_rate": 3.576113220573007e-06, "loss": 1.5765, "reason_loss": 0.4578976631164551, "step": 2701, "utility_loss": 1.1186378002166748 }, { "cosine_similarity": 0.1165079561347516, "epoch": 2.5181733457595525, "grad_norm": 1.0774133431432091, "learning_rate": 3.569209527096997e-06, "loss": 1.4524, "reason_loss": 0.518653154373169, "step": 2702, "utility_loss": 0.9337809085845947 }, { "cosine_similarity": 0.1414693097773244, "epoch": 2.5191053122087603, "grad_norm": 1.2286908730357127, "learning_rate": 3.5623058336209874e-06, "loss": 1.5366, "reason_loss": 0.45577186346054077, "step": 2703, "utility_loss": 1.080796718597412 }, { "cosine_similarity": 0.21461294196686068, "epoch": 2.520037278657968, "grad_norm": 1.213901687807786, "learning_rate": 3.5554021401449777e-06, "loss": 1.319, "reason_loss": 0.46811407804489136, "step": 2704, "utility_loss": 0.8509336113929749 }, { "cosine_similarity": 0.01568803172503518, "epoch": 2.5209692451071763, "grad_norm": 1.3679126183219243, "learning_rate": 3.548498446668968e-06, "loss": 1.3921, "reason_loss": 0.5379276275634766, "step": 2705, "utility_loss": 0.8541254997253418 }, { "cosine_similarity": 0.12657728650522987, "epoch": 2.5219012115563837, "grad_norm": 1.3542625760461489, "learning_rate": 3.5415947531929584e-06, "loss": 1.7756, "reason_loss": 0.4797448217868805, "step": 2706, "utility_loss": 1.2958931922912598 }, { "cosine_similarity": 0.08468853627201582, "epoch": 2.522833178005592, "grad_norm": 1.075975932570889, "learning_rate": 3.5346910597169487e-06, "loss": 1.3822, "reason_loss": 0.49468135833740234, "step": 2707, "utility_loss": 0.8874842524528503 }, { "cosine_similarity": -0.06637930016709086, "epoch": 2.5237651444547997, "grad_norm": 1.2096793795537373, "learning_rate": 3.527787366240939e-06, "loss": 1.4906, "reason_loss": 0.49291637539863586, "step": 2708, "utility_loss": 0.9977332353591919 }, { "cosine_similarity": 0.017527544820945064, "epoch": 2.5246971109040075, "grad_norm": 1.273968054852911, "learning_rate": 3.5208836727649293e-06, "loss": 1.5592, "reason_loss": 0.4790732264518738, "step": 2709, "utility_loss": 1.0800974369049072 }, { "cosine_similarity": -0.035908006318094834, "epoch": 2.5256290773532153, "grad_norm": 1.4348220185043703, "learning_rate": 3.51397997928892e-06, "loss": 1.6349, "reason_loss": 0.4827943444252014, "step": 2710, "utility_loss": 1.1521034240722656 }, { "cosine_similarity": 0.3476409748005473, "epoch": 2.526561043802423, "grad_norm": 1.6792794464843495, "learning_rate": 3.5070762858129103e-06, "loss": 1.7327, "reason_loss": 0.4965870678424835, "step": 2711, "utility_loss": 1.2361143827438354 }, { "cosine_similarity": 0.16053449276857415, "epoch": 2.527493010251631, "grad_norm": 1.2088604070022801, "learning_rate": 3.5001725923369006e-06, "loss": 1.3193, "reason_loss": 0.4745216965675354, "step": 2712, "utility_loss": 0.8448082208633423 }, { "cosine_similarity": 0.030802204321844336, "epoch": 2.5284249767008387, "grad_norm": 1.157884309005545, "learning_rate": 3.493268898860891e-06, "loss": 1.7117, "reason_loss": 0.48460546135902405, "step": 2713, "utility_loss": 1.2271242141723633 }, { "cosine_similarity": 0.081616731413249, "epoch": 2.5293569431500464, "grad_norm": 1.2831224108398718, "learning_rate": 3.4863652053848813e-06, "loss": 1.7254, "reason_loss": 0.4741612672805786, "step": 2714, "utility_loss": 1.2512528896331787 }, { "cosine_similarity": 0.1600487874672892, "epoch": 2.5302889095992542, "grad_norm": 1.2572800780454458, "learning_rate": 3.4794615119088716e-06, "loss": 1.4974, "reason_loss": 0.5131689310073853, "step": 2715, "utility_loss": 0.9842573404312134 }, { "cosine_similarity": -0.06354953581987433, "epoch": 2.5312208760484625, "grad_norm": 1.1564065956507106, "learning_rate": 3.472557818432862e-06, "loss": 1.6312, "reason_loss": 0.46038708090782166, "step": 2716, "utility_loss": 1.1707981824874878 }, { "cosine_similarity": 0.057661585786242475, "epoch": 2.53215284249767, "grad_norm": 1.2062198713331458, "learning_rate": 3.465654124956852e-06, "loss": 1.4374, "reason_loss": 0.5199655294418335, "step": 2717, "utility_loss": 0.9174270629882812 }, { "cosine_similarity": -0.013098791727550844, "epoch": 2.533084808946878, "grad_norm": 1.2599376821720276, "learning_rate": 3.4587504314808425e-06, "loss": 1.4591, "reason_loss": 0.5125368237495422, "step": 2718, "utility_loss": 0.9465420842170715 }, { "cosine_similarity": -0.014582171200446121, "epoch": 2.534016775396086, "grad_norm": 1.164704229863628, "learning_rate": 3.451846738004833e-06, "loss": 1.3375, "reason_loss": 0.4934108853340149, "step": 2719, "utility_loss": 0.8440984487533569 }, { "cosine_similarity": 0.10473155091310943, "epoch": 2.5349487418452936, "grad_norm": 1.0340031053875285, "learning_rate": 3.4449430445288235e-06, "loss": 1.472, "reason_loss": 0.4785390794277191, "step": 2720, "utility_loss": 0.9934636354446411 }, { "cosine_similarity": -0.028149306912864904, "epoch": 2.5358807082945014, "grad_norm": 1.4346334317840026, "learning_rate": 3.438039351052814e-06, "loss": 1.4995, "reason_loss": 0.4630175530910492, "step": 2721, "utility_loss": 1.0364741086959839 }, { "cosine_similarity": 0.00017259429238930643, "epoch": 2.536812674743709, "grad_norm": 1.1788908926223909, "learning_rate": 3.431135657576804e-06, "loss": 1.2554, "reason_loss": 0.4980955123901367, "step": 2722, "utility_loss": 0.7573386430740356 }, { "cosine_similarity": 0.013403936514648074, "epoch": 2.537744641192917, "grad_norm": 1.1842529476597141, "learning_rate": 3.4242319641007945e-06, "loss": 1.55, "reason_loss": 0.46164819598197937, "step": 2723, "utility_loss": 1.088343620300293 }, { "cosine_similarity": 0.11262702022296692, "epoch": 2.5386766076421248, "grad_norm": 1.2078880066004696, "learning_rate": 3.4173282706247844e-06, "loss": 1.425, "reason_loss": 0.4620512127876282, "step": 2724, "utility_loss": 0.9628996253013611 }, { "cosine_similarity": 0.09017897247246877, "epoch": 2.5396085740913326, "grad_norm": 1.134097673203147, "learning_rate": 3.4104245771487747e-06, "loss": 1.2835, "reason_loss": 0.4772089123725891, "step": 2725, "utility_loss": 0.8062559366226196 }, { "cosine_similarity": 0.08844911084458273, "epoch": 2.5405405405405403, "grad_norm": 1.371111155495934, "learning_rate": 3.403520883672765e-06, "loss": 1.6053, "reason_loss": 0.47810864448547363, "step": 2726, "utility_loss": 1.1272156238555908 }, { "cosine_similarity": 0.14240469403451897, "epoch": 2.5414725069897486, "grad_norm": 1.1926241463231566, "learning_rate": 3.3966171901967553e-06, "loss": 1.6216, "reason_loss": 0.4798349142074585, "step": 2727, "utility_loss": 1.1417691707611084 }, { "cosine_similarity": 0.15618443503236407, "epoch": 2.542404473438956, "grad_norm": 1.1849984690041961, "learning_rate": 3.3897134967207456e-06, "loss": 1.8433, "reason_loss": 0.4681902527809143, "step": 2728, "utility_loss": 1.3751068115234375 }, { "cosine_similarity": 0.2125342480081938, "epoch": 2.543336439888164, "grad_norm": 1.4945604334181597, "learning_rate": 3.382809803244736e-06, "loss": 1.507, "reason_loss": 0.4831625819206238, "step": 2729, "utility_loss": 1.0238068103790283 }, { "cosine_similarity": 0.3664559491516699, "epoch": 2.544268406337372, "grad_norm": 1.1774554873683507, "learning_rate": 3.3759061097687266e-06, "loss": 1.5254, "reason_loss": 0.4644436538219452, "step": 2730, "utility_loss": 1.0609737634658813 }, { "cosine_similarity": 0.059265306797499355, "epoch": 2.5452003727865797, "grad_norm": 1.3621972985911752, "learning_rate": 3.369002416292717e-06, "loss": 1.7185, "reason_loss": 0.5004668235778809, "step": 2731, "utility_loss": 1.2179853916168213 }, { "cosine_similarity": -0.06389962444631879, "epoch": 2.5461323392357875, "grad_norm": 1.1945634623711963, "learning_rate": 3.3620987228167072e-06, "loss": 1.4528, "reason_loss": 0.47496506571769714, "step": 2732, "utility_loss": 0.9778734445571899 }, { "cosine_similarity": 0.05100450256844128, "epoch": 2.5470643056849953, "grad_norm": 0.9805896153783497, "learning_rate": 3.3551950293406976e-06, "loss": 1.3773, "reason_loss": 0.5005172491073608, "step": 2733, "utility_loss": 0.8767984509468079 }, { "cosine_similarity": 0.21865063570251728, "epoch": 2.547996272134203, "grad_norm": 1.371982154866672, "learning_rate": 3.348291335864688e-06, "loss": 1.4706, "reason_loss": 0.49486157298088074, "step": 2734, "utility_loss": 0.9757341742515564 }, { "cosine_similarity": 0.07844000658473041, "epoch": 2.548928238583411, "grad_norm": 1.1031553135878625, "learning_rate": 3.341387642388678e-06, "loss": 1.4951, "reason_loss": 0.4773225784301758, "step": 2735, "utility_loss": 1.0178072452545166 }, { "cosine_similarity": 0.07230320950296275, "epoch": 2.5498602050326187, "grad_norm": 1.2382053415351353, "learning_rate": 3.3344839489126685e-06, "loss": 1.3759, "reason_loss": 0.45981940627098083, "step": 2736, "utility_loss": 0.9161247611045837 }, { "cosine_similarity": 0.11305566322457469, "epoch": 2.5507921714818265, "grad_norm": 1.3360588740112134, "learning_rate": 3.327580255436659e-06, "loss": 1.737, "reason_loss": 0.4858434200286865, "step": 2737, "utility_loss": 1.25114905834198 }, { "cosine_similarity": 0.022655124022699175, "epoch": 2.5517241379310347, "grad_norm": 0.9934879361984293, "learning_rate": 3.320676561960649e-06, "loss": 1.342, "reason_loss": 0.46822112798690796, "step": 2738, "utility_loss": 0.8737832307815552 }, { "cosine_similarity": 0.015059578921612323, "epoch": 2.552656104380242, "grad_norm": 0.995251959905233, "learning_rate": 3.3137728684846394e-06, "loss": 1.3802, "reason_loss": 0.48377031087875366, "step": 2739, "utility_loss": 0.8964554071426392 }, { "cosine_similarity": 0.020933897371155558, "epoch": 2.5535880708294503, "grad_norm": 1.3873069544620582, "learning_rate": 3.30686917500863e-06, "loss": 1.5093, "reason_loss": 0.4898793697357178, "step": 2740, "utility_loss": 1.0194696187973022 }, { "cosine_similarity": -0.028249232521525582, "epoch": 2.554520037278658, "grad_norm": 1.096805463740134, "learning_rate": 3.2999654815326205e-06, "loss": 1.512, "reason_loss": 0.47589194774627686, "step": 2741, "utility_loss": 1.0360852479934692 }, { "cosine_similarity": 0.008385404010571572, "epoch": 2.555452003727866, "grad_norm": 1.1798655805145788, "learning_rate": 3.2930617880566108e-06, "loss": 1.3551, "reason_loss": 0.44918015599250793, "step": 2742, "utility_loss": 0.9059423208236694 }, { "cosine_similarity": 0.0396943792026442, "epoch": 2.5563839701770736, "grad_norm": 1.1623391204596678, "learning_rate": 3.286158094580601e-06, "loss": 1.4013, "reason_loss": 0.5125935077667236, "step": 2743, "utility_loss": 0.8886846899986267 }, { "cosine_similarity": 0.1570731079193599, "epoch": 2.5573159366262814, "grad_norm": 1.2348292026730947, "learning_rate": 3.2792544011045914e-06, "loss": 1.5885, "reason_loss": 0.4877299666404724, "step": 2744, "utility_loss": 1.1007559299468994 }, { "cosine_similarity": 0.002440770962097736, "epoch": 2.558247903075489, "grad_norm": 1.1327901502834778, "learning_rate": 3.2723507076285817e-06, "loss": 1.2653, "reason_loss": 0.48683029413223267, "step": 2745, "utility_loss": 0.7784425020217896 }, { "cosine_similarity": 0.20041452553526073, "epoch": 2.559179869524697, "grad_norm": 1.30291509738565, "learning_rate": 3.265447014152572e-06, "loss": 1.683, "reason_loss": 0.4784426987171173, "step": 2746, "utility_loss": 1.2045159339904785 }, { "cosine_similarity": 0.139242086586752, "epoch": 2.560111835973905, "grad_norm": 1.27160623428246, "learning_rate": 3.258543320676562e-06, "loss": 1.6953, "reason_loss": 0.47311946749687195, "step": 2747, "utility_loss": 1.2221384048461914 }, { "cosine_similarity": -0.022212517753865464, "epoch": 2.5610438024231126, "grad_norm": 0.9895503824489, "learning_rate": 3.251639627200552e-06, "loss": 1.4671, "reason_loss": 0.5034075379371643, "step": 2748, "utility_loss": 0.9636883735656738 }, { "cosine_similarity": -0.05235363883894089, "epoch": 2.561975768872321, "grad_norm": 1.2213164122644424, "learning_rate": 3.2447359337245425e-06, "loss": 1.5074, "reason_loss": 0.4909147322177887, "step": 2749, "utility_loss": 1.0164445638656616 }, { "cosine_similarity": 0.031720687812694784, "epoch": 2.562907735321528, "grad_norm": 1.1401298407913611, "learning_rate": 3.2378322402485337e-06, "loss": 1.4421, "reason_loss": 0.487594872713089, "step": 2750, "utility_loss": 0.9544818997383118 }, { "cosine_similarity": 0.09443725423205845, "epoch": 2.5638397017707364, "grad_norm": 0.949025421977501, "learning_rate": 3.230928546772524e-06, "loss": 1.3144, "reason_loss": 0.46731501817703247, "step": 2751, "utility_loss": 0.8470567464828491 }, { "cosine_similarity": -0.0073696315263799455, "epoch": 2.564771668219944, "grad_norm": 1.2325466308525566, "learning_rate": 3.224024853296514e-06, "loss": 1.2745, "reason_loss": 0.4877062141895294, "step": 2752, "utility_loss": 0.7868292331695557 }, { "cosine_similarity": -0.08467641985834633, "epoch": 2.565703634669152, "grad_norm": 1.0661144279263002, "learning_rate": 3.217121159820504e-06, "loss": 1.4875, "reason_loss": 0.5012122988700867, "step": 2753, "utility_loss": 0.9862571954727173 }, { "cosine_similarity": -0.07072080052605634, "epoch": 2.5666356011183598, "grad_norm": 1.1100398454242473, "learning_rate": 3.2102174663444945e-06, "loss": 1.4882, "reason_loss": 0.4976728856563568, "step": 2754, "utility_loss": 0.9905645847320557 }, { "cosine_similarity": 0.06052188936422164, "epoch": 2.5675675675675675, "grad_norm": 1.0849314282722298, "learning_rate": 3.2033137728684848e-06, "loss": 1.4079, "reason_loss": 0.4745802581310272, "step": 2755, "utility_loss": 0.9333294034004211 }, { "cosine_similarity": 0.13883325003731506, "epoch": 2.5684995340167753, "grad_norm": 1.1512428513472095, "learning_rate": 3.196410079392475e-06, "loss": 1.5795, "reason_loss": 0.4865962564945221, "step": 2756, "utility_loss": 1.0929522514343262 }, { "cosine_similarity": 0.1195288617697486, "epoch": 2.569431500465983, "grad_norm": 1.146172298383808, "learning_rate": 3.1895063859164654e-06, "loss": 1.2477, "reason_loss": 0.47629523277282715, "step": 2757, "utility_loss": 0.771409273147583 }, { "cosine_similarity": -0.059051660489166886, "epoch": 2.570363466915191, "grad_norm": 1.3629254549106002, "learning_rate": 3.1826026924404557e-06, "loss": 1.4139, "reason_loss": 0.46991369128227234, "step": 2758, "utility_loss": 0.9440272450447083 }, { "cosine_similarity": 0.019096200798479453, "epoch": 2.5712954333643987, "grad_norm": 1.069793066099441, "learning_rate": 3.175698998964446e-06, "loss": 1.3371, "reason_loss": 0.48722490668296814, "step": 2759, "utility_loss": 0.8498464226722717 }, { "cosine_similarity": 0.05946750599933286, "epoch": 2.572227399813607, "grad_norm": 1.1390165735273257, "learning_rate": 3.1687953054884367e-06, "loss": 1.2729, "reason_loss": 0.527959942817688, "step": 2760, "utility_loss": 0.744918942451477 }, { "cosine_similarity": -0.010070632878036279, "epoch": 2.5731593662628143, "grad_norm": 1.2962699191251268, "learning_rate": 3.161891612012427e-06, "loss": 1.3933, "reason_loss": 0.4987153112888336, "step": 2761, "utility_loss": 0.894565224647522 }, { "cosine_similarity": -0.02256957945991388, "epoch": 2.5740913327120225, "grad_norm": 1.2309957405121452, "learning_rate": 3.1549879185364174e-06, "loss": 1.5378, "reason_loss": 0.4787690043449402, "step": 2762, "utility_loss": 1.0590245723724365 }, { "cosine_similarity": 0.05532491607310995, "epoch": 2.5750232991612303, "grad_norm": 1.0482757924017383, "learning_rate": 3.1480842250604077e-06, "loss": 1.3232, "reason_loss": 0.5058474540710449, "step": 2763, "utility_loss": 0.8173691034317017 }, { "cosine_similarity": -0.030352026776938813, "epoch": 2.575955265610438, "grad_norm": 1.1703803371486066, "learning_rate": 3.141180531584398e-06, "loss": 1.5788, "reason_loss": 0.4845019578933716, "step": 2764, "utility_loss": 1.0943293571472168 }, { "cosine_similarity": -0.08542433128886993, "epoch": 2.576887232059646, "grad_norm": 1.5902497702823348, "learning_rate": 3.1342768381083883e-06, "loss": 1.8428, "reason_loss": 0.48869407176971436, "step": 2765, "utility_loss": 1.3541111946105957 }, { "cosine_similarity": 0.09556230012046663, "epoch": 2.5778191985088537, "grad_norm": 1.2868857326339966, "learning_rate": 3.1273731446323786e-06, "loss": 1.1756, "reason_loss": 0.4974134564399719, "step": 2766, "utility_loss": 0.6781908869743347 }, { "cosine_similarity": -0.06573962670749126, "epoch": 2.5787511649580614, "grad_norm": 1.0037958528027002, "learning_rate": 3.120469451156369e-06, "loss": 1.1036, "reason_loss": 0.45330148935317993, "step": 2767, "utility_loss": 0.6503432393074036 }, { "cosine_similarity": 0.13983589347964012, "epoch": 2.5796831314072692, "grad_norm": 1.342110868296645, "learning_rate": 3.1135657576803592e-06, "loss": 1.4905, "reason_loss": 0.47900331020355225, "step": 2768, "utility_loss": 1.011460542678833 }, { "cosine_similarity": 0.09114784195490537, "epoch": 2.580615097856477, "grad_norm": 1.1578839503616376, "learning_rate": 3.1066620642043495e-06, "loss": 1.4993, "reason_loss": 0.5108023285865784, "step": 2769, "utility_loss": 0.9885412454605103 }, { "cosine_similarity": 0.0018246362925447303, "epoch": 2.581547064305685, "grad_norm": 1.1657260233586855, "learning_rate": 3.0997583707283403e-06, "loss": 1.6087, "reason_loss": 0.42982468008995056, "step": 2770, "utility_loss": 1.1789029836654663 }, { "cosine_similarity": 0.03566100123999051, "epoch": 2.582479030754893, "grad_norm": 1.1448546377716236, "learning_rate": 3.0928546772523306e-06, "loss": 1.4694, "reason_loss": 0.500330924987793, "step": 2771, "utility_loss": 0.9690794944763184 }, { "cosine_similarity": 0.14812259673308467, "epoch": 2.5834109972041004, "grad_norm": 1.0857205767791769, "learning_rate": 3.085950983776321e-06, "loss": 1.512, "reason_loss": 0.4598851501941681, "step": 2772, "utility_loss": 1.0521345138549805 }, { "cosine_similarity": -0.02218963832279352, "epoch": 2.5843429636533086, "grad_norm": 0.9278923764607123, "learning_rate": 3.079047290300311e-06, "loss": 1.3511, "reason_loss": 0.49093925952911377, "step": 2773, "utility_loss": 0.8601281046867371 }, { "cosine_similarity": -0.05631852628109191, "epoch": 2.5852749301025164, "grad_norm": 1.0759690031014013, "learning_rate": 3.0721435968243015e-06, "loss": 1.2556, "reason_loss": 0.4683383107185364, "step": 2774, "utility_loss": 0.7872650027275085 }, { "cosine_similarity": 0.08720443702844573, "epoch": 2.586206896551724, "grad_norm": 1.0643849044632188, "learning_rate": 3.0652399033482914e-06, "loss": 1.297, "reason_loss": 0.48290392756462097, "step": 2775, "utility_loss": 0.8140462040901184 }, { "cosine_similarity": -0.09073391771827863, "epoch": 2.587138863000932, "grad_norm": 0.9988652832138809, "learning_rate": 3.0583362098722817e-06, "loss": 1.3712, "reason_loss": 0.46863651275634766, "step": 2776, "utility_loss": 0.9025415182113647 }, { "cosine_similarity": 0.0011926646494874174, "epoch": 2.5880708294501398, "grad_norm": 1.2995881148674817, "learning_rate": 3.051432516396272e-06, "loss": 1.5591, "reason_loss": 0.48113512992858887, "step": 2777, "utility_loss": 1.0780065059661865 }, { "cosine_similarity": 0.06094566679752222, "epoch": 2.5890027958993476, "grad_norm": 1.2466601764674914, "learning_rate": 3.0445288229202623e-06, "loss": 1.5047, "reason_loss": 0.48907729983329773, "step": 2778, "utility_loss": 1.0155802965164185 }, { "cosine_similarity": -0.00415408870984297, "epoch": 2.5899347623485554, "grad_norm": 1.1424850496822692, "learning_rate": 3.0376251294442526e-06, "loss": 1.449, "reason_loss": 0.5034893751144409, "step": 2779, "utility_loss": 0.9454817771911621 }, { "cosine_similarity": -0.044956459875059994, "epoch": 2.590866728797763, "grad_norm": 1.0044164261923287, "learning_rate": 3.0307214359682433e-06, "loss": 1.3874, "reason_loss": 0.48947980999946594, "step": 2780, "utility_loss": 0.8979330062866211 }, { "cosine_similarity": 0.06524339090407617, "epoch": 2.591798695246971, "grad_norm": 1.3939407695881765, "learning_rate": 3.0238177424922337e-06, "loss": 1.5638, "reason_loss": 0.48681801557540894, "step": 2781, "utility_loss": 1.0769565105438232 }, { "cosine_similarity": 0.09768484611521024, "epoch": 2.592730661696179, "grad_norm": 1.2441058232272912, "learning_rate": 3.016914049016224e-06, "loss": 1.3312, "reason_loss": 0.4993533194065094, "step": 2782, "utility_loss": 0.8318485617637634 }, { "cosine_similarity": -0.010569715363917374, "epoch": 2.5936626281453865, "grad_norm": 1.1175721146568727, "learning_rate": 3.0100103555402143e-06, "loss": 1.6317, "reason_loss": 0.49744921922683716, "step": 2783, "utility_loss": 1.134264588356018 }, { "cosine_similarity": 0.17659640253274544, "epoch": 2.5945945945945947, "grad_norm": 1.2399206088619128, "learning_rate": 3.0031066620642046e-06, "loss": 1.5769, "reason_loss": 0.4591958224773407, "step": 2784, "utility_loss": 1.11765718460083 }, { "cosine_similarity": -0.05399411394822942, "epoch": 2.5955265610438025, "grad_norm": 1.1245404239293935, "learning_rate": 2.996202968588195e-06, "loss": 1.3132, "reason_loss": 0.4734097719192505, "step": 2785, "utility_loss": 0.8398236036300659 }, { "cosine_similarity": 0.06604109465059325, "epoch": 2.5964585274930103, "grad_norm": 1.3110803300473004, "learning_rate": 2.989299275112185e-06, "loss": 1.6066, "reason_loss": 0.5203027129173279, "step": 2786, "utility_loss": 1.0862653255462646 }, { "cosine_similarity": 0.04488003311757955, "epoch": 2.597390493942218, "grad_norm": 1.056428046029404, "learning_rate": 2.9823955816361755e-06, "loss": 1.6127, "reason_loss": 0.4914840757846832, "step": 2787, "utility_loss": 1.121230125427246 }, { "cosine_similarity": -0.015255110199284317, "epoch": 2.598322460391426, "grad_norm": 0.9952272761606585, "learning_rate": 2.975491888160166e-06, "loss": 1.4656, "reason_loss": 0.5171834230422974, "step": 2788, "utility_loss": 0.9484272599220276 }, { "cosine_similarity": -0.18118645607640949, "epoch": 2.5992544268406337, "grad_norm": 1.4292258811277547, "learning_rate": 2.968588194684156e-06, "loss": 1.4835, "reason_loss": 0.4868789613246918, "step": 2789, "utility_loss": 0.9965978264808655 }, { "cosine_similarity": 0.10096585763861293, "epoch": 2.6001863932898415, "grad_norm": 1.3195759816092336, "learning_rate": 2.9616845012081464e-06, "loss": 1.4379, "reason_loss": 0.45973145961761475, "step": 2790, "utility_loss": 0.9781326055526733 }, { "cosine_similarity": 0.04719535719691321, "epoch": 2.6011183597390493, "grad_norm": 1.1784797199643477, "learning_rate": 2.954780807732137e-06, "loss": 1.4796, "reason_loss": 0.5199089050292969, "step": 2791, "utility_loss": 0.9596789479255676 }, { "cosine_similarity": 0.13312299717993575, "epoch": 2.602050326188257, "grad_norm": 1.2170433541660888, "learning_rate": 2.9478771142561275e-06, "loss": 1.5274, "reason_loss": 0.495455265045166, "step": 2792, "utility_loss": 1.0319041013717651 }, { "cosine_similarity": 0.06539418081741298, "epoch": 2.6029822926374653, "grad_norm": 1.3092917502441597, "learning_rate": 2.940973420780118e-06, "loss": 1.6243, "reason_loss": 0.4844406843185425, "step": 2793, "utility_loss": 1.139853596687317 }, { "cosine_similarity": 0.08729943353650174, "epoch": 2.6039142590866726, "grad_norm": 1.3822663950485317, "learning_rate": 2.934069727304108e-06, "loss": 1.4655, "reason_loss": 0.4913935661315918, "step": 2794, "utility_loss": 0.9740701913833618 }, { "cosine_similarity": 0.06266789053332751, "epoch": 2.604846225535881, "grad_norm": 1.108432558291037, "learning_rate": 2.9271660338280984e-06, "loss": 1.3947, "reason_loss": 0.4971734285354614, "step": 2795, "utility_loss": 0.8975231647491455 }, { "cosine_similarity": 0.01082145260511004, "epoch": 2.6057781919850886, "grad_norm": 1.4729733979264394, "learning_rate": 2.9202623403520887e-06, "loss": 1.6425, "reason_loss": 0.4594191312789917, "step": 2796, "utility_loss": 1.1830499172210693 }, { "cosine_similarity": -0.08267347943645682, "epoch": 2.6067101584342964, "grad_norm": 1.0061076288760518, "learning_rate": 2.913358646876079e-06, "loss": 1.448, "reason_loss": 0.4731975197792053, "step": 2797, "utility_loss": 0.9748317003250122 }, { "cosine_similarity": 0.03633527271943134, "epoch": 2.607642124883504, "grad_norm": 1.2893546836223004, "learning_rate": 2.906454953400069e-06, "loss": 1.711, "reason_loss": 0.4856429398059845, "step": 2798, "utility_loss": 1.2253901958465576 }, { "cosine_similarity": 0.10467263424677171, "epoch": 2.608574091332712, "grad_norm": 1.3477488807648736, "learning_rate": 2.8995512599240592e-06, "loss": 1.3654, "reason_loss": 0.49775099754333496, "step": 2799, "utility_loss": 0.867691159248352 }, { "cosine_similarity": 0.15113363472956584, "epoch": 2.60950605778192, "grad_norm": 1.1706266081632055, "learning_rate": 2.8926475664480495e-06, "loss": 1.4718, "reason_loss": 0.49207067489624023, "step": 2800, "utility_loss": 0.9797537326812744 }, { "cosine_similarity": -0.13202035069242044, "epoch": 2.6104380242311276, "grad_norm": 1.188049927075174, "learning_rate": 2.8857438729720407e-06, "loss": 1.5264, "reason_loss": 0.4881782531738281, "step": 2801, "utility_loss": 1.0382213592529297 }, { "cosine_similarity": -0.0042315385031965825, "epoch": 2.6113699906803354, "grad_norm": 1.4521820913597203, "learning_rate": 2.878840179496031e-06, "loss": 1.371, "reason_loss": 0.49755290150642395, "step": 2802, "utility_loss": 0.8734777569770813 }, { "cosine_similarity": -0.017208644466043633, "epoch": 2.612301957129543, "grad_norm": 1.2283105389509248, "learning_rate": 2.871936486020021e-06, "loss": 1.3581, "reason_loss": 0.4976523518562317, "step": 2803, "utility_loss": 0.8604916930198669 }, { "cosine_similarity": -0.08554390155783376, "epoch": 2.6132339235787514, "grad_norm": 1.3046794476576897, "learning_rate": 2.865032792544011e-06, "loss": 1.4829, "reason_loss": 0.49077868461608887, "step": 2804, "utility_loss": 0.9921503663063049 }, { "cosine_similarity": -0.01913517534762242, "epoch": 2.6141658900279587, "grad_norm": 1.156716241627186, "learning_rate": 2.8581290990680015e-06, "loss": 1.4169, "reason_loss": 0.47618865966796875, "step": 2805, "utility_loss": 0.9407221078872681 }, { "cosine_similarity": 0.02753458138845436, "epoch": 2.615097856477167, "grad_norm": 1.259997645075166, "learning_rate": 2.851225405591992e-06, "loss": 1.9334, "reason_loss": 0.4927220642566681, "step": 2806, "utility_loss": 1.4407126903533936 }, { "cosine_similarity": -0.0389464630764287, "epoch": 2.6160298229263748, "grad_norm": 1.0297930062824703, "learning_rate": 2.844321712115982e-06, "loss": 1.3208, "reason_loss": 0.4641997814178467, "step": 2807, "utility_loss": 0.8565821051597595 }, { "cosine_similarity": 0.13399122062497054, "epoch": 2.6169617893755825, "grad_norm": 1.164084395863202, "learning_rate": 2.8374180186399724e-06, "loss": 1.4868, "reason_loss": 0.4771449565887451, "step": 2808, "utility_loss": 1.009652853012085 }, { "cosine_similarity": -0.0584456143378626, "epoch": 2.6178937558247903, "grad_norm": 1.0523593735597794, "learning_rate": 2.8305143251639627e-06, "loss": 1.7247, "reason_loss": 0.5063978433609009, "step": 2809, "utility_loss": 1.2182857990264893 }, { "cosine_similarity": -0.029729575924350396, "epoch": 2.618825722273998, "grad_norm": 1.037537375850699, "learning_rate": 2.823610631687953e-06, "loss": 1.16, "reason_loss": 0.46134793758392334, "step": 2810, "utility_loss": 0.6986960172653198 }, { "cosine_similarity": -0.03616648711378442, "epoch": 2.619757688723206, "grad_norm": 1.420747181923068, "learning_rate": 2.8167069382119438e-06, "loss": 1.3579, "reason_loss": 0.4900188148021698, "step": 2811, "utility_loss": 0.867888331413269 }, { "cosine_similarity": 0.11957397631751465, "epoch": 2.6206896551724137, "grad_norm": 1.1036657647251205, "learning_rate": 2.809803244735934e-06, "loss": 1.2481, "reason_loss": 0.49288418889045715, "step": 2812, "utility_loss": 0.7552546262741089 }, { "cosine_similarity": 0.03388677007422915, "epoch": 2.6216216216216215, "grad_norm": 1.0856972178991011, "learning_rate": 2.8028995512599244e-06, "loss": 1.0539, "reason_loss": 0.5009278059005737, "step": 2813, "utility_loss": 0.5529879331588745 }, { "cosine_similarity": -0.014368327439193605, "epoch": 2.6225535880708293, "grad_norm": 1.0672474233803537, "learning_rate": 2.7959958577839147e-06, "loss": 1.5461, "reason_loss": 0.49974673986434937, "step": 2814, "utility_loss": 1.0463042259216309 }, { "cosine_similarity": 0.1182183347236877, "epoch": 2.6234855545200375, "grad_norm": 1.0365772992656934, "learning_rate": 2.789092164307905e-06, "loss": 1.3945, "reason_loss": 0.4558647871017456, "step": 2815, "utility_loss": 0.9386227130889893 }, { "cosine_similarity": 0.01535903841834859, "epoch": 2.624417520969245, "grad_norm": 1.010922853440776, "learning_rate": 2.7821884708318953e-06, "loss": 1.3199, "reason_loss": 0.4970267713069916, "step": 2816, "utility_loss": 0.8228417634963989 }, { "cosine_similarity": 0.0411883898464106, "epoch": 2.625349487418453, "grad_norm": 1.1910808118940122, "learning_rate": 2.7752847773558856e-06, "loss": 1.5853, "reason_loss": 0.485067218542099, "step": 2817, "utility_loss": 1.1001962423324585 }, { "cosine_similarity": -0.044794399009761364, "epoch": 2.626281453867661, "grad_norm": 1.2176500251084499, "learning_rate": 2.768381083879876e-06, "loss": 1.4807, "reason_loss": 0.5201753973960876, "step": 2818, "utility_loss": 0.9605438709259033 }, { "cosine_similarity": 0.08318844216455681, "epoch": 2.6272134203168687, "grad_norm": 1.0593173593095366, "learning_rate": 2.7614773904038662e-06, "loss": 1.2786, "reason_loss": 0.5039523839950562, "step": 2819, "utility_loss": 0.7746663093566895 }, { "cosine_similarity": 0.03973758319029214, "epoch": 2.6281453867660765, "grad_norm": 1.2627866766638285, "learning_rate": 2.7545736969278566e-06, "loss": 1.2912, "reason_loss": 0.4994196891784668, "step": 2820, "utility_loss": 0.7918046712875366 }, { "cosine_similarity": 0.21859024674784025, "epoch": 2.6290773532152842, "grad_norm": 1.3646274013607578, "learning_rate": 2.7476700034518473e-06, "loss": 1.7602, "reason_loss": 0.5108287334442139, "step": 2821, "utility_loss": 1.2493866682052612 }, { "cosine_similarity": 0.03183583914984147, "epoch": 2.630009319664492, "grad_norm": 1.2439475586174331, "learning_rate": 2.7407663099758376e-06, "loss": 1.5808, "reason_loss": 0.49523818492889404, "step": 2822, "utility_loss": 1.0855733156204224 }, { "cosine_similarity": -0.06814502939901379, "epoch": 2.6309412861137, "grad_norm": 1.2125722137714603, "learning_rate": 2.733862616499828e-06, "loss": 1.7477, "reason_loss": 0.48084813356399536, "step": 2823, "utility_loss": 1.2668039798736572 }, { "cosine_similarity": -0.055843334108015286, "epoch": 2.6318732525629076, "grad_norm": 1.2160872499932873, "learning_rate": 2.7269589230238182e-06, "loss": 1.5844, "reason_loss": 0.4832187294960022, "step": 2824, "utility_loss": 1.1011476516723633 }, { "cosine_similarity": 0.018903335925978832, "epoch": 2.6328052190121154, "grad_norm": 1.1546665214947802, "learning_rate": 2.7200552295478085e-06, "loss": 1.3729, "reason_loss": 0.4853014349937439, "step": 2825, "utility_loss": 0.8876107335090637 }, { "cosine_similarity": 0.20867464716500025, "epoch": 2.6337371854613236, "grad_norm": 1.1495959343776059, "learning_rate": 2.7131515360717984e-06, "loss": 1.4431, "reason_loss": 0.4850744307041168, "step": 2826, "utility_loss": 0.9580457806587219 }, { "cosine_similarity": -0.08939204776313371, "epoch": 2.634669151910531, "grad_norm": 1.1800818311494938, "learning_rate": 2.7062478425957887e-06, "loss": 1.697, "reason_loss": 0.5133359432220459, "step": 2827, "utility_loss": 1.1836421489715576 }, { "cosine_similarity": 0.13736855826322983, "epoch": 2.635601118359739, "grad_norm": 1.3265047941472672, "learning_rate": 2.699344149119779e-06, "loss": 1.6282, "reason_loss": 0.46412450075149536, "step": 2828, "utility_loss": 1.1641020774841309 }, { "cosine_similarity": 0.1439204891404166, "epoch": 2.636533084808947, "grad_norm": 1.4844059674076702, "learning_rate": 2.6924404556437693e-06, "loss": 1.2985, "reason_loss": 0.4968266189098358, "step": 2829, "utility_loss": 0.801638126373291 }, { "cosine_similarity": -0.07647604515534823, "epoch": 2.637465051258155, "grad_norm": 1.3031578334781735, "learning_rate": 2.6855367621677596e-06, "loss": 1.8612, "reason_loss": 0.47109299898147583, "step": 2830, "utility_loss": 1.3900666236877441 }, { "cosine_similarity": -0.0015394178130239478, "epoch": 2.6383970177073626, "grad_norm": 1.254993729589141, "learning_rate": 2.6786330686917504e-06, "loss": 1.409, "reason_loss": 0.4854740798473358, "step": 2831, "utility_loss": 0.9235708713531494 }, { "cosine_similarity": 0.1749073108585916, "epoch": 2.6393289841565704, "grad_norm": 1.4851973220691015, "learning_rate": 2.6717293752157407e-06, "loss": 1.5327, "reason_loss": 0.4753383994102478, "step": 2832, "utility_loss": 1.0573337078094482 }, { "cosine_similarity": 0.1655038664207937, "epoch": 2.640260950605778, "grad_norm": 1.074573274708606, "learning_rate": 2.664825681739731e-06, "loss": 1.2844, "reason_loss": 0.4872877597808838, "step": 2833, "utility_loss": 0.7971310615539551 }, { "cosine_similarity": -0.012514136694880596, "epoch": 2.641192917054986, "grad_norm": 1.1191787346913538, "learning_rate": 2.6579219882637213e-06, "loss": 1.7046, "reason_loss": 0.4823821485042572, "step": 2834, "utility_loss": 1.222259759902954 }, { "cosine_similarity": 0.13994834070190185, "epoch": 2.6421248835041937, "grad_norm": 1.1172657103215011, "learning_rate": 2.6510182947877116e-06, "loss": 1.2874, "reason_loss": 0.5335695147514343, "step": 2835, "utility_loss": 0.7538099884986877 }, { "cosine_similarity": 0.1903029479050752, "epoch": 2.6430568499534015, "grad_norm": 1.1865860643821, "learning_rate": 2.644114601311702e-06, "loss": 1.2563, "reason_loss": 0.4646522104740143, "step": 2836, "utility_loss": 0.7916842699050903 }, { "cosine_similarity": -0.06426177684615347, "epoch": 2.6439888164026097, "grad_norm": 1.2262518488462746, "learning_rate": 2.6372109078356922e-06, "loss": 1.6863, "reason_loss": 0.5019074082374573, "step": 2837, "utility_loss": 1.1843674182891846 }, { "cosine_similarity": -0.0032192367936983686, "epoch": 2.644920782851817, "grad_norm": 1.0246953596891748, "learning_rate": 2.6303072143596825e-06, "loss": 1.5257, "reason_loss": 0.5190620422363281, "step": 2838, "utility_loss": 1.006598949432373 }, { "cosine_similarity": 0.16668304333106387, "epoch": 2.6458527493010253, "grad_norm": 1.2000013160785776, "learning_rate": 2.623403520883673e-06, "loss": 1.3358, "reason_loss": 0.48348820209503174, "step": 2839, "utility_loss": 0.852358341217041 }, { "cosine_similarity": 0.02979981822337405, "epoch": 2.646784715750233, "grad_norm": 1.1140645855351845, "learning_rate": 2.616499827407663e-06, "loss": 1.4645, "reason_loss": 0.47756725549697876, "step": 2840, "utility_loss": 0.986896812915802 }, { "cosine_similarity": 0.11167956075293095, "epoch": 2.647716682199441, "grad_norm": 1.1089763001142956, "learning_rate": 2.609596133931654e-06, "loss": 1.4021, "reason_loss": 0.46176382899284363, "step": 2841, "utility_loss": 0.9403518438339233 }, { "cosine_similarity": 0.011312795398898954, "epoch": 2.6486486486486487, "grad_norm": 1.246099607398486, "learning_rate": 2.602692440455644e-06, "loss": 1.8987, "reason_loss": 0.48998603224754333, "step": 2842, "utility_loss": 1.4087374210357666 }, { "cosine_similarity": -0.016321683473676712, "epoch": 2.6495806150978565, "grad_norm": 1.0891527695618017, "learning_rate": 2.5957887469796345e-06, "loss": 1.4898, "reason_loss": 0.4925314486026764, "step": 2843, "utility_loss": 0.9972784519195557 }, { "cosine_similarity": 0.13769875464820885, "epoch": 2.6505125815470643, "grad_norm": 1.295826425702383, "learning_rate": 2.588885053503625e-06, "loss": 1.3391, "reason_loss": 0.4906969964504242, "step": 2844, "utility_loss": 0.8484410047531128 }, { "cosine_similarity": -0.12707911885276346, "epoch": 2.651444547996272, "grad_norm": 1.106633916185381, "learning_rate": 2.581981360027615e-06, "loss": 1.4422, "reason_loss": 0.48923200368881226, "step": 2845, "utility_loss": 0.9529892206192017 }, { "cosine_similarity": 0.1352924126536721, "epoch": 2.65237651444548, "grad_norm": 1.1465291464451124, "learning_rate": 2.5750776665516054e-06, "loss": 1.5687, "reason_loss": 0.4722491502761841, "step": 2846, "utility_loss": 1.0964349508285522 }, { "cosine_similarity": 0.13620190174454969, "epoch": 2.6533084808946876, "grad_norm": 1.2320990171055797, "learning_rate": 2.5681739730755957e-06, "loss": 1.6887, "reason_loss": 0.509377658367157, "step": 2847, "utility_loss": 1.1793574094772339 }, { "cosine_similarity": 0.1564839767592045, "epoch": 2.654240447343896, "grad_norm": 1.1628159747075495, "learning_rate": 2.561270279599586e-06, "loss": 1.4549, "reason_loss": 0.4817727208137512, "step": 2848, "utility_loss": 0.9730923771858215 }, { "cosine_similarity": 0.054216539718724525, "epoch": 2.655172413793103, "grad_norm": 1.2039298993824323, "learning_rate": 2.554366586123576e-06, "loss": 1.73, "reason_loss": 0.48218780755996704, "step": 2849, "utility_loss": 1.247809648513794 }, { "cosine_similarity": 0.15530656971276266, "epoch": 2.6561043802423114, "grad_norm": 1.1802417520165611, "learning_rate": 2.5474628926475662e-06, "loss": 1.6056, "reason_loss": 0.5244932174682617, "step": 2850, "utility_loss": 1.0811100006103516 }, { "cosine_similarity": 0.0625604501079114, "epoch": 2.6570363466915192, "grad_norm": 1.3959196938414922, "learning_rate": 2.5405591991715574e-06, "loss": 1.3693, "reason_loss": 0.499858021736145, "step": 2851, "utility_loss": 0.8694701790809631 }, { "cosine_similarity": 0.13978275695562709, "epoch": 2.657968313140727, "grad_norm": 1.2079027722195508, "learning_rate": 2.5336555056955477e-06, "loss": 1.28, "reason_loss": 0.47877246141433716, "step": 2852, "utility_loss": 0.8011844754219055 }, { "cosine_similarity": 0.039333898416155796, "epoch": 2.658900279589935, "grad_norm": 1.2732924099323684, "learning_rate": 2.5267518122195376e-06, "loss": 1.4736, "reason_loss": 0.4938381016254425, "step": 2853, "utility_loss": 0.9797994494438171 }, { "cosine_similarity": 0.06780174438106842, "epoch": 2.6598322460391426, "grad_norm": 1.2213085772631682, "learning_rate": 2.519848118743528e-06, "loss": 1.5852, "reason_loss": 0.48555710911750793, "step": 2854, "utility_loss": 1.0995988845825195 }, { "cosine_similarity": 0.07686678381532938, "epoch": 2.6607642124883504, "grad_norm": 1.0958693445774745, "learning_rate": 2.5129444252675182e-06, "loss": 1.4739, "reason_loss": 0.5007832050323486, "step": 2855, "utility_loss": 0.9730885028839111 }, { "cosine_similarity": 0.10808719356294828, "epoch": 2.661696178937558, "grad_norm": 0.926719641126892, "learning_rate": 2.5060407317915085e-06, "loss": 1.5541, "reason_loss": 0.49279797077178955, "step": 2856, "utility_loss": 1.061329960823059 }, { "cosine_similarity": -0.11827796399532091, "epoch": 2.662628145386766, "grad_norm": 1.1792619172457135, "learning_rate": 2.499137038315499e-06, "loss": 1.408, "reason_loss": 0.5146865248680115, "step": 2857, "utility_loss": 0.8933256268501282 }, { "cosine_similarity": -0.035063980330257344, "epoch": 2.6635601118359737, "grad_norm": 1.1620608624395454, "learning_rate": 2.492233344839489e-06, "loss": 1.4439, "reason_loss": 0.4813355803489685, "step": 2858, "utility_loss": 0.962559700012207 }, { "cosine_similarity": 0.021389125723935084, "epoch": 2.664492078285182, "grad_norm": 1.5064570396237622, "learning_rate": 2.48532965136348e-06, "loss": 1.5831, "reason_loss": 0.477359414100647, "step": 2859, "utility_loss": 1.1057007312774658 }, { "cosine_similarity": 0.161783386499666, "epoch": 2.6654240447343893, "grad_norm": 1.2590312520866698, "learning_rate": 2.47842595788747e-06, "loss": 1.603, "reason_loss": 0.5291060209274292, "step": 2860, "utility_loss": 1.0738873481750488 }, { "cosine_similarity": -0.05053823402376673, "epoch": 2.6663560111835976, "grad_norm": 1.3823079126807163, "learning_rate": 2.4715222644114605e-06, "loss": 1.2673, "reason_loss": 0.4891255497932434, "step": 2861, "utility_loss": 0.7781283855438232 }, { "cosine_similarity": -0.00428950686379621, "epoch": 2.6672879776328053, "grad_norm": 1.1811658308260948, "learning_rate": 2.464618570935451e-06, "loss": 1.5636, "reason_loss": 0.5425200462341309, "step": 2862, "utility_loss": 1.0210508108139038 }, { "cosine_similarity": 0.04412308250567977, "epoch": 2.668219944082013, "grad_norm": 1.1419159377392742, "learning_rate": 2.4577148774594407e-06, "loss": 1.558, "reason_loss": 0.4748039245605469, "step": 2863, "utility_loss": 1.083202600479126 }, { "cosine_similarity": 0.11648364673652858, "epoch": 2.669151910531221, "grad_norm": 1.0666701659146944, "learning_rate": 2.4508111839834314e-06, "loss": 1.383, "reason_loss": 0.4762996435165405, "step": 2864, "utility_loss": 0.906696081161499 }, { "cosine_similarity": -0.027470498165442864, "epoch": 2.6700838769804287, "grad_norm": 1.2824873383360742, "learning_rate": 2.4439074905074217e-06, "loss": 1.9765, "reason_loss": 0.4899531602859497, "step": 2865, "utility_loss": 1.4865301847457886 }, { "cosine_similarity": 0.04601986294768121, "epoch": 2.6710158434296365, "grad_norm": 1.1142941161646835, "learning_rate": 2.437003797031412e-06, "loss": 1.6933, "reason_loss": 0.487413227558136, "step": 2866, "utility_loss": 1.2058560848236084 }, { "cosine_similarity": -0.05188550775791305, "epoch": 2.6719478098788443, "grad_norm": 1.1535170425210384, "learning_rate": 2.4301001035554023e-06, "loss": 1.6132, "reason_loss": 0.4956096112728119, "step": 2867, "utility_loss": 1.1176364421844482 }, { "cosine_similarity": 0.0019455640577719852, "epoch": 2.672879776328052, "grad_norm": 1.116983888644737, "learning_rate": 2.4231964100793927e-06, "loss": 1.3504, "reason_loss": 0.4777905344963074, "step": 2868, "utility_loss": 0.8726401329040527 }, { "cosine_similarity": -0.04240942011094757, "epoch": 2.67381174277726, "grad_norm": 1.0094758658766363, "learning_rate": 2.416292716603383e-06, "loss": 1.1544, "reason_loss": 0.46364542841911316, "step": 2869, "utility_loss": 0.6907413005828857 }, { "cosine_similarity": 0.014641013071407949, "epoch": 2.674743709226468, "grad_norm": 1.10754433476172, "learning_rate": 2.4093890231273733e-06, "loss": 1.3964, "reason_loss": 0.47263050079345703, "step": 2870, "utility_loss": 0.9237415194511414 }, { "cosine_similarity": 0.025041679122992735, "epoch": 2.6756756756756754, "grad_norm": 1.2401334249076656, "learning_rate": 2.4024853296513636e-06, "loss": 1.5599, "reason_loss": 0.5202628970146179, "step": 2871, "utility_loss": 1.0396161079406738 }, { "cosine_similarity": -0.001040064538116202, "epoch": 2.6766076421248837, "grad_norm": 1.103865857688422, "learning_rate": 2.395581636175354e-06, "loss": 1.1391, "reason_loss": 0.5037357211112976, "step": 2872, "utility_loss": 0.6353389024734497 }, { "cosine_similarity": -0.08738576246055473, "epoch": 2.6775396085740915, "grad_norm": 1.273797117391563, "learning_rate": 2.388677942699344e-06, "loss": 1.6875, "reason_loss": 0.4853755533695221, "step": 2873, "utility_loss": 1.2021318674087524 }, { "cosine_similarity": 0.13769607459395347, "epoch": 2.6784715750232992, "grad_norm": 1.3029132425838008, "learning_rate": 2.381774249223335e-06, "loss": 1.696, "reason_loss": 0.5092376470565796, "step": 2874, "utility_loss": 1.1867600679397583 }, { "cosine_similarity": 0.0498486585418886, "epoch": 2.679403541472507, "grad_norm": 1.1729668733290248, "learning_rate": 2.3748705557473252e-06, "loss": 1.5852, "reason_loss": 0.5350751876831055, "step": 2875, "utility_loss": 1.0501201152801514 }, { "cosine_similarity": 0.03429446208984992, "epoch": 2.680335507921715, "grad_norm": 1.7876468226058395, "learning_rate": 2.367966862271315e-06, "loss": 1.4513, "reason_loss": 0.5118528008460999, "step": 2876, "utility_loss": 0.9394668340682983 }, { "cosine_similarity": -0.08727573325764926, "epoch": 2.6812674743709226, "grad_norm": 1.097770247964926, "learning_rate": 2.3610631687953054e-06, "loss": 1.2243, "reason_loss": 0.49494680762290955, "step": 2877, "utility_loss": 0.7293040752410889 }, { "cosine_similarity": 0.02803741484282751, "epoch": 2.6821994408201304, "grad_norm": 1.1033587622797172, "learning_rate": 2.3541594753192957e-06, "loss": 1.4213, "reason_loss": 0.5176862478256226, "step": 2878, "utility_loss": 0.9036335945129395 }, { "cosine_similarity": 0.1634720150412501, "epoch": 2.683131407269338, "grad_norm": 1.1257373732809526, "learning_rate": 2.3472557818432865e-06, "loss": 1.2984, "reason_loss": 0.4751036763191223, "step": 2879, "utility_loss": 0.8233362436294556 }, { "cosine_similarity": -0.061590283756545, "epoch": 2.684063373718546, "grad_norm": 1.0999782623424148, "learning_rate": 2.3403520883672768e-06, "loss": 1.446, "reason_loss": 0.48952966928482056, "step": 2880, "utility_loss": 0.9564836025238037 }, { "cosine_similarity": 0.12123254406486476, "epoch": 2.684995340167754, "grad_norm": 1.162260999556854, "learning_rate": 2.333448394891267e-06, "loss": 1.6952, "reason_loss": 0.4636542797088623, "step": 2881, "utility_loss": 1.231581449508667 }, { "cosine_similarity": 0.06733274715758482, "epoch": 2.6859273066169616, "grad_norm": 1.2670094235619533, "learning_rate": 2.3265447014152574e-06, "loss": 1.5334, "reason_loss": 0.471156507730484, "step": 2882, "utility_loss": 1.0622525215148926 }, { "cosine_similarity": 0.10254951644931377, "epoch": 2.68685927306617, "grad_norm": 1.0504666903227242, "learning_rate": 2.3196410079392477e-06, "loss": 1.7041, "reason_loss": 0.47866714000701904, "step": 2883, "utility_loss": 1.2253923416137695 }, { "cosine_similarity": 0.07003283023368978, "epoch": 2.6877912395153776, "grad_norm": 1.3387615318421358, "learning_rate": 2.312737314463238e-06, "loss": 1.4743, "reason_loss": 0.5013446807861328, "step": 2884, "utility_loss": 0.9729487895965576 }, { "cosine_similarity": -0.047855542750602366, "epoch": 2.6887232059645854, "grad_norm": 0.9654344169182227, "learning_rate": 2.3058336209872283e-06, "loss": 1.2921, "reason_loss": 0.5120774507522583, "step": 2885, "utility_loss": 0.7800336480140686 }, { "cosine_similarity": 0.047923263165940456, "epoch": 2.689655172413793, "grad_norm": 1.0458098596308811, "learning_rate": 2.2989299275112186e-06, "loss": 1.4208, "reason_loss": 0.5069702863693237, "step": 2886, "utility_loss": 0.9138404130935669 }, { "cosine_similarity": -0.053465162880841834, "epoch": 2.690587138863001, "grad_norm": 1.0917356774157754, "learning_rate": 2.292026234035209e-06, "loss": 1.3186, "reason_loss": 0.4937998652458191, "step": 2887, "utility_loss": 0.8247956037521362 }, { "cosine_similarity": 0.01337948851827733, "epoch": 2.6915191053122087, "grad_norm": 1.2746344030252805, "learning_rate": 2.2851225405591993e-06, "loss": 1.3499, "reason_loss": 0.48930877447128296, "step": 2888, "utility_loss": 0.8606116771697998 }, { "cosine_similarity": -0.0936859958866955, "epoch": 2.6924510717614165, "grad_norm": 1.13456656810166, "learning_rate": 2.27821884708319e-06, "loss": 1.3732, "reason_loss": 0.483832448720932, "step": 2889, "utility_loss": 0.8893967866897583 }, { "cosine_similarity": 0.05394975930554993, "epoch": 2.6933830382106243, "grad_norm": 1.0862680206506719, "learning_rate": 2.27131515360718e-06, "loss": 1.4993, "reason_loss": 0.4812886118888855, "step": 2890, "utility_loss": 1.0180286169052124 }, { "cosine_similarity": 0.09891417346312631, "epoch": 2.694315004659832, "grad_norm": 1.103536668212442, "learning_rate": 2.26441146013117e-06, "loss": 1.3257, "reason_loss": 0.5034728050231934, "step": 2891, "utility_loss": 0.8222016096115112 }, { "cosine_similarity": 0.09478510431180293, "epoch": 2.6952469711090403, "grad_norm": 1.1239239890997794, "learning_rate": 2.2575077666551605e-06, "loss": 1.4178, "reason_loss": 0.47562238574028015, "step": 2892, "utility_loss": 0.942183256149292 }, { "cosine_similarity": 0.031899005042814316, "epoch": 2.6961789375582477, "grad_norm": 1.0615941004591243, "learning_rate": 2.250604073179151e-06, "loss": 1.4455, "reason_loss": 0.5018312931060791, "step": 2893, "utility_loss": 0.9436427354812622 }, { "cosine_similarity": 0.04036520120011601, "epoch": 2.697110904007456, "grad_norm": 1.2516275886339832, "learning_rate": 2.2437003797031415e-06, "loss": 1.4681, "reason_loss": 0.4951944649219513, "step": 2894, "utility_loss": 0.9728741645812988 }, { "cosine_similarity": 0.08606133575016388, "epoch": 2.6980428704566637, "grad_norm": 1.0596020949157212, "learning_rate": 2.236796686227132e-06, "loss": 1.4223, "reason_loss": 0.4938706159591675, "step": 2895, "utility_loss": 0.928380012512207 }, { "cosine_similarity": -0.04139072588591164, "epoch": 2.6989748369058715, "grad_norm": 1.2368213700313564, "learning_rate": 2.229892992751122e-06, "loss": 1.9601, "reason_loss": 0.4624108672142029, "step": 2896, "utility_loss": 1.4976873397827148 }, { "cosine_similarity": -0.013319169870349146, "epoch": 2.6999068033550793, "grad_norm": 1.0395920610867944, "learning_rate": 2.2229892992751125e-06, "loss": 1.3502, "reason_loss": 0.468543142080307, "step": 2897, "utility_loss": 0.881618857383728 }, { "cosine_similarity": -0.07658275221857502, "epoch": 2.700838769804287, "grad_norm": 1.334406109928764, "learning_rate": 2.2160856057991028e-06, "loss": 1.7635, "reason_loss": 0.4869294762611389, "step": 2898, "utility_loss": 1.2765681743621826 }, { "cosine_similarity": -0.062436855345051764, "epoch": 2.701770736253495, "grad_norm": 1.1261317707802139, "learning_rate": 2.209181912323093e-06, "loss": 1.3965, "reason_loss": 0.477064847946167, "step": 2899, "utility_loss": 0.9193946123123169 }, { "cosine_similarity": -0.014570668263962609, "epoch": 2.7027027027027026, "grad_norm": 1.0965716515850459, "learning_rate": 2.2022782188470834e-06, "loss": 1.4517, "reason_loss": 0.500225305557251, "step": 2900, "utility_loss": 0.9514668583869934 }, { "cosine_similarity": -0.056089392403474034, "epoch": 2.7036346691519104, "grad_norm": 1.1782592503233424, "learning_rate": 2.1953745253710737e-06, "loss": 1.2833, "reason_loss": 0.4712125062942505, "step": 2901, "utility_loss": 0.8121200799942017 }, { "cosine_similarity": -0.02369686414968844, "epoch": 2.704566635601118, "grad_norm": 1.1373766941703165, "learning_rate": 2.188470831895064e-06, "loss": 1.4257, "reason_loss": 0.4643932580947876, "step": 2902, "utility_loss": 0.9613260626792908 }, { "cosine_similarity": 0.1467821688235572, "epoch": 2.7054986020503264, "grad_norm": 1.1374516679862794, "learning_rate": 2.1815671384190543e-06, "loss": 1.1906, "reason_loss": 0.48611003160476685, "step": 2903, "utility_loss": 0.7044970989227295 }, { "cosine_similarity": -0.02296002790098544, "epoch": 2.706430568499534, "grad_norm": 1.3438097894996703, "learning_rate": 2.1746634449430446e-06, "loss": 1.8148, "reason_loss": 0.49549418687820435, "step": 2904, "utility_loss": 1.3193416595458984 }, { "cosine_similarity": 0.025692886677722206, "epoch": 2.707362534948742, "grad_norm": 1.0601290264368082, "learning_rate": 2.167759751467035e-06, "loss": 1.3755, "reason_loss": 0.4935429096221924, "step": 2905, "utility_loss": 0.8819665908813477 }, { "cosine_similarity": 0.11401150281538483, "epoch": 2.70829450139795, "grad_norm": 1.2692657659172353, "learning_rate": 2.1608560579910252e-06, "loss": 1.5702, "reason_loss": 0.538390040397644, "step": 2906, "utility_loss": 1.0317656993865967 }, { "cosine_similarity": 0.06297017973120363, "epoch": 2.7092264678471576, "grad_norm": 1.6603546118605526, "learning_rate": 2.1539523645150156e-06, "loss": 1.5239, "reason_loss": 0.5210886001586914, "step": 2907, "utility_loss": 1.0027648210525513 }, { "cosine_similarity": 0.1952774722922699, "epoch": 2.7101584342963654, "grad_norm": 1.2537040022880812, "learning_rate": 2.147048671039006e-06, "loss": 1.2721, "reason_loss": 0.4724893569946289, "step": 2908, "utility_loss": 0.7995645403862 }, { "cosine_similarity": -0.056115255434688316, "epoch": 2.711090400745573, "grad_norm": 1.1550183843767825, "learning_rate": 2.1401449775629966e-06, "loss": 1.3167, "reason_loss": 0.4766222834587097, "step": 2909, "utility_loss": 0.8400535583496094 }, { "cosine_similarity": 0.04368885135293558, "epoch": 2.712022367194781, "grad_norm": 1.4903742817934376, "learning_rate": 2.133241284086987e-06, "loss": 1.6583, "reason_loss": 0.5142273902893066, "step": 2910, "utility_loss": 1.1440521478652954 }, { "cosine_similarity": -0.0887640544118075, "epoch": 2.7129543336439887, "grad_norm": 1.1331995964724197, "learning_rate": 2.1263375906109772e-06, "loss": 1.463, "reason_loss": 0.48444071412086487, "step": 2911, "utility_loss": 0.9785513877868652 }, { "cosine_similarity": -0.06939713032925166, "epoch": 2.7138863000931965, "grad_norm": 1.234184156634437, "learning_rate": 2.1194338971349675e-06, "loss": 1.4615, "reason_loss": 0.491409033536911, "step": 2912, "utility_loss": 0.9700465202331543 }, { "cosine_similarity": 0.03432590829954983, "epoch": 2.7148182665424043, "grad_norm": 1.0458861982907475, "learning_rate": 2.1125302036589574e-06, "loss": 1.354, "reason_loss": 0.5359128713607788, "step": 2913, "utility_loss": 0.8181294202804565 }, { "cosine_similarity": 0.011168223774545785, "epoch": 2.7157502329916126, "grad_norm": 0.9620110713856492, "learning_rate": 2.105626510182948e-06, "loss": 1.177, "reason_loss": 0.4607831537723541, "step": 2914, "utility_loss": 0.7162399291992188 }, { "cosine_similarity": 0.127470764809717, "epoch": 2.71668219944082, "grad_norm": 1.5199755870591882, "learning_rate": 2.0987228167069384e-06, "loss": 1.5225, "reason_loss": 0.4791049361228943, "step": 2915, "utility_loss": 1.0434328317642212 }, { "cosine_similarity": 0.09165035140678117, "epoch": 2.717614165890028, "grad_norm": 1.0982352889404723, "learning_rate": 2.0918191232309288e-06, "loss": 1.4179, "reason_loss": 0.48672109842300415, "step": 2916, "utility_loss": 0.931185245513916 }, { "cosine_similarity": 0.08867000794261312, "epoch": 2.718546132339236, "grad_norm": 1.1633592680137548, "learning_rate": 2.084915429754919e-06, "loss": 1.3609, "reason_loss": 0.4628942012786865, "step": 2917, "utility_loss": 0.8979698419570923 }, { "cosine_similarity": 0.16978163484109404, "epoch": 2.7194780987884437, "grad_norm": 1.1837666278272114, "learning_rate": 2.0780117362789094e-06, "loss": 1.7044, "reason_loss": 0.48462003469467163, "step": 2918, "utility_loss": 1.2198165655136108 }, { "cosine_similarity": 0.07304922804597298, "epoch": 2.7204100652376515, "grad_norm": 1.173475985629489, "learning_rate": 2.0711080428028997e-06, "loss": 1.5261, "reason_loss": 0.5112733840942383, "step": 2919, "utility_loss": 1.0148463249206543 }, { "cosine_similarity": -0.036395119851596665, "epoch": 2.7213420316868593, "grad_norm": 1.4668042299969883, "learning_rate": 2.06420434932689e-06, "loss": 1.5781, "reason_loss": 0.48038560152053833, "step": 2920, "utility_loss": 1.0977494716644287 }, { "cosine_similarity": -0.0739007722826674, "epoch": 2.722273998136067, "grad_norm": 1.1764590110386692, "learning_rate": 2.0573006558508803e-06, "loss": 1.415, "reason_loss": 0.4907997250556946, "step": 2921, "utility_loss": 0.9241846799850464 }, { "cosine_similarity": 0.01184576586379902, "epoch": 2.723205964585275, "grad_norm": 1.1804107136934217, "learning_rate": 2.0503969623748706e-06, "loss": 1.4174, "reason_loss": 0.49048078060150146, "step": 2922, "utility_loss": 0.9269422292709351 }, { "cosine_similarity": 0.0157998300298298, "epoch": 2.7241379310344827, "grad_norm": 1.2941736417986438, "learning_rate": 2.043493268898861e-06, "loss": 1.4903, "reason_loss": 0.4571399390697479, "step": 2923, "utility_loss": 1.0331852436065674 }, { "cosine_similarity": 0.14309273168523545, "epoch": 2.7250698974836904, "grad_norm": 1.3121267653184325, "learning_rate": 2.0365895754228517e-06, "loss": 1.4681, "reason_loss": 0.48862606287002563, "step": 2924, "utility_loss": 0.9794459342956543 }, { "cosine_similarity": 0.035097104389552415, "epoch": 2.7260018639328987, "grad_norm": 1.2129910941829853, "learning_rate": 2.029685881946842e-06, "loss": 1.4991, "reason_loss": 0.4792165756225586, "step": 2925, "utility_loss": 1.019834041595459 }, { "cosine_similarity": 0.012376080683165836, "epoch": 2.726933830382106, "grad_norm": 1.0619385763787041, "learning_rate": 2.0227821884708323e-06, "loss": 1.3009, "reason_loss": 0.4979579448699951, "step": 2926, "utility_loss": 0.8029733896255493 }, { "cosine_similarity": 0.05074594781866356, "epoch": 2.7278657968313142, "grad_norm": 1.4609676844653379, "learning_rate": 2.015878494994822e-06, "loss": 1.4792, "reason_loss": 0.5122126340866089, "step": 2927, "utility_loss": 0.9669599533081055 }, { "cosine_similarity": -0.013028202586552141, "epoch": 2.728797763280522, "grad_norm": 1.0198594528175202, "learning_rate": 2.0089748015188125e-06, "loss": 1.3187, "reason_loss": 0.46795424818992615, "step": 2928, "utility_loss": 0.8507808446884155 }, { "cosine_similarity": 0.024236695717648398, "epoch": 2.72972972972973, "grad_norm": 1.1892950662457982, "learning_rate": 2.002071108042803e-06, "loss": 1.2375, "reason_loss": 0.5020156502723694, "step": 2929, "utility_loss": 0.7355232834815979 }, { "cosine_similarity": 0.024616340283411146, "epoch": 2.7306616961789376, "grad_norm": 1.0949888689116223, "learning_rate": 1.9951674145667935e-06, "loss": 1.491, "reason_loss": 0.4859352111816406, "step": 2930, "utility_loss": 1.0050214529037476 }, { "cosine_similarity": -0.050878447379779576, "epoch": 2.7315936626281454, "grad_norm": 1.2569252711678003, "learning_rate": 1.988263721090784e-06, "loss": 1.6039, "reason_loss": 0.47099441289901733, "step": 2931, "utility_loss": 1.1329033374786377 }, { "cosine_similarity": -0.03398329296916855, "epoch": 2.732525629077353, "grad_norm": 1.0066858921642574, "learning_rate": 1.981360027614774e-06, "loss": 1.2204, "reason_loss": 0.4930003583431244, "step": 2932, "utility_loss": 0.7274243831634521 }, { "cosine_similarity": 0.06763836078618586, "epoch": 2.733457595526561, "grad_norm": 1.5076710864214278, "learning_rate": 1.9744563341387644e-06, "loss": 1.8101, "reason_loss": 0.4861873984336853, "step": 2933, "utility_loss": 1.323866367340088 }, { "cosine_similarity": 0.00847319722478525, "epoch": 2.7343895619757688, "grad_norm": 1.374733399121635, "learning_rate": 1.9675526406627547e-06, "loss": 1.9023, "reason_loss": 0.5027110576629639, "step": 2934, "utility_loss": 1.3996288776397705 }, { "cosine_similarity": 0.04878693916887336, "epoch": 2.7353215284249766, "grad_norm": 1.0370038425684283, "learning_rate": 1.960648947186745e-06, "loss": 1.1223, "reason_loss": 0.46456480026245117, "step": 2935, "utility_loss": 0.6576880216598511 }, { "cosine_similarity": -0.04818932132601364, "epoch": 2.736253494874185, "grad_norm": 1.2622074852373728, "learning_rate": 1.9537452537107354e-06, "loss": 1.631, "reason_loss": 0.4899148643016815, "step": 2936, "utility_loss": 1.1411025524139404 }, { "cosine_similarity": -0.03179048713111339, "epoch": 2.737185461323392, "grad_norm": 0.9736500628732533, "learning_rate": 1.9468415602347257e-06, "loss": 1.2752, "reason_loss": 0.5206304788589478, "step": 2937, "utility_loss": 0.7545249462127686 }, { "cosine_similarity": -0.20652752769018948, "epoch": 2.7381174277726004, "grad_norm": 1.3056256780186937, "learning_rate": 1.939937866758716e-06, "loss": 1.6413, "reason_loss": 0.49047890305519104, "step": 2938, "utility_loss": 1.1508463621139526 }, { "cosine_similarity": 0.0026714747861311746, "epoch": 2.739049394221808, "grad_norm": 1.1655429087051714, "learning_rate": 1.9330341732827067e-06, "loss": 1.5021, "reason_loss": 0.4832904636859894, "step": 2939, "utility_loss": 1.0188215970993042 }, { "cosine_similarity": 0.020736354233079523, "epoch": 2.739981360671016, "grad_norm": 0.9162785686409305, "learning_rate": 1.926130479806697e-06, "loss": 1.159, "reason_loss": 0.5060915946960449, "step": 2940, "utility_loss": 0.6528757810592651 }, { "cosine_similarity": -0.036535358362755045, "epoch": 2.7409133271202237, "grad_norm": 1.3292038375853794, "learning_rate": 1.919226786330687e-06, "loss": 1.5767, "reason_loss": 0.48609381914138794, "step": 2941, "utility_loss": 1.0905729532241821 }, { "cosine_similarity": 0.028622053332283178, "epoch": 2.7418452935694315, "grad_norm": 3.472962892418506, "learning_rate": 1.9123230928546772e-06, "loss": 1.3743, "reason_loss": 0.5131452083587646, "step": 2942, "utility_loss": 0.861131489276886 }, { "cosine_similarity": 0.021026663301162016, "epoch": 2.7427772600186393, "grad_norm": 1.1385900261265647, "learning_rate": 1.9054193993786677e-06, "loss": 1.4993, "reason_loss": 0.46968740224838257, "step": 2943, "utility_loss": 1.0296390056610107 }, { "cosine_similarity": 0.028444964695028743, "epoch": 2.743709226467847, "grad_norm": 1.07609057328124, "learning_rate": 1.8985157059026583e-06, "loss": 1.4002, "reason_loss": 0.4922253489494324, "step": 2944, "utility_loss": 0.9079853296279907 }, { "cosine_similarity": 0.012802557774597198, "epoch": 2.744641192917055, "grad_norm": 0.9823770953891472, "learning_rate": 1.8916120124266486e-06, "loss": 1.1785, "reason_loss": 0.4714224934577942, "step": 2945, "utility_loss": 0.7070350050926208 }, { "cosine_similarity": -0.0004082266449589635, "epoch": 2.7455731593662627, "grad_norm": 1.2162894426067283, "learning_rate": 1.8847083189506389e-06, "loss": 1.5484, "reason_loss": 0.5080429315567017, "step": 2946, "utility_loss": 1.0403242111206055 }, { "cosine_similarity": -0.07991811476985554, "epoch": 2.746505125815471, "grad_norm": 1.2217372992040005, "learning_rate": 1.877804625474629e-06, "loss": 1.5022, "reason_loss": 0.48572787642478943, "step": 2947, "utility_loss": 1.0164337158203125 }, { "cosine_similarity": -0.03029376521202815, "epoch": 2.7474370922646782, "grad_norm": 1.0329898041427383, "learning_rate": 1.8709009319986193e-06, "loss": 1.3405, "reason_loss": 0.4840795695781708, "step": 2948, "utility_loss": 0.8564015626907349 }, { "cosine_similarity": 0.034775714847272064, "epoch": 2.7483690587138865, "grad_norm": 1.017461060725722, "learning_rate": 1.8639972385226098e-06, "loss": 1.364, "reason_loss": 0.5061618089675903, "step": 2949, "utility_loss": 0.8578323125839233 }, { "cosine_similarity": 0.02458761356225099, "epoch": 2.7493010251630943, "grad_norm": 1.0133063614011917, "learning_rate": 1.8570935450466001e-06, "loss": 1.3632, "reason_loss": 0.5083695650100708, "step": 2950, "utility_loss": 0.8548197150230408 }, { "cosine_similarity": 0.040547903177750144, "epoch": 2.750232991612302, "grad_norm": 1.1810617708796811, "learning_rate": 1.8501898515705904e-06, "loss": 1.3805, "reason_loss": 0.4695778787136078, "step": 2951, "utility_loss": 0.9109613299369812 }, { "cosine_similarity": 0.03087383708676466, "epoch": 2.75116495806151, "grad_norm": 1.1277106974151316, "learning_rate": 1.8432861580945807e-06, "loss": 1.2586, "reason_loss": 0.46149396896362305, "step": 2952, "utility_loss": 0.797106146812439 }, { "cosine_similarity": 0.05308301460486124, "epoch": 2.7520969245107176, "grad_norm": 1.0731123074012345, "learning_rate": 1.836382464618571e-06, "loss": 1.4083, "reason_loss": 0.5212655067443848, "step": 2953, "utility_loss": 0.8870466947555542 }, { "cosine_similarity": -0.005956188766815869, "epoch": 2.7530288909599254, "grad_norm": 1.2911120262629756, "learning_rate": 1.8294787711425613e-06, "loss": 1.5485, "reason_loss": 0.4818303883075714, "step": 2954, "utility_loss": 1.066640019416809 }, { "cosine_similarity": -0.09176737236180539, "epoch": 2.753960857409133, "grad_norm": 1.3875494358362577, "learning_rate": 1.8225750776665519e-06, "loss": 1.5244, "reason_loss": 0.4626092314720154, "step": 2955, "utility_loss": 1.0618352890014648 }, { "cosine_similarity": 0.09752970177319867, "epoch": 2.754892823858341, "grad_norm": 1.1200378766357209, "learning_rate": 1.8156713841905422e-06, "loss": 1.5546, "reason_loss": 0.4938179850578308, "step": 2956, "utility_loss": 1.0607630014419556 }, { "cosine_similarity": 0.04519506868990859, "epoch": 2.755824790307549, "grad_norm": 1.3374880027679166, "learning_rate": 1.8087676907145325e-06, "loss": 1.2708, "reason_loss": 0.4767642617225647, "step": 2957, "utility_loss": 0.7939926981925964 }, { "cosine_similarity": 0.1017196352302685, "epoch": 2.756756756756757, "grad_norm": 1.2298902664158518, "learning_rate": 1.8018639972385226e-06, "loss": 1.5433, "reason_loss": 0.487460196018219, "step": 2958, "utility_loss": 1.0558440685272217 }, { "cosine_similarity": 0.08797993042316614, "epoch": 2.7576887232059644, "grad_norm": 1.3558665666654481, "learning_rate": 1.7949603037625129e-06, "loss": 1.5429, "reason_loss": 0.4893370568752289, "step": 2959, "utility_loss": 1.053563117980957 }, { "cosine_similarity": -0.09643169522795556, "epoch": 2.7586206896551726, "grad_norm": 1.3513037146917624, "learning_rate": 1.7880566102865034e-06, "loss": 1.3837, "reason_loss": 0.45894095301628113, "step": 2960, "utility_loss": 0.9247881174087524 }, { "cosine_similarity": -0.018300210089215584, "epoch": 2.7595526561043804, "grad_norm": 1.4127720102906975, "learning_rate": 1.7811529168104937e-06, "loss": 1.6826, "reason_loss": 0.5124727487564087, "step": 2961, "utility_loss": 1.1701526641845703 }, { "cosine_similarity": -0.06224750677847515, "epoch": 2.760484622553588, "grad_norm": 1.1901213099673154, "learning_rate": 1.774249223334484e-06, "loss": 1.5368, "reason_loss": 0.5041759014129639, "step": 2962, "utility_loss": 1.0326621532440186 }, { "cosine_similarity": 0.03445911824962675, "epoch": 2.761416589002796, "grad_norm": 1.2361232608485928, "learning_rate": 1.7673455298584743e-06, "loss": 1.4665, "reason_loss": 0.47943195700645447, "step": 2963, "utility_loss": 0.9870710372924805 }, { "cosine_similarity": 0.04987898783597709, "epoch": 2.7623485554520038, "grad_norm": 1.2193965388170542, "learning_rate": 1.7604418363824646e-06, "loss": 1.6412, "reason_loss": 0.4593918025493622, "step": 2964, "utility_loss": 1.181767225265503 }, { "cosine_similarity": -0.05051458879878995, "epoch": 2.7632805219012115, "grad_norm": 1.140274289830141, "learning_rate": 1.7535381429064552e-06, "loss": 1.4758, "reason_loss": 0.4769572913646698, "step": 2965, "utility_loss": 0.9988695383071899 }, { "cosine_similarity": 0.09185145832418468, "epoch": 2.7642124883504193, "grad_norm": 1.0907290029143213, "learning_rate": 1.7466344494304455e-06, "loss": 1.5352, "reason_loss": 0.5092748403549194, "step": 2966, "utility_loss": 1.0259678363800049 }, { "cosine_similarity": -0.0021069175557291995, "epoch": 2.765144454799627, "grad_norm": 1.0993801742386045, "learning_rate": 1.7397307559544358e-06, "loss": 1.1033, "reason_loss": 0.45932915806770325, "step": 2967, "utility_loss": 0.6439381837844849 }, { "cosine_similarity": -0.001764329968510514, "epoch": 2.766076421248835, "grad_norm": 1.2337013845254345, "learning_rate": 1.732827062478426e-06, "loss": 1.497, "reason_loss": 0.5045424699783325, "step": 2968, "utility_loss": 0.9924737215042114 }, { "cosine_similarity": -0.042680092777677155, "epoch": 2.767008387698043, "grad_norm": 1.0004394518612694, "learning_rate": 1.7259233690024164e-06, "loss": 1.6486, "reason_loss": 0.5086952447891235, "step": 2969, "utility_loss": 1.1399089097976685 }, { "cosine_similarity": -0.018428614384530317, "epoch": 2.7679403541472505, "grad_norm": 1.0272173097626003, "learning_rate": 1.719019675526407e-06, "loss": 1.2023, "reason_loss": 0.4891276955604553, "step": 2970, "utility_loss": 0.7131451368331909 }, { "cosine_similarity": 0.14643012592538207, "epoch": 2.7688723205964587, "grad_norm": 1.1674103345148288, "learning_rate": 1.7121159820503972e-06, "loss": 1.3893, "reason_loss": 0.48854702711105347, "step": 2971, "utility_loss": 0.9007762670516968 }, { "cosine_similarity": -0.008939709975768763, "epoch": 2.7698042870456665, "grad_norm": 1.1508037272993625, "learning_rate": 1.7052122885743873e-06, "loss": 2.0134, "reason_loss": 0.4876500070095062, "step": 2972, "utility_loss": 1.5257091522216797 }, { "cosine_similarity": 0.10855214837357191, "epoch": 2.7707362534948743, "grad_norm": 1.2324129489500932, "learning_rate": 1.6983085950983776e-06, "loss": 1.6853, "reason_loss": 0.48706090450286865, "step": 2973, "utility_loss": 1.1982711553573608 }, { "cosine_similarity": -0.030406363007913245, "epoch": 2.771668219944082, "grad_norm": 0.9896856693977203, "learning_rate": 1.691404901622368e-06, "loss": 1.4917, "reason_loss": 0.5029376149177551, "step": 2974, "utility_loss": 0.9888002276420593 }, { "cosine_similarity": 0.11828441796231214, "epoch": 2.77260018639329, "grad_norm": 1.1256270942459885, "learning_rate": 1.6845012081463585e-06, "loss": 1.3583, "reason_loss": 0.48954224586486816, "step": 2975, "utility_loss": 0.8687120079994202 }, { "cosine_similarity": 0.05042940312723676, "epoch": 2.7735321528424977, "grad_norm": 1.1795487209038265, "learning_rate": 1.6775975146703488e-06, "loss": 1.3288, "reason_loss": 0.4999794065952301, "step": 2976, "utility_loss": 0.8287942409515381 }, { "cosine_similarity": -0.03692474677210416, "epoch": 2.7744641192917054, "grad_norm": 1.1147804840166575, "learning_rate": 1.670693821194339e-06, "loss": 1.4633, "reason_loss": 0.4841700494289398, "step": 2977, "utility_loss": 0.9791111350059509 }, { "cosine_similarity": 0.0859558899029377, "epoch": 2.7753960857409132, "grad_norm": 1.3252626773812612, "learning_rate": 1.6637901277183294e-06, "loss": 1.7523, "reason_loss": 0.4681408405303955, "step": 2978, "utility_loss": 1.2841416597366333 }, { "cosine_similarity": 0.017408128069308994, "epoch": 2.776328052190121, "grad_norm": 1.1952318538807403, "learning_rate": 1.6568864342423197e-06, "loss": 1.3913, "reason_loss": 0.48643577098846436, "step": 2979, "utility_loss": 0.9048260450363159 }, { "cosine_similarity": 0.08710726104326899, "epoch": 2.7772600186393293, "grad_norm": 1.285737094594738, "learning_rate": 1.6499827407663102e-06, "loss": 1.5734, "reason_loss": 0.47430792450904846, "step": 2980, "utility_loss": 1.0990663766860962 }, { "cosine_similarity": 0.03858462662681009, "epoch": 2.7781919850885366, "grad_norm": 1.242269282235651, "learning_rate": 1.6430790472903005e-06, "loss": 1.6042, "reason_loss": 0.47419458627700806, "step": 2981, "utility_loss": 1.1299803256988525 }, { "cosine_similarity": 0.13113983569546028, "epoch": 2.779123951537745, "grad_norm": 1.8579978029012238, "learning_rate": 1.6361753538142908e-06, "loss": 1.9569, "reason_loss": 0.5386071801185608, "step": 2982, "utility_loss": 1.4183311462402344 }, { "cosine_similarity": 0.07852968190061722, "epoch": 2.7800559179869526, "grad_norm": 1.0547626210997838, "learning_rate": 1.629271660338281e-06, "loss": 1.1423, "reason_loss": 0.4727555215358734, "step": 2983, "utility_loss": 0.6695916652679443 }, { "cosine_similarity": 0.10465614748466931, "epoch": 2.7809878844361604, "grad_norm": 1.0959898999068869, "learning_rate": 1.6223679668622713e-06, "loss": 1.5378, "reason_loss": 0.5081540942192078, "step": 2984, "utility_loss": 1.029674768447876 }, { "cosine_similarity": 0.04429937239958908, "epoch": 2.781919850885368, "grad_norm": 1.1043250241884437, "learning_rate": 1.615464273386262e-06, "loss": 1.4327, "reason_loss": 0.48789334297180176, "step": 2985, "utility_loss": 0.944830060005188 }, { "cosine_similarity": 0.052774573372535265, "epoch": 2.782851817334576, "grad_norm": 1.3680103162669572, "learning_rate": 1.608560579910252e-06, "loss": 1.2438, "reason_loss": 0.5060716271400452, "step": 2986, "utility_loss": 0.7376952767372131 }, { "cosine_similarity": -0.006308626321671675, "epoch": 2.7837837837837838, "grad_norm": 1.2509787096812333, "learning_rate": 1.6016568864342424e-06, "loss": 1.405, "reason_loss": 0.5004342794418335, "step": 2987, "utility_loss": 0.9045641422271729 }, { "cosine_similarity": -0.05233270437256028, "epoch": 2.7847157502329916, "grad_norm": 1.1426184203731484, "learning_rate": 1.5947531929582327e-06, "loss": 1.5828, "reason_loss": 0.4783884286880493, "step": 2988, "utility_loss": 1.1043713092803955 }, { "cosine_similarity": 0.027710679924648613, "epoch": 2.7856477166821993, "grad_norm": 1.1851436628503809, "learning_rate": 1.587849499482223e-06, "loss": 1.3739, "reason_loss": 0.47359710931777954, "step": 2989, "utility_loss": 0.9003000259399414 }, { "cosine_similarity": -0.06002419644279786, "epoch": 2.786579683131407, "grad_norm": 1.214576600659119, "learning_rate": 1.5809458060062135e-06, "loss": 1.3455, "reason_loss": 0.48656654357910156, "step": 2990, "utility_loss": 0.85896897315979 }, { "cosine_similarity": 0.024847177720197403, "epoch": 2.7875116495806154, "grad_norm": 1.0471964712276032, "learning_rate": 1.5740421125302038e-06, "loss": 1.1632, "reason_loss": 0.4762861430644989, "step": 2991, "utility_loss": 0.6869181394577026 }, { "cosine_similarity": 0.0015525573242916122, "epoch": 2.7884436160298227, "grad_norm": 1.0837949565609217, "learning_rate": 1.5671384190541941e-06, "loss": 1.3758, "reason_loss": 0.4637066721916199, "step": 2992, "utility_loss": 0.9120843410491943 }, { "cosine_similarity": -0.022611374476802344, "epoch": 2.789375582479031, "grad_norm": 1.1654364011859442, "learning_rate": 1.5602347255781845e-06, "loss": 1.4954, "reason_loss": 0.5025001764297485, "step": 2993, "utility_loss": 0.992865264415741 }, { "cosine_similarity": 0.0801339839809872, "epoch": 2.7903075489282387, "grad_norm": 1.2605775006523263, "learning_rate": 1.5533310321021748e-06, "loss": 1.3725, "reason_loss": 0.500749409198761, "step": 2994, "utility_loss": 0.8717649579048157 }, { "cosine_similarity": 0.04758986791430326, "epoch": 2.7912395153774465, "grad_norm": 1.104217846129533, "learning_rate": 1.5464273386261653e-06, "loss": 1.5036, "reason_loss": 0.47612109780311584, "step": 2995, "utility_loss": 1.027506947517395 }, { "cosine_similarity": -0.04018096616832252, "epoch": 2.7921714818266543, "grad_norm": 1.254772267512829, "learning_rate": 1.5395236451501556e-06, "loss": 1.3017, "reason_loss": 0.49422287940979004, "step": 2996, "utility_loss": 0.807482898235321 }, { "cosine_similarity": -0.042392705005126666, "epoch": 2.793103448275862, "grad_norm": 1.1132115287188389, "learning_rate": 1.5326199516741457e-06, "loss": 1.2562, "reason_loss": 0.47180402278900146, "step": 2997, "utility_loss": 0.784376323223114 }, { "cosine_similarity": 0.04611044596407526, "epoch": 2.79403541472507, "grad_norm": 1.2047529252148281, "learning_rate": 1.525716258198136e-06, "loss": 1.486, "reason_loss": 0.48553186655044556, "step": 2998, "utility_loss": 1.0004398822784424 }, { "cosine_similarity": 0.06107081456511982, "epoch": 2.7949673811742777, "grad_norm": 1.181075698711305, "learning_rate": 1.5188125647221263e-06, "loss": 1.7163, "reason_loss": 0.492654949426651, "step": 2999, "utility_loss": 1.223681092262268 }, { "cosine_similarity": 0.09917169357940471, "epoch": 2.7958993476234855, "grad_norm": 1.044546389176063, "learning_rate": 1.5119088712461168e-06, "loss": 1.4688, "reason_loss": 0.46611976623535156, "step": 3000, "utility_loss": 1.0026811361312866 }, { "cosine_similarity": -0.03008531508227281, "epoch": 2.7968313140726933, "grad_norm": 1.089355525900217, "learning_rate": 1.5050051777701071e-06, "loss": 1.3122, "reason_loss": 0.49770891666412354, "step": 3001, "utility_loss": 0.8145197629928589 }, { "cosine_similarity": 0.0668310181876593, "epoch": 2.7977632805219015, "grad_norm": 0.9882289422316551, "learning_rate": 1.4981014842940974e-06, "loss": 1.5354, "reason_loss": 0.4957579970359802, "step": 3002, "utility_loss": 1.0396777391433716 }, { "cosine_similarity": 0.2774696965437306, "epoch": 2.798695246971109, "grad_norm": 1.0839413673663558, "learning_rate": 1.4911977908180878e-06, "loss": 1.3492, "reason_loss": 0.4641270041465759, "step": 3003, "utility_loss": 0.8850760459899902 }, { "cosine_similarity": -0.0050956994421004105, "epoch": 2.799627213420317, "grad_norm": 1.3223227012713987, "learning_rate": 1.484294097342078e-06, "loss": 1.3241, "reason_loss": 0.4999980926513672, "step": 3004, "utility_loss": 0.8241171836853027 }, { "cosine_similarity": 0.03364916106965308, "epoch": 2.800559179869525, "grad_norm": 1.2115211663712009, "learning_rate": 1.4773904038660686e-06, "loss": 1.3997, "reason_loss": 0.5015804171562195, "step": 3005, "utility_loss": 0.8980779647827148 }, { "cosine_similarity": 0.1450388457267009, "epoch": 2.8014911463187326, "grad_norm": 1.1623005960628687, "learning_rate": 1.470486710390059e-06, "loss": 1.4784, "reason_loss": 0.5366213917732239, "step": 3006, "utility_loss": 0.9417377710342407 }, { "cosine_similarity": -0.08124299340716003, "epoch": 2.8024231127679404, "grad_norm": 1.0591282367202652, "learning_rate": 1.4635830169140492e-06, "loss": 1.5855, "reason_loss": 0.5498919486999512, "step": 3007, "utility_loss": 1.0356281995773315 }, { "cosine_similarity": 0.007985880425316626, "epoch": 2.803355079217148, "grad_norm": 1.1928492707005205, "learning_rate": 1.4566793234380395e-06, "loss": 1.743, "reason_loss": 0.4829642176628113, "step": 3008, "utility_loss": 1.259986400604248 }, { "cosine_similarity": 0.11860875263621545, "epoch": 2.804287045666356, "grad_norm": 1.2025756770170113, "learning_rate": 1.4497756299620296e-06, "loss": 1.6107, "reason_loss": 0.4528288245201111, "step": 3009, "utility_loss": 1.1578269004821777 }, { "cosine_similarity": 0.050590198361016546, "epoch": 2.805219012115564, "grad_norm": 1.1392662855629665, "learning_rate": 1.4428719364860203e-06, "loss": 1.4528, "reason_loss": 0.478854775428772, "step": 3010, "utility_loss": 0.9739565849304199 }, { "cosine_similarity": 0.0511564637066907, "epoch": 2.8061509785647716, "grad_norm": 1.0999065259175869, "learning_rate": 1.4359682430100104e-06, "loss": 1.4508, "reason_loss": 0.4673917293548584, "step": 3011, "utility_loss": 0.9834079146385193 }, { "cosine_similarity": -0.0047725694631920385, "epoch": 2.8070829450139794, "grad_norm": 0.9173496733339215, "learning_rate": 1.4290645495340007e-06, "loss": 1.1573, "reason_loss": 0.46604517102241516, "step": 3012, "utility_loss": 0.6912107467651367 }, { "cosine_similarity": -0.029187118251840796, "epoch": 2.8080149114631876, "grad_norm": 1.188076643718183, "learning_rate": 1.422160856057991e-06, "loss": 1.5474, "reason_loss": 0.46654465794563293, "step": 3013, "utility_loss": 1.0808135271072388 }, { "cosine_similarity": 0.20332667137928498, "epoch": 2.808946877912395, "grad_norm": 1.298836209733922, "learning_rate": 1.4152571625819814e-06, "loss": 1.3496, "reason_loss": 0.48212265968322754, "step": 3014, "utility_loss": 0.8674531579017639 }, { "cosine_similarity": 0.04841070954775806, "epoch": 2.809878844361603, "grad_norm": 1.1992023597159713, "learning_rate": 1.4083534691059719e-06, "loss": 1.5528, "reason_loss": 0.4954792857170105, "step": 3015, "utility_loss": 1.057364821434021 }, { "cosine_similarity": 0.011251960714677602, "epoch": 2.810810810810811, "grad_norm": 1.1530103382168233, "learning_rate": 1.4014497756299622e-06, "loss": 1.2795, "reason_loss": 0.489895224571228, "step": 3016, "utility_loss": 0.789604663848877 }, { "cosine_similarity": -0.08729201887828437, "epoch": 2.8117427772600188, "grad_norm": 1.130353569111431, "learning_rate": 1.3945460821539525e-06, "loss": 1.4285, "reason_loss": 0.4502675533294678, "step": 3017, "utility_loss": 0.9782240390777588 }, { "cosine_similarity": 0.003855004377524353, "epoch": 2.8126747437092265, "grad_norm": 1.552731450126965, "learning_rate": 1.3876423886779428e-06, "loss": 1.6929, "reason_loss": 0.4787418842315674, "step": 3018, "utility_loss": 1.2141607999801636 }, { "cosine_similarity": 0.1656522754688364, "epoch": 2.8136067101584343, "grad_norm": 0.9990096001492821, "learning_rate": 1.3807386952019331e-06, "loss": 1.4152, "reason_loss": 0.4814315438270569, "step": 3019, "utility_loss": 0.9337761402130127 }, { "cosine_similarity": 0.0016097221423503377, "epoch": 2.814538676607642, "grad_norm": 1.2934232873364448, "learning_rate": 1.3738350017259236e-06, "loss": 1.3679, "reason_loss": 0.49068835377693176, "step": 3020, "utility_loss": 0.8772436380386353 }, { "cosine_similarity": 0.027243921971053235, "epoch": 2.81547064305685, "grad_norm": 1.4403310941189327, "learning_rate": 1.366931308249914e-06, "loss": 1.6466, "reason_loss": 0.5067567229270935, "step": 3021, "utility_loss": 1.139819622039795 }, { "cosine_similarity": 0.039610790916971, "epoch": 2.8164026095060577, "grad_norm": 1.240558006740318, "learning_rate": 1.3600276147739043e-06, "loss": 1.5207, "reason_loss": 0.5008729696273804, "step": 3022, "utility_loss": 1.0198392868041992 }, { "cosine_similarity": -0.16167470041782486, "epoch": 2.8173345759552655, "grad_norm": 1.0610588918528088, "learning_rate": 1.3531239212978944e-06, "loss": 1.287, "reason_loss": 0.4768921136856079, "step": 3023, "utility_loss": 0.810063362121582 }, { "cosine_similarity": -0.0070020939298720795, "epoch": 2.8182665424044733, "grad_norm": 1.1454156164685798, "learning_rate": 1.3462202278218847e-06, "loss": 1.2746, "reason_loss": 0.4950924217700958, "step": 3024, "utility_loss": 0.779550313949585 }, { "cosine_similarity": -0.03896258469141933, "epoch": 2.819198508853681, "grad_norm": 1.274560731730128, "learning_rate": 1.3393165343458752e-06, "loss": 1.6691, "reason_loss": 0.5023017525672913, "step": 3025, "utility_loss": 1.1667795181274414 }, { "cosine_similarity": 0.07600418665433883, "epoch": 2.8201304753028893, "grad_norm": 1.2992420098221968, "learning_rate": 1.3324128408698655e-06, "loss": 1.3763, "reason_loss": 0.5133222937583923, "step": 3026, "utility_loss": 0.8629930019378662 }, { "cosine_similarity": 0.11969479420497066, "epoch": 2.821062441752097, "grad_norm": 1.3672891172352415, "learning_rate": 1.3255091473938558e-06, "loss": 1.7645, "reason_loss": 0.4896585941314697, "step": 3027, "utility_loss": 1.274829626083374 }, { "cosine_similarity": 0.01854180559174054, "epoch": 2.821994408201305, "grad_norm": 1.333491445127148, "learning_rate": 1.3186054539178461e-06, "loss": 1.4377, "reason_loss": 0.5401530265808105, "step": 3028, "utility_loss": 0.8975744247436523 }, { "cosine_similarity": 0.07062371395522156, "epoch": 2.8229263746505127, "grad_norm": 1.0240405926230354, "learning_rate": 1.3117017604418364e-06, "loss": 1.3575, "reason_loss": 0.49300435185432434, "step": 3029, "utility_loss": 0.8644475936889648 }, { "cosine_similarity": -0.003289590116755116, "epoch": 2.8238583410997204, "grad_norm": 1.203569893007137, "learning_rate": 1.304798066965827e-06, "loss": 1.3987, "reason_loss": 0.46948787569999695, "step": 3030, "utility_loss": 0.9292171001434326 }, { "cosine_similarity": 0.1611641933827264, "epoch": 2.8247903075489282, "grad_norm": 1.1701958489994575, "learning_rate": 1.2978943734898173e-06, "loss": 1.3394, "reason_loss": 0.4982483983039856, "step": 3031, "utility_loss": 0.8411281108856201 }, { "cosine_similarity": -0.08928365940715367, "epoch": 2.825722273998136, "grad_norm": 1.2877839007516396, "learning_rate": 1.2909906800138076e-06, "loss": 1.4459, "reason_loss": 0.4883458614349365, "step": 3032, "utility_loss": 0.9575978517532349 }, { "cosine_similarity": 0.016719435042551797, "epoch": 2.826654240447344, "grad_norm": 1.1963184999232024, "learning_rate": 1.2840869865377979e-06, "loss": 1.3031, "reason_loss": 0.4834170639514923, "step": 3033, "utility_loss": 0.8196454644203186 }, { "cosine_similarity": 0.01060950643089977, "epoch": 2.8275862068965516, "grad_norm": 1.1083036582693333, "learning_rate": 1.277183293061788e-06, "loss": 1.2489, "reason_loss": 0.4748498797416687, "step": 3034, "utility_loss": 0.7740920186042786 }, { "cosine_similarity": -0.013045624654819751, "epoch": 2.8285181733457594, "grad_norm": 1.5952155500706906, "learning_rate": 1.2702795995857787e-06, "loss": 1.6863, "reason_loss": 0.5109685659408569, "step": 3035, "utility_loss": 1.1753191947937012 }, { "cosine_similarity": 0.18165505459770415, "epoch": 2.829450139794967, "grad_norm": 1.1912419436949906, "learning_rate": 1.2633759061097688e-06, "loss": 1.356, "reason_loss": 0.4614483118057251, "step": 3036, "utility_loss": 0.8945615291595459 }, { "cosine_similarity": 0.04159118515442042, "epoch": 2.8303821062441754, "grad_norm": 1.795962266635693, "learning_rate": 1.2564722126337591e-06, "loss": 1.6518, "reason_loss": 0.4938453435897827, "step": 3037, "utility_loss": 1.1579201221466064 }, { "cosine_similarity": 0.01025710349724589, "epoch": 2.831314072693383, "grad_norm": 1.2530049526540625, "learning_rate": 1.2495685191577494e-06, "loss": 1.5816, "reason_loss": 0.4951927661895752, "step": 3038, "utility_loss": 1.0863890647888184 }, { "cosine_similarity": -0.02701389150142076, "epoch": 2.832246039142591, "grad_norm": 1.2671118576097746, "learning_rate": 1.24266482568174e-06, "loss": 1.4797, "reason_loss": 0.49613669514656067, "step": 3039, "utility_loss": 0.983607292175293 }, { "cosine_similarity": 0.010809768224851198, "epoch": 2.8331780055917988, "grad_norm": 1.135068503423821, "learning_rate": 1.2357611322057302e-06, "loss": 1.4569, "reason_loss": 0.46794986724853516, "step": 3040, "utility_loss": 0.9889166951179504 }, { "cosine_similarity": 0.14835851324911284, "epoch": 2.8341099720410066, "grad_norm": 1.1563679426382816, "learning_rate": 1.2288574387297203e-06, "loss": 1.3518, "reason_loss": 0.46258655190467834, "step": 3041, "utility_loss": 0.8891687393188477 }, { "cosine_similarity": -0.06433284449820453, "epoch": 2.8350419384902144, "grad_norm": 1.0849180272861945, "learning_rate": 1.2219537452537109e-06, "loss": 1.2905, "reason_loss": 0.47676077485084534, "step": 3042, "utility_loss": 0.8137655258178711 }, { "cosine_similarity": 0.016934860138866626, "epoch": 2.835973904939422, "grad_norm": 0.9906116035068445, "learning_rate": 1.2150500517777012e-06, "loss": 1.4647, "reason_loss": 0.492034912109375, "step": 3043, "utility_loss": 0.9726332426071167 }, { "cosine_similarity": 0.030185585589200776, "epoch": 2.83690587138863, "grad_norm": 1.2568755881295346, "learning_rate": 1.2081463583016915e-06, "loss": 1.3257, "reason_loss": 0.4848136305809021, "step": 3044, "utility_loss": 0.8408974409103394 }, { "cosine_similarity": 0.018917856547218993, "epoch": 2.8378378378378377, "grad_norm": 1.171659471114361, "learning_rate": 1.2012426648256818e-06, "loss": 1.3602, "reason_loss": 0.48387038707733154, "step": 3045, "utility_loss": 0.8763582706451416 }, { "cosine_similarity": -0.11520290896210023, "epoch": 2.8387698042870455, "grad_norm": 1.0381176381046833, "learning_rate": 1.194338971349672e-06, "loss": 1.5485, "reason_loss": 0.4874058663845062, "step": 3046, "utility_loss": 1.061089038848877 }, { "cosine_similarity": -0.08907759496308108, "epoch": 2.8397017707362533, "grad_norm": 1.0415965637256621, "learning_rate": 1.1874352778736626e-06, "loss": 1.36, "reason_loss": 0.4778828024864197, "step": 3047, "utility_loss": 0.8821598887443542 }, { "cosine_similarity": 0.1421594337369264, "epoch": 2.8406337371854615, "grad_norm": 1.2307426572846092, "learning_rate": 1.1805315843976527e-06, "loss": 1.1626, "reason_loss": 0.4688504934310913, "step": 3048, "utility_loss": 0.6937906742095947 }, { "cosine_similarity": 0.15264601866839325, "epoch": 2.8415657036346693, "grad_norm": 1.397767442644918, "learning_rate": 1.1736278909216432e-06, "loss": 1.4104, "reason_loss": 0.5288873910903931, "step": 3049, "utility_loss": 0.8814877271652222 }, { "cosine_similarity": -0.042501102811221006, "epoch": 2.842497670083877, "grad_norm": 1.341955041172953, "learning_rate": 1.1667241974456335e-06, "loss": 1.3805, "reason_loss": 0.49134188890457153, "step": 3050, "utility_loss": 0.8891799449920654 }, { "cosine_similarity": 0.23517556725239094, "epoch": 2.843429636533085, "grad_norm": 1.4398216754534718, "learning_rate": 1.1598205039696239e-06, "loss": 1.3838, "reason_loss": 0.4802286922931671, "step": 3051, "utility_loss": 0.9035284519195557 }, { "cosine_similarity": 0.005497668275018134, "epoch": 2.8443616029822927, "grad_norm": 1.4002093730519283, "learning_rate": 1.1529168104936142e-06, "loss": 1.3593, "reason_loss": 0.5068398714065552, "step": 3052, "utility_loss": 0.8525072336196899 }, { "cosine_similarity": 0.015913584291258165, "epoch": 2.8452935694315005, "grad_norm": 1.1638480198252108, "learning_rate": 1.1460131170176045e-06, "loss": 1.3444, "reason_loss": 0.46203547716140747, "step": 3053, "utility_loss": 0.8824130296707153 }, { "cosine_similarity": -0.025319604379645366, "epoch": 2.8462255358807083, "grad_norm": 1.2154736396192383, "learning_rate": 1.139109423541595e-06, "loss": 1.4149, "reason_loss": 0.4813416302204132, "step": 3054, "utility_loss": 0.9335902333259583 }, { "cosine_similarity": 0.1321316990952407, "epoch": 2.847157502329916, "grad_norm": 1.5565198267896927, "learning_rate": 1.132205730065585e-06, "loss": 1.5607, "reason_loss": 0.5032450556755066, "step": 3055, "utility_loss": 1.057474136352539 }, { "cosine_similarity": 0.08499947068791636, "epoch": 2.848089468779124, "grad_norm": 1.2317764277832055, "learning_rate": 1.1253020365895754e-06, "loss": 1.4788, "reason_loss": 0.4898267090320587, "step": 3056, "utility_loss": 0.9889756441116333 }, { "cosine_similarity": 0.058220668972637404, "epoch": 2.8490214352283316, "grad_norm": 1.2165512286725784, "learning_rate": 1.118398343113566e-06, "loss": 1.6554, "reason_loss": 0.48477235436439514, "step": 3057, "utility_loss": 1.1706695556640625 }, { "cosine_similarity": 0.016535207362775976, "epoch": 2.8499534016775394, "grad_norm": 1.4123870066241175, "learning_rate": 1.1114946496375562e-06, "loss": 1.5531, "reason_loss": 0.48098379373550415, "step": 3058, "utility_loss": 1.0721118450164795 }, { "cosine_similarity": -0.028117077353715523, "epoch": 2.8508853681267476, "grad_norm": 1.3286497211472725, "learning_rate": 1.1045909561615465e-06, "loss": 1.6039, "reason_loss": 0.4880734086036682, "step": 3059, "utility_loss": 1.115799903869629 }, { "cosine_similarity": -0.013799527889685023, "epoch": 2.8518173345759554, "grad_norm": 1.274925392618893, "learning_rate": 1.0976872626855368e-06, "loss": 1.4159, "reason_loss": 0.47117194533348083, "step": 3060, "utility_loss": 0.944724977016449 }, { "cosine_similarity": -0.024265094950014023, "epoch": 2.852749301025163, "grad_norm": 1.188162711208705, "learning_rate": 1.0907835692095272e-06, "loss": 1.3366, "reason_loss": 0.48433929681777954, "step": 3061, "utility_loss": 0.8522212505340576 }, { "cosine_similarity": -0.013629008416613882, "epoch": 2.853681267474371, "grad_norm": 1.2588737422873513, "learning_rate": 1.0838798757335175e-06, "loss": 1.4062, "reason_loss": 0.473601758480072, "step": 3062, "utility_loss": 0.9325868487358093 }, { "cosine_similarity": 0.002943007148405694, "epoch": 2.854613233923579, "grad_norm": 1.3334114666202668, "learning_rate": 1.0769761822575078e-06, "loss": 1.5487, "reason_loss": 0.48856642842292786, "step": 3063, "utility_loss": 1.060111165046692 }, { "cosine_similarity": -0.04994055234124093, "epoch": 2.8555452003727866, "grad_norm": 1.1308439735760762, "learning_rate": 1.0700724887814983e-06, "loss": 1.4358, "reason_loss": 0.49906980991363525, "step": 3064, "utility_loss": 0.9367642402648926 }, { "cosine_similarity": 0.014799565128563244, "epoch": 2.8564771668219944, "grad_norm": 1.3218827083015647, "learning_rate": 1.0631687953054886e-06, "loss": 1.4181, "reason_loss": 0.4774720072746277, "step": 3065, "utility_loss": 0.9406217336654663 }, { "cosine_similarity": 0.004469894788814168, "epoch": 2.857409133271202, "grad_norm": 1.3125815694656615, "learning_rate": 1.0562651018294787e-06, "loss": 1.6246, "reason_loss": 0.5063508749008179, "step": 3066, "utility_loss": 1.1182235479354858 }, { "cosine_similarity": 0.11639182185293276, "epoch": 2.85834109972041, "grad_norm": 1.1806253229563666, "learning_rate": 1.0493614083534692e-06, "loss": 1.5274, "reason_loss": 0.4745804965496063, "step": 3067, "utility_loss": 1.0528297424316406 }, { "cosine_similarity": -0.00906126458246174, "epoch": 2.8592730661696177, "grad_norm": 1.1614416136690708, "learning_rate": 1.0424577148774595e-06, "loss": 1.3545, "reason_loss": 0.46936455368995667, "step": 3068, "utility_loss": 0.8850951194763184 }, { "cosine_similarity": 0.05620399534188444, "epoch": 2.8602050326188255, "grad_norm": 0.9855431278047809, "learning_rate": 1.0355540214014498e-06, "loss": 1.2121, "reason_loss": 0.49415743350982666, "step": 3069, "utility_loss": 0.7179874181747437 }, { "cosine_similarity": 0.007349782213544734, "epoch": 2.8611369990680338, "grad_norm": 1.3680430853447525, "learning_rate": 1.0286503279254402e-06, "loss": 1.9016, "reason_loss": 0.48337870836257935, "step": 3070, "utility_loss": 1.4182333946228027 }, { "cosine_similarity": -0.021007535101022413, "epoch": 2.862068965517241, "grad_norm": 1.336862647915619, "learning_rate": 1.0217466344494305e-06, "loss": 1.5729, "reason_loss": 0.48255783319473267, "step": 3071, "utility_loss": 1.0903428792953491 }, { "cosine_similarity": 0.04380476798621321, "epoch": 2.8630009319664493, "grad_norm": 1.2077003668132955, "learning_rate": 1.014842940973421e-06, "loss": 1.5107, "reason_loss": 0.5051778554916382, "step": 3072, "utility_loss": 1.005497932434082 }, { "cosine_similarity": 0.3344261118424182, "epoch": 2.863932898415657, "grad_norm": 1.4432941274238567, "learning_rate": 1.007939247497411e-06, "loss": 1.7138, "reason_loss": 0.48209601640701294, "step": 3073, "utility_loss": 1.231752872467041 }, { "cosine_similarity": 0.11518255467026281, "epoch": 2.864864864864865, "grad_norm": 1.2318413617601092, "learning_rate": 1.0010355540214016e-06, "loss": 1.3664, "reason_loss": 0.4895646274089813, "step": 3074, "utility_loss": 0.8768206834793091 }, { "cosine_similarity": -0.07418717461290732, "epoch": 2.8657968313140727, "grad_norm": 1.1644861901402614, "learning_rate": 9.94131860545392e-07, "loss": 1.4322, "reason_loss": 0.48758214712142944, "step": 3075, "utility_loss": 0.9445753693580627 }, { "cosine_similarity": -0.064062223724058, "epoch": 2.8667287977632805, "grad_norm": 1.1815482085022513, "learning_rate": 9.872281670693822e-07, "loss": 1.5188, "reason_loss": 0.4822841286659241, "step": 3076, "utility_loss": 1.0365335941314697 }, { "cosine_similarity": 0.04694278756632153, "epoch": 2.8676607642124883, "grad_norm": 1.1467326966837699, "learning_rate": 9.803244735933725e-07, "loss": 1.5302, "reason_loss": 0.48638278245925903, "step": 3077, "utility_loss": 1.0438001155853271 }, { "cosine_similarity": -0.03637033343602049, "epoch": 2.868592730661696, "grad_norm": 1.2231599287623145, "learning_rate": 9.734207801173628e-07, "loss": 1.7029, "reason_loss": 0.4883197844028473, "step": 3078, "utility_loss": 1.2145538330078125 }, { "cosine_similarity": -0.003629987554436543, "epoch": 2.869524697110904, "grad_norm": 0.996263752346688, "learning_rate": 9.665170866413534e-07, "loss": 1.1318, "reason_loss": 0.5108110904693604, "step": 3079, "utility_loss": 0.6209864616394043 }, { "cosine_similarity": 0.02012719596926, "epoch": 2.8704566635601116, "grad_norm": 1.2252388116243014, "learning_rate": 9.596133931653435e-07, "loss": 1.6877, "reason_loss": 0.4997907876968384, "step": 3080, "utility_loss": 1.1879315376281738 }, { "cosine_similarity": 0.054079417479437245, "epoch": 2.87138863000932, "grad_norm": 1.2328375363103188, "learning_rate": 9.527096996893339e-07, "loss": 1.5533, "reason_loss": 0.4934113323688507, "step": 3081, "utility_loss": 1.059874176979065 }, { "cosine_similarity": -0.066129923950549, "epoch": 2.872320596458527, "grad_norm": 1.2571017967706017, "learning_rate": 9.458060062133243e-07, "loss": 1.2706, "reason_loss": 0.47741973400115967, "step": 3082, "utility_loss": 0.7931606769561768 }, { "cosine_similarity": 0.04467257919377956, "epoch": 2.8732525629077355, "grad_norm": 1.2583275464243298, "learning_rate": 9.389023127373145e-07, "loss": 1.3419, "reason_loss": 0.4607210159301758, "step": 3083, "utility_loss": 0.8811987638473511 }, { "cosine_similarity": 0.05025007624344505, "epoch": 2.8741845293569432, "grad_norm": 1.1360309657937686, "learning_rate": 9.319986192613049e-07, "loss": 1.4606, "reason_loss": 0.4926113486289978, "step": 3084, "utility_loss": 0.9679673910140991 }, { "cosine_similarity": 0.020522395887766574, "epoch": 2.875116495806151, "grad_norm": 0.9992548977823575, "learning_rate": 9.250949257852952e-07, "loss": 1.1757, "reason_loss": 0.45913562178611755, "step": 3085, "utility_loss": 0.7166013717651367 }, { "cosine_similarity": 0.10082805598882003, "epoch": 2.876048462255359, "grad_norm": 1.3301395281656805, "learning_rate": 9.181912323092855e-07, "loss": 1.5404, "reason_loss": 0.4586641788482666, "step": 3086, "utility_loss": 1.0817458629608154 }, { "cosine_similarity": -0.15819015843915368, "epoch": 2.8769804287045666, "grad_norm": 1.1965707733930337, "learning_rate": 9.112875388332759e-07, "loss": 1.2796, "reason_loss": 0.5035669803619385, "step": 3087, "utility_loss": 0.7759866118431091 }, { "cosine_similarity": 0.11156112848545915, "epoch": 2.8779123951537744, "grad_norm": 1.0579429516770635, "learning_rate": 9.043838453572662e-07, "loss": 1.5553, "reason_loss": 0.46695640683174133, "step": 3088, "utility_loss": 1.0883415937423706 }, { "cosine_similarity": -0.09184336059483383, "epoch": 2.878844361602982, "grad_norm": 1.2382784706677137, "learning_rate": 8.974801518812564e-07, "loss": 1.29, "reason_loss": 0.48680317401885986, "step": 3089, "utility_loss": 0.8032218217849731 }, { "cosine_similarity": -0.022159805965972005, "epoch": 2.87977632805219, "grad_norm": 1.1661643256125285, "learning_rate": 8.905764584052469e-07, "loss": 1.3445, "reason_loss": 0.49955374002456665, "step": 3090, "utility_loss": 0.8449357748031616 }, { "cosine_similarity": -0.1487768299145513, "epoch": 2.8807082945013978, "grad_norm": 1.0739261576150227, "learning_rate": 8.836727649292372e-07, "loss": 1.4639, "reason_loss": 0.4809868335723877, "step": 3091, "utility_loss": 0.9829199910163879 }, { "cosine_similarity": 0.02892093636358707, "epoch": 2.881640260950606, "grad_norm": 1.240493245873254, "learning_rate": 8.767690714532276e-07, "loss": 1.3173, "reason_loss": 0.48113733530044556, "step": 3092, "utility_loss": 0.8362115025520325 }, { "cosine_similarity": 0.010397991190776635, "epoch": 2.8825722273998133, "grad_norm": 1.0708480924948742, "learning_rate": 8.698653779772179e-07, "loss": 1.3531, "reason_loss": 0.4937305748462677, "step": 3093, "utility_loss": 0.8593349456787109 }, { "cosine_similarity": 0.11256161859083219, "epoch": 2.8835041938490216, "grad_norm": 1.0653179527254841, "learning_rate": 8.629616845012082e-07, "loss": 1.2617, "reason_loss": 0.490359365940094, "step": 3094, "utility_loss": 0.771323025226593 }, { "cosine_similarity": -0.01920743391085991, "epoch": 2.8844361602982294, "grad_norm": 1.0894487016728016, "learning_rate": 8.560579910251986e-07, "loss": 1.4888, "reason_loss": 0.46657344698905945, "step": 3095, "utility_loss": 1.0222724676132202 }, { "cosine_similarity": 0.06785702834264074, "epoch": 2.885368126747437, "grad_norm": 1.2585528364537428, "learning_rate": 8.491542975491888e-07, "loss": 1.3002, "reason_loss": 0.5011411905288696, "step": 3096, "utility_loss": 0.7990432381629944 }, { "cosine_similarity": 0.12285204738974816, "epoch": 2.886300093196645, "grad_norm": 1.3219560508807626, "learning_rate": 8.422506040731792e-07, "loss": 1.5799, "reason_loss": 0.5036951899528503, "step": 3097, "utility_loss": 1.0762027502059937 }, { "cosine_similarity": 0.04146853230723748, "epoch": 2.8872320596458527, "grad_norm": 1.032656363937631, "learning_rate": 8.353469105971695e-07, "loss": 1.3977, "reason_loss": 0.5020105838775635, "step": 3098, "utility_loss": 0.8957239389419556 }, { "cosine_similarity": 0.0786511333723646, "epoch": 2.8881640260950605, "grad_norm": 1.1261501459202927, "learning_rate": 8.284432171211599e-07, "loss": 1.371, "reason_loss": 0.4724390506744385, "step": 3099, "utility_loss": 0.8985282182693481 }, { "cosine_similarity": -0.06351861347778881, "epoch": 2.8890959925442683, "grad_norm": 1.2061370986389088, "learning_rate": 8.215395236451503e-07, "loss": 1.4076, "reason_loss": 0.4834223985671997, "step": 3100, "utility_loss": 0.9241659045219421 }, { "cosine_similarity": 0.08248723810664249, "epoch": 2.890027958993476, "grad_norm": 1.2242647565032327, "learning_rate": 8.146358301691405e-07, "loss": 1.1925, "reason_loss": 0.4646006226539612, "step": 3101, "utility_loss": 0.7279400825500488 }, { "cosine_similarity": -0.23176006865938903, "epoch": 2.890959925442684, "grad_norm": 1.534705906455731, "learning_rate": 8.07732136693131e-07, "loss": 1.5356, "reason_loss": 0.47136688232421875, "step": 3102, "utility_loss": 1.064185619354248 }, { "cosine_similarity": 0.026418399279621658, "epoch": 2.891891891891892, "grad_norm": 1.1726773609165702, "learning_rate": 8.008284432171212e-07, "loss": 1.3079, "reason_loss": 0.46884775161743164, "step": 3103, "utility_loss": 0.8390772342681885 }, { "cosine_similarity": 0.18180132573745755, "epoch": 2.8928238583410995, "grad_norm": 1.3048542646690326, "learning_rate": 7.939247497411115e-07, "loss": 1.5091, "reason_loss": 0.46336814761161804, "step": 3104, "utility_loss": 1.0457662343978882 }, { "cosine_similarity": 0.031667573549417904, "epoch": 2.8937558247903077, "grad_norm": 0.8676394029312537, "learning_rate": 7.870210562651019e-07, "loss": 1.2913, "reason_loss": 0.5069524049758911, "step": 3105, "utility_loss": 0.7843774557113647 }, { "cosine_similarity": 0.10737298544223514, "epoch": 2.8946877912395155, "grad_norm": 1.1236297324385889, "learning_rate": 7.801173627890922e-07, "loss": 1.438, "reason_loss": 0.49580878019332886, "step": 3106, "utility_loss": 0.9421536326408386 }, { "cosine_similarity": 0.0783725096680957, "epoch": 2.8956197576887233, "grad_norm": 0.9383675836089008, "learning_rate": 7.732136693130826e-07, "loss": 1.2298, "reason_loss": 0.48104870319366455, "step": 3107, "utility_loss": 0.748755693435669 }, { "cosine_similarity": 0.10091110033968606, "epoch": 2.896551724137931, "grad_norm": 1.3501572855948178, "learning_rate": 7.663099758370728e-07, "loss": 1.7694, "reason_loss": 0.5095904469490051, "step": 3108, "utility_loss": 1.2598260641098022 }, { "cosine_similarity": 0.02015598114874375, "epoch": 2.897483690587139, "grad_norm": 1.200131657617587, "learning_rate": 7.594062823610632e-07, "loss": 1.5045, "reason_loss": 0.51050865650177, "step": 3109, "utility_loss": 0.9940199851989746 }, { "cosine_similarity": -0.011864732158807649, "epoch": 2.8984156570363466, "grad_norm": 1.0500766671710962, "learning_rate": 7.525025888850536e-07, "loss": 1.431, "reason_loss": 0.49208566546440125, "step": 3110, "utility_loss": 0.9389287829399109 }, { "cosine_similarity": 0.03328407227646444, "epoch": 2.8993476234855544, "grad_norm": 1.1748126140188424, "learning_rate": 7.455988954090439e-07, "loss": 1.2456, "reason_loss": 0.48518848419189453, "step": 3111, "utility_loss": 0.7604436874389648 }, { "cosine_similarity": -0.09761364853596619, "epoch": 2.900279589934762, "grad_norm": 1.044119456058621, "learning_rate": 7.386952019330343e-07, "loss": 1.0595, "reason_loss": 0.47570547461509705, "step": 3112, "utility_loss": 0.5837869644165039 }, { "cosine_similarity": 0.013876295983887376, "epoch": 2.90121155638397, "grad_norm": 1.2751445434655686, "learning_rate": 7.317915084570246e-07, "loss": 1.5458, "reason_loss": 0.4816577434539795, "step": 3113, "utility_loss": 1.0641558170318604 }, { "cosine_similarity": 0.010236334283907118, "epoch": 2.9021435228331782, "grad_norm": 1.2048013538971647, "learning_rate": 7.248878149810148e-07, "loss": 1.2707, "reason_loss": 0.4358241558074951, "step": 3114, "utility_loss": 0.8349118232727051 }, { "cosine_similarity": -0.04324817527340682, "epoch": 2.9030754892823856, "grad_norm": 1.1751822046958358, "learning_rate": 7.179841215050052e-07, "loss": 1.4935, "reason_loss": 0.488645076751709, "step": 3115, "utility_loss": 1.004859209060669 }, { "cosine_similarity": 0.010180311546146197, "epoch": 2.904007455731594, "grad_norm": 1.0468804648977958, "learning_rate": 7.110804280289955e-07, "loss": 1.5533, "reason_loss": 0.48963049054145813, "step": 3116, "utility_loss": 1.0636590719223022 }, { "cosine_similarity": 0.18062155700108584, "epoch": 2.9049394221808016, "grad_norm": 1.7306265194848651, "learning_rate": 7.041767345529859e-07, "loss": 1.3853, "reason_loss": 0.4909515678882599, "step": 3117, "utility_loss": 0.8943466544151306 }, { "cosine_similarity": 0.04535054617923818, "epoch": 2.9058713886300094, "grad_norm": 1.092246413748149, "learning_rate": 6.972730410769763e-07, "loss": 1.5077, "reason_loss": 0.48461389541625977, "step": 3118, "utility_loss": 1.023073434829712 }, { "cosine_similarity": -0.13518141585952517, "epoch": 2.906803355079217, "grad_norm": 1.0903242901478318, "learning_rate": 6.903693476009666e-07, "loss": 1.4098, "reason_loss": 0.48426058888435364, "step": 3119, "utility_loss": 0.9255562424659729 }, { "cosine_similarity": 0.08698666580247583, "epoch": 2.907735321528425, "grad_norm": 1.3097345648909786, "learning_rate": 6.83465654124957e-07, "loss": 1.551, "reason_loss": 0.4948591887950897, "step": 3120, "utility_loss": 1.0560944080352783 }, { "cosine_similarity": 0.03129763906987828, "epoch": 2.9086672879776327, "grad_norm": 1.3155583594340083, "learning_rate": 6.765619606489472e-07, "loss": 1.5112, "reason_loss": 0.46791356801986694, "step": 3121, "utility_loss": 1.04329514503479 }, { "cosine_similarity": 0.11419585851050987, "epoch": 2.9095992544268405, "grad_norm": 1.1715961094076597, "learning_rate": 6.696582671729376e-07, "loss": 1.3404, "reason_loss": 0.4925033450126648, "step": 3122, "utility_loss": 0.8478865027427673 }, { "cosine_similarity": 0.07336921168693385, "epoch": 2.9105312208760483, "grad_norm": 1.2328876043715533, "learning_rate": 6.627545736969279e-07, "loss": 1.484, "reason_loss": 0.4775051474571228, "step": 3123, "utility_loss": 1.0065267086029053 }, { "cosine_similarity": -0.14932264022349306, "epoch": 2.911463187325256, "grad_norm": 1.2030975986062347, "learning_rate": 6.558508802209182e-07, "loss": 1.3928, "reason_loss": 0.4633757174015045, "step": 3124, "utility_loss": 0.9294356107711792 }, { "cosine_similarity": 0.20624359833664013, "epoch": 2.9123951537744643, "grad_norm": 1.3092506863274656, "learning_rate": 6.489471867449086e-07, "loss": 1.361, "reason_loss": 0.4752463102340698, "step": 3125, "utility_loss": 0.8857995271682739 }, { "cosine_similarity": 0.04145480003005092, "epoch": 2.9133271202236717, "grad_norm": 1.048394194197549, "learning_rate": 6.420434932688989e-07, "loss": 1.1727, "reason_loss": 0.45797306299209595, "step": 3126, "utility_loss": 0.7147289514541626 }, { "cosine_similarity": 0.05654084388878611, "epoch": 2.91425908667288, "grad_norm": 1.1646095252034572, "learning_rate": 6.351397997928894e-07, "loss": 1.5793, "reason_loss": 0.46496260166168213, "step": 3127, "utility_loss": 1.1143656969070435 }, { "cosine_similarity": -0.0632085134022852, "epoch": 2.9151910531220877, "grad_norm": 1.2249769823599632, "learning_rate": 6.282361063168796e-07, "loss": 1.4113, "reason_loss": 0.46598029136657715, "step": 3128, "utility_loss": 0.9453366994857788 }, { "cosine_similarity": -0.1166545573396855, "epoch": 2.9161230195712955, "grad_norm": 1.4449994868321558, "learning_rate": 6.2133241284087e-07, "loss": 1.3132, "reason_loss": 0.4752950072288513, "step": 3129, "utility_loss": 0.8378653526306152 }, { "cosine_similarity": -0.043937946828243986, "epoch": 2.9170549860205033, "grad_norm": 1.2393036436692966, "learning_rate": 6.144287193648602e-07, "loss": 1.5638, "reason_loss": 0.5060926675796509, "step": 3130, "utility_loss": 1.057708501815796 }, { "cosine_similarity": 0.06499592778380396, "epoch": 2.917986952469711, "grad_norm": 1.29309237900128, "learning_rate": 6.075250258888506e-07, "loss": 1.297, "reason_loss": 0.4552452564239502, "step": 3131, "utility_loss": 0.8417285680770874 }, { "cosine_similarity": -0.014499543214601328, "epoch": 2.918918918918919, "grad_norm": 1.1397238407063268, "learning_rate": 6.006213324128409e-07, "loss": 1.5078, "reason_loss": 0.5095263719558716, "step": 3132, "utility_loss": 0.9982632994651794 }, { "cosine_similarity": 0.09144224999303617, "epoch": 2.9198508853681266, "grad_norm": 1.1535414342972106, "learning_rate": 5.937176389368313e-07, "loss": 1.6093, "reason_loss": 0.49302399158477783, "step": 3133, "utility_loss": 1.116245985031128 }, { "cosine_similarity": 0.05556050864229391, "epoch": 2.9207828518173344, "grad_norm": 1.296233388966137, "learning_rate": 5.868139454608216e-07, "loss": 1.3533, "reason_loss": 0.48841527104377747, "step": 3134, "utility_loss": 0.8649026155471802 }, { "cosine_similarity": -0.05153638919394136, "epoch": 2.9217148182665422, "grad_norm": 1.2123477785847245, "learning_rate": 5.799102519848119e-07, "loss": 1.4408, "reason_loss": 0.4973936378955841, "step": 3135, "utility_loss": 0.9433566927909851 }, { "cosine_similarity": -0.0006384240560382856, "epoch": 2.9226467847157505, "grad_norm": 1.0664316856073208, "learning_rate": 5.730065585088022e-07, "loss": 1.059, "reason_loss": 0.47544774413108826, "step": 3136, "utility_loss": 0.5835872888565063 }, { "cosine_similarity": -0.08400375986963128, "epoch": 2.923578751164958, "grad_norm": 1.075312127190932, "learning_rate": 5.661028650327925e-07, "loss": 1.1304, "reason_loss": 0.4639957845211029, "step": 3137, "utility_loss": 0.666392982006073 }, { "cosine_similarity": -0.10047762637203055, "epoch": 2.924510717614166, "grad_norm": 1.1777690643704875, "learning_rate": 5.59199171556783e-07, "loss": 1.3894, "reason_loss": 0.47415420413017273, "step": 3138, "utility_loss": 0.9152151346206665 }, { "cosine_similarity": -0.024243028574592748, "epoch": 2.925442684063374, "grad_norm": 1.0034065265998744, "learning_rate": 5.522954780807733e-07, "loss": 1.3807, "reason_loss": 0.4662085771560669, "step": 3139, "utility_loss": 0.91450035572052 }, { "cosine_similarity": 0.012935384666792988, "epoch": 2.9263746505125816, "grad_norm": 1.0810120266497436, "learning_rate": 5.453917846047636e-07, "loss": 1.2178, "reason_loss": 0.48983415961265564, "step": 3140, "utility_loss": 0.7279466390609741 }, { "cosine_similarity": 0.013315543074411618, "epoch": 2.9273066169617894, "grad_norm": 1.1495622837965551, "learning_rate": 5.384880911287539e-07, "loss": 1.5954, "reason_loss": 0.4567812383174896, "step": 3141, "utility_loss": 1.138644814491272 }, { "cosine_similarity": 0.039762228435771985, "epoch": 2.928238583410997, "grad_norm": 1.3212694044199476, "learning_rate": 5.315843976527443e-07, "loss": 1.6443, "reason_loss": 0.4813960790634155, "step": 3142, "utility_loss": 1.1629536151885986 }, { "cosine_similarity": 0.06922419959611727, "epoch": 2.929170549860205, "grad_norm": 1.017939806105214, "learning_rate": 5.246807041767346e-07, "loss": 1.2764, "reason_loss": 0.4966691732406616, "step": 3143, "utility_loss": 0.7797755002975464 }, { "cosine_similarity": -0.017172540728605953, "epoch": 2.9301025163094128, "grad_norm": 1.2687490144622702, "learning_rate": 5.177770107007249e-07, "loss": 1.5751, "reason_loss": 0.5300132632255554, "step": 3144, "utility_loss": 1.0450778007507324 }, { "cosine_similarity": -0.01418303427639124, "epoch": 2.9310344827586206, "grad_norm": 1.3805241102017451, "learning_rate": 5.108733172247152e-07, "loss": 1.6442, "reason_loss": 0.4920964241027832, "step": 3145, "utility_loss": 1.1521356105804443 }, { "cosine_similarity": 0.12224988600519696, "epoch": 2.9319664492078283, "grad_norm": 1.232717979345265, "learning_rate": 5.039696237487055e-07, "loss": 1.6533, "reason_loss": 0.5240076780319214, "step": 3146, "utility_loss": 1.1293097734451294 }, { "cosine_similarity": -0.07658771711227257, "epoch": 2.9328984156570366, "grad_norm": 1.1234879240833706, "learning_rate": 4.97065930272696e-07, "loss": 1.2982, "reason_loss": 0.5031974911689758, "step": 3147, "utility_loss": 0.7949535846710205 }, { "cosine_similarity": 0.1047179792522103, "epoch": 2.933830382106244, "grad_norm": 1.0338635010423924, "learning_rate": 4.901622367966863e-07, "loss": 1.6899, "reason_loss": 0.4733133912086487, "step": 3148, "utility_loss": 1.216539978981018 }, { "cosine_similarity": 0.07378064606859536, "epoch": 2.934762348555452, "grad_norm": 1.0068144623809672, "learning_rate": 4.832585433206767e-07, "loss": 1.5364, "reason_loss": 0.5109483599662781, "step": 3149, "utility_loss": 1.0254206657409668 }, { "cosine_similarity": 0.01606085005722656, "epoch": 2.93569431500466, "grad_norm": 1.3662251072955975, "learning_rate": 4.7635484984466693e-07, "loss": 1.4994, "reason_loss": 0.4750295877456665, "step": 3150, "utility_loss": 1.0243278741836548 }, { "cosine_similarity": -0.11676630994826281, "epoch": 2.9366262814538677, "grad_norm": 1.1274159766933474, "learning_rate": 4.6945115636865724e-07, "loss": 1.4417, "reason_loss": 0.4873542785644531, "step": 3151, "utility_loss": 0.9543354511260986 }, { "cosine_similarity": 0.00732807100453865, "epoch": 2.9375582479030755, "grad_norm": 1.2781531595822795, "learning_rate": 4.625474628926476e-07, "loss": 1.2619, "reason_loss": 0.5100232362747192, "step": 3152, "utility_loss": 0.7518872618675232 }, { "cosine_similarity": -0.042089446293323236, "epoch": 2.9384902143522833, "grad_norm": 1.48463951062425, "learning_rate": 4.5564376941663797e-07, "loss": 1.6283, "reason_loss": 0.4678400754928589, "step": 3153, "utility_loss": 1.1604105234146118 }, { "cosine_similarity": -0.05133457848872007, "epoch": 2.939422180801491, "grad_norm": 1.1553818862200402, "learning_rate": 4.487400759406282e-07, "loss": 1.1863, "reason_loss": 0.5125402212142944, "step": 3154, "utility_loss": 0.6737496852874756 }, { "cosine_similarity": -0.11850511417572028, "epoch": 2.940354147250699, "grad_norm": 1.1474515748102754, "learning_rate": 4.418363824646186e-07, "loss": 1.6983, "reason_loss": 0.49265679717063904, "step": 3155, "utility_loss": 1.205636739730835 }, { "cosine_similarity": 0.05389672444122491, "epoch": 2.9412861136999067, "grad_norm": 1.3007822181804236, "learning_rate": 4.3493268898860895e-07, "loss": 1.2933, "reason_loss": 0.48209482431411743, "step": 3156, "utility_loss": 0.8112384676933289 }, { "cosine_similarity": -0.004732460978839378, "epoch": 2.9422180801491145, "grad_norm": 1.4653536152500384, "learning_rate": 4.280289955125993e-07, "loss": 1.7763, "reason_loss": 0.5076661109924316, "step": 3157, "utility_loss": 1.2686002254486084 }, { "cosine_similarity": 0.06255827480224015, "epoch": 2.9431500465983227, "grad_norm": 1.0647210356286276, "learning_rate": 4.211253020365896e-07, "loss": 1.3567, "reason_loss": 0.48133584856987, "step": 3158, "utility_loss": 0.8754087090492249 }, { "cosine_similarity": -0.06737653560871741, "epoch": 2.94408201304753, "grad_norm": 1.1135323569879003, "learning_rate": 4.142216085605799e-07, "loss": 1.6053, "reason_loss": 0.4702915549278259, "step": 3159, "utility_loss": 1.134998083114624 }, { "cosine_similarity": 0.05928096715876612, "epoch": 2.9450139794967383, "grad_norm": 1.3261936097644496, "learning_rate": 4.0731791508457024e-07, "loss": 1.2894, "reason_loss": 0.4942462146282196, "step": 3160, "utility_loss": 0.7951120138168335 }, { "cosine_similarity": 0.04147608668847422, "epoch": 2.945945945945946, "grad_norm": 1.5902760526056539, "learning_rate": 4.004142216085606e-07, "loss": 1.4951, "reason_loss": 0.4732654392719269, "step": 3161, "utility_loss": 1.0217941999435425 }, { "cosine_similarity": -0.06829722544501697, "epoch": 2.946877912395154, "grad_norm": 1.263621081486847, "learning_rate": 3.9351052813255096e-07, "loss": 1.3588, "reason_loss": 0.4686039984226227, "step": 3162, "utility_loss": 0.8901984691619873 }, { "cosine_similarity": 0.058915281013096475, "epoch": 2.9478098788443616, "grad_norm": 1.4952009374244843, "learning_rate": 3.866068346565413e-07, "loss": 1.4282, "reason_loss": 0.5085610151290894, "step": 3163, "utility_loss": 0.9196329116821289 }, { "cosine_similarity": -0.07352716734750532, "epoch": 2.9487418452935694, "grad_norm": 1.5309916178289829, "learning_rate": 3.797031411805316e-07, "loss": 1.6141, "reason_loss": 0.4697134494781494, "step": 3164, "utility_loss": 1.1444284915924072 }, { "cosine_similarity": 0.030021784686656467, "epoch": 2.949673811742777, "grad_norm": 1.2456860961248124, "learning_rate": 3.7279944770452194e-07, "loss": 1.444, "reason_loss": 0.4750341773033142, "step": 3165, "utility_loss": 0.9689553380012512 }, { "cosine_similarity": 0.034250311338634806, "epoch": 2.950605778191985, "grad_norm": 1.1670560201762692, "learning_rate": 3.658957542285123e-07, "loss": 1.3359, "reason_loss": 0.4768705666065216, "step": 3166, "utility_loss": 0.8590015172958374 }, { "cosine_similarity": 0.05074010711810049, "epoch": 2.951537744641193, "grad_norm": 1.1547925000004482, "learning_rate": 3.589920607525026e-07, "loss": 1.4303, "reason_loss": 0.48912641406059265, "step": 3167, "utility_loss": 0.9411306381225586 }, { "cosine_similarity": -0.1292518579504243, "epoch": 2.9524697110904006, "grad_norm": 1.1077262536336763, "learning_rate": 3.5208836727649297e-07, "loss": 1.5667, "reason_loss": 0.5122976303100586, "step": 3168, "utility_loss": 1.0544424057006836 }, { "cosine_similarity": -0.00012335880922824665, "epoch": 2.953401677539609, "grad_norm": 1.1681454131192446, "learning_rate": 3.451846738004833e-07, "loss": 1.6168, "reason_loss": 0.4877183139324188, "step": 3169, "utility_loss": 1.1291234493255615 }, { "cosine_similarity": 0.03114757003749983, "epoch": 2.954333643988816, "grad_norm": 1.1519881096510292, "learning_rate": 3.382809803244736e-07, "loss": 1.5929, "reason_loss": 0.4905027747154236, "step": 3170, "utility_loss": 1.1023783683776855 }, { "cosine_similarity": -0.02190125776547788, "epoch": 2.9552656104380244, "grad_norm": 1.05011282062316, "learning_rate": 3.3137728684846395e-07, "loss": 1.3808, "reason_loss": 0.5087729096412659, "step": 3171, "utility_loss": 0.8719896674156189 }, { "cosine_similarity": 0.09250783083897227, "epoch": 2.956197576887232, "grad_norm": 1.0704395329913168, "learning_rate": 3.244735933724543e-07, "loss": 1.4111, "reason_loss": 0.4946363866329193, "step": 3172, "utility_loss": 0.916504442691803 }, { "cosine_similarity": 0.037947456694618936, "epoch": 2.95712954333644, "grad_norm": 1.0435622627772276, "learning_rate": 3.175698998964447e-07, "loss": 1.58, "reason_loss": 0.509810209274292, "step": 3173, "utility_loss": 1.0701481103897095 }, { "cosine_similarity": 0.030275689792818417, "epoch": 2.9580615097856477, "grad_norm": 1.2503030829464192, "learning_rate": 3.10666206420435e-07, "loss": 1.7943, "reason_loss": 0.45401763916015625, "step": 3174, "utility_loss": 1.3402692079544067 }, { "cosine_similarity": 0.03516008758203223, "epoch": 2.9589934762348555, "grad_norm": 0.9960741980549527, "learning_rate": 3.037625129444253e-07, "loss": 1.2405, "reason_loss": 0.4905625581741333, "step": 3175, "utility_loss": 0.7499422430992126 }, { "cosine_similarity": -0.011759928631597107, "epoch": 2.9599254426840633, "grad_norm": 1.0071113810884162, "learning_rate": 2.9685881946841566e-07, "loss": 1.1979, "reason_loss": 0.47203245759010315, "step": 3176, "utility_loss": 0.7258725166320801 }, { "cosine_similarity": 0.16191632452604757, "epoch": 2.960857409133271, "grad_norm": 1.0935611578215174, "learning_rate": 2.8995512599240596e-07, "loss": 1.4169, "reason_loss": 0.512481153011322, "step": 3177, "utility_loss": 0.9044226408004761 }, { "cosine_similarity": 0.2306750349676882, "epoch": 2.961789375582479, "grad_norm": 1.3723865546111118, "learning_rate": 2.8305143251639627e-07, "loss": 1.4949, "reason_loss": 0.48006778955459595, "step": 3178, "utility_loss": 1.014803171157837 }, { "cosine_similarity": -0.02440092791057789, "epoch": 2.9627213420316867, "grad_norm": 1.4202336761979593, "learning_rate": 2.7614773904038664e-07, "loss": 1.6073, "reason_loss": 0.4839140474796295, "step": 3179, "utility_loss": 1.1233716011047363 }, { "cosine_similarity": 0.03506684009981565, "epoch": 2.963653308480895, "grad_norm": 1.1135893213259302, "learning_rate": 2.6924404556437694e-07, "loss": 1.4375, "reason_loss": 0.5035898089408875, "step": 3180, "utility_loss": 0.9338917136192322 }, { "cosine_similarity": 0.02137024148049965, "epoch": 2.9645852749301023, "grad_norm": 1.6189885956253443, "learning_rate": 2.623403520883673e-07, "loss": 1.7205, "reason_loss": 0.4737897515296936, "step": 3181, "utility_loss": 1.2466824054718018 }, { "cosine_similarity": -0.019139352776337635, "epoch": 2.9655172413793105, "grad_norm": 1.0639916554390043, "learning_rate": 2.554366586123576e-07, "loss": 1.4782, "reason_loss": 0.48616844415664673, "step": 3182, "utility_loss": 0.992011547088623 }, { "cosine_similarity": -0.033309074991641055, "epoch": 2.9664492078285183, "grad_norm": 1.20600079761908, "learning_rate": 2.48532965136348e-07, "loss": 1.347, "reason_loss": 0.4957507252693176, "step": 3183, "utility_loss": 0.8512930870056152 }, { "cosine_similarity": 0.06839008646913786, "epoch": 2.967381174277726, "grad_norm": 1.194684332106799, "learning_rate": 2.4162927166033834e-07, "loss": 1.6489, "reason_loss": 0.4920650124549866, "step": 3184, "utility_loss": 1.1568424701690674 }, { "cosine_similarity": -0.04634416211672623, "epoch": 2.968313140726934, "grad_norm": 1.1866913509392383, "learning_rate": 2.3472557818432862e-07, "loss": 1.3112, "reason_loss": 0.46791693568229675, "step": 3185, "utility_loss": 0.8432658910751343 }, { "cosine_similarity": -0.06459414823424235, "epoch": 2.9692451071761417, "grad_norm": 1.4516067284516707, "learning_rate": 2.2782188470831898e-07, "loss": 1.5657, "reason_loss": 0.4777478575706482, "step": 3186, "utility_loss": 1.087942123413086 }, { "cosine_similarity": 0.07868429359295234, "epoch": 2.9701770736253494, "grad_norm": 1.440466265464633, "learning_rate": 2.209181912323093e-07, "loss": 1.4754, "reason_loss": 0.478410542011261, "step": 3187, "utility_loss": 0.997031569480896 }, { "cosine_similarity": 0.06293900178183648, "epoch": 2.9711090400745572, "grad_norm": 1.2250219525347705, "learning_rate": 2.1401449775629965e-07, "loss": 1.5059, "reason_loss": 0.4767870008945465, "step": 3188, "utility_loss": 1.0290727615356445 }, { "cosine_similarity": 0.08785691137924824, "epoch": 2.972041006523765, "grad_norm": 1.3256646070718439, "learning_rate": 2.0711080428028996e-07, "loss": 1.3413, "reason_loss": 0.49231988191604614, "step": 3189, "utility_loss": 0.8489518761634827 }, { "cosine_similarity": -0.09466377412648784, "epoch": 2.972972972972973, "grad_norm": 1.1287547842519599, "learning_rate": 2.002071108042803e-07, "loss": 1.2299, "reason_loss": 0.47540926933288574, "step": 3190, "utility_loss": 0.7545232772827148 }, { "cosine_similarity": 0.022905330937574842, "epoch": 2.973904939422181, "grad_norm": 1.6031501913654422, "learning_rate": 1.9330341732827066e-07, "loss": 1.5263, "reason_loss": 0.48980841040611267, "step": 3191, "utility_loss": 1.0364460945129395 }, { "cosine_similarity": 0.13073240993666554, "epoch": 2.9748369058713884, "grad_norm": 1.2728051769620108, "learning_rate": 1.8639972385226097e-07, "loss": 1.5494, "reason_loss": 0.4904467463493347, "step": 3192, "utility_loss": 1.0589444637298584 }, { "cosine_similarity": 0.0031887857905986113, "epoch": 2.9757688723205966, "grad_norm": 1.0729313711274877, "learning_rate": 1.794960303762513e-07, "loss": 1.4275, "reason_loss": 0.47666090726852417, "step": 3193, "utility_loss": 0.9508694410324097 }, { "cosine_similarity": -0.053924481741098276, "epoch": 2.9767008387698044, "grad_norm": 1.1293084823012698, "learning_rate": 1.7259233690024164e-07, "loss": 1.5218, "reason_loss": 0.4963819980621338, "step": 3194, "utility_loss": 1.025391697883606 }, { "cosine_similarity": -0.07418530916878088, "epoch": 2.977632805219012, "grad_norm": 1.1976442118388277, "learning_rate": 1.6568864342423198e-07, "loss": 1.2992, "reason_loss": 0.4800591468811035, "step": 3195, "utility_loss": 0.8191070556640625 }, { "cosine_similarity": 0.03632936115237116, "epoch": 2.97856477166822, "grad_norm": 1.255944674774129, "learning_rate": 1.5878494994822234e-07, "loss": 1.5216, "reason_loss": 0.49591416120529175, "step": 3196, "utility_loss": 1.0256974697113037 }, { "cosine_similarity": 0.0018435436841585044, "epoch": 2.9794967381174278, "grad_norm": 1.1240184235241273, "learning_rate": 1.5188125647221265e-07, "loss": 1.4635, "reason_loss": 0.4751388430595398, "step": 3197, "utility_loss": 0.9884001612663269 }, { "cosine_similarity": 0.12250527922731018, "epoch": 2.9804287045666356, "grad_norm": 1.3882645029524001, "learning_rate": 1.4497756299620298e-07, "loss": 1.5559, "reason_loss": 0.4998546242713928, "step": 3198, "utility_loss": 1.0560320615768433 }, { "cosine_similarity": 0.05376173044764456, "epoch": 2.9813606710158433, "grad_norm": 1.1487510677834663, "learning_rate": 1.3807386952019332e-07, "loss": 1.3666, "reason_loss": 0.5150159597396851, "step": 3199, "utility_loss": 0.8515506982803345 }, { "cosine_similarity": 0.16524084593441396, "epoch": 2.982292637465051, "grad_norm": 1.2613542799373771, "learning_rate": 1.3117017604418365e-07, "loss": 1.5792, "reason_loss": 0.4886874258518219, "step": 3200, "utility_loss": 1.0904746055603027 }, { "cosine_similarity": 0.06791309382345807, "epoch": 2.983224603914259, "grad_norm": 1.1562845633381316, "learning_rate": 1.24266482568174e-07, "loss": 1.4764, "reason_loss": 0.4946932792663574, "step": 3201, "utility_loss": 0.9816793203353882 }, { "cosine_similarity": -0.28083453453580104, "epoch": 2.984156570363467, "grad_norm": 1.352496326963234, "learning_rate": 1.1736278909216431e-07, "loss": 1.5339, "reason_loss": 0.4697483777999878, "step": 3202, "utility_loss": 1.0641999244689941 }, { "cosine_similarity": -0.022238247114898368, "epoch": 2.9850885368126745, "grad_norm": 1.1744810030054587, "learning_rate": 1.1045909561615465e-07, "loss": 1.6094, "reason_loss": 0.4777185916900635, "step": 3203, "utility_loss": 1.1316418647766113 }, { "cosine_similarity": -0.04689184549607194, "epoch": 2.9860205032618827, "grad_norm": 1.029344351526997, "learning_rate": 1.0355540214014498e-07, "loss": 1.3137, "reason_loss": 0.4963192343711853, "step": 3204, "utility_loss": 0.8174160122871399 }, { "cosine_similarity": 0.013257741612917935, "epoch": 2.9869524697110905, "grad_norm": 1.3626615931618016, "learning_rate": 9.665170866413533e-08, "loss": 1.6422, "reason_loss": 0.476842999458313, "step": 3205, "utility_loss": 1.1653598546981812 }, { "cosine_similarity": 0.019825297492293035, "epoch": 2.9878844361602983, "grad_norm": 1.1136105370246674, "learning_rate": 8.974801518812565e-08, "loss": 1.1476, "reason_loss": 0.47770604491233826, "step": 3206, "utility_loss": 0.669905424118042 }, { "cosine_similarity": 0.004667261367019573, "epoch": 2.988816402609506, "grad_norm": 1.0172486621850265, "learning_rate": 8.284432171211599e-08, "loss": 1.3262, "reason_loss": 0.5166902542114258, "step": 3207, "utility_loss": 0.8094905614852905 }, { "cosine_similarity": 0.01495770452406095, "epoch": 2.989748369058714, "grad_norm": 1.2951646649874011, "learning_rate": 7.594062823610632e-08, "loss": 1.6058, "reason_loss": 0.4531099498271942, "step": 3208, "utility_loss": 1.1527221202850342 }, { "cosine_similarity": 0.02139535828121777, "epoch": 2.9906803355079217, "grad_norm": 1.351392608945475, "learning_rate": 6.903693476009666e-08, "loss": 1.4984, "reason_loss": 0.501305878162384, "step": 3209, "utility_loss": 0.9970453977584839 }, { "cosine_similarity": 0.010731495367510727, "epoch": 2.9916123019571295, "grad_norm": 1.0018680103541882, "learning_rate": 6.2133241284087e-08, "loss": 1.4141, "reason_loss": 0.48041248321533203, "step": 3210, "utility_loss": 0.9336457252502441 }, { "cosine_similarity": -0.1229389647500183, "epoch": 2.9925442684063372, "grad_norm": 1.349076408626573, "learning_rate": 5.522954780807732e-08, "loss": 1.7577, "reason_loss": 0.48935502767562866, "step": 3211, "utility_loss": 1.2683742046356201 }, { "cosine_similarity": 0.0035811323517833337, "epoch": 2.993476234855545, "grad_norm": 1.4635772207925126, "learning_rate": 4.8325854332067665e-08, "loss": 1.3549, "reason_loss": 0.5063822269439697, "step": 3212, "utility_loss": 0.8485575318336487 }, { "cosine_similarity": 0.09130304837835974, "epoch": 2.9944082013047533, "grad_norm": 1.262860553489782, "learning_rate": 4.1422160856057994e-08, "loss": 1.5877, "reason_loss": 0.49081194400787354, "step": 3213, "utility_loss": 1.096840500831604 }, { "cosine_similarity": -0.05467761297174037, "epoch": 2.9953401677539606, "grad_norm": 1.102901188537116, "learning_rate": 3.451846738004833e-08, "loss": 1.5365, "reason_loss": 0.5027488470077515, "step": 3214, "utility_loss": 1.033722162246704 }, { "cosine_similarity": 0.033889362688717395, "epoch": 2.996272134203169, "grad_norm": 1.042901820260411, "learning_rate": 2.761477390403866e-08, "loss": 1.2209, "reason_loss": 0.48233428597450256, "step": 3215, "utility_loss": 0.7385515570640564 }, { "cosine_similarity": -0.014566744443724813, "epoch": 2.9972041006523766, "grad_norm": 1.9632803684500042, "learning_rate": 2.0711080428028997e-08, "loss": 1.2262, "reason_loss": 0.49736592173576355, "step": 3216, "utility_loss": 0.7288330793380737 }, { "cosine_similarity": 0.027663721991810945, "epoch": 2.9981360671015844, "grad_norm": 1.2064508949873514, "learning_rate": 1.380738695201933e-08, "loss": 1.357, "reason_loss": 0.4909939467906952, "step": 3217, "utility_loss": 0.8660329580307007 }, { "cosine_similarity": 0.027114969791192742, "epoch": 2.999068033550792, "grad_norm": 1.1768412415715024, "learning_rate": 6.903693476009665e-09, "loss": 1.4197, "reason_loss": 0.4619781970977783, "step": 3218, "utility_loss": 0.9577109217643738 }, { "cosine_similarity": -0.051194429517224437, "epoch": 3.0, "grad_norm": 0.9449049095313518, "learning_rate": 0.0, "loss": 1.048, "reason_loss": 0.5068051218986511, "step": 3219, "utility_loss": 0.5411568880081177 }, { "epoch": 3.0, "step": 3219, "total_flos": 0.0, "train_loss": 1.8987508281190486, "train_runtime": 123692.9015, "train_samples_per_second": 0.416, "train_steps_per_second": 0.026 } ], "logging_steps": 1, "max_steps": 3219, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }