|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 22.3939418765977, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.4932885766029358, |
|
"logits/rejected": -0.45799916982650757, |
|
"logps/chosen": -190.0329132080078, |
|
"logps/rejected": -191.8831787109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 20.489528225103992, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.46848025918006897, |
|
"logits/rejected": -0.5136116147041321, |
|
"logps/chosen": -412.9850158691406, |
|
"logps/rejected": -341.7144775390625, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.0014108126051723957, |
|
"rewards/margins": 0.002006153343245387, |
|
"rewards/rejected": -0.0005953406216576695, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 22.660301161500335, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.49284687638282776, |
|
"logits/rejected": -0.5051966309547424, |
|
"logps/chosen": -284.6512756347656, |
|
"logps/rejected": -252.2484130859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00024593251873739064, |
|
"rewards/margins": -0.0005931654013693333, |
|
"rewards/rejected": 0.0003472328535281122, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 19.268901422232762, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.505753755569458, |
|
"logits/rejected": -0.5429405570030212, |
|
"logps/chosen": -292.92572021484375, |
|
"logps/rejected": -302.96075439453125, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00018037228437606245, |
|
"rewards/margins": -0.0002901476400438696, |
|
"rewards/rejected": 0.00010977555939462036, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 20.284535112632803, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.5352919101715088, |
|
"logits/rejected": -0.5566824078559875, |
|
"logps/chosen": -312.46917724609375, |
|
"logps/rejected": -294.7722473144531, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.040445209189784e-06, |
|
"rewards/margins": 0.0011984433513134718, |
|
"rewards/rejected": -0.0012064839247614145, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 22.05827429093486, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.5338358879089355, |
|
"logits/rejected": -0.5201472043991089, |
|
"logps/chosen": -313.9563903808594, |
|
"logps/rejected": -271.4358825683594, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0012641324428841472, |
|
"rewards/margins": -0.0005854673217982054, |
|
"rewards/rejected": -0.0006786651210859418, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 23.434459391350423, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -0.5112911462783813, |
|
"logits/rejected": -0.4821871817111969, |
|
"logps/chosen": -328.1177673339844, |
|
"logps/rejected": -310.93768310546875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0002256775478599593, |
|
"rewards/margins": -0.00044657508260570467, |
|
"rewards/rejected": 0.0006722525577060878, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 18.93678967837979, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.45163947343826294, |
|
"logits/rejected": -0.448442280292511, |
|
"logps/chosen": -327.08203125, |
|
"logps/rejected": -307.91986083984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00034751114435493946, |
|
"rewards/margins": 0.00025592115707695484, |
|
"rewards/rejected": 9.158989269053563e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 22.83165129920526, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -0.5182130932807922, |
|
"logits/rejected": -0.4505055844783783, |
|
"logps/chosen": -240.3303680419922, |
|
"logps/rejected": -293.2958984375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0007435273728333414, |
|
"rewards/margins": 0.0011482133995741606, |
|
"rewards/rejected": -0.0004046859103254974, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 20.814362573731255, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -0.5227237343788147, |
|
"logits/rejected": -0.5390074253082275, |
|
"logps/chosen": -374.2882385253906, |
|
"logps/rejected": -324.8116760253906, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00026526738656684756, |
|
"rewards/margins": 0.0005599698051810265, |
|
"rewards/rejected": -0.0002947025350295007, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 19.779762194802167, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.4820384085178375, |
|
"logits/rejected": -0.44324564933776855, |
|
"logps/chosen": -305.8553161621094, |
|
"logps/rejected": -325.142578125, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.000371701258700341, |
|
"rewards/margins": -0.0010418787132948637, |
|
"rewards/rejected": 0.0006701773963868618, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 19.60288651060619, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.5077439546585083, |
|
"logits/rejected": -0.5146594047546387, |
|
"logps/chosen": -267.1889343261719, |
|
"logps/rejected": -267.95477294921875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.001199588761664927, |
|
"rewards/margins": 0.00047235292731784284, |
|
"rewards/rejected": 0.0007272359216585755, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 19.81671375800534, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.48887982964515686, |
|
"logits/rejected": -0.5169537663459778, |
|
"logps/chosen": -337.0032653808594, |
|
"logps/rejected": -320.5115966796875, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.002912611234933138, |
|
"rewards/margins": 0.0012268821010366082, |
|
"rewards/rejected": 0.001685728901065886, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 17.71618435991255, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -0.5093793869018555, |
|
"logits/rejected": -0.49895817041397095, |
|
"logps/chosen": -251.9505157470703, |
|
"logps/rejected": -343.0433654785156, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00024327848223038018, |
|
"rewards/margins": 0.004206613637506962, |
|
"rewards/rejected": -0.0039633349515497684, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 27.19256542332896, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.4116830825805664, |
|
"logits/rejected": -0.4355412423610687, |
|
"logps/chosen": -338.66827392578125, |
|
"logps/rejected": -329.36993408203125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0019357700366526842, |
|
"rewards/margins": 0.0008046304574236274, |
|
"rewards/rejected": -0.0027403999119997025, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 20.618666143121267, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.4978245794773102, |
|
"logits/rejected": -0.4943366050720215, |
|
"logps/chosen": -263.31927490234375, |
|
"logps/rejected": -262.4169921875, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0014055909123271704, |
|
"rewards/margins": 0.005907330196350813, |
|
"rewards/rejected": -0.004501739051192999, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 21.439050667954167, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.41648274660110474, |
|
"logits/rejected": -0.43770337104797363, |
|
"logps/chosen": -346.9476318359375, |
|
"logps/rejected": -274.9339294433594, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.001629197271540761, |
|
"rewards/margins": 0.012000782415270805, |
|
"rewards/rejected": -0.013629982247948647, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 23.34246780774632, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.4433521330356598, |
|
"logits/rejected": -0.4420672357082367, |
|
"logps/chosen": -386.0821228027344, |
|
"logps/rejected": -372.5276794433594, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.00482617411762476, |
|
"rewards/margins": 0.018708640709519386, |
|
"rewards/rejected": -0.013882467523217201, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 20.810964783891215, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -0.5174981951713562, |
|
"logits/rejected": -0.516277551651001, |
|
"logps/chosen": -276.76483154296875, |
|
"logps/rejected": -273.97021484375, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.012685242109000683, |
|
"rewards/margins": 0.007636575493961573, |
|
"rewards/rejected": -0.020321819931268692, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 23.227452761543166, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.43492501974105835, |
|
"logits/rejected": -0.4534667134284973, |
|
"logps/chosen": -360.90936279296875, |
|
"logps/rejected": -301.4361267089844, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0038906075060367584, |
|
"rewards/margins": 0.01148594543337822, |
|
"rewards/rejected": -0.015376554802060127, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 23.29182726258291, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.48784318566322327, |
|
"logits/rejected": -0.4500521719455719, |
|
"logps/chosen": -351.4430847167969, |
|
"logps/rejected": -337.54034423828125, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.007072852458804846, |
|
"rewards/margins": 0.025352124124765396, |
|
"rewards/rejected": -0.03242497891187668, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 24.758734034242963, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.49850010871887207, |
|
"logits/rejected": -0.5115815997123718, |
|
"logps/chosen": -288.8320007324219, |
|
"logps/rejected": -310.86944580078125, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03227672725915909, |
|
"rewards/margins": 0.020085657015442848, |
|
"rewards/rejected": -0.052362389862537384, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 19.01379424647313, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.5123470425605774, |
|
"logits/rejected": -0.5222417116165161, |
|
"logps/chosen": -369.8315734863281, |
|
"logps/rejected": -361.53485107421875, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.04827170446515083, |
|
"rewards/margins": 0.008011287078261375, |
|
"rewards/rejected": -0.056282997131347656, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 20.8810945345681, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.45552024245262146, |
|
"logits/rejected": -0.4496513903141022, |
|
"logps/chosen": -281.07867431640625, |
|
"logps/rejected": -317.8404235839844, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.056314654648303986, |
|
"rewards/margins": 0.03004314936697483, |
|
"rewards/rejected": -0.08635779470205307, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 22.504427621110032, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.47351568937301636, |
|
"logits/rejected": -0.4632163941860199, |
|
"logps/chosen": -340.7323303222656, |
|
"logps/rejected": -295.4796447753906, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03860332816839218, |
|
"rewards/margins": 0.047780074179172516, |
|
"rewards/rejected": -0.0863833948969841, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 24.675621978580526, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.4286671280860901, |
|
"logits/rejected": -0.4413766860961914, |
|
"logps/chosen": -276.46929931640625, |
|
"logps/rejected": -265.77239990234375, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07923885434865952, |
|
"rewards/margins": 0.05146307870745659, |
|
"rewards/rejected": -0.1307019591331482, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 23.669883287520868, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.507714033126831, |
|
"logits/rejected": -0.4842301905155182, |
|
"logps/chosen": -331.04541015625, |
|
"logps/rejected": -326.94293212890625, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11218921095132828, |
|
"rewards/margins": 0.07296401262283325, |
|
"rewards/rejected": -0.18515323102474213, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 21.62474720041908, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.40045681595802307, |
|
"logits/rejected": -0.4459362030029297, |
|
"logps/chosen": -332.62591552734375, |
|
"logps/rejected": -274.89227294921875, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1307973563671112, |
|
"rewards/margins": 0.02679337002336979, |
|
"rewards/rejected": -0.15759070217609406, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 23.086917994516984, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -0.4868457317352295, |
|
"logits/rejected": -0.4884609282016754, |
|
"logps/chosen": -350.65716552734375, |
|
"logps/rejected": -345.1634521484375, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12245456129312515, |
|
"rewards/margins": 0.10012376308441162, |
|
"rewards/rejected": -0.22257831692695618, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 20.370739664485072, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.4236484169960022, |
|
"logits/rejected": -0.39932698011398315, |
|
"logps/chosen": -336.76806640625, |
|
"logps/rejected": -317.404541015625, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14181901514530182, |
|
"rewards/margins": 0.08710981160402298, |
|
"rewards/rejected": -0.2289288341999054, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 24.492537982428285, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.4785955846309662, |
|
"logits/rejected": -0.49328359961509705, |
|
"logps/chosen": -321.550048828125, |
|
"logps/rejected": -348.0299377441406, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21054868400096893, |
|
"rewards/margins": 0.11387765407562256, |
|
"rewards/rejected": -0.3244263529777527, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 23.952168042712074, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.4431813359260559, |
|
"logits/rejected": -0.47212114930152893, |
|
"logps/chosen": -334.4637145996094, |
|
"logps/rejected": -310.35382080078125, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19401198625564575, |
|
"rewards/margins": 0.13989683985710144, |
|
"rewards/rejected": -0.3339087963104248, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 23.791432528920133, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.43875059485435486, |
|
"logits/rejected": -0.43469443917274475, |
|
"logps/chosen": -327.66461181640625, |
|
"logps/rejected": -307.001708984375, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.18280547857284546, |
|
"rewards/margins": 0.12870724499225616, |
|
"rewards/rejected": -0.31151270866394043, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 24.644356150681073, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.3701268136501312, |
|
"logits/rejected": -0.36926591396331787, |
|
"logps/chosen": -322.92822265625, |
|
"logps/rejected": -376.3653869628906, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2948777973651886, |
|
"rewards/margins": 0.1813381314277649, |
|
"rewards/rejected": -0.4762159287929535, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 27.27844451104773, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.3673369288444519, |
|
"logits/rejected": -0.37854182720184326, |
|
"logps/chosen": -297.27362060546875, |
|
"logps/rejected": -340.39337158203125, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.22769299149513245, |
|
"rewards/margins": 0.21097370982170105, |
|
"rewards/rejected": -0.4386666715145111, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 25.156100705592984, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -0.43728965520858765, |
|
"logits/rejected": -0.4115135669708252, |
|
"logps/chosen": -376.92926025390625, |
|
"logps/rejected": -378.50994873046875, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.38330426812171936, |
|
"rewards/margins": 0.1259448528289795, |
|
"rewards/rejected": -0.5092491507530212, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 29.275353610625938, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.34812045097351074, |
|
"logits/rejected": -0.40864071249961853, |
|
"logps/chosen": -338.40545654296875, |
|
"logps/rejected": -294.67510986328125, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4027242660522461, |
|
"rewards/margins": 0.07332389056682587, |
|
"rewards/rejected": -0.47604817152023315, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 35.74285605742634, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.3650192320346832, |
|
"logits/rejected": -0.3437384068965912, |
|
"logps/chosen": -328.18255615234375, |
|
"logps/rejected": -361.9512023925781, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4244493842124939, |
|
"rewards/margins": 0.26134413480758667, |
|
"rewards/rejected": -0.6857935190200806, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 23.814640257789698, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -0.21547524631023407, |
|
"logits/rejected": -0.24726839363574982, |
|
"logps/chosen": -313.928466796875, |
|
"logps/rejected": -342.5099792480469, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3726133704185486, |
|
"rewards/margins": 0.25710588693618774, |
|
"rewards/rejected": -0.6297192573547363, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 32.047368094271064, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.1495486944913864, |
|
"logits/rejected": -0.1404871642589569, |
|
"logps/chosen": -366.45904541015625, |
|
"logps/rejected": -347.5558776855469, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5525146722793579, |
|
"rewards/margins": 0.15630534291267395, |
|
"rewards/rejected": -0.7088200449943542, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 25.703381812969745, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.11726783215999603, |
|
"logits/rejected": -0.12675485014915466, |
|
"logps/chosen": -342.845703125, |
|
"logps/rejected": -358.5388488769531, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5184834599494934, |
|
"rewards/margins": 0.35581427812576294, |
|
"rewards/rejected": -0.8742977976799011, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 36.75285765827503, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -0.06710594147443771, |
|
"logits/rejected": -0.06482669711112976, |
|
"logps/chosen": -343.30914306640625, |
|
"logps/rejected": -399.09521484375, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5613540410995483, |
|
"rewards/margins": 0.26459768414497375, |
|
"rewards/rejected": -0.8259517550468445, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 30.774833552295362, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": 0.010618582367897034, |
|
"logits/rejected": 0.06938418000936508, |
|
"logps/chosen": -431.36004638671875, |
|
"logps/rejected": -424.60406494140625, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6083325147628784, |
|
"rewards/margins": 0.34742894768714905, |
|
"rewards/rejected": -0.9557614326477051, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 40.58050289640907, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -0.0023524551652371883, |
|
"logits/rejected": 0.023655416443943977, |
|
"logps/chosen": -335.98919677734375, |
|
"logps/rejected": -377.95965576171875, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5256800055503845, |
|
"rewards/margins": 0.4393700957298279, |
|
"rewards/rejected": -0.9650500416755676, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 32.69679410312447, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": 0.08202698826789856, |
|
"logits/rejected": 0.08389448374509811, |
|
"logps/chosen": -385.47454833984375, |
|
"logps/rejected": -391.5417785644531, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6195366382598877, |
|
"rewards/margins": 0.18123595416545868, |
|
"rewards/rejected": -0.8007725477218628, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 33.910285266368, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": 0.21913523972034454, |
|
"logits/rejected": 0.271841824054718, |
|
"logps/chosen": -342.24359130859375, |
|
"logps/rejected": -344.8548889160156, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.674537181854248, |
|
"rewards/margins": 0.21622857451438904, |
|
"rewards/rejected": -0.8907658457756042, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 29.74278380126358, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": 0.08306100219488144, |
|
"logits/rejected": 0.06756886094808578, |
|
"logps/chosen": -364.54083251953125, |
|
"logps/rejected": -397.7304992675781, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.551899254322052, |
|
"rewards/margins": 0.4192637503147125, |
|
"rewards/rejected": -0.9711629748344421, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 29.113296545515738, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": 0.1874076873064041, |
|
"logits/rejected": 0.20838460326194763, |
|
"logps/chosen": -306.8981018066406, |
|
"logps/rejected": -340.91058349609375, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6327184438705444, |
|
"rewards/margins": 0.3435761630535126, |
|
"rewards/rejected": -0.9762946963310242, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 33.856336044697485, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": 0.23304300010204315, |
|
"logits/rejected": 0.1729666292667389, |
|
"logps/chosen": -343.1372985839844, |
|
"logps/rejected": -437.03607177734375, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7108132243156433, |
|
"rewards/margins": 0.39795851707458496, |
|
"rewards/rejected": -1.1087716817855835, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 33.767059031037455, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": 0.24416175484657288, |
|
"logits/rejected": 0.1543808877468109, |
|
"logps/chosen": -308.34014892578125, |
|
"logps/rejected": -369.05096435546875, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5864844918251038, |
|
"rewards/margins": 0.5907909274101257, |
|
"rewards/rejected": -1.17727530002594, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 30.71017107420923, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": 0.08184429258108139, |
|
"logits/rejected": 0.12789519131183624, |
|
"logps/chosen": -418.55023193359375, |
|
"logps/rejected": -405.9790954589844, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8971430659294128, |
|
"rewards/margins": 0.32296597957611084, |
|
"rewards/rejected": -1.2201091051101685, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 31.250292579789402, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": 0.08086155354976654, |
|
"logits/rejected": 0.03146091103553772, |
|
"logps/chosen": -342.9521789550781, |
|
"logps/rejected": -415.9251403808594, |
|
"loss": 0.5725, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5589657425880432, |
|
"rewards/margins": 0.6988564729690552, |
|
"rewards/rejected": -1.2578222751617432, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 37.15692038834496, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": 0.1345275640487671, |
|
"logits/rejected": 0.2082027643918991, |
|
"logps/chosen": -403.8849792480469, |
|
"logps/rejected": -437.1533203125, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8195702433586121, |
|
"rewards/margins": 0.3937850296497345, |
|
"rewards/rejected": -1.213355302810669, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 30.89123246626415, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": 0.4588778614997864, |
|
"logits/rejected": 0.39364755153656006, |
|
"logps/chosen": -319.67474365234375, |
|
"logps/rejected": -411.1517028808594, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6797133088111877, |
|
"rewards/margins": 0.5333698987960815, |
|
"rewards/rejected": -1.213083028793335, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 25.366271563444606, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": 0.3952273726463318, |
|
"logits/rejected": 0.3921428620815277, |
|
"logps/chosen": -357.10552978515625, |
|
"logps/rejected": -394.90380859375, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6387368440628052, |
|
"rewards/margins": 0.4164732098579407, |
|
"rewards/rejected": -1.055209994316101, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 27.86364345269766, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": 0.3245702385902405, |
|
"logits/rejected": 0.2541370987892151, |
|
"logps/chosen": -422.35760498046875, |
|
"logps/rejected": -467.5943908691406, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7628673315048218, |
|
"rewards/margins": 0.45092684030532837, |
|
"rewards/rejected": -1.2137939929962158, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 35.81198727683156, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": 0.2330838441848755, |
|
"logits/rejected": 0.29881471395492554, |
|
"logps/chosen": -301.19683837890625, |
|
"logps/rejected": -291.89031982421875, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6757429838180542, |
|
"rewards/margins": 0.17738452553749084, |
|
"rewards/rejected": -0.8531274795532227, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 32.12782969147517, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": 0.13176020979881287, |
|
"logits/rejected": 0.10654962062835693, |
|
"logps/chosen": -430.05206298828125, |
|
"logps/rejected": -499.424072265625, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9448302984237671, |
|
"rewards/margins": 0.4683268070220947, |
|
"rewards/rejected": -1.4131572246551514, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 46.904345596977215, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": 0.23485073447227478, |
|
"logits/rejected": 0.25204676389694214, |
|
"logps/chosen": -389.0350646972656, |
|
"logps/rejected": -405.47369384765625, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8869741559028625, |
|
"rewards/margins": 0.34045299887657166, |
|
"rewards/rejected": -1.2274272441864014, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 60.23627289410932, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": 0.11752445995807648, |
|
"logits/rejected": 0.14324410259723663, |
|
"logps/chosen": -353.82000732421875, |
|
"logps/rejected": -380.82904052734375, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9636253118515015, |
|
"rewards/margins": 0.30282920598983765, |
|
"rewards/rejected": -1.2664546966552734, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 34.853441256045365, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": 0.4285155236721039, |
|
"logits/rejected": 0.4016164243221283, |
|
"logps/chosen": -392.8042907714844, |
|
"logps/rejected": -403.59820556640625, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6405273079872131, |
|
"rewards/margins": 0.48459750413894653, |
|
"rewards/rejected": -1.1251246929168701, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 40.376164301943156, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": 0.23434874415397644, |
|
"logits/rejected": 0.20591160655021667, |
|
"logps/chosen": -398.8081359863281, |
|
"logps/rejected": -416.83392333984375, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7015125751495361, |
|
"rewards/margins": 0.44586700201034546, |
|
"rewards/rejected": -1.1473795175552368, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 34.926989445671786, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": 0.37248092889785767, |
|
"logits/rejected": 0.4226955473423004, |
|
"logps/chosen": -476.33392333984375, |
|
"logps/rejected": -421.0924377441406, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.874337375164032, |
|
"rewards/margins": 0.430635929107666, |
|
"rewards/rejected": -1.3049733638763428, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 28.23482167712942, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": 0.46470069885253906, |
|
"logits/rejected": 0.4302327036857605, |
|
"logps/chosen": -353.2744140625, |
|
"logps/rejected": -386.2238464355469, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8768332600593567, |
|
"rewards/margins": 0.392566055059433, |
|
"rewards/rejected": -1.2693992853164673, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 35.808076946052864, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": 0.4015461802482605, |
|
"logits/rejected": 0.36489754915237427, |
|
"logps/chosen": -360.6815185546875, |
|
"logps/rejected": -397.16143798828125, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9075252413749695, |
|
"rewards/margins": 0.36623865365982056, |
|
"rewards/rejected": -1.2737640142440796, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 46.33742443874802, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": 0.3150482475757599, |
|
"logits/rejected": 0.34753578901290894, |
|
"logps/chosen": -431.3271484375, |
|
"logps/rejected": -444.75128173828125, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0419960021972656, |
|
"rewards/margins": 0.3389902114868164, |
|
"rewards/rejected": -1.3809860944747925, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 35.49752890226974, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": 0.3023494482040405, |
|
"logits/rejected": 0.30674803256988525, |
|
"logps/chosen": -356.8683776855469, |
|
"logps/rejected": -407.8005676269531, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8490778207778931, |
|
"rewards/margins": 0.5763091444969177, |
|
"rewards/rejected": -1.4253871440887451, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 40.67792337844733, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": 0.3066056966781616, |
|
"logits/rejected": 0.4046483635902405, |
|
"logps/chosen": -455.53179931640625, |
|
"logps/rejected": -467.2894592285156, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9284945726394653, |
|
"rewards/margins": 0.34615594148635864, |
|
"rewards/rejected": -1.2746505737304688, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 42.686512303964925, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": 0.23917528986930847, |
|
"logits/rejected": 0.26977282762527466, |
|
"logps/chosen": -384.415283203125, |
|
"logps/rejected": -409.13250732421875, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8209155201911926, |
|
"rewards/margins": 0.4496981203556061, |
|
"rewards/rejected": -1.2706137895584106, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 38.72197174915104, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": 0.3088577091693878, |
|
"logits/rejected": 0.3489815592765808, |
|
"logps/chosen": -384.8271484375, |
|
"logps/rejected": -420.25823974609375, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.951115608215332, |
|
"rewards/margins": 0.34042900800704956, |
|
"rewards/rejected": -1.2915446758270264, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 52.50419479972031, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": 0.46556228399276733, |
|
"logits/rejected": 0.47466373443603516, |
|
"logps/chosen": -326.25604248046875, |
|
"logps/rejected": -362.116455078125, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7597115635871887, |
|
"rewards/margins": 0.6219409108161926, |
|
"rewards/rejected": -1.3816524744033813, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 49.54085454981157, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": 0.45910024642944336, |
|
"logits/rejected": 0.5596626400947571, |
|
"logps/chosen": -408.1614685058594, |
|
"logps/rejected": -418.06756591796875, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6898539662361145, |
|
"rewards/margins": 0.6227684617042542, |
|
"rewards/rejected": -1.312622308731079, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 37.88267694676448, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": 0.33932724595069885, |
|
"logits/rejected": 0.3793552815914154, |
|
"logps/chosen": -393.13995361328125, |
|
"logps/rejected": -435.6053161621094, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9148247838020325, |
|
"rewards/margins": 0.4767160415649414, |
|
"rewards/rejected": -1.3915406465530396, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 46.27932235940938, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": 0.21239468455314636, |
|
"logits/rejected": 0.16626520454883575, |
|
"logps/chosen": -330.5399169921875, |
|
"logps/rejected": -392.7298583984375, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6710583567619324, |
|
"rewards/margins": 0.5520154237747192, |
|
"rewards/rejected": -1.2230736017227173, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 42.816149550861326, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": 0.3929751217365265, |
|
"logits/rejected": 0.4593464434146881, |
|
"logps/chosen": -415.23651123046875, |
|
"logps/rejected": -411.86187744140625, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9089242219924927, |
|
"rewards/margins": 0.3579888939857483, |
|
"rewards/rejected": -1.2669130563735962, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 44.38419728612149, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": 0.5118775963783264, |
|
"logits/rejected": 0.4823269248008728, |
|
"logps/chosen": -395.13665771484375, |
|
"logps/rejected": -447.4171447753906, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9016819000244141, |
|
"rewards/margins": 0.6085169315338135, |
|
"rewards/rejected": -1.5101988315582275, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 31.810333382490555, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": 0.5458894968032837, |
|
"logits/rejected": 0.6065562963485718, |
|
"logps/chosen": -475.55438232421875, |
|
"logps/rejected": -520.8302001953125, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0972192287445068, |
|
"rewards/margins": 0.47927117347717285, |
|
"rewards/rejected": -1.5764904022216797, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 45.12467445004805, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": 0.7339604496955872, |
|
"logits/rejected": 0.7442577481269836, |
|
"logps/chosen": -420.70367431640625, |
|
"logps/rejected": -505.57830810546875, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7960783243179321, |
|
"rewards/margins": 0.9124542474746704, |
|
"rewards/rejected": -1.7085325717926025, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 30.919800618919098, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 0.6233432292938232, |
|
"logits/rejected": 0.5392367839813232, |
|
"logps/chosen": -370.8200988769531, |
|
"logps/rejected": -448.16668701171875, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8282602429389954, |
|
"rewards/margins": 0.5970464944839478, |
|
"rewards/rejected": -1.4253066778182983, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 37.51643254650835, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": 0.46249571442604065, |
|
"logits/rejected": 0.44672688841819763, |
|
"logps/chosen": -357.6346130371094, |
|
"logps/rejected": -410.09722900390625, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0386784076690674, |
|
"rewards/margins": 0.42310237884521484, |
|
"rewards/rejected": -1.4617807865142822, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 37.588059854913, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": 0.6460080742835999, |
|
"logits/rejected": 0.6077075004577637, |
|
"logps/chosen": -421.1116638183594, |
|
"logps/rejected": -521.49462890625, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0067331790924072, |
|
"rewards/margins": 0.9951297044754028, |
|
"rewards/rejected": -2.0018630027770996, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 53.72683188605399, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": 0.49988117814064026, |
|
"logits/rejected": 0.5366328358650208, |
|
"logps/chosen": -401.47613525390625, |
|
"logps/rejected": -420.8955993652344, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.075630784034729, |
|
"rewards/margins": 0.39734476804733276, |
|
"rewards/rejected": -1.4729753732681274, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 38.712156598659824, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": 0.3760056793689728, |
|
"logits/rejected": 0.4651992917060852, |
|
"logps/chosen": -437.2298889160156, |
|
"logps/rejected": -446.694580078125, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1204659938812256, |
|
"rewards/margins": 0.3689970076084137, |
|
"rewards/rejected": -1.4894630908966064, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 44.01596534059215, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": 0.5848439931869507, |
|
"logits/rejected": 0.6506586074829102, |
|
"logps/chosen": -403.06683349609375, |
|
"logps/rejected": -448.021728515625, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0916748046875, |
|
"rewards/margins": 0.6617940664291382, |
|
"rewards/rejected": -1.7534688711166382, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 40.83854861930071, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": 0.46873077750205994, |
|
"logits/rejected": 0.6275912523269653, |
|
"logps/chosen": -367.41162109375, |
|
"logps/rejected": -396.37066650390625, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7957026362419128, |
|
"rewards/margins": 0.5888250470161438, |
|
"rewards/rejected": -1.3845278024673462, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 36.70662477238839, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": 0.7435306310653687, |
|
"logits/rejected": 0.7161687612533569, |
|
"logps/chosen": -398.19744873046875, |
|
"logps/rejected": -457.5523986816406, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6199816465377808, |
|
"rewards/margins": 0.9293144941329956, |
|
"rewards/rejected": -1.5492960214614868, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 37.32626194913683, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": 0.8320767283439636, |
|
"logits/rejected": 0.7710009813308716, |
|
"logps/chosen": -344.98431396484375, |
|
"logps/rejected": -416.36334228515625, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7510636448860168, |
|
"rewards/margins": 0.5001067519187927, |
|
"rewards/rejected": -1.2511705160140991, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 33.87982573400151, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": 0.8840534090995789, |
|
"logits/rejected": 0.8203161358833313, |
|
"logps/chosen": -396.6270446777344, |
|
"logps/rejected": -443.01416015625, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.846544086933136, |
|
"rewards/margins": 0.6513880491256714, |
|
"rewards/rejected": -1.4979320764541626, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 37.96993366603482, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": 0.6460505723953247, |
|
"logits/rejected": 0.6371086835861206, |
|
"logps/chosen": -381.64019775390625, |
|
"logps/rejected": -417.2781677246094, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.875778317451477, |
|
"rewards/margins": 0.46770256757736206, |
|
"rewards/rejected": -1.3434808254241943, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 31.715805230455267, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": 0.8026679754257202, |
|
"logits/rejected": 0.7344800233840942, |
|
"logps/chosen": -362.9905700683594, |
|
"logps/rejected": -417.6888122558594, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7607866525650024, |
|
"rewards/margins": 0.5089119672775269, |
|
"rewards/rejected": -1.2696987390518188, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 36.199326860294505, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": 0.6950449347496033, |
|
"logits/rejected": 0.6759757995605469, |
|
"logps/chosen": -370.34210205078125, |
|
"logps/rejected": -414.42913818359375, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8894515037536621, |
|
"rewards/margins": 0.3143045902252197, |
|
"rewards/rejected": -1.2037560939788818, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 77.10525211165906, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": 0.5585970282554626, |
|
"logits/rejected": 0.523398220539093, |
|
"logps/chosen": -336.2797546386719, |
|
"logps/rejected": -415.0740661621094, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7577199339866638, |
|
"rewards/margins": 0.744074285030365, |
|
"rewards/rejected": -1.5017942190170288, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 36.180170288824705, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": 0.8028939962387085, |
|
"logits/rejected": 0.8777521252632141, |
|
"logps/chosen": -466.35821533203125, |
|
"logps/rejected": -494.737060546875, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3651368618011475, |
|
"rewards/margins": 0.20146362483501434, |
|
"rewards/rejected": -1.5666005611419678, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 38.796576126211065, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": 0.7190769910812378, |
|
"logits/rejected": 0.7383664846420288, |
|
"logps/chosen": -441.91143798828125, |
|
"logps/rejected": -486.78497314453125, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1250263452529907, |
|
"rewards/margins": 0.5087109804153442, |
|
"rewards/rejected": -1.6337372064590454, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 45.082822996860905, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": 0.68000328540802, |
|
"logits/rejected": 0.6397444009780884, |
|
"logps/chosen": -399.813232421875, |
|
"logps/rejected": -472.39501953125, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9084193110466003, |
|
"rewards/margins": 0.8160839080810547, |
|
"rewards/rejected": -1.7245031595230103, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 49.635962246333484, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": 0.8004183769226074, |
|
"logits/rejected": 0.9182437658309937, |
|
"logps/chosen": -431.7543029785156, |
|
"logps/rejected": -411.73956298828125, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1316945552825928, |
|
"rewards/margins": 0.47147074341773987, |
|
"rewards/rejected": -1.6031652688980103, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 45.08385639506829, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": 0.8087307214736938, |
|
"logits/rejected": 0.9052631258964539, |
|
"logps/chosen": -462.55047607421875, |
|
"logps/rejected": -439.76092529296875, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.008272409439087, |
|
"rewards/margins": 0.43184152245521545, |
|
"rewards/rejected": -1.44011390209198, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 37.6951920728851, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": 0.5539201498031616, |
|
"logits/rejected": 0.5718740224838257, |
|
"logps/chosen": -466.7937927246094, |
|
"logps/rejected": -492.1181640625, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0342867374420166, |
|
"rewards/margins": 0.497918039560318, |
|
"rewards/rejected": -1.5322047472000122, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 41.18778147599408, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": 0.7255790829658508, |
|
"logits/rejected": 0.7525658011436462, |
|
"logps/chosen": -431.845947265625, |
|
"logps/rejected": -467.608642578125, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1927156448364258, |
|
"rewards/margins": 0.39901024103164673, |
|
"rewards/rejected": -1.5917259454727173, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 34.01166933720842, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": 0.6290280818939209, |
|
"logits/rejected": 0.7923456430435181, |
|
"logps/chosen": -424.76202392578125, |
|
"logps/rejected": -428.0445251464844, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1598247289657593, |
|
"rewards/margins": 0.4864526391029358, |
|
"rewards/rejected": -1.6462774276733398, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 59.80833149970133, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": 0.6181938052177429, |
|
"logits/rejected": 0.6077758073806763, |
|
"logps/chosen": -390.84759521484375, |
|
"logps/rejected": -466.9073181152344, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9729859232902527, |
|
"rewards/margins": 0.6784927845001221, |
|
"rewards/rejected": -1.6514785289764404, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 46.289608360862516, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": 1.0508053302764893, |
|
"logits/rejected": 0.9303622245788574, |
|
"logps/chosen": -406.5604553222656, |
|
"logps/rejected": -520.6412353515625, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0987637042999268, |
|
"rewards/margins": 0.5951114892959595, |
|
"rewards/rejected": -1.6938753128051758, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 42.06088161985798, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": 0.758729100227356, |
|
"logits/rejected": 0.6525701284408569, |
|
"logps/chosen": -419.9432678222656, |
|
"logps/rejected": -529.0169677734375, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.134624719619751, |
|
"rewards/margins": 0.6982806921005249, |
|
"rewards/rejected": -1.8329054117202759, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 49.59098353750529, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": 0.9453147053718567, |
|
"logits/rejected": 1.105104684829712, |
|
"logps/chosen": -433.534912109375, |
|
"logps/rejected": -436.529296875, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0504753589630127, |
|
"rewards/margins": 0.6852850914001465, |
|
"rewards/rejected": -1.7357604503631592, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 52.65729612768378, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": 0.8473076820373535, |
|
"logits/rejected": 0.901703953742981, |
|
"logps/chosen": -422.795654296875, |
|
"logps/rejected": -475.37664794921875, |
|
"loss": 0.5247, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1214004755020142, |
|
"rewards/margins": 0.6356508135795593, |
|
"rewards/rejected": -1.7570511102676392, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 41.04431980215294, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": 0.5490779280662537, |
|
"logits/rejected": 0.7916635870933533, |
|
"logps/chosen": -411.013427734375, |
|
"logps/rejected": -405.4512634277344, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1067702770233154, |
|
"rewards/margins": 0.4491574168205261, |
|
"rewards/rejected": -1.5559276342391968, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 54.70318972479703, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": 0.846695065498352, |
|
"logits/rejected": 0.8700542449951172, |
|
"logps/chosen": -443.24188232421875, |
|
"logps/rejected": -477.93212890625, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1851952075958252, |
|
"rewards/margins": 0.6953514814376831, |
|
"rewards/rejected": -1.8805465698242188, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 40.079917195874565, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": 1.0208021402359009, |
|
"logits/rejected": 0.9469173550605774, |
|
"logps/chosen": -425.1080017089844, |
|
"logps/rejected": -514.3255615234375, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.173519492149353, |
|
"rewards/margins": 0.789941132068634, |
|
"rewards/rejected": -1.9634605646133423, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 45.856720630657264, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": 0.9579218626022339, |
|
"logits/rejected": 0.887250542640686, |
|
"logps/chosen": -447.91131591796875, |
|
"logps/rejected": -593.8408813476562, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3050678968429565, |
|
"rewards/margins": 1.0051066875457764, |
|
"rewards/rejected": -2.3101744651794434, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 67.68719643696569, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": 0.7985285520553589, |
|
"logits/rejected": 0.6671330332756042, |
|
"logps/chosen": -426.599365234375, |
|
"logps/rejected": -549.8866577148438, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1211028099060059, |
|
"rewards/margins": 0.8608362078666687, |
|
"rewards/rejected": -1.9819389581680298, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 42.388445435936234, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": 0.7295267581939697, |
|
"logits/rejected": 0.7975782155990601, |
|
"logps/chosen": -435.1449279785156, |
|
"logps/rejected": -466.5536193847656, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0994889736175537, |
|
"rewards/margins": 0.6408120393753052, |
|
"rewards/rejected": -1.7403008937835693, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 54.680367956034765, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": 0.5742919445037842, |
|
"logits/rejected": 0.5861102342605591, |
|
"logps/chosen": -445.10418701171875, |
|
"logps/rejected": -476.4541015625, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.9862637519836426, |
|
"rewards/margins": 0.6428527235984802, |
|
"rewards/rejected": -1.629116415977478, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 55.34501650862039, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": 0.7889494299888611, |
|
"logits/rejected": 0.7528579831123352, |
|
"logps/chosen": -331.51971435546875, |
|
"logps/rejected": -408.86810302734375, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0300666093826294, |
|
"rewards/margins": 0.6660178899765015, |
|
"rewards/rejected": -1.6960846185684204, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 72.99155725912874, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": 0.7341224551200867, |
|
"logits/rejected": 0.7539893984794617, |
|
"logps/chosen": -334.0638732910156, |
|
"logps/rejected": -441.6001892089844, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9043061137199402, |
|
"rewards/margins": 0.8354164958000183, |
|
"rewards/rejected": -1.7397226095199585, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 46.95046590404824, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": 0.9667636156082153, |
|
"logits/rejected": 0.9541499018669128, |
|
"logps/chosen": -372.1925964355469, |
|
"logps/rejected": -477.1884765625, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.11563241481781, |
|
"rewards/margins": 0.9807880520820618, |
|
"rewards/rejected": -2.0964205265045166, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 55.69927183627371, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": 0.8608558773994446, |
|
"logits/rejected": 0.9462955594062805, |
|
"logps/chosen": -457.93353271484375, |
|
"logps/rejected": -495.9036560058594, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2782789468765259, |
|
"rewards/margins": 0.7158700227737427, |
|
"rewards/rejected": -1.9941489696502686, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 50.19138460750432, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": 0.7986218929290771, |
|
"logits/rejected": 0.7014732360839844, |
|
"logps/chosen": -346.75079345703125, |
|
"logps/rejected": -484.42327880859375, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2208130359649658, |
|
"rewards/margins": 0.9798757433891296, |
|
"rewards/rejected": -2.2006888389587402, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 52.00626901277634, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": 1.0265276432037354, |
|
"logits/rejected": 1.0709214210510254, |
|
"logps/chosen": -418.23541259765625, |
|
"logps/rejected": -466.9190979003906, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2776657342910767, |
|
"rewards/margins": 0.5967856645584106, |
|
"rewards/rejected": -1.8744516372680664, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 56.1088767533042, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": 1.0688083171844482, |
|
"logits/rejected": 1.1207191944122314, |
|
"logps/chosen": -407.19488525390625, |
|
"logps/rejected": -445.3601989746094, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1254866123199463, |
|
"rewards/margins": 0.8538464307785034, |
|
"rewards/rejected": -1.9793332815170288, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 62.65883518262796, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": 0.7397902607917786, |
|
"logits/rejected": 0.7497339844703674, |
|
"logps/chosen": -421.90576171875, |
|
"logps/rejected": -483.19793701171875, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.148611307144165, |
|
"rewards/margins": 0.9697946310043335, |
|
"rewards/rejected": -2.118406057357788, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 49.37026206976709, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": 0.9949874877929688, |
|
"logits/rejected": 0.9877160787582397, |
|
"logps/chosen": -423.5523376464844, |
|
"logps/rejected": -502.5065002441406, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3646349906921387, |
|
"rewards/margins": 1.0118378400802612, |
|
"rewards/rejected": -2.3764731884002686, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 53.24677382900961, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": 0.6013599634170532, |
|
"logits/rejected": 0.6572192311286926, |
|
"logps/chosen": -439.3816833496094, |
|
"logps/rejected": -477.80181884765625, |
|
"loss": 0.5269, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4732705354690552, |
|
"rewards/margins": 0.6750000715255737, |
|
"rewards/rejected": -2.148270845413208, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 68.94012810973257, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": 0.8849973678588867, |
|
"logits/rejected": 0.770715594291687, |
|
"logps/chosen": -383.9098205566406, |
|
"logps/rejected": -516.1448974609375, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0178947448730469, |
|
"rewards/margins": 0.920157790184021, |
|
"rewards/rejected": -1.938052773475647, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 68.05451108564053, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": 0.7672693133354187, |
|
"logits/rejected": 0.737251877784729, |
|
"logps/chosen": -401.1749267578125, |
|
"logps/rejected": -422.4449157714844, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3735793828964233, |
|
"rewards/margins": 0.34203168749809265, |
|
"rewards/rejected": -1.7156111001968384, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 43.95830701945629, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": 0.8582401275634766, |
|
"logits/rejected": 0.7815409302711487, |
|
"logps/chosen": -393.76190185546875, |
|
"logps/rejected": -478.68316650390625, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0531777143478394, |
|
"rewards/margins": 0.744937539100647, |
|
"rewards/rejected": -1.7981151342391968, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 41.791141385921165, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": 0.9962691068649292, |
|
"logits/rejected": 1.0862138271331787, |
|
"logps/chosen": -442.69677734375, |
|
"logps/rejected": -490.83477783203125, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0725691318511963, |
|
"rewards/margins": 0.927507221698761, |
|
"rewards/rejected": -2.0000760555267334, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 45.46238343473115, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": 0.7613427639007568, |
|
"logits/rejected": 0.8594020009040833, |
|
"logps/chosen": -455.64910888671875, |
|
"logps/rejected": -500.81884765625, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3402540683746338, |
|
"rewards/margins": 0.6587101817131042, |
|
"rewards/rejected": -1.9989643096923828, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 65.9818331003588, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": 0.8444356918334961, |
|
"logits/rejected": 0.8592824935913086, |
|
"logps/chosen": -395.78253173828125, |
|
"logps/rejected": -482.62066650390625, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1250605583190918, |
|
"rewards/margins": 0.9761988520622253, |
|
"rewards/rejected": -2.101259708404541, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 36.28312911385993, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": 0.8551861643791199, |
|
"logits/rejected": 0.8254505395889282, |
|
"logps/chosen": -419.19140625, |
|
"logps/rejected": -468.79833984375, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1932027339935303, |
|
"rewards/margins": 0.8080015182495117, |
|
"rewards/rejected": -2.001204013824463, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 55.47772550847886, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": 0.6213891506195068, |
|
"logits/rejected": 0.7258043885231018, |
|
"logps/chosen": -482.3876037597656, |
|
"logps/rejected": -496.20721435546875, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3729370832443237, |
|
"rewards/margins": 0.7703680396080017, |
|
"rewards/rejected": -2.1433053016662598, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 47.54207300797797, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": 0.8744233250617981, |
|
"logits/rejected": 1.0026605129241943, |
|
"logps/chosen": -437.36151123046875, |
|
"logps/rejected": -435.86431884765625, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.547682285308838, |
|
"rewards/margins": 0.4932001233100891, |
|
"rewards/rejected": -2.0408825874328613, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 55.17985718254532, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": 1.1504853963851929, |
|
"logits/rejected": 1.1863739490509033, |
|
"logps/chosen": -447.06982421875, |
|
"logps/rejected": -475.8119201660156, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3189492225646973, |
|
"rewards/margins": 0.7006794214248657, |
|
"rewards/rejected": -2.0196290016174316, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 68.77706740294221, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": 0.8590642809867859, |
|
"logits/rejected": 0.8680535554885864, |
|
"logps/chosen": -423.173828125, |
|
"logps/rejected": -525.0599365234375, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.002828598022461, |
|
"rewards/margins": 0.9719334840774536, |
|
"rewards/rejected": -1.974762201309204, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 48.521191388044464, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": 0.7687040567398071, |
|
"logits/rejected": 0.7733981609344482, |
|
"logps/chosen": -416.3907165527344, |
|
"logps/rejected": -461.263671875, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.930250346660614, |
|
"rewards/margins": 0.7135656476020813, |
|
"rewards/rejected": -1.6438157558441162, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 53.18788722564653, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": 0.9676445126533508, |
|
"logits/rejected": 0.9193744659423828, |
|
"logps/chosen": -453.27899169921875, |
|
"logps/rejected": -556.67919921875, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3282757997512817, |
|
"rewards/margins": 1.236495018005371, |
|
"rewards/rejected": -2.5647706985473633, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 52.266257199655314, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": 0.7797717452049255, |
|
"logits/rejected": 0.7896989583969116, |
|
"logps/chosen": -413.337158203125, |
|
"logps/rejected": -484.93524169921875, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4328323602676392, |
|
"rewards/margins": 0.8061455488204956, |
|
"rewards/rejected": -2.2389779090881348, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 67.5456585948929, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": 0.8868836164474487, |
|
"logits/rejected": 0.8673005104064941, |
|
"logps/chosen": -482.28131103515625, |
|
"logps/rejected": -500.20111083984375, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.8790414333343506, |
|
"rewards/margins": 0.437173068523407, |
|
"rewards/rejected": -2.3162145614624023, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 54.36707308041338, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": 1.0288692712783813, |
|
"logits/rejected": 0.9043776392936707, |
|
"logps/chosen": -427.5420837402344, |
|
"logps/rejected": -516.7747802734375, |
|
"loss": 0.52, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.374241828918457, |
|
"rewards/margins": 0.7391558885574341, |
|
"rewards/rejected": -2.1133978366851807, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 51.90801874687792, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": 1.0052636861801147, |
|
"logits/rejected": 0.8439337015151978, |
|
"logps/chosen": -386.07806396484375, |
|
"logps/rejected": -503.79071044921875, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0417211055755615, |
|
"rewards/margins": 1.0018870830535889, |
|
"rewards/rejected": -2.0436081886291504, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 38.478118020714795, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": 0.8623784780502319, |
|
"logits/rejected": 0.9146772623062134, |
|
"logps/chosen": -423.99456787109375, |
|
"logps/rejected": -512.5450439453125, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.995891273021698, |
|
"rewards/margins": 1.1742404699325562, |
|
"rewards/rejected": -2.1701316833496094, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 51.39037792131386, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": 1.193550944328308, |
|
"logits/rejected": 1.1216976642608643, |
|
"logps/chosen": -450.0552673339844, |
|
"logps/rejected": -515.8976440429688, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1599600315093994, |
|
"rewards/margins": 0.9123829007148743, |
|
"rewards/rejected": -2.072342872619629, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 45.77345081613381, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": 0.8184305429458618, |
|
"logits/rejected": 0.8285325765609741, |
|
"logps/chosen": -423.65093994140625, |
|
"logps/rejected": -491.8164978027344, |
|
"loss": 0.4877, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9397039413452148, |
|
"rewards/margins": 0.8372356295585632, |
|
"rewards/rejected": -1.7769397497177124, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 45.612240044732765, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": 0.8922063112258911, |
|
"logits/rejected": 0.7613739967346191, |
|
"logps/chosen": -394.79620361328125, |
|
"logps/rejected": -518.1996459960938, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1862159967422485, |
|
"rewards/margins": 1.088355302810669, |
|
"rewards/rejected": -2.274571180343628, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 44.59212559752669, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": 1.0565603971481323, |
|
"logits/rejected": 1.0191848278045654, |
|
"logps/chosen": -411.9322204589844, |
|
"logps/rejected": -440.435791015625, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2969719171524048, |
|
"rewards/margins": 0.4736696183681488, |
|
"rewards/rejected": -1.7706416845321655, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 69.16302064102989, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": 0.7613767981529236, |
|
"logits/rejected": 0.5791240930557251, |
|
"logps/chosen": -407.04779052734375, |
|
"logps/rejected": -530.9757080078125, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2859679460525513, |
|
"rewards/margins": 0.9325041770935059, |
|
"rewards/rejected": -2.2184720039367676, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 63.13121199229064, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": 0.6463740468025208, |
|
"logits/rejected": 0.5943626761436462, |
|
"logps/chosen": -363.8845520019531, |
|
"logps/rejected": -494.2472229003906, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1846896409988403, |
|
"rewards/margins": 0.7935212850570679, |
|
"rewards/rejected": -1.9782108068466187, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 46.44657072197099, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": 1.0462594032287598, |
|
"logits/rejected": 0.8756824731826782, |
|
"logps/chosen": -398.9894104003906, |
|
"logps/rejected": -548.1451416015625, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.279750108718872, |
|
"rewards/margins": 1.224387526512146, |
|
"rewards/rejected": -2.5041377544403076, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 48.644815218733605, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": 0.818069577217102, |
|
"logits/rejected": 0.9696394205093384, |
|
"logps/chosen": -520.135498046875, |
|
"logps/rejected": -567.9030151367188, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.386540412902832, |
|
"rewards/margins": 0.9368530511856079, |
|
"rewards/rejected": -2.3233933448791504, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 66.08193974754538, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": 1.0040993690490723, |
|
"logits/rejected": 1.0307161808013916, |
|
"logps/chosen": -460.35638427734375, |
|
"logps/rejected": -541.0, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4873685836791992, |
|
"rewards/margins": 0.776241660118103, |
|
"rewards/rejected": -2.263610363006592, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 51.68715260167406, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": 0.8855185508728027, |
|
"logits/rejected": 0.881149411201477, |
|
"logps/chosen": -415.97491455078125, |
|
"logps/rejected": -512.08544921875, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0777570009231567, |
|
"rewards/margins": 0.9350083470344543, |
|
"rewards/rejected": -2.012765407562256, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 53.00939469793159, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": 0.6939389109611511, |
|
"logits/rejected": 0.7273343205451965, |
|
"logps/chosen": -390.81207275390625, |
|
"logps/rejected": -440.28436279296875, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.332270860671997, |
|
"rewards/margins": 0.6884737014770508, |
|
"rewards/rejected": -2.020744562149048, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 44.14249498260212, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": 0.8848837614059448, |
|
"logits/rejected": 0.7293730974197388, |
|
"logps/chosen": -485.33782958984375, |
|
"logps/rejected": -541.0433959960938, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4377082586288452, |
|
"rewards/margins": 0.5620696544647217, |
|
"rewards/rejected": -1.9997777938842773, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 45.34505301032185, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": 0.754276692867279, |
|
"logits/rejected": 0.7457298040390015, |
|
"logps/chosen": -448.704833984375, |
|
"logps/rejected": -508.90020751953125, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2745474576950073, |
|
"rewards/margins": 0.8524150848388672, |
|
"rewards/rejected": -2.126962661743164, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 56.130826474993995, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": 0.7544993758201599, |
|
"logits/rejected": 0.7632573843002319, |
|
"logps/chosen": -436.90606689453125, |
|
"logps/rejected": -496.5868225097656, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3305100202560425, |
|
"rewards/margins": 0.733523428440094, |
|
"rewards/rejected": -2.064033031463623, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 92.58654093312839, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": 0.9438270330429077, |
|
"logits/rejected": 1.0349982976913452, |
|
"logps/chosen": -522.1015014648438, |
|
"logps/rejected": -569.0567626953125, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.598048448562622, |
|
"rewards/margins": 0.8854676485061646, |
|
"rewards/rejected": -2.483515977859497, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 57.457525660594186, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": 0.9184654355049133, |
|
"logits/rejected": 0.8352133631706238, |
|
"logps/chosen": -383.5724182128906, |
|
"logps/rejected": -498.5684509277344, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2462844848632812, |
|
"rewards/margins": 0.6481317281723022, |
|
"rewards/rejected": -1.894416093826294, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 45.94533505834995, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": 0.7388312816619873, |
|
"logits/rejected": 0.5691046118736267, |
|
"logps/chosen": -397.6712341308594, |
|
"logps/rejected": -492.70135498046875, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2556772232055664, |
|
"rewards/margins": 0.745816707611084, |
|
"rewards/rejected": -2.0014939308166504, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 46.62179890239987, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": 0.7752183079719543, |
|
"logits/rejected": 0.6871098279953003, |
|
"logps/chosen": -449.6210021972656, |
|
"logps/rejected": -576.10009765625, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.296726942062378, |
|
"rewards/margins": 1.2676968574523926, |
|
"rewards/rejected": -2.5644240379333496, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 52.674578731728076, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": 0.5881195664405823, |
|
"logits/rejected": 0.6049793362617493, |
|
"logps/chosen": -420.54901123046875, |
|
"logps/rejected": -488.27911376953125, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.272402048110962, |
|
"rewards/margins": 0.7887789011001587, |
|
"rewards/rejected": -2.061180830001831, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 54.61642247735503, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": 0.6993811726570129, |
|
"logits/rejected": 0.5093764066696167, |
|
"logps/chosen": -400.59661865234375, |
|
"logps/rejected": -579.0813598632812, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.130835771560669, |
|
"rewards/margins": 1.3733729124069214, |
|
"rewards/rejected": -2.50420880317688, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 50.06351085238108, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": 0.8732035756111145, |
|
"logits/rejected": 0.8188701868057251, |
|
"logps/chosen": -414.146484375, |
|
"logps/rejected": -507.7533264160156, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.422013759613037, |
|
"rewards/margins": 0.8163474202156067, |
|
"rewards/rejected": -2.23836088180542, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 44.008064537367936, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": 0.8892354965209961, |
|
"logits/rejected": 0.9115354418754578, |
|
"logps/chosen": -476.12615966796875, |
|
"logps/rejected": -511.7137145996094, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5090854167938232, |
|
"rewards/margins": 0.8414263725280762, |
|
"rewards/rejected": -2.3505120277404785, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 47.40715513873281, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": 0.9135812520980835, |
|
"logits/rejected": 0.987476646900177, |
|
"logps/chosen": -400.1079406738281, |
|
"logps/rejected": -424.742431640625, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1808850765228271, |
|
"rewards/margins": 0.667824387550354, |
|
"rewards/rejected": -1.8487094640731812, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 48.15655808946851, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": 0.6475375890731812, |
|
"logits/rejected": 0.6269491314888, |
|
"logps/chosen": -439.3553161621094, |
|
"logps/rejected": -487.8170471191406, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1088930368423462, |
|
"rewards/margins": 0.7641946077346802, |
|
"rewards/rejected": -1.8730875253677368, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 47.39539000922524, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": 0.8335397839546204, |
|
"logits/rejected": 0.896551787853241, |
|
"logps/chosen": -404.9975891113281, |
|
"logps/rejected": -460.90557861328125, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.301875352859497, |
|
"rewards/margins": 0.7203748822212219, |
|
"rewards/rejected": -2.0222504138946533, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 53.72186555167991, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": 0.8256493806838989, |
|
"logits/rejected": 0.7532102465629578, |
|
"logps/chosen": -388.57958984375, |
|
"logps/rejected": -487.66607666015625, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3446906805038452, |
|
"rewards/margins": 0.9335054159164429, |
|
"rewards/rejected": -2.278196096420288, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 54.68327724847984, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": 1.0391814708709717, |
|
"logits/rejected": 1.0717869997024536, |
|
"logps/chosen": -423.3404235839844, |
|
"logps/rejected": -514.415771484375, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2519047260284424, |
|
"rewards/margins": 0.946384072303772, |
|
"rewards/rejected": -2.198288679122925, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 56.03903784012351, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": 0.7413307428359985, |
|
"logits/rejected": 0.6294633150100708, |
|
"logps/chosen": -414.1393127441406, |
|
"logps/rejected": -544.715576171875, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2840138673782349, |
|
"rewards/margins": 1.298734426498413, |
|
"rewards/rejected": -2.5827481746673584, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 66.94071518387054, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": 0.8318290710449219, |
|
"logits/rejected": 0.7132448554039001, |
|
"logps/chosen": -409.70208740234375, |
|
"logps/rejected": -531.7227783203125, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.351738452911377, |
|
"rewards/margins": 1.0488694906234741, |
|
"rewards/rejected": -2.4006078243255615, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 62.86881111183994, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": 0.7547793388366699, |
|
"logits/rejected": 0.9106947183609009, |
|
"logps/chosen": -473.1553649902344, |
|
"logps/rejected": -515.6361083984375, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.398654580116272, |
|
"rewards/margins": 0.7868353128433228, |
|
"rewards/rejected": -2.1854898929595947, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 53.05876275486071, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": 0.8228553533554077, |
|
"logits/rejected": 0.7816241979598999, |
|
"logps/chosen": -368.2134094238281, |
|
"logps/rejected": -469.35150146484375, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9591751098632812, |
|
"rewards/margins": 1.192636251449585, |
|
"rewards/rejected": -2.1518115997314453, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 42.59574425313358, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": 0.9947112202644348, |
|
"logits/rejected": 0.851239800453186, |
|
"logps/chosen": -440.8876037597656, |
|
"logps/rejected": -553.189453125, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3626024723052979, |
|
"rewards/margins": 0.6568693518638611, |
|
"rewards/rejected": -2.019472122192383, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 43.90113535838793, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": 0.8267531394958496, |
|
"logits/rejected": 0.8521022796630859, |
|
"logps/chosen": -455.4795837402344, |
|
"logps/rejected": -521.9550170898438, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3708423376083374, |
|
"rewards/margins": 0.4586716294288635, |
|
"rewards/rejected": -1.8295139074325562, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 45.98410364865412, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": 0.9817929267883301, |
|
"logits/rejected": 0.8475187420845032, |
|
"logps/chosen": -403.3632507324219, |
|
"logps/rejected": -512.5465087890625, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1158840656280518, |
|
"rewards/margins": 0.9202521443367004, |
|
"rewards/rejected": -2.0361361503601074, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 73.38035343525371, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": 1.2034145593643188, |
|
"logits/rejected": 1.1238936185836792, |
|
"logps/chosen": -395.7027282714844, |
|
"logps/rejected": -542.30224609375, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.466168761253357, |
|
"rewards/margins": 1.2128782272338867, |
|
"rewards/rejected": -2.679047107696533, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 46.4298586204596, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": 0.9583197832107544, |
|
"logits/rejected": 0.9138822555541992, |
|
"logps/chosen": -426.2366638183594, |
|
"logps/rejected": -569.9010009765625, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3246296644210815, |
|
"rewards/margins": 1.3053953647613525, |
|
"rewards/rejected": -2.6300246715545654, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 56.64029206592269, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": 0.8950651288032532, |
|
"logits/rejected": 1.0267428159713745, |
|
"logps/chosen": -472.55450439453125, |
|
"logps/rejected": -515.6345825195312, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4042866230010986, |
|
"rewards/margins": 0.8778480291366577, |
|
"rewards/rejected": -2.2821342945098877, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 68.41921008018974, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": 0.8837359547615051, |
|
"logits/rejected": 0.8470293283462524, |
|
"logps/chosen": -396.31011962890625, |
|
"logps/rejected": -554.0247192382812, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0590462684631348, |
|
"rewards/margins": 1.3236466646194458, |
|
"rewards/rejected": -2.382692813873291, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 49.71527524549888, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": 0.9320363998413086, |
|
"logits/rejected": 0.8679379224777222, |
|
"logps/chosen": -442.30938720703125, |
|
"logps/rejected": -540.7999877929688, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1069481372833252, |
|
"rewards/margins": 1.3789770603179932, |
|
"rewards/rejected": -2.4859251976013184, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 52.23523772615801, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": 0.9266379475593567, |
|
"logits/rejected": 1.049410343170166, |
|
"logps/chosen": -466.9554748535156, |
|
"logps/rejected": -470.123046875, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5328459739685059, |
|
"rewards/margins": 0.5366465449333191, |
|
"rewards/rejected": -2.069492816925049, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 52.52335424695513, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": 0.6955570578575134, |
|
"logits/rejected": 0.801531195640564, |
|
"logps/chosen": -392.8811950683594, |
|
"logps/rejected": -467.2154846191406, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3384829759597778, |
|
"rewards/margins": 1.0498571395874023, |
|
"rewards/rejected": -2.3883399963378906, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 45.50199750515447, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": 0.7124713063240051, |
|
"logits/rejected": 0.7005807757377625, |
|
"logps/chosen": -501.9646911621094, |
|
"logps/rejected": -580.0416259765625, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6505119800567627, |
|
"rewards/margins": 1.2892248630523682, |
|
"rewards/rejected": -2.939736843109131, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 37.14025291788701, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": 0.8327137231826782, |
|
"logits/rejected": 0.80305016040802, |
|
"logps/chosen": -420.4562072753906, |
|
"logps/rejected": -515.4019775390625, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4620695114135742, |
|
"rewards/margins": 0.8651070594787598, |
|
"rewards/rejected": -2.327176570892334, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 51.77915662417934, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": 0.8448446989059448, |
|
"logits/rejected": 0.8065817952156067, |
|
"logps/chosen": -458.9383850097656, |
|
"logps/rejected": -656.3840942382812, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4464824199676514, |
|
"rewards/margins": 1.5652090311050415, |
|
"rewards/rejected": -3.0116915702819824, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 48.92438736087046, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": 0.8703710436820984, |
|
"logits/rejected": 0.9936625361442566, |
|
"logps/chosen": -400.05755615234375, |
|
"logps/rejected": -516.2723388671875, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1408751010894775, |
|
"rewards/margins": 1.3080992698669434, |
|
"rewards/rejected": -2.448974132537842, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 76.90402993904019, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": 0.8843770027160645, |
|
"logits/rejected": 0.8394691348075867, |
|
"logps/chosen": -469.574951171875, |
|
"logps/rejected": -538.5886840820312, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.371246099472046, |
|
"rewards/margins": 0.9312295913696289, |
|
"rewards/rejected": -2.302475929260254, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 62.441258629021334, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": 0.9072667956352234, |
|
"logits/rejected": 0.887412428855896, |
|
"logps/chosen": -469.30010986328125, |
|
"logps/rejected": -526.6321411132812, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5707318782806396, |
|
"rewards/margins": 0.8752137422561646, |
|
"rewards/rejected": -2.4459455013275146, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 48.66503465546773, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": 0.8730419874191284, |
|
"logits/rejected": 0.684839129447937, |
|
"logps/chosen": -470.7222595214844, |
|
"logps/rejected": -579.5089721679688, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4040987491607666, |
|
"rewards/margins": 0.8575217127799988, |
|
"rewards/rejected": -2.26162052154541, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 44.68082348006358, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": 0.8070545196533203, |
|
"logits/rejected": 0.7253775000572205, |
|
"logps/chosen": -421.2344665527344, |
|
"logps/rejected": -546.1370239257812, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3976895809173584, |
|
"rewards/margins": 1.0565311908721924, |
|
"rewards/rejected": -2.4542205333709717, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 56.702478594555046, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": 0.7543920278549194, |
|
"logits/rejected": 0.7805048227310181, |
|
"logps/chosen": -380.0421142578125, |
|
"logps/rejected": -487.1282653808594, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3079397678375244, |
|
"rewards/margins": 0.9207633137702942, |
|
"rewards/rejected": -2.2287027835845947, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 72.35267614756623, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": 0.884433925151825, |
|
"logits/rejected": 0.7826744318008423, |
|
"logps/chosen": -363.8003845214844, |
|
"logps/rejected": -480.23541259765625, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1955456733703613, |
|
"rewards/margins": 0.9841529726982117, |
|
"rewards/rejected": -2.179698944091797, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 49.22886627829821, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": 1.0801002979278564, |
|
"logits/rejected": 1.012824296951294, |
|
"logps/chosen": -451.6114807128906, |
|
"logps/rejected": -564.43212890625, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5498759746551514, |
|
"rewards/margins": 0.9230774641036987, |
|
"rewards/rejected": -2.4729533195495605, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 42.18936637177058, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": 0.8707852363586426, |
|
"logits/rejected": 0.9778729677200317, |
|
"logps/chosen": -452.41754150390625, |
|
"logps/rejected": -524.420654296875, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3883720636367798, |
|
"rewards/margins": 0.848536491394043, |
|
"rewards/rejected": -2.236908435821533, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 65.06391769278284, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": 0.9964378476142883, |
|
"logits/rejected": 0.8785178065299988, |
|
"logps/chosen": -495.6791076660156, |
|
"logps/rejected": -596.3472290039062, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.529970407485962, |
|
"rewards/margins": 0.9682935476303101, |
|
"rewards/rejected": -2.4982638359069824, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 59.642955517422095, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": 1.0544954538345337, |
|
"logits/rejected": 1.052119493484497, |
|
"logps/chosen": -433.45965576171875, |
|
"logps/rejected": -525.55615234375, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3199418783187866, |
|
"rewards/margins": 0.8681329488754272, |
|
"rewards/rejected": -2.1880745887756348, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 91.78366909934816, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": 0.7547950148582458, |
|
"logits/rejected": 0.7914108037948608, |
|
"logps/chosen": -456.7359924316406, |
|
"logps/rejected": -531.7911376953125, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4032281637191772, |
|
"rewards/margins": 0.7097315788269043, |
|
"rewards/rejected": -2.112959384918213, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 53.077645633954475, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": 1.0897995233535767, |
|
"logits/rejected": 1.055633544921875, |
|
"logps/chosen": -414.7632751464844, |
|
"logps/rejected": -548.1317138671875, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0568482875823975, |
|
"rewards/margins": 1.4594687223434448, |
|
"rewards/rejected": -2.5163168907165527, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 51.487210109520056, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": 1.0350861549377441, |
|
"logits/rejected": 0.972908616065979, |
|
"logps/chosen": -554.54833984375, |
|
"logps/rejected": -705.962890625, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8348925113677979, |
|
"rewards/margins": 1.0842039585113525, |
|
"rewards/rejected": -2.9190964698791504, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 45.109957619702215, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": 0.9095099568367004, |
|
"logits/rejected": 0.938258945941925, |
|
"logps/chosen": -434.8583068847656, |
|
"logps/rejected": -570.1888427734375, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6028114557266235, |
|
"rewards/margins": 1.2169245481491089, |
|
"rewards/rejected": -2.8197360038757324, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 62.477519637457696, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 0.8121892213821411, |
|
"logits/rejected": 0.7717586159706116, |
|
"logps/chosen": -548.3729248046875, |
|
"logps/rejected": -626.3896484375, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0559659004211426, |
|
"rewards/margins": 0.7300991415977478, |
|
"rewards/rejected": -2.786064624786377, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 44.60843746328939, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": 0.9651338458061218, |
|
"logits/rejected": 1.1084082126617432, |
|
"logps/chosen": -459.8268127441406, |
|
"logps/rejected": -498.62554931640625, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3309376239776611, |
|
"rewards/margins": 0.9754717946052551, |
|
"rewards/rejected": -2.3064093589782715, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": 1.2166770696640015, |
|
"eval_logits/rejected": 1.1608844995498657, |
|
"eval_logps/chosen": -443.1514587402344, |
|
"eval_logps/rejected": -562.7020874023438, |
|
"eval_loss": 0.49380749464035034, |
|
"eval_rewards/accuracies": 0.7910714149475098, |
|
"eval_rewards/chosen": -1.4838404655456543, |
|
"eval_rewards/margins": 1.1245745420455933, |
|
"eval_rewards/rejected": -2.608414888381958, |
|
"eval_runtime": 186.3836, |
|
"eval_samples_per_second": 23.935, |
|
"eval_steps_per_second": 0.376, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 68.47203736653746, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": 0.9950852394104004, |
|
"logits/rejected": 0.9550646543502808, |
|
"logps/chosen": -474.58184814453125, |
|
"logps/rejected": -555.65478515625, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.637946367263794, |
|
"rewards/margins": 1.066723108291626, |
|
"rewards/rejected": -2.70466947555542, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 54.78705406465125, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": 0.9966745376586914, |
|
"logits/rejected": 1.0476700067520142, |
|
"logps/chosen": -471.40283203125, |
|
"logps/rejected": -545.7142944335938, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3969167470932007, |
|
"rewards/margins": 1.1684801578521729, |
|
"rewards/rejected": -2.565396785736084, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 57.24019044027548, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": 0.9877074360847473, |
|
"logits/rejected": 0.917037308216095, |
|
"logps/chosen": -396.08197021484375, |
|
"logps/rejected": -481.23004150390625, |
|
"loss": 0.5243, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.239987850189209, |
|
"rewards/margins": 0.79121333360672, |
|
"rewards/rejected": -2.031201124191284, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 44.16237077814799, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": 1.1882489919662476, |
|
"logits/rejected": 1.0277183055877686, |
|
"logps/chosen": -378.992919921875, |
|
"logps/rejected": -533.8101806640625, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9555326700210571, |
|
"rewards/margins": 1.6396633386611938, |
|
"rewards/rejected": -2.595196008682251, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 74.81510476061555, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": 0.7026466727256775, |
|
"logits/rejected": 0.6058215498924255, |
|
"logps/chosen": -476.6302795410156, |
|
"logps/rejected": -584.3870849609375, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.641077995300293, |
|
"rewards/margins": 0.9068584442138672, |
|
"rewards/rejected": -2.547936201095581, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 51.4769571654088, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": 0.9324299693107605, |
|
"logits/rejected": 0.8517206311225891, |
|
"logps/chosen": -427.55987548828125, |
|
"logps/rejected": -562.2366943359375, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3503427505493164, |
|
"rewards/margins": 1.1821272373199463, |
|
"rewards/rejected": -2.532470226287842, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 50.118950103729716, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": 0.9580842852592468, |
|
"logits/rejected": 0.8755077123641968, |
|
"logps/chosen": -452.33465576171875, |
|
"logps/rejected": -553.51220703125, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.320235252380371, |
|
"rewards/margins": 1.0315730571746826, |
|
"rewards/rejected": -2.3518080711364746, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 49.40016845612337, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": 0.839256763458252, |
|
"logits/rejected": 0.8141627311706543, |
|
"logps/chosen": -504.6307678222656, |
|
"logps/rejected": -632.181884765625, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5735645294189453, |
|
"rewards/margins": 1.5842703580856323, |
|
"rewards/rejected": -3.157834768295288, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 77.70300437123323, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": 0.7339235544204712, |
|
"logits/rejected": 0.7084833383560181, |
|
"logps/chosen": -456.03387451171875, |
|
"logps/rejected": -566.5303955078125, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3032381534576416, |
|
"rewards/margins": 0.9215162992477417, |
|
"rewards/rejected": -2.2247543334960938, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 61.57283238764505, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": 0.6217154860496521, |
|
"logits/rejected": 0.5891402959823608, |
|
"logps/chosen": -400.98052978515625, |
|
"logps/rejected": -513.5938720703125, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1533586978912354, |
|
"rewards/margins": 1.00028395652771, |
|
"rewards/rejected": -2.153642416000366, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 51.27306900884652, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": 0.7210658192634583, |
|
"logits/rejected": 0.6568849682807922, |
|
"logps/chosen": -423.27581787109375, |
|
"logps/rejected": -528.0835571289062, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1000630855560303, |
|
"rewards/margins": 1.0572912693023682, |
|
"rewards/rejected": -2.1573543548583984, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 69.64029908977776, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": 0.9349791407585144, |
|
"logits/rejected": 0.8959067463874817, |
|
"logps/chosen": -427.88824462890625, |
|
"logps/rejected": -498.28277587890625, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4291841983795166, |
|
"rewards/margins": 0.6939970254898071, |
|
"rewards/rejected": -2.123181104660034, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 56.95098336670352, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": 0.7756115198135376, |
|
"logits/rejected": 0.6824313402175903, |
|
"logps/chosen": -424.12078857421875, |
|
"logps/rejected": -554.8558349609375, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4175843000411987, |
|
"rewards/margins": 1.0442845821380615, |
|
"rewards/rejected": -2.4618687629699707, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 51.346229952240485, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": 0.8402312397956848, |
|
"logits/rejected": 0.7388736009597778, |
|
"logps/chosen": -439.6341857910156, |
|
"logps/rejected": -538.5376586914062, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.446385145187378, |
|
"rewards/margins": 0.9518035650253296, |
|
"rewards/rejected": -2.398188829421997, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 64.97786746923843, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": 0.7467316389083862, |
|
"logits/rejected": 0.8188697695732117, |
|
"logps/chosen": -456.263671875, |
|
"logps/rejected": -513.8021850585938, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.460170030593872, |
|
"rewards/margins": 0.9706412553787231, |
|
"rewards/rejected": -2.4308114051818848, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 47.735500272298246, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": 0.8745681643486023, |
|
"logits/rejected": 0.9194679260253906, |
|
"logps/chosen": -421.95806884765625, |
|
"logps/rejected": -566.9091796875, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4120038747787476, |
|
"rewards/margins": 1.492087960243225, |
|
"rewards/rejected": -2.9040918350219727, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 46.84287465469112, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": 0.6313827037811279, |
|
"logits/rejected": 0.5524680018424988, |
|
"logps/chosen": -452.0630798339844, |
|
"logps/rejected": -565.3175048828125, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3242391347885132, |
|
"rewards/margins": 0.8884557485580444, |
|
"rewards/rejected": -2.2126948833465576, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 42.64036648327254, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": 0.7804339528083801, |
|
"logits/rejected": 0.7059229612350464, |
|
"logps/chosen": -447.0234375, |
|
"logps/rejected": -611.4049072265625, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.615553855895996, |
|
"rewards/margins": 1.4554349184036255, |
|
"rewards/rejected": -3.070988416671753, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 66.89197539611867, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": 0.8176469802856445, |
|
"logits/rejected": 0.6826872229576111, |
|
"logps/chosen": -409.46759033203125, |
|
"logps/rejected": -586.7466430664062, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2512916326522827, |
|
"rewards/margins": 1.739323377609253, |
|
"rewards/rejected": -2.990614891052246, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 63.20140309956034, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": 0.752414882183075, |
|
"logits/rejected": 0.6861324310302734, |
|
"logps/chosen": -419.324951171875, |
|
"logps/rejected": -514.6325073242188, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3946325778961182, |
|
"rewards/margins": 1.1481375694274902, |
|
"rewards/rejected": -2.5427699089050293, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 68.01427450286997, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": 0.5179321765899658, |
|
"logits/rejected": 0.5527491569519043, |
|
"logps/chosen": -490.6349182128906, |
|
"logps/rejected": -493.8052673339844, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.6353775262832642, |
|
"rewards/margins": 0.4279721677303314, |
|
"rewards/rejected": -2.063349723815918, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 78.46661449911845, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": 0.9700131416320801, |
|
"logits/rejected": 0.9586893320083618, |
|
"logps/chosen": -436.4134826660156, |
|
"logps/rejected": -495.54449462890625, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3165397644042969, |
|
"rewards/margins": 0.4658414423465729, |
|
"rewards/rejected": -1.782381296157837, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 50.019083454811444, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": 1.065694808959961, |
|
"logits/rejected": 1.008362054824829, |
|
"logps/chosen": -395.4405822753906, |
|
"logps/rejected": -535.7208251953125, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1981178522109985, |
|
"rewards/margins": 1.276220679283142, |
|
"rewards/rejected": -2.4743385314941406, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 64.07494250184162, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": 0.8909622430801392, |
|
"logits/rejected": 0.8934359550476074, |
|
"logps/chosen": -443.07684326171875, |
|
"logps/rejected": -564.4014892578125, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3731637001037598, |
|
"rewards/margins": 1.2797712087631226, |
|
"rewards/rejected": -2.6529347896575928, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 48.40896952203475, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": 0.6491595506668091, |
|
"logits/rejected": 0.6502425670623779, |
|
"logps/chosen": -392.4731140136719, |
|
"logps/rejected": -474.72479248046875, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1799042224884033, |
|
"rewards/margins": 0.9124962091445923, |
|
"rewards/rejected": -2.092400074005127, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 62.72939017230718, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": 0.9608882665634155, |
|
"logits/rejected": 0.8880300521850586, |
|
"logps/chosen": -418.8319396972656, |
|
"logps/rejected": -539.8108520507812, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3952124118804932, |
|
"rewards/margins": 1.1928045749664307, |
|
"rewards/rejected": -2.588017225265503, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 69.32128093551441, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": 0.924429714679718, |
|
"logits/rejected": 0.7656577825546265, |
|
"logps/chosen": -390.9301452636719, |
|
"logps/rejected": -579.2156982421875, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1683149337768555, |
|
"rewards/margins": 1.7250967025756836, |
|
"rewards/rejected": -2.893411636352539, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 52.7355061107119, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": 0.9853194355964661, |
|
"logits/rejected": 0.8427760004997253, |
|
"logps/chosen": -421.3599548339844, |
|
"logps/rejected": -550.279052734375, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.73796808719635, |
|
"rewards/margins": 1.111445665359497, |
|
"rewards/rejected": -2.8494136333465576, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 53.62606714676091, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": 1.0646812915802002, |
|
"logits/rejected": 1.071195363998413, |
|
"logps/chosen": -427.5694274902344, |
|
"logps/rejected": -516.3193359375, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6495002508163452, |
|
"rewards/margins": 0.8560526967048645, |
|
"rewards/rejected": -2.5055530071258545, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 64.06509613900914, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": 1.1138678789138794, |
|
"logits/rejected": 0.9982527494430542, |
|
"logps/chosen": -427.8755798339844, |
|
"logps/rejected": -611.0533447265625, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7073419094085693, |
|
"rewards/margins": 1.392088770866394, |
|
"rewards/rejected": -3.099430799484253, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 52.510465339154806, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": 0.7810562252998352, |
|
"logits/rejected": 0.8685593605041504, |
|
"logps/chosen": -498.1653747558594, |
|
"logps/rejected": -567.6756591796875, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.502379298210144, |
|
"rewards/margins": 1.1495414972305298, |
|
"rewards/rejected": -2.651920795440674, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 78.04829703471421, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": 0.8291500210762024, |
|
"logits/rejected": 0.7324923872947693, |
|
"logps/chosen": -424.38995361328125, |
|
"logps/rejected": -574.2205810546875, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3887931108474731, |
|
"rewards/margins": 0.9264582395553589, |
|
"rewards/rejected": -2.315251111984253, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 70.26539499857562, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": 0.9374772310256958, |
|
"logits/rejected": 1.077742338180542, |
|
"logps/chosen": -455.1084899902344, |
|
"logps/rejected": -479.3863830566406, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5127017498016357, |
|
"rewards/margins": 0.7922407388687134, |
|
"rewards/rejected": -2.3049423694610596, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 70.2899322509318, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": 1.0267776250839233, |
|
"logits/rejected": 1.0876456499099731, |
|
"logps/chosen": -432.3119201660156, |
|
"logps/rejected": -474.933349609375, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.544737458229065, |
|
"rewards/margins": 0.6480750441551208, |
|
"rewards/rejected": -2.192812442779541, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 54.87146723110392, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": 0.9092999696731567, |
|
"logits/rejected": 0.8199703097343445, |
|
"logps/chosen": -423.0660705566406, |
|
"logps/rejected": -569.11572265625, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7659733295440674, |
|
"rewards/margins": 1.3059362173080444, |
|
"rewards/rejected": -3.0719094276428223, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 63.96900099199721, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": 0.854632556438446, |
|
"logits/rejected": 0.7670155763626099, |
|
"logps/chosen": -477.948486328125, |
|
"logps/rejected": -591.9798583984375, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3857605457305908, |
|
"rewards/margins": 1.5275886058807373, |
|
"rewards/rejected": -2.913348913192749, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 41.93285610160106, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": 0.8104848861694336, |
|
"logits/rejected": 0.769019365310669, |
|
"logps/chosen": -484.6270446777344, |
|
"logps/rejected": -576.666259765625, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5848088264465332, |
|
"rewards/margins": 0.8290297389030457, |
|
"rewards/rejected": -2.4138386249542236, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 39.93265801907108, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": 1.1064794063568115, |
|
"logits/rejected": 1.3124349117279053, |
|
"logps/chosen": -416.0125427246094, |
|
"logps/rejected": -411.6392517089844, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3425989151000977, |
|
"rewards/margins": 0.665269136428833, |
|
"rewards/rejected": -2.0078678131103516, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 50.55246328994561, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": 1.1529178619384766, |
|
"logits/rejected": 1.218758225440979, |
|
"logps/chosen": -385.5342102050781, |
|
"logps/rejected": -489.95843505859375, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3069584369659424, |
|
"rewards/margins": 1.1834717988967896, |
|
"rewards/rejected": -2.4904303550720215, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 45.5491994908578, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": 1.01101815700531, |
|
"logits/rejected": 1.0026451349258423, |
|
"logps/chosen": -371.60589599609375, |
|
"logps/rejected": -459.232421875, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4028453826904297, |
|
"rewards/margins": 0.9635394215583801, |
|
"rewards/rejected": -2.366384744644165, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 52.12193374603822, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": 1.031686782836914, |
|
"logits/rejected": 0.8516979217529297, |
|
"logps/chosen": -401.5879821777344, |
|
"logps/rejected": -530.6275634765625, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0833652019500732, |
|
"rewards/margins": 1.0837208032608032, |
|
"rewards/rejected": -2.167086124420166, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 59.98014211098357, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": 1.133502721786499, |
|
"logits/rejected": 1.043272614479065, |
|
"logps/chosen": -384.2505187988281, |
|
"logps/rejected": -592.968994140625, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5084893703460693, |
|
"rewards/margins": 1.9793064594268799, |
|
"rewards/rejected": -3.48779559135437, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 54.66091749549038, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": 1.1090089082717896, |
|
"logits/rejected": 1.0012341737747192, |
|
"logps/chosen": -397.206787109375, |
|
"logps/rejected": -564.4781494140625, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4173775911331177, |
|
"rewards/margins": 1.4264302253723145, |
|
"rewards/rejected": -2.8438076972961426, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 68.41100793696917, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": 0.9752421379089355, |
|
"logits/rejected": 0.961895763874054, |
|
"logps/chosen": -401.00982666015625, |
|
"logps/rejected": -527.8412475585938, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5913372039794922, |
|
"rewards/margins": 1.159667730331421, |
|
"rewards/rejected": -2.751004934310913, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 55.28276385518376, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": 1.1240136623382568, |
|
"logits/rejected": 1.1612218618392944, |
|
"logps/chosen": -468.381591796875, |
|
"logps/rejected": -535.3687133789062, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4069411754608154, |
|
"rewards/margins": 0.9461265802383423, |
|
"rewards/rejected": -2.3530678749084473, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 58.95559860539494, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": 0.7781744003295898, |
|
"logits/rejected": 0.6136264204978943, |
|
"logps/chosen": -394.98272705078125, |
|
"logps/rejected": -580.1182861328125, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.204701542854309, |
|
"rewards/margins": 1.41546630859375, |
|
"rewards/rejected": -2.6201682090759277, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 66.83705487462822, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": 1.043413519859314, |
|
"logits/rejected": 0.819706916809082, |
|
"logps/chosen": -414.18194580078125, |
|
"logps/rejected": -585.1841430664062, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4290785789489746, |
|
"rewards/margins": 1.1952152252197266, |
|
"rewards/rejected": -2.624293804168701, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 59.852697232379285, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": 0.944989800453186, |
|
"logits/rejected": 0.7555774450302124, |
|
"logps/chosen": -377.5307922363281, |
|
"logps/rejected": -507.8644104003906, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1177617311477661, |
|
"rewards/margins": 1.0794548988342285, |
|
"rewards/rejected": -2.197216510772705, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 85.94904960649434, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": 0.9416057467460632, |
|
"logits/rejected": 0.8576537370681763, |
|
"logps/chosen": -462.7132263183594, |
|
"logps/rejected": -575.8701782226562, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4933173656463623, |
|
"rewards/margins": 1.1759144067764282, |
|
"rewards/rejected": -2.66923189163208, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 71.86374165366914, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": 1.0789827108383179, |
|
"logits/rejected": 1.241758108139038, |
|
"logps/chosen": -436.0838317871094, |
|
"logps/rejected": -483.39794921875, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5711548328399658, |
|
"rewards/margins": 0.8767528533935547, |
|
"rewards/rejected": -2.4479074478149414, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 79.49096424520961, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": 0.6974599957466125, |
|
"logits/rejected": 0.8242738842964172, |
|
"logps/chosen": -445.80047607421875, |
|
"logps/rejected": -516.7428588867188, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5052564144134521, |
|
"rewards/margins": 1.1741901636123657, |
|
"rewards/rejected": -2.679446220397949, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 80.60526856821485, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": 0.9884384274482727, |
|
"logits/rejected": 0.9763882756233215, |
|
"logps/chosen": -432.0250549316406, |
|
"logps/rejected": -518.0442504882812, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.796215295791626, |
|
"rewards/margins": 0.707119345664978, |
|
"rewards/rejected": -2.5033345222473145, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 63.1816955794022, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": 0.997573971748352, |
|
"logits/rejected": 0.8543837666511536, |
|
"logps/chosen": -438.85906982421875, |
|
"logps/rejected": -582.8068237304688, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.229027509689331, |
|
"rewards/margins": 1.5200862884521484, |
|
"rewards/rejected": -2.7491135597229004, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 62.41950193084954, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": 0.9797847867012024, |
|
"logits/rejected": 1.079450249671936, |
|
"logps/chosen": -399.32000732421875, |
|
"logps/rejected": -419.75927734375, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4550023078918457, |
|
"rewards/margins": 0.6732853651046753, |
|
"rewards/rejected": -2.1282875537872314, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 57.62809380177565, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": 1.0366195440292358, |
|
"logits/rejected": 1.073227882385254, |
|
"logps/chosen": -447.63177490234375, |
|
"logps/rejected": -578.6515502929688, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5430773496627808, |
|
"rewards/margins": 1.2703707218170166, |
|
"rewards/rejected": -2.813448190689087, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 60.009210014532215, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": 1.0186638832092285, |
|
"logits/rejected": 0.943347156047821, |
|
"logps/chosen": -430.02264404296875, |
|
"logps/rejected": -630.0662841796875, |
|
"loss": 0.4709, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2625811100006104, |
|
"rewards/margins": 2.038123369216919, |
|
"rewards/rejected": -3.30070424079895, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 49.00247372933369, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": 0.9420596957206726, |
|
"logits/rejected": 0.9559775590896606, |
|
"logps/chosen": -391.6435241699219, |
|
"logps/rejected": -551.275146484375, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.323351263999939, |
|
"rewards/margins": 1.5198358297348022, |
|
"rewards/rejected": -2.843186855316162, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 64.66450632824152, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": 0.8951179385185242, |
|
"logits/rejected": 0.8913706541061401, |
|
"logps/chosen": -422.13275146484375, |
|
"logps/rejected": -494.0396423339844, |
|
"loss": 0.4789, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2230151891708374, |
|
"rewards/margins": 0.9262048006057739, |
|
"rewards/rejected": -2.1492199897766113, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 74.82690790372563, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": 1.034971833229065, |
|
"logits/rejected": 0.8996318578720093, |
|
"logps/chosen": -423.81927490234375, |
|
"logps/rejected": -636.1827392578125, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5726258754730225, |
|
"rewards/margins": 1.4758774042129517, |
|
"rewards/rejected": -3.0485031604766846, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 43.10672829531682, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": 0.7720481157302856, |
|
"logits/rejected": 0.6963208913803101, |
|
"logps/chosen": -455.3863830566406, |
|
"logps/rejected": -621.8772583007812, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1479754447937012, |
|
"rewards/margins": 1.7601661682128906, |
|
"rewards/rejected": -2.908141613006592, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 63.482842253520616, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": 1.1697524785995483, |
|
"logits/rejected": 1.0048816204071045, |
|
"logps/chosen": -338.16070556640625, |
|
"logps/rejected": -563.50244140625, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0349814891815186, |
|
"rewards/margins": 1.9046099185943604, |
|
"rewards/rejected": -2.939591407775879, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 69.77697070240897, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": 0.8537524938583374, |
|
"logits/rejected": 0.8137442469596863, |
|
"logps/chosen": -409.3791809082031, |
|
"logps/rejected": -485.70361328125, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.393723964691162, |
|
"rewards/margins": 0.9283844232559204, |
|
"rewards/rejected": -2.322108268737793, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 85.36339641258817, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": 1.0899819135665894, |
|
"logits/rejected": 0.9631819725036621, |
|
"logps/chosen": -389.0103759765625, |
|
"logps/rejected": -542.0838012695312, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4166555404663086, |
|
"rewards/margins": 1.345792531967163, |
|
"rewards/rejected": -2.7624480724334717, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 49.49777693517236, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": 0.8809930086135864, |
|
"logits/rejected": 0.9411085247993469, |
|
"logps/chosen": -462.8880310058594, |
|
"logps/rejected": -500.64178466796875, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.362559199333191, |
|
"rewards/margins": 0.8943181037902832, |
|
"rewards/rejected": -2.2568774223327637, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 66.4479639615787, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": 1.0005168914794922, |
|
"logits/rejected": 1.153052806854248, |
|
"logps/chosen": -480.7499084472656, |
|
"logps/rejected": -553.7113647460938, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5144213438034058, |
|
"rewards/margins": 1.093235731124878, |
|
"rewards/rejected": -2.6076574325561523, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 59.578705579629094, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": 0.6815598607063293, |
|
"logits/rejected": 0.5531748533248901, |
|
"logps/chosen": -427.00189208984375, |
|
"logps/rejected": -631.4588623046875, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3672816753387451, |
|
"rewards/margins": 1.8084214925765991, |
|
"rewards/rejected": -3.175703525543213, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 39.145750962317365, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": 0.7526305317878723, |
|
"logits/rejected": 0.9373035430908203, |
|
"logps/chosen": -448.674072265625, |
|
"logps/rejected": -522.0548095703125, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6043018102645874, |
|
"rewards/margins": 1.3347322940826416, |
|
"rewards/rejected": -2.9390339851379395, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 52.0485958912909, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": 0.8610442280769348, |
|
"logits/rejected": 0.7900782823562622, |
|
"logps/chosen": -388.0408935546875, |
|
"logps/rejected": -614.5856323242188, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.385807752609253, |
|
"rewards/margins": 1.7768402099609375, |
|
"rewards/rejected": -3.1626479625701904, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 70.4165432468412, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": 0.8667429089546204, |
|
"logits/rejected": 0.8874995112419128, |
|
"logps/chosen": -433.28289794921875, |
|
"logps/rejected": -484.38177490234375, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5184026956558228, |
|
"rewards/margins": 0.7777020335197449, |
|
"rewards/rejected": -2.296104907989502, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 53.14772640733025, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": 0.9356837272644043, |
|
"logits/rejected": 0.7656700611114502, |
|
"logps/chosen": -452.4273376464844, |
|
"logps/rejected": -613.3473510742188, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4059609174728394, |
|
"rewards/margins": 1.225271463394165, |
|
"rewards/rejected": -2.6312320232391357, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 108.27180197735416, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": 1.1747074127197266, |
|
"logits/rejected": 1.0810006856918335, |
|
"logps/chosen": -463.4851989746094, |
|
"logps/rejected": -537.5053100585938, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7211042642593384, |
|
"rewards/margins": 0.8061060905456543, |
|
"rewards/rejected": -2.5272104740142822, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 49.32911252791468, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": 1.057152509689331, |
|
"logits/rejected": 1.0074436664581299, |
|
"logps/chosen": -444.5403747558594, |
|
"logps/rejected": -594.4525146484375, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2437794208526611, |
|
"rewards/margins": 1.3522827625274658, |
|
"rewards/rejected": -2.596062183380127, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 50.540722326456994, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": 0.7453646063804626, |
|
"logits/rejected": 0.7216060757637024, |
|
"logps/chosen": -429.54248046875, |
|
"logps/rejected": -559.0650634765625, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2170121669769287, |
|
"rewards/margins": 1.3658866882324219, |
|
"rewards/rejected": -2.5828986167907715, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 51.6547346118967, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": 1.0355870723724365, |
|
"logits/rejected": 1.0161089897155762, |
|
"logps/chosen": -369.6821594238281, |
|
"logps/rejected": -482.0846252441406, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3156317472457886, |
|
"rewards/margins": 0.8946720361709595, |
|
"rewards/rejected": -2.210303783416748, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 47.1693862463699, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": 0.9648367762565613, |
|
"logits/rejected": 0.8552564382553101, |
|
"logps/chosen": -382.4488830566406, |
|
"logps/rejected": -529.5360717773438, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1674479246139526, |
|
"rewards/margins": 1.2194554805755615, |
|
"rewards/rejected": -2.3869035243988037, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 94.58211756506084, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": 0.9964067339897156, |
|
"logits/rejected": 1.1129343509674072, |
|
"logps/chosen": -488.83245849609375, |
|
"logps/rejected": -565.7014770507812, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3394157886505127, |
|
"rewards/margins": 1.3574081659317017, |
|
"rewards/rejected": -2.696824312210083, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 56.724968356239614, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": 1.0629318952560425, |
|
"logits/rejected": 1.0994447469711304, |
|
"logps/chosen": -461.70098876953125, |
|
"logps/rejected": -557.0706787109375, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5605425834655762, |
|
"rewards/margins": 0.9814839363098145, |
|
"rewards/rejected": -2.5420267581939697, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 73.07132474192694, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": 0.8918180465698242, |
|
"logits/rejected": 1.0692389011383057, |
|
"logps/chosen": -454.5640563964844, |
|
"logps/rejected": -514.7313842773438, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4592220783233643, |
|
"rewards/margins": 1.138021469116211, |
|
"rewards/rejected": -2.597243309020996, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 89.46334651331017, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": 0.850295901298523, |
|
"logits/rejected": 0.8481257557868958, |
|
"logps/chosen": -464.66729736328125, |
|
"logps/rejected": -544.7948608398438, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4368793964385986, |
|
"rewards/margins": 1.4174644947052002, |
|
"rewards/rejected": -2.854343891143799, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 82.15838546116684, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": 0.6838446855545044, |
|
"logits/rejected": 0.7786232829093933, |
|
"logps/chosen": -435.9329528808594, |
|
"logps/rejected": -568.1392211914062, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.379408597946167, |
|
"rewards/margins": 1.6114330291748047, |
|
"rewards/rejected": -2.990841865539551, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 51.992079030977465, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": 0.6652716994285583, |
|
"logits/rejected": 0.7421627044677734, |
|
"logps/chosen": -463.707763671875, |
|
"logps/rejected": -577.2199096679688, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4516760110855103, |
|
"rewards/margins": 1.2704440355300903, |
|
"rewards/rejected": -2.7221200466156006, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 52.5558212553436, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": 0.7784019112586975, |
|
"logits/rejected": 0.793055534362793, |
|
"logps/chosen": -513.3424072265625, |
|
"logps/rejected": -591.7744750976562, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6059348583221436, |
|
"rewards/margins": 0.9743717312812805, |
|
"rewards/rejected": -2.5803067684173584, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 59.6538875352704, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": 0.8990803956985474, |
|
"logits/rejected": 0.7679176926612854, |
|
"logps/chosen": -451.548828125, |
|
"logps/rejected": -565.1710205078125, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6711994409561157, |
|
"rewards/margins": 1.1422600746154785, |
|
"rewards/rejected": -2.8134593963623047, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 46.30676257604635, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": 1.0149420499801636, |
|
"logits/rejected": 0.8020240664482117, |
|
"logps/chosen": -364.19476318359375, |
|
"logps/rejected": -524.8628540039062, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2790462970733643, |
|
"rewards/margins": 1.1484087705612183, |
|
"rewards/rejected": -2.427455425262451, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 48.63204270378802, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": 0.8761056065559387, |
|
"logits/rejected": 0.8987948298454285, |
|
"logps/chosen": -475.8661193847656, |
|
"logps/rejected": -552.35009765625, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4442270994186401, |
|
"rewards/margins": 1.1364909410476685, |
|
"rewards/rejected": -2.5807182788848877, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 102.522150067521, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": 0.9235042333602905, |
|
"logits/rejected": 0.8118287920951843, |
|
"logps/chosen": -475.92120361328125, |
|
"logps/rejected": -574.1943359375, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.384049892425537, |
|
"rewards/margins": 0.9419873952865601, |
|
"rewards/rejected": -2.3260374069213867, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 51.563583755724764, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": 1.0318387746810913, |
|
"logits/rejected": 0.881234347820282, |
|
"logps/chosen": -353.23553466796875, |
|
"logps/rejected": -484.5628967285156, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.253504753112793, |
|
"rewards/margins": 1.1224879026412964, |
|
"rewards/rejected": -2.3759925365448, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 55.691275390702216, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": 0.8622503280639648, |
|
"logits/rejected": 1.078961730003357, |
|
"logps/chosen": -507.03955078125, |
|
"logps/rejected": -526.7344360351562, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7587352991104126, |
|
"rewards/margins": 0.5359312891960144, |
|
"rewards/rejected": -2.2946667671203613, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 62.99944785280698, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": 0.7162820100784302, |
|
"logits/rejected": 0.7219021320343018, |
|
"logps/chosen": -421.7882385253906, |
|
"logps/rejected": -524.5950927734375, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.39750075340271, |
|
"rewards/margins": 0.9577351808547974, |
|
"rewards/rejected": -2.3552358150482178, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 79.70508618196584, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": 0.9651128649711609, |
|
"logits/rejected": 0.8870256543159485, |
|
"logps/chosen": -394.3436584472656, |
|
"logps/rejected": -515.9041748046875, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4795421361923218, |
|
"rewards/margins": 1.217334508895874, |
|
"rewards/rejected": -2.6968765258789062, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 65.7601063958055, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": 1.2349733114242554, |
|
"logits/rejected": 1.1591541767120361, |
|
"logps/chosen": -371.3346252441406, |
|
"logps/rejected": -476.70703125, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3660104274749756, |
|
"rewards/margins": 1.0262000560760498, |
|
"rewards/rejected": -2.3922104835510254, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 55.77955194376905, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": 0.8615692257881165, |
|
"logits/rejected": 0.6960811614990234, |
|
"logps/chosen": -353.88507080078125, |
|
"logps/rejected": -502.08135986328125, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4362457990646362, |
|
"rewards/margins": 1.1442558765411377, |
|
"rewards/rejected": -2.5805020332336426, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 62.23924834272331, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": 0.9941568374633789, |
|
"logits/rejected": 1.0033624172210693, |
|
"logps/chosen": -417.5264587402344, |
|
"logps/rejected": -504.75250244140625, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2873165607452393, |
|
"rewards/margins": 0.935228168964386, |
|
"rewards/rejected": -2.2225444316864014, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 78.04297561166587, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": 0.8165215253829956, |
|
"logits/rejected": 0.7669203281402588, |
|
"logps/chosen": -452.28656005859375, |
|
"logps/rejected": -585.2645263671875, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4614063501358032, |
|
"rewards/margins": 1.4426178932189941, |
|
"rewards/rejected": -2.904024124145508, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 46.80948750530279, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": 0.850195050239563, |
|
"logits/rejected": 0.769768238067627, |
|
"logps/chosen": -516.2821044921875, |
|
"logps/rejected": -631.9750366210938, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7611281871795654, |
|
"rewards/margins": 1.155269742012024, |
|
"rewards/rejected": -2.9163975715637207, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 48.50100775989009, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": 0.9858098030090332, |
|
"logits/rejected": 0.8773143887519836, |
|
"logps/chosen": -465.1497497558594, |
|
"logps/rejected": -579.1061401367188, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4429266452789307, |
|
"rewards/margins": 1.0109323263168335, |
|
"rewards/rejected": -2.4538588523864746, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 48.36951332908679, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": 0.8811975717544556, |
|
"logits/rejected": 0.9356629252433777, |
|
"logps/chosen": -498.65484619140625, |
|
"logps/rejected": -596.5376586914062, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5424554347991943, |
|
"rewards/margins": 1.2407790422439575, |
|
"rewards/rejected": -2.7832343578338623, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 60.99551216478677, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": 0.8725711703300476, |
|
"logits/rejected": 0.8699949383735657, |
|
"logps/chosen": -466.63568115234375, |
|
"logps/rejected": -623.0892944335938, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.268715262413025, |
|
"rewards/margins": 1.6545215845108032, |
|
"rewards/rejected": -2.92323637008667, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 80.5419760145001, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": 1.0529805421829224, |
|
"logits/rejected": 1.0967413187026978, |
|
"logps/chosen": -428.4676818847656, |
|
"logps/rejected": -532.6207275390625, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4159809350967407, |
|
"rewards/margins": 1.3065288066864014, |
|
"rewards/rejected": -2.7225098609924316, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 55.60067829764303, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": 0.8879537582397461, |
|
"logits/rejected": 0.8326207995414734, |
|
"logps/chosen": -460.4139099121094, |
|
"logps/rejected": -608.259033203125, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6177055835723877, |
|
"rewards/margins": 1.2510621547698975, |
|
"rewards/rejected": -2.8687679767608643, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 71.68404138373934, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": 0.9635574221611023, |
|
"logits/rejected": 1.0736175775527954, |
|
"logps/chosen": -470.2195739746094, |
|
"logps/rejected": -569.334716796875, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.437206745147705, |
|
"rewards/margins": 1.3707683086395264, |
|
"rewards/rejected": -2.8079750537872314, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 62.98132303512444, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": 0.7009516954421997, |
|
"logits/rejected": 0.7479378581047058, |
|
"logps/chosen": -449.1669006347656, |
|
"logps/rejected": -546.8016967773438, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.46233332157135, |
|
"rewards/margins": 1.1905874013900757, |
|
"rewards/rejected": -2.652920722961426, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 59.80214586775118, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": 1.0582497119903564, |
|
"logits/rejected": 1.0464842319488525, |
|
"logps/chosen": -431.91827392578125, |
|
"logps/rejected": -532.6561279296875, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3770239353179932, |
|
"rewards/margins": 1.2723242044448853, |
|
"rewards/rejected": -2.6493477821350098, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 73.58921534110489, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": 0.879109263420105, |
|
"logits/rejected": 0.9201618432998657, |
|
"logps/chosen": -403.30743408203125, |
|
"logps/rejected": -587.2562255859375, |
|
"loss": 0.4736, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4832494258880615, |
|
"rewards/margins": 1.9308059215545654, |
|
"rewards/rejected": -3.414055347442627, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 51.40228578276026, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": 1.0463981628417969, |
|
"logits/rejected": 0.901201605796814, |
|
"logps/chosen": -464.07000732421875, |
|
"logps/rejected": -615.8527221679688, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5733706951141357, |
|
"rewards/margins": 1.399388074874878, |
|
"rewards/rejected": -2.9727585315704346, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 65.61085222279628, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": 1.1604185104370117, |
|
"logits/rejected": 1.1423100233078003, |
|
"logps/chosen": -460.8193359375, |
|
"logps/rejected": -542.03515625, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6957588195800781, |
|
"rewards/margins": 1.095609188079834, |
|
"rewards/rejected": -2.791368007659912, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 49.561484954300994, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": 1.0651283264160156, |
|
"logits/rejected": 1.0810836553573608, |
|
"logps/chosen": -474.07098388671875, |
|
"logps/rejected": -554.2447509765625, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7141796350479126, |
|
"rewards/margins": 1.1244512796401978, |
|
"rewards/rejected": -2.8386306762695312, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 64.37329189023627, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": 1.1131503582000732, |
|
"logits/rejected": 1.0831563472747803, |
|
"logps/chosen": -374.66558837890625, |
|
"logps/rejected": -521.4501953125, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3161659240722656, |
|
"rewards/margins": 1.0668962001800537, |
|
"rewards/rejected": -2.3830621242523193, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 71.22115543081615, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": 0.777843177318573, |
|
"logits/rejected": 0.7904726266860962, |
|
"logps/chosen": -488.55084228515625, |
|
"logps/rejected": -593.0804443359375, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6046578884124756, |
|
"rewards/margins": 1.401132345199585, |
|
"rewards/rejected": -3.0057902336120605, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 62.33912841030523, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": 0.8172556161880493, |
|
"logits/rejected": 0.8972476124763489, |
|
"logps/chosen": -491.6143493652344, |
|
"logps/rejected": -543.5655517578125, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.522455096244812, |
|
"rewards/margins": 0.8369817733764648, |
|
"rewards/rejected": -2.3594369888305664, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 79.85474261597784, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": 0.9176260828971863, |
|
"logits/rejected": 0.8250390887260437, |
|
"logps/chosen": -391.8580627441406, |
|
"logps/rejected": -560.777099609375, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5633355379104614, |
|
"rewards/margins": 1.1446614265441895, |
|
"rewards/rejected": -2.7079968452453613, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 77.21353171900161, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": 0.7309160828590393, |
|
"logits/rejected": 0.8586199879646301, |
|
"logps/chosen": -559.169677734375, |
|
"logps/rejected": -628.1065673828125, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0104846954345703, |
|
"rewards/margins": 1.0945736169815063, |
|
"rewards/rejected": -3.105058431625366, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 74.79700171288587, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": 0.8999913334846497, |
|
"logits/rejected": 0.9396308660507202, |
|
"logps/chosen": -338.5341796875, |
|
"logps/rejected": -501.94390869140625, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9048227071762085, |
|
"rewards/margins": 1.6569000482559204, |
|
"rewards/rejected": -2.561722755432129, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 69.60793160439464, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": 0.7898174524307251, |
|
"logits/rejected": 0.8277270197868347, |
|
"logps/chosen": -390.6011657714844, |
|
"logps/rejected": -515.8409423828125, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4156880378723145, |
|
"rewards/margins": 1.2011487483978271, |
|
"rewards/rejected": -2.6168370246887207, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 42.40544564620146, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": 0.9087181091308594, |
|
"logits/rejected": 0.8868638873100281, |
|
"logps/chosen": -385.9741516113281, |
|
"logps/rejected": -534.3232421875, |
|
"loss": 0.4084, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.074923038482666, |
|
"rewards/margins": 1.5154277086257935, |
|
"rewards/rejected": -2.59035062789917, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 95.93544600400732, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": 1.0622968673706055, |
|
"logits/rejected": 1.0777418613433838, |
|
"logps/chosen": -418.54754638671875, |
|
"logps/rejected": -527.2640991210938, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2520979642868042, |
|
"rewards/margins": 0.9434958696365356, |
|
"rewards/rejected": -2.1955935955047607, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 57.426941216354265, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": 0.9564765691757202, |
|
"logits/rejected": 1.1413500308990479, |
|
"logps/chosen": -446.05267333984375, |
|
"logps/rejected": -567.2977294921875, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2580714225769043, |
|
"rewards/margins": 1.5958807468414307, |
|
"rewards/rejected": -2.853951930999756, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 74.6309792254669, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": 1.0146820545196533, |
|
"logits/rejected": 1.1107871532440186, |
|
"logps/chosen": -455.269287109375, |
|
"logps/rejected": -513.904296875, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5008915662765503, |
|
"rewards/margins": 1.0291327238082886, |
|
"rewards/rejected": -2.530024766921997, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 53.26267383310568, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": 0.9971107244491577, |
|
"logits/rejected": 0.9133806228637695, |
|
"logps/chosen": -416.66717529296875, |
|
"logps/rejected": -550.373046875, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3668549060821533, |
|
"rewards/margins": 1.2376948595046997, |
|
"rewards/rejected": -2.6045498847961426, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 88.00137208224452, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 0.9374136924743652, |
|
"logits/rejected": 0.9528753161430359, |
|
"logps/chosen": -433.9676208496094, |
|
"logps/rejected": -541.1063232421875, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.500398874282837, |
|
"rewards/margins": 0.8700464367866516, |
|
"rewards/rejected": -2.370445489883423, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 69.77894187576233, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": 0.5996731519699097, |
|
"logits/rejected": 0.7502093315124512, |
|
"logps/chosen": -421.07940673828125, |
|
"logps/rejected": -501.9176330566406, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4724102020263672, |
|
"rewards/margins": 0.9041327238082886, |
|
"rewards/rejected": -2.3765430450439453, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 69.2905643020607, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": 0.8716554641723633, |
|
"logits/rejected": 0.7565750479698181, |
|
"logps/chosen": -361.068359375, |
|
"logps/rejected": -492.9208068847656, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2246003150939941, |
|
"rewards/margins": 1.47506582736969, |
|
"rewards/rejected": -2.6996662616729736, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 64.68066978059316, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": 1.0276987552642822, |
|
"logits/rejected": 0.9690669178962708, |
|
"logps/chosen": -465.96917724609375, |
|
"logps/rejected": -573.2808227539062, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3623851537704468, |
|
"rewards/margins": 1.2746822834014893, |
|
"rewards/rejected": -2.6370673179626465, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 59.391414436729164, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": 1.1677882671356201, |
|
"logits/rejected": 1.063370704650879, |
|
"logps/chosen": -447.7652893066406, |
|
"logps/rejected": -582.9169311523438, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.485723614692688, |
|
"rewards/margins": 1.0630359649658203, |
|
"rewards/rejected": -2.5487594604492188, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 72.96819940548642, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": 0.8832002878189087, |
|
"logits/rejected": 0.9215946197509766, |
|
"logps/chosen": -436.1380310058594, |
|
"logps/rejected": -573.0477905273438, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.301335334777832, |
|
"rewards/margins": 1.4504317045211792, |
|
"rewards/rejected": -2.7517669200897217, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 57.49963781238441, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": 1.0455777645111084, |
|
"logits/rejected": 1.0405381917953491, |
|
"logps/chosen": -446.6703186035156, |
|
"logps/rejected": -635.7301025390625, |
|
"loss": 0.4415, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4371367692947388, |
|
"rewards/margins": 2.05957293510437, |
|
"rewards/rejected": -3.4967098236083984, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 49.50295167535732, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": 0.8270001411437988, |
|
"logits/rejected": 0.8978917002677917, |
|
"logps/chosen": -444.87396240234375, |
|
"logps/rejected": -529.2759399414062, |
|
"loss": 0.4577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4605473279953003, |
|
"rewards/margins": 1.1724517345428467, |
|
"rewards/rejected": -2.6329989433288574, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 61.98472853588885, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": 1.2570455074310303, |
|
"logits/rejected": 1.3103208541870117, |
|
"logps/chosen": -445.773681640625, |
|
"logps/rejected": -592.1373291015625, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6690900325775146, |
|
"rewards/margins": 1.4836785793304443, |
|
"rewards/rejected": -3.152768611907959, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 80.73715094651381, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": 0.9210006594657898, |
|
"logits/rejected": 0.9131428003311157, |
|
"logps/chosen": -466.51190185546875, |
|
"logps/rejected": -587.1438598632812, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6189082860946655, |
|
"rewards/margins": 1.2230169773101807, |
|
"rewards/rejected": -2.8419253826141357, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 93.25084862169949, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": 1.1030242443084717, |
|
"logits/rejected": 1.0518558025360107, |
|
"logps/chosen": -482.72283935546875, |
|
"logps/rejected": -543.3652954101562, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9328705072402954, |
|
"rewards/margins": 0.7048792839050293, |
|
"rewards/rejected": -2.6377501487731934, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 59.23305315818277, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": 0.9242640733718872, |
|
"logits/rejected": 1.1043177843093872, |
|
"logps/chosen": -462.8280334472656, |
|
"logps/rejected": -513.4354858398438, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.272550344467163, |
|
"rewards/margins": 1.1968976259231567, |
|
"rewards/rejected": -2.469447612762451, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 66.64302830502234, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": 1.0391790866851807, |
|
"logits/rejected": 1.0512213706970215, |
|
"logps/chosen": -506.3793029785156, |
|
"logps/rejected": -625.9534301757812, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6041486263275146, |
|
"rewards/margins": 1.4524866342544556, |
|
"rewards/rejected": -3.0566353797912598, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 61.47567817354362, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": 0.806106448173523, |
|
"logits/rejected": 0.9027592539787292, |
|
"logps/chosen": -525.6714477539062, |
|
"logps/rejected": -593.1508178710938, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.538500428199768, |
|
"rewards/margins": 1.357391595840454, |
|
"rewards/rejected": -2.8958919048309326, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 68.59118231824314, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": 1.0598998069763184, |
|
"logits/rejected": 1.137149691581726, |
|
"logps/chosen": -395.7103576660156, |
|
"logps/rejected": -495.3836975097656, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4034923315048218, |
|
"rewards/margins": 1.137370228767395, |
|
"rewards/rejected": -2.540862560272217, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 45.96623521548647, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": 1.1010735034942627, |
|
"logits/rejected": 0.9745647311210632, |
|
"logps/chosen": -378.5804138183594, |
|
"logps/rejected": -560.9840087890625, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2688277959823608, |
|
"rewards/margins": 1.444834589958191, |
|
"rewards/rejected": -2.7136621475219727, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 60.01965395449306, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": 1.02316153049469, |
|
"logits/rejected": 0.9464454650878906, |
|
"logps/chosen": -443.47076416015625, |
|
"logps/rejected": -589.4595336914062, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5698074102401733, |
|
"rewards/margins": 1.1526679992675781, |
|
"rewards/rejected": -2.722475528717041, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 45.69703636421495, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": 0.9663570523262024, |
|
"logits/rejected": 0.9202507138252258, |
|
"logps/chosen": -502.4654846191406, |
|
"logps/rejected": -659.4030151367188, |
|
"loss": 0.4579, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.710269570350647, |
|
"rewards/margins": 1.3103970289230347, |
|
"rewards/rejected": -3.0206668376922607, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 56.3784671312179, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": 1.131562352180481, |
|
"logits/rejected": 1.0653345584869385, |
|
"logps/chosen": -406.16668701171875, |
|
"logps/rejected": -580.4251098632812, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6179018020629883, |
|
"rewards/margins": 1.8569211959838867, |
|
"rewards/rejected": -3.474823474884033, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 142.3568470454038, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": 1.1798999309539795, |
|
"logits/rejected": 1.1666319370269775, |
|
"logps/chosen": -420.8440856933594, |
|
"logps/rejected": -493.9124450683594, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6935014724731445, |
|
"rewards/margins": 0.8110185861587524, |
|
"rewards/rejected": -2.5045199394226074, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 60.507992958343735, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": 0.7668262720108032, |
|
"logits/rejected": 1.0279020071029663, |
|
"logps/chosen": -585.5093383789062, |
|
"logps/rejected": -586.260009765625, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7469886541366577, |
|
"rewards/margins": 0.8090783953666687, |
|
"rewards/rejected": -2.5560669898986816, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 49.02884505358885, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": 0.9770414233207703, |
|
"logits/rejected": 0.9699662923812866, |
|
"logps/chosen": -450.8439025878906, |
|
"logps/rejected": -512.0349731445312, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4404536485671997, |
|
"rewards/margins": 0.9168438911437988, |
|
"rewards/rejected": -2.357297420501709, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 44.1172937326225, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": 1.1203324794769287, |
|
"logits/rejected": 1.0058125257492065, |
|
"logps/chosen": -458.5472106933594, |
|
"logps/rejected": -564.257080078125, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6246845722198486, |
|
"rewards/margins": 1.2415374517440796, |
|
"rewards/rejected": -2.8662219047546387, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 102.51628881366037, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": 0.8426109552383423, |
|
"logits/rejected": 0.8990569114685059, |
|
"logps/chosen": -488.92413330078125, |
|
"logps/rejected": -602.349609375, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5736935138702393, |
|
"rewards/margins": 1.458703637123108, |
|
"rewards/rejected": -3.0323970317840576, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 53.883343734427164, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": 1.016815423965454, |
|
"logits/rejected": 0.9943561553955078, |
|
"logps/chosen": -467.83453369140625, |
|
"logps/rejected": -593.01953125, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6717674732208252, |
|
"rewards/margins": 1.153261423110962, |
|
"rewards/rejected": -2.825028896331787, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 74.42115483435119, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": 0.7937738299369812, |
|
"logits/rejected": 0.9189519882202148, |
|
"logps/chosen": -491.814208984375, |
|
"logps/rejected": -503.80535888671875, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.691393494606018, |
|
"rewards/margins": 0.6737578511238098, |
|
"rewards/rejected": -2.3651511669158936, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 64.82639865402723, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": 1.1066054105758667, |
|
"logits/rejected": 1.0938472747802734, |
|
"logps/chosen": -444.3915100097656, |
|
"logps/rejected": -583.7407836914062, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.487115502357483, |
|
"rewards/margins": 1.296104073524475, |
|
"rewards/rejected": -2.783219575881958, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 51.71343363724296, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": 1.0025551319122314, |
|
"logits/rejected": 0.9638897180557251, |
|
"logps/chosen": -405.7477111816406, |
|
"logps/rejected": -564.5352783203125, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2101963758468628, |
|
"rewards/margins": 1.606525182723999, |
|
"rewards/rejected": -2.8167214393615723, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 55.251652522250346, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": 0.8747108578681946, |
|
"logits/rejected": 0.9431697726249695, |
|
"logps/chosen": -473.67254638671875, |
|
"logps/rejected": -588.3966064453125, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9702599048614502, |
|
"rewards/margins": 1.0397762060165405, |
|
"rewards/rejected": -3.010035991668701, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 43.456294897177564, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": 0.8632869720458984, |
|
"logits/rejected": 0.9594618082046509, |
|
"logps/chosen": -519.5004272460938, |
|
"logps/rejected": -555.9515380859375, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7276846170425415, |
|
"rewards/margins": 1.0071427822113037, |
|
"rewards/rejected": -2.7348270416259766, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 42.813192158424926, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": 0.8604679107666016, |
|
"logits/rejected": 0.8224517703056335, |
|
"logps/chosen": -472.61846923828125, |
|
"logps/rejected": -593.2980346679688, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4025806188583374, |
|
"rewards/margins": 1.3147811889648438, |
|
"rewards/rejected": -2.7173619270324707, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 56.73755352202025, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": 0.9600769877433777, |
|
"logits/rejected": 1.060494065284729, |
|
"logps/chosen": -472.34918212890625, |
|
"logps/rejected": -525.0235595703125, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3924285173416138, |
|
"rewards/margins": 0.8397138714790344, |
|
"rewards/rejected": -2.232142210006714, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 59.642963309795554, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": 0.8754169344902039, |
|
"logits/rejected": 0.8556619882583618, |
|
"logps/chosen": -399.696044921875, |
|
"logps/rejected": -517.2654418945312, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3005479574203491, |
|
"rewards/margins": 1.060710072517395, |
|
"rewards/rejected": -2.361258029937744, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 46.921029777465385, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": 1.0747076272964478, |
|
"logits/rejected": 1.013808250427246, |
|
"logps/chosen": -374.58447265625, |
|
"logps/rejected": -558.8201904296875, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2730499505996704, |
|
"rewards/margins": 1.3839048147201538, |
|
"rewards/rejected": -2.656954765319824, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 68.77532256341718, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": 0.7924941778182983, |
|
"logits/rejected": 0.7915457487106323, |
|
"logps/chosen": -456.20135498046875, |
|
"logps/rejected": -617.9271850585938, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6541321277618408, |
|
"rewards/margins": 1.639906644821167, |
|
"rewards/rejected": -3.294038772583008, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 91.63684779774286, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": 0.7711794376373291, |
|
"logits/rejected": 0.769540011882782, |
|
"logps/chosen": -506.12835693359375, |
|
"logps/rejected": -617.2738037109375, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6386969089508057, |
|
"rewards/margins": 1.224452257156372, |
|
"rewards/rejected": -2.8631489276885986, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 55.91407448509537, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": 0.9632130861282349, |
|
"logits/rejected": 0.9312864542007446, |
|
"logps/chosen": -391.71295166015625, |
|
"logps/rejected": -468.4722595214844, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.348204255104065, |
|
"rewards/margins": 0.7907976508140564, |
|
"rewards/rejected": -2.1390020847320557, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 60.804344304487145, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": 0.9375985264778137, |
|
"logits/rejected": 0.8940775990486145, |
|
"logps/chosen": -417.53125, |
|
"logps/rejected": -574.74609375, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3990520238876343, |
|
"rewards/margins": 1.6231104135513306, |
|
"rewards/rejected": -3.0221621990203857, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 52.48796206102305, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": 1.108391523361206, |
|
"logits/rejected": 1.2146079540252686, |
|
"logps/chosen": -507.42010498046875, |
|
"logps/rejected": -544.3141479492188, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6256961822509766, |
|
"rewards/margins": 0.867256760597229, |
|
"rewards/rejected": -2.492953300476074, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 55.95560336738377, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": 0.9215981364250183, |
|
"logits/rejected": 0.8801361918449402, |
|
"logps/chosen": -471.855224609375, |
|
"logps/rejected": -613.532958984375, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8285738229751587, |
|
"rewards/margins": 1.022980809211731, |
|
"rewards/rejected": -2.8515543937683105, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 128.53724782402537, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": 1.0059984922409058, |
|
"logits/rejected": 0.9314395189285278, |
|
"logps/chosen": -408.28851318359375, |
|
"logps/rejected": -512.9013671875, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4966000318527222, |
|
"rewards/margins": 1.0415229797363281, |
|
"rewards/rejected": -2.538123369216919, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 75.32534103547118, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": 0.9809101819992065, |
|
"logits/rejected": 0.9042765498161316, |
|
"logps/chosen": -427.94818115234375, |
|
"logps/rejected": -608.7567138671875, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4624723196029663, |
|
"rewards/margins": 2.0217435359954834, |
|
"rewards/rejected": -3.4842162132263184, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 76.16915221643951, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": 1.109959363937378, |
|
"logits/rejected": 1.0451819896697998, |
|
"logps/chosen": -387.56915283203125, |
|
"logps/rejected": -513.5570068359375, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4730123281478882, |
|
"rewards/margins": 1.0677883625030518, |
|
"rewards/rejected": -2.5408008098602295, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 89.9628440342139, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": 0.8237798810005188, |
|
"logits/rejected": 1.0308492183685303, |
|
"logps/chosen": -327.59185791015625, |
|
"logps/rejected": -460.06597900390625, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.237448811531067, |
|
"rewards/margins": 1.4968156814575195, |
|
"rewards/rejected": -2.734264612197876, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 62.06311219467785, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": 0.869620144367218, |
|
"logits/rejected": 0.9487771987915039, |
|
"logps/chosen": -427.9664001464844, |
|
"logps/rejected": -548.3504638671875, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2834144830703735, |
|
"rewards/margins": 1.766037940979004, |
|
"rewards/rejected": -3.049452543258667, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 53.805369545877234, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": 0.9454919695854187, |
|
"logits/rejected": 0.8354522585868835, |
|
"logps/chosen": -424.94744873046875, |
|
"logps/rejected": -568.275146484375, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.540513277053833, |
|
"rewards/margins": 1.1268528699874878, |
|
"rewards/rejected": -2.6673665046691895, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 53.95529288856619, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": 0.8412476778030396, |
|
"logits/rejected": 0.6873846650123596, |
|
"logps/chosen": -380.01617431640625, |
|
"logps/rejected": -553.5303955078125, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2762123346328735, |
|
"rewards/margins": 1.5833995342254639, |
|
"rewards/rejected": -2.859611749649048, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 62.9258342669386, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": 0.8068563342094421, |
|
"logits/rejected": 0.879061222076416, |
|
"logps/chosen": -468.471923828125, |
|
"logps/rejected": -507.77978515625, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8093141317367554, |
|
"rewards/margins": 0.8247050046920776, |
|
"rewards/rejected": -2.634019374847412, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 70.86456981112302, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": 0.8949640989303589, |
|
"logits/rejected": 0.9865063428878784, |
|
"logps/chosen": -419.70965576171875, |
|
"logps/rejected": -498.1214904785156, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6463212966918945, |
|
"rewards/margins": 0.6911530494689941, |
|
"rewards/rejected": -2.3374743461608887, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 68.65130081600282, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": 0.9888423085212708, |
|
"logits/rejected": 0.907960057258606, |
|
"logps/chosen": -467.0636291503906, |
|
"logps/rejected": -611.2190551757812, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6578500270843506, |
|
"rewards/margins": 1.326330542564392, |
|
"rewards/rejected": -2.984180450439453, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 62.456500627505264, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": 0.9534702301025391, |
|
"logits/rejected": 0.9982631802558899, |
|
"logps/chosen": -386.90740966796875, |
|
"logps/rejected": -512.1735229492188, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3113470077514648, |
|
"rewards/margins": 1.3248114585876465, |
|
"rewards/rejected": -2.6361584663391113, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 49.18533154532746, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": 0.9990129470825195, |
|
"logits/rejected": 1.0061266422271729, |
|
"logps/chosen": -477.3984375, |
|
"logps/rejected": -547.6627197265625, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7209774255752563, |
|
"rewards/margins": 0.9805021286010742, |
|
"rewards/rejected": -2.701479434967041, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 54.05413624909213, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": 0.8176316022872925, |
|
"logits/rejected": 0.8249105215072632, |
|
"logps/chosen": -431.5072326660156, |
|
"logps/rejected": -552.5424194335938, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3977526426315308, |
|
"rewards/margins": 1.4976484775543213, |
|
"rewards/rejected": -2.8954014778137207, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 78.72361123419643, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": 1.0658419132232666, |
|
"logits/rejected": 1.1984130144119263, |
|
"logps/chosen": -401.8185729980469, |
|
"logps/rejected": -530.7760620117188, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4774563312530518, |
|
"rewards/margins": 1.1679576635360718, |
|
"rewards/rejected": -2.645414113998413, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 82.78364069314664, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": 0.7775467038154602, |
|
"logits/rejected": 0.8516207933425903, |
|
"logps/chosen": -522.8553466796875, |
|
"logps/rejected": -584.3663940429688, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6044390201568604, |
|
"rewards/margins": 0.9675573110580444, |
|
"rewards/rejected": -2.5719962120056152, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 55.503270236646635, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": 0.8838014602661133, |
|
"logits/rejected": 0.7281323671340942, |
|
"logps/chosen": -484.826171875, |
|
"logps/rejected": -663.9266357421875, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7360010147094727, |
|
"rewards/margins": 1.4825078248977661, |
|
"rewards/rejected": -3.2185089588165283, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 50.16513241204491, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": 0.9875132441520691, |
|
"logits/rejected": 0.8935637474060059, |
|
"logps/chosen": -448.6133728027344, |
|
"logps/rejected": -582.8692626953125, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.39998459815979, |
|
"rewards/margins": 1.2009289264678955, |
|
"rewards/rejected": -2.6009135246276855, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 46.449497272119075, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": 0.941249668598175, |
|
"logits/rejected": 0.9164915084838867, |
|
"logps/chosen": -420.18572998046875, |
|
"logps/rejected": -560.7182006835938, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4988505840301514, |
|
"rewards/margins": 1.5091602802276611, |
|
"rewards/rejected": -3.0080108642578125, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 69.96932776431272, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": 0.8017269372940063, |
|
"logits/rejected": 0.8931961059570312, |
|
"logps/chosen": -477.79229736328125, |
|
"logps/rejected": -530.8778686523438, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4025003910064697, |
|
"rewards/margins": 1.0767922401428223, |
|
"rewards/rejected": -2.479292631149292, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 57.69809813157058, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": 1.067690134048462, |
|
"logits/rejected": 1.153307318687439, |
|
"logps/chosen": -508.63641357421875, |
|
"logps/rejected": -606.459716796875, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4867782592773438, |
|
"rewards/margins": 1.1114721298217773, |
|
"rewards/rejected": -2.5982506275177, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 51.959676392057304, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": 0.9507681131362915, |
|
"logits/rejected": 0.8653473854064941, |
|
"logps/chosen": -522.01123046875, |
|
"logps/rejected": -664.1727294921875, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8983325958251953, |
|
"rewards/margins": 1.3663065433502197, |
|
"rewards/rejected": -3.264639377593994, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 65.97651430504328, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": 1.0778931379318237, |
|
"logits/rejected": 1.2029404640197754, |
|
"logps/chosen": -459.14239501953125, |
|
"logps/rejected": -502.3555603027344, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.367511510848999, |
|
"rewards/margins": 0.9244858026504517, |
|
"rewards/rejected": -2.2919974327087402, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 59.680109761572375, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": 0.9210470914840698, |
|
"logits/rejected": 0.8286763429641724, |
|
"logps/chosen": -419.98052978515625, |
|
"logps/rejected": -604.6139526367188, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2651748657226562, |
|
"rewards/margins": 1.8151108026504517, |
|
"rewards/rejected": -3.0802855491638184, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 64.15804767246028, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": 0.8791543245315552, |
|
"logits/rejected": 0.7720547914505005, |
|
"logps/chosen": -434.31072998046875, |
|
"logps/rejected": -538.9797973632812, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3990988731384277, |
|
"rewards/margins": 0.9665057063102722, |
|
"rewards/rejected": -2.3656046390533447, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 78.26051008225383, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": 1.20572829246521, |
|
"logits/rejected": 1.0488499402999878, |
|
"logps/chosen": -474.15423583984375, |
|
"logps/rejected": -653.7531127929688, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7111165523529053, |
|
"rewards/margins": 1.3848183155059814, |
|
"rewards/rejected": -3.095935106277466, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 52.922202837660016, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": 0.9619580507278442, |
|
"logits/rejected": 0.8864519000053406, |
|
"logps/chosen": -476.1001892089844, |
|
"logps/rejected": -602.1996459960938, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7220556735992432, |
|
"rewards/margins": 1.1378229856491089, |
|
"rewards/rejected": -2.8598790168762207, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 60.674209428344206, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": 1.1256102323532104, |
|
"logits/rejected": 0.9962800741195679, |
|
"logps/chosen": -408.4756164550781, |
|
"logps/rejected": -597.3717041015625, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5593998432159424, |
|
"rewards/margins": 1.3417896032333374, |
|
"rewards/rejected": -2.9011893272399902, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 64.90478017897114, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": 1.1957015991210938, |
|
"logits/rejected": 0.9952928423881531, |
|
"logps/chosen": -465.75921630859375, |
|
"logps/rejected": -572.9944458007812, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5789905786514282, |
|
"rewards/margins": 0.66215980052948, |
|
"rewards/rejected": -2.241150379180908, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 62.94124317893805, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": 0.7113492488861084, |
|
"logits/rejected": 0.8110172152519226, |
|
"logps/chosen": -426.31005859375, |
|
"logps/rejected": -539.2099609375, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4343928098678589, |
|
"rewards/margins": 1.4262264966964722, |
|
"rewards/rejected": -2.86061954498291, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 52.267284009041056, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": 0.9020377993583679, |
|
"logits/rejected": 0.7847568988800049, |
|
"logps/chosen": -487.7434997558594, |
|
"logps/rejected": -625.0538330078125, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6163032054901123, |
|
"rewards/margins": 1.4039795398712158, |
|
"rewards/rejected": -3.020282745361328, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 62.596711125268236, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": 1.0403454303741455, |
|
"logits/rejected": 0.9746878743171692, |
|
"logps/chosen": -366.9835205078125, |
|
"logps/rejected": -541.3766479492188, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0881052017211914, |
|
"rewards/margins": 1.4868371486663818, |
|
"rewards/rejected": -2.5749423503875732, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 84.37144620334973, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": 1.1226634979248047, |
|
"logits/rejected": 1.0752464532852173, |
|
"logps/chosen": -440.1749572753906, |
|
"logps/rejected": -587.12255859375, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4864726066589355, |
|
"rewards/margins": 1.7284291982650757, |
|
"rewards/rejected": -3.2149016857147217, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 62.60828204037455, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": 1.0327513217926025, |
|
"logits/rejected": 1.0482391119003296, |
|
"logps/chosen": -487.63287353515625, |
|
"logps/rejected": -566.2637939453125, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6275917291641235, |
|
"rewards/margins": 0.9259729385375977, |
|
"rewards/rejected": -2.5535647869110107, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 60.0912083970973, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": 0.9793977737426758, |
|
"logits/rejected": 1.0605987310409546, |
|
"logps/chosen": -515.0064697265625, |
|
"logps/rejected": -658.4754028320312, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7532989978790283, |
|
"rewards/margins": 1.5051729679107666, |
|
"rewards/rejected": -3.258471965789795, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 48.498819013773996, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": 0.8250744938850403, |
|
"logits/rejected": 0.7809959650039673, |
|
"logps/chosen": -478.2774353027344, |
|
"logps/rejected": -611.0914916992188, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3411508798599243, |
|
"rewards/margins": 1.4210319519042969, |
|
"rewards/rejected": -2.76218318939209, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 50.13893547137485, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": 0.7877107858657837, |
|
"logits/rejected": 0.8041622042655945, |
|
"logps/chosen": -414.5467224121094, |
|
"logps/rejected": -560.4259643554688, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4216382503509521, |
|
"rewards/margins": 1.5532824993133545, |
|
"rewards/rejected": -2.9749207496643066, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 46.026526783579634, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": 0.8864864110946655, |
|
"logits/rejected": 0.6277433633804321, |
|
"logps/chosen": -406.79278564453125, |
|
"logps/rejected": -578.2540893554688, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3959004878997803, |
|
"rewards/margins": 1.171373724937439, |
|
"rewards/rejected": -2.567274332046509, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 70.78602100703401, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": 0.9595912098884583, |
|
"logits/rejected": 0.9379026293754578, |
|
"logps/chosen": -398.94671630859375, |
|
"logps/rejected": -497.46832275390625, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5710119009017944, |
|
"rewards/margins": 1.0367610454559326, |
|
"rewards/rejected": -2.6077733039855957, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 61.9170193707521, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": 1.0550943613052368, |
|
"logits/rejected": 0.9832525253295898, |
|
"logps/chosen": -420.7129821777344, |
|
"logps/rejected": -601.7310791015625, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5211260318756104, |
|
"rewards/margins": 1.6833159923553467, |
|
"rewards/rejected": -3.204442262649536, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 71.57546581039838, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": 1.027452826499939, |
|
"logits/rejected": 1.0642149448394775, |
|
"logps/chosen": -459.17315673828125, |
|
"logps/rejected": -596.4716796875, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4702363014221191, |
|
"rewards/margins": 1.3921765089035034, |
|
"rewards/rejected": -2.862412929534912, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 67.29053247318343, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": 0.9031354188919067, |
|
"logits/rejected": 0.9704192280769348, |
|
"logps/chosen": -531.8564453125, |
|
"logps/rejected": -627.3538818359375, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.564781904220581, |
|
"rewards/margins": 0.9251095056533813, |
|
"rewards/rejected": -2.489891529083252, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": 1.2258787155151367, |
|
"eval_logits/rejected": 1.1678062677383423, |
|
"eval_logps/chosen": -449.6615295410156, |
|
"eval_logps/rejected": -589.1575317382812, |
|
"eval_loss": 0.4695444405078888, |
|
"eval_rewards/accuracies": 0.8107143044471741, |
|
"eval_rewards/chosen": -1.5489410161972046, |
|
"eval_rewards/margins": 1.324028730392456, |
|
"eval_rewards/rejected": -2.87296986579895, |
|
"eval_runtime": 173.0444, |
|
"eval_samples_per_second": 25.78, |
|
"eval_steps_per_second": 0.405, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 60.723257107266754, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": 0.8792866468429565, |
|
"logits/rejected": 0.8162811994552612, |
|
"logps/chosen": -393.1644592285156, |
|
"logps/rejected": -572.2662963867188, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4554507732391357, |
|
"rewards/margins": 1.7228260040283203, |
|
"rewards/rejected": -3.178276538848877, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 62.98890394630698, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": 0.790207028388977, |
|
"logits/rejected": 0.7221522927284241, |
|
"logps/chosen": -549.1417236328125, |
|
"logps/rejected": -689.9721069335938, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.363548755645752, |
|
"rewards/margins": 1.4138752222061157, |
|
"rewards/rejected": -2.777423858642578, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 52.084802047603894, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": 0.8847156763076782, |
|
"logits/rejected": 1.0453051328659058, |
|
"logps/chosen": -492.10491943359375, |
|
"logps/rejected": -566.874755859375, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5524340867996216, |
|
"rewards/margins": 0.765845775604248, |
|
"rewards/rejected": -2.31827974319458, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 72.5825872393078, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": 0.9875534176826477, |
|
"logits/rejected": 0.9585920572280884, |
|
"logps/chosen": -465.96588134765625, |
|
"logps/rejected": -639.5526123046875, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6843414306640625, |
|
"rewards/margins": 1.7214224338531494, |
|
"rewards/rejected": -3.405763626098633, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 97.90895810697678, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": 1.0351098775863647, |
|
"logits/rejected": 0.8952969312667847, |
|
"logps/chosen": -459.99090576171875, |
|
"logps/rejected": -603.3958740234375, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5300720930099487, |
|
"rewards/margins": 1.295023798942566, |
|
"rewards/rejected": -2.8250958919525146, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 45.17361142734586, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": 0.8794069290161133, |
|
"logits/rejected": 0.8390556573867798, |
|
"logps/chosen": -373.766845703125, |
|
"logps/rejected": -544.37939453125, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3079745769500732, |
|
"rewards/margins": 1.6338344812393188, |
|
"rewards/rejected": -2.9418091773986816, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 56.06868332422118, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": 1.237776517868042, |
|
"logits/rejected": 1.1337307691574097, |
|
"logps/chosen": -455.1429138183594, |
|
"logps/rejected": -541.4223022460938, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8374900817871094, |
|
"rewards/margins": 1.0089203119277954, |
|
"rewards/rejected": -2.8464105129241943, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 70.86357349084764, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": 1.1772403717041016, |
|
"logits/rejected": 1.1157615184783936, |
|
"logps/chosen": -455.3755798339844, |
|
"logps/rejected": -587.3627319335938, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5816056728363037, |
|
"rewards/margins": 1.06257164478302, |
|
"rewards/rejected": -2.644177198410034, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 84.73402761529461, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": 0.9162181615829468, |
|
"logits/rejected": 0.7842608690261841, |
|
"logps/chosen": -381.3438720703125, |
|
"logps/rejected": -545.1882934570312, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3479974269866943, |
|
"rewards/margins": 1.3901352882385254, |
|
"rewards/rejected": -2.7381327152252197, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 67.24843168063263, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": 1.0658302307128906, |
|
"logits/rejected": 0.9410190582275391, |
|
"logps/chosen": -431.353271484375, |
|
"logps/rejected": -582.7957153320312, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6311867237091064, |
|
"rewards/margins": 1.4718221426010132, |
|
"rewards/rejected": -3.103008985519409, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 46.4074976701673, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": 1.0150151252746582, |
|
"logits/rejected": 0.9888280034065247, |
|
"logps/chosen": -484.53265380859375, |
|
"logps/rejected": -603.1912231445312, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6683940887451172, |
|
"rewards/margins": 0.8519685864448547, |
|
"rewards/rejected": -2.5203628540039062, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 65.07967736819077, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": 0.9517459869384766, |
|
"logits/rejected": 1.0431149005889893, |
|
"logps/chosen": -513.82861328125, |
|
"logps/rejected": -587.9022827148438, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.5867310762405396, |
|
"rewards/margins": 1.1293456554412842, |
|
"rewards/rejected": -2.716076374053955, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 52.84618869879537, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": 0.9450265765190125, |
|
"logits/rejected": 0.9049190282821655, |
|
"logps/chosen": -501.4932556152344, |
|
"logps/rejected": -618.7479248046875, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4727351665496826, |
|
"rewards/margins": 1.4222580194473267, |
|
"rewards/rejected": -2.894993305206299, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 79.91254306595567, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": 0.902356743812561, |
|
"logits/rejected": 0.9255275726318359, |
|
"logps/chosen": -456.47509765625, |
|
"logps/rejected": -605.6835327148438, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.272007703781128, |
|
"rewards/margins": 1.8344463109970093, |
|
"rewards/rejected": -3.1064541339874268, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 75.05159932169606, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": 0.8458648920059204, |
|
"logits/rejected": 0.7623052000999451, |
|
"logps/chosen": -472.890380859375, |
|
"logps/rejected": -599.908203125, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4442172050476074, |
|
"rewards/margins": 1.2427784204483032, |
|
"rewards/rejected": -2.6869957447052, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 81.34299836383774, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": 1.1288570165634155, |
|
"logits/rejected": 1.0921885967254639, |
|
"logps/chosen": -402.1815490722656, |
|
"logps/rejected": -543.1682739257812, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.466627836227417, |
|
"rewards/margins": 1.192771553993225, |
|
"rewards/rejected": -2.6593992710113525, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5209795107882678, |
|
"train_runtime": 14376.1789, |
|
"train_samples_per_second": 9.277, |
|
"train_steps_per_second": 0.29 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|