{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 4167, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.1990407673860912e-08, "logits/chosen": -1.8029143810272217, "logits/rejected": -1.7061834335327148, "logps/chosen": -186.78228759765625, "logps/rejected": -121.93081665039062, "loss": 0.4697, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.1990407673860913e-07, "logits/chosen": -1.9159948825836182, "logits/rejected": -1.380942463874817, "logps/chosen": -176.175048828125, "logps/rejected": -120.50838470458984, "loss": 0.4052, "rewards/accuracies": 0.3055555522441864, "rewards/chosen": -0.00010719310375861824, "rewards/margins": -0.00013813158147968352, "rewards/rejected": 3.093846680712886e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.3980815347721825e-07, "logits/chosen": -1.930445909500122, "logits/rejected": -1.3741885423660278, "logps/chosen": -183.2718505859375, "logps/rejected": -109.85884094238281, "loss": 0.3903, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.00011072765482822433, "rewards/margins": 0.00010560165537754074, "rewards/rejected": 5.1259967221994884e-06, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -1.8763048648834229, "logits/rejected": -1.4949097633361816, "logps/chosen": -173.87942504882812, "logps/rejected": -129.18692016601562, "loss": 0.4002, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 5.196337588131428e-05, "rewards/margins": 7.132586324587464e-05, "rewards/rejected": -1.936249464051798e-05, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.796163069544365e-07, "logits/chosen": -2.070870876312256, "logits/rejected": -1.6049951314926147, "logps/chosen": -128.12319946289062, "logps/rejected": -99.4199447631836, "loss": 0.3738, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0003283734549768269, "rewards/margins": 0.00042926621972583234, "rewards/rejected": -0.00010089283023262396, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.995203836930456e-07, "logits/chosen": -2.017171859741211, "logits/rejected": -1.4538745880126953, "logps/chosen": -151.61212158203125, "logps/rejected": -106.30601501464844, "loss": 0.3839, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0006235686596482992, "rewards/margins": 0.000633338640909642, "rewards/rejected": -9.770013093657326e-06, "step": 50 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -1.9818241596221924, "logits/rejected": -1.5024590492248535, "logps/chosen": -206.4155731201172, "logps/rejected": -129.6781768798828, "loss": 0.4124, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0013709899503737688, "rewards/margins": 0.0006454504909925163, "rewards/rejected": 0.0007255395175889134, "step": 60 }, { "epoch": 0.02, "learning_rate": 8.393285371702639e-07, "logits/chosen": -2.0715603828430176, "logits/rejected": -1.4825446605682373, "logps/chosen": -189.07382202148438, "logps/rejected": -106.4325942993164, "loss": 0.3996, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0014919806271791458, "rewards/margins": 0.001798570854589343, "rewards/rejected": -0.00030659048934467137, "step": 70 }, { "epoch": 0.02, "learning_rate": 9.59232613908873e-07, "logits/chosen": -2.0178894996643066, "logits/rejected": -1.4030519723892212, "logps/chosen": -158.771484375, "logps/rejected": -120.0831298828125, "loss": 0.4146, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0014441380044445395, "rewards/margins": 0.004313913639634848, "rewards/rejected": -0.0028697755187749863, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -1.9421523809432983, "logits/rejected": -1.3828394412994385, "logps/chosen": -195.73452758789062, "logps/rejected": -166.6143798828125, "loss": 0.3436, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009366961196064949, "rewards/margins": 0.009355243295431137, "rewards/rejected": -0.018722204491496086, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.1990407673860912e-06, "logits/chosen": -1.959376573562622, "logits/rejected": -1.2909282445907593, "logps/chosen": -247.2277374267578, "logps/rejected": -191.56613159179688, "loss": 0.3416, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05521208792924881, "rewards/margins": 0.025743016973137856, "rewards/rejected": -0.08095510303974152, "step": 100 }, { "epoch": 0.02, "eval_logits/chosen": -1.8827470541000366, "eval_logits/rejected": -1.7201571464538574, "eval_logps/chosen": -365.3259582519531, "eval_logps/rejected": -367.12213134765625, "eval_loss": 0.04473324120044708, "eval_rewards/accuracies": 0.5883233547210693, "eval_rewards/chosen": -0.09944818913936615, "eval_rewards/margins": 0.0166977159678936, "eval_rewards/rejected": -0.11614590138196945, "eval_runtime": 1203.8025, "eval_samples_per_second": 1.661, "eval_steps_per_second": 0.277, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.3189448441247004e-06, "logits/chosen": -1.9691740274429321, "logits/rejected": -1.3262847661972046, "logps/chosen": -273.80657958984375, "logps/rejected": -279.27740478515625, "loss": 0.3299, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.127334326505661, "rewards/margins": 0.05298153683543205, "rewards/rejected": -0.18031586706638336, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -1.9778773784637451, "logits/rejected": -1.5461676120758057, "logps/chosen": -313.43084716796875, "logps/rejected": -336.5901184082031, "loss": 0.3315, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15375864505767822, "rewards/margins": 0.06629587709903717, "rewards/rejected": -0.2200545072555542, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.5587529976019186e-06, "logits/chosen": -2.0032668113708496, "logits/rejected": -1.5493533611297607, "logps/chosen": -361.8681335449219, "logps/rejected": -406.78179931640625, "loss": 0.3122, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18789012730121613, "rewards/margins": 0.09802448004484177, "rewards/rejected": -0.2859145998954773, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.6786570743405278e-06, "logits/chosen": -2.2357876300811768, "logits/rejected": -1.7135013341903687, "logps/chosen": -299.4336242675781, "logps/rejected": -350.95953369140625, "loss": 0.3401, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12972791492938995, "rewards/margins": 0.11772496998310089, "rewards/rejected": -0.24745285511016846, "step": 140 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -2.025186061859131, "logits/rejected": -1.5389646291732788, "logps/chosen": -445.5409240722656, "logps/rejected": -510.79425048828125, "loss": 0.3098, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2933913469314575, "rewards/margins": 0.11575798690319061, "rewards/rejected": -0.40914931893348694, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.918465227817746e-06, "logits/chosen": -2.004533052444458, "logits/rejected": -1.5478112697601318, "logps/chosen": -562.5319213867188, "logps/rejected": -625.5440673828125, "loss": 0.2848, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3802778124809265, "rewards/margins": 0.11718380451202393, "rewards/rejected": -0.49746161699295044, "step": 160 }, { "epoch": 0.04, "learning_rate": 2.0383693045563552e-06, "logits/chosen": -1.9026339054107666, "logits/rejected": -1.5486068725585938, "logps/chosen": -442.73638916015625, "logps/rejected": -475.7823181152344, "loss": 0.3234, "rewards/accuracies": 0.625, "rewards/chosen": -0.3179342746734619, "rewards/margins": 0.07873056083917618, "rewards/rejected": -0.3966647982597351, "step": 170 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -2.192417621612549, "logits/rejected": -1.531654715538025, "logps/chosen": -356.04315185546875, "logps/rejected": -468.2540588378906, "loss": 0.2554, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1803785264492035, "rewards/margins": 0.1676339954137802, "rewards/rejected": -0.3480125367641449, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.2781774580335732e-06, "logits/chosen": -1.977656602859497, "logits/rejected": -1.5833740234375, "logps/chosen": -389.6104431152344, "logps/rejected": -443.1611328125, "loss": 0.3262, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.25455620884895325, "rewards/margins": 0.08044341206550598, "rewards/rejected": -0.33499962091445923, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.3980815347721824e-06, "logits/chosen": -2.15303373336792, "logits/rejected": -1.7479757070541382, "logps/chosen": -314.3880310058594, "logps/rejected": -412.335205078125, "loss": 0.2571, "rewards/accuracies": 0.625, "rewards/chosen": -0.17357322573661804, "rewards/margins": 0.12978531420230865, "rewards/rejected": -0.3033584952354431, "step": 200 }, { "epoch": 0.05, "eval_logits/chosen": -2.0364434719085693, "eval_logits/rejected": -1.8598525524139404, "eval_logps/chosen": -450.7508544921875, "eval_logps/rejected": -466.86273193359375, "eval_loss": 0.08576688915491104, "eval_rewards/accuracies": 0.4790419042110443, "eval_rewards/chosen": -0.1848730444908142, "eval_rewards/margins": 0.03101346082985401, "eval_rewards/rejected": -0.21588650345802307, "eval_runtime": 1203.1794, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 200 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -2.0329134464263916, "logits/rejected": -1.167757272720337, "logps/chosen": -466.6771545410156, "logps/rejected": -594.9791870117188, "loss": 0.2921, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3080136775970459, "rewards/margins": 0.18821842968463898, "rewards/rejected": -0.49623212218284607, "step": 210 }, { "epoch": 0.05, "learning_rate": 2.637889688249401e-06, "logits/chosen": -2.039789915084839, "logits/rejected": -1.3271219730377197, "logps/chosen": -509.66748046875, "logps/rejected": -675.979736328125, "loss": 0.2741, "rewards/accuracies": 0.75, "rewards/chosen": -0.34899237751960754, "rewards/margins": 0.21285727620124817, "rewards/rejected": -0.5618497133255005, "step": 220 }, { "epoch": 0.06, "learning_rate": 2.75779376498801e-06, "logits/chosen": -2.1223864555358887, "logits/rejected": -1.527343988418579, "logps/chosen": -338.41387939453125, "logps/rejected": -467.3946228027344, "loss": 0.2673, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1759270280599594, "rewards/margins": 0.18991851806640625, "rewards/rejected": -0.36584553122520447, "step": 230 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -2.3056137561798096, "logits/rejected": -1.6976007223129272, "logps/chosen": -467.47198486328125, "logps/rejected": -594.7034912109375, "loss": 0.2651, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3085511326789856, "rewards/margins": 0.1851382851600647, "rewards/rejected": -0.4936893880367279, "step": 240 }, { "epoch": 0.06, "learning_rate": 2.9976019184652285e-06, "logits/chosen": -2.2671046257019043, "logits/rejected": -1.6860625743865967, "logps/chosen": -365.64276123046875, "logps/rejected": -459.3984375, "loss": 0.2966, "rewards/accuracies": 0.75, "rewards/chosen": -0.21584682166576385, "rewards/margins": 0.13884630799293518, "rewards/rejected": -0.35469311475753784, "step": 250 }, { "epoch": 0.06, "learning_rate": 3.1175059952038373e-06, "logits/chosen": -2.165955066680908, "logits/rejected": -1.6470870971679688, "logps/chosen": -504.0653381347656, "logps/rejected": -674.6064453125, "loss": 0.2419, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3326278030872345, "rewards/margins": 0.20763631165027618, "rewards/rejected": -0.5402641296386719, "step": 260 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -2.275056838989258, "logits/rejected": -1.7461671829223633, "logps/chosen": -464.90594482421875, "logps/rejected": -606.6214599609375, "loss": 0.2304, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3181764483451843, "rewards/margins": 0.1774064600467682, "rewards/rejected": -0.4955829083919525, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.3573141486810557e-06, "logits/chosen": -2.5036635398864746, "logits/rejected": -1.6616184711456299, "logps/chosen": -451.06890869140625, "logps/rejected": -593.6022338867188, "loss": 0.2653, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24220120906829834, "rewards/margins": 0.2283978909254074, "rewards/rejected": -0.47059908509254456, "step": 280 }, { "epoch": 0.07, "learning_rate": 3.4772182254196645e-06, "logits/chosen": -2.259277820587158, "logits/rejected": -1.6954368352890015, "logps/chosen": -505.272705078125, "logps/rejected": -610.1090087890625, "loss": 0.2702, "rewards/accuracies": 0.625, "rewards/chosen": -0.3104931116104126, "rewards/margins": 0.17024651169776917, "rewards/rejected": -0.48073965311050415, "step": 290 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -2.172471523284912, "logits/rejected": -1.504427433013916, "logps/chosen": -396.2477111816406, "logps/rejected": -579.3084716796875, "loss": 0.2771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24097943305969238, "rewards/margins": 0.21324284374713898, "rewards/rejected": -0.45422229170799255, "step": 300 }, { "epoch": 0.07, "eval_logits/chosen": -2.090923309326172, "eval_logits/rejected": -1.908668041229248, "eval_logps/chosen": -507.7906494140625, "eval_logps/rejected": -527.8734741210938, "eval_loss": 0.09101365506649017, "eval_rewards/accuracies": 0.477544903755188, "eval_rewards/chosen": -0.24191293120384216, "eval_rewards/margins": 0.03498436138033867, "eval_rewards/rejected": -0.27689728140830994, "eval_runtime": 1203.0495, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 300 }, { "epoch": 0.07, "learning_rate": 3.717026378896883e-06, "logits/chosen": -2.049182415008545, "logits/rejected": -1.4486244916915894, "logps/chosen": -439.39208984375, "logps/rejected": -584.477783203125, "loss": 0.2243, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2854474186897278, "rewards/margins": 0.20249168574810028, "rewards/rejected": -0.48793911933898926, "step": 310 }, { "epoch": 0.08, "learning_rate": 3.836930455635492e-06, "logits/chosen": -2.3335776329040527, "logits/rejected": -1.786325216293335, "logps/chosen": -396.58306884765625, "logps/rejected": -475.70733642578125, "loss": 0.2766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24405236542224884, "rewards/margins": 0.12695007026195526, "rewards/rejected": -0.3710024654865265, "step": 320 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -2.1659653186798096, "logits/rejected": -1.3438094854354858, "logps/chosen": -670.764892578125, "logps/rejected": -849.9221801757812, "loss": 0.2475, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4837416112422943, "rewards/margins": 0.2372397631406784, "rewards/rejected": -0.7209814190864563, "step": 330 }, { "epoch": 0.08, "learning_rate": 4.0767386091127105e-06, "logits/chosen": -2.2137274742126465, "logits/rejected": -1.5572987794876099, "logps/chosen": -542.7764892578125, "logps/rejected": -685.880615234375, "loss": 0.2809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37600573897361755, "rewards/margins": 0.18942835927009583, "rewards/rejected": -0.5654340982437134, "step": 340 }, { "epoch": 0.08, "learning_rate": 4.196642685851319e-06, "logits/chosen": -2.3048481941223145, "logits/rejected": -1.8672215938568115, "logps/chosen": -409.4325866699219, "logps/rejected": -562.182861328125, "loss": 0.2495, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29149532318115234, "rewards/margins": 0.16700473427772522, "rewards/rejected": -0.45850005745887756, "step": 350 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -2.289367198944092, "logits/rejected": -1.8715400695800781, "logps/chosen": -377.6619567871094, "logps/rejected": -513.40771484375, "loss": 0.2755, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23883633315563202, "rewards/margins": 0.14426147937774658, "rewards/rejected": -0.3830978274345398, "step": 360 }, { "epoch": 0.09, "learning_rate": 4.436450839328538e-06, "logits/chosen": -2.102851390838623, "logits/rejected": -1.4640109539031982, "logps/chosen": -508.1983337402344, "logps/rejected": -678.5553588867188, "loss": 0.2274, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3818313479423523, "rewards/margins": 0.18701128661632538, "rewards/rejected": -0.5688427090644836, "step": 370 }, { "epoch": 0.09, "learning_rate": 4.5563549160671465e-06, "logits/chosen": -2.3133838176727295, "logits/rejected": -1.7321016788482666, "logps/chosen": -440.7145080566406, "logps/rejected": -574.8981323242188, "loss": 0.2404, "rewards/accuracies": 0.75, "rewards/chosen": -0.2492268830537796, "rewards/margins": 0.18998585641384125, "rewards/rejected": -0.43921273946762085, "step": 380 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -2.265368938446045, "logits/rejected": -1.8561140298843384, "logps/chosen": -449.74041748046875, "logps/rejected": -564.5775146484375, "loss": 0.2747, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29022103548049927, "rewards/margins": 0.15951310098171234, "rewards/rejected": -0.4497341513633728, "step": 390 }, { "epoch": 0.1, "learning_rate": 4.796163069544365e-06, "logits/chosen": -2.2809808254241943, "logits/rejected": -1.629220962524414, "logps/chosen": -516.8795166015625, "logps/rejected": -680.0480346679688, "loss": 0.2561, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36346548795700073, "rewards/margins": 0.21010203659534454, "rewards/rejected": -0.5735675096511841, "step": 400 }, { "epoch": 0.1, "eval_logits/chosen": -2.151092290878296, "eval_logits/rejected": -1.9571280479431152, "eval_logps/chosen": -731.9658203125, "eval_logps/rejected": -759.565185546875, "eval_loss": 0.11267108470201492, "eval_rewards/accuracies": 0.48952096700668335, "eval_rewards/chosen": -0.46608811616897583, "eval_rewards/margins": 0.0425008200109005, "eval_rewards/rejected": -0.5085889101028442, "eval_runtime": 1203.5251, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 400 }, { "epoch": 0.1, "learning_rate": 4.916067146282974e-06, "logits/chosen": -2.432021141052246, "logits/rejected": -1.629734754562378, "logps/chosen": -540.5179443359375, "logps/rejected": -710.5264892578125, "loss": 0.2134, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34152334928512573, "rewards/margins": 0.24536368250846863, "rewards/rejected": -0.586887001991272, "step": 410 }, { "epoch": 0.1, "learning_rate": 4.9999921043206356e-06, "logits/chosen": -2.293018341064453, "logits/rejected": -1.6244852542877197, "logps/chosen": -427.8416442871094, "logps/rejected": -547.2958374023438, "loss": 0.246, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2615181803703308, "rewards/margins": 0.1817948967218399, "rewards/rejected": -0.4433130621910095, "step": 420 }, { "epoch": 0.1, "learning_rate": 4.999851738074904e-06, "logits/chosen": -2.2683982849121094, "logits/rejected": -1.581102728843689, "logps/chosen": -465.10333251953125, "logps/rejected": -683.4638671875, "loss": 0.21, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2840215861797333, "rewards/margins": 0.2840023636817932, "rewards/rejected": -0.5680239796638489, "step": 430 }, { "epoch": 0.11, "learning_rate": 4.9995359236271094e-06, "logits/chosen": -2.3237693309783936, "logits/rejected": -1.8209505081176758, "logps/chosen": -515.5578002929688, "logps/rejected": -590.4849853515625, "loss": 0.2872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.342002272605896, "rewards/margins": 0.12481825053691864, "rewards/rejected": -0.46682053804397583, "step": 440 }, { "epoch": 0.11, "learning_rate": 4.999044683142196e-06, "logits/chosen": -2.1307613849639893, "logits/rejected": -1.4179335832595825, "logps/chosen": -465.1773986816406, "logps/rejected": -620.6632080078125, "loss": 0.2678, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30656537413597107, "rewards/margins": 0.2203582525253296, "rewards/rejected": -0.526923656463623, "step": 450 }, { "epoch": 0.11, "learning_rate": 4.998378051097111e-06, "logits/chosen": -2.061636447906494, "logits/rejected": -1.571152687072754, "logps/chosen": -650.8114013671875, "logps/rejected": -758.4025268554688, "loss": 0.2521, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.488238662481308, "rewards/margins": 0.16402384638786316, "rewards/rejected": -0.6522625684738159, "step": 460 }, { "epoch": 0.11, "learning_rate": 4.997536074278388e-06, "logits/chosen": -2.140510082244873, "logits/rejected": -1.6469488143920898, "logps/chosen": -550.5447998046875, "logps/rejected": -670.2029418945312, "loss": 0.2912, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.410614550113678, "rewards/margins": 0.16074909269809723, "rewards/rejected": -0.5713636875152588, "step": 470 }, { "epoch": 0.12, "learning_rate": 4.996518811778858e-06, "logits/chosen": -2.063178300857544, "logits/rejected": -1.5267159938812256, "logps/chosen": -537.4976806640625, "logps/rejected": -689.3345947265625, "loss": 0.2264, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3834238052368164, "rewards/margins": 0.2064622938632965, "rewards/rejected": -0.5898860692977905, "step": 480 }, { "epoch": 0.12, "learning_rate": 4.995326334993508e-06, "logits/chosen": -2.2591309547424316, "logits/rejected": -1.7779147624969482, "logps/chosen": -460.14752197265625, "logps/rejected": -540.4667358398438, "loss": 0.2671, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27436771988868713, "rewards/margins": 0.1594381481409073, "rewards/rejected": -0.4338058531284332, "step": 490 }, { "epoch": 0.12, "learning_rate": 4.993958727614462e-06, "logits/chosen": -1.9684454202651978, "logits/rejected": -1.4450188875198364, "logps/chosen": -500.50848388671875, "logps/rejected": -647.1019897460938, "loss": 0.2604, "rewards/accuracies": 0.75, "rewards/chosen": -0.34749436378479004, "rewards/margins": 0.1976911574602127, "rewards/rejected": -0.5451855063438416, "step": 500 }, { "epoch": 0.12, "eval_logits/chosen": -2.0449163913726807, "eval_logits/rejected": -1.864294171333313, "eval_logps/chosen": -587.9281005859375, "eval_logps/rejected": -612.2919311523438, "eval_loss": 0.08259331434965134, "eval_rewards/accuracies": 0.48353293538093567, "eval_rewards/chosen": -0.3220503628253937, "eval_rewards/margins": 0.03926531970500946, "eval_rewards/rejected": -0.36131569743156433, "eval_runtime": 1203.5016, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.992416085625115e-06, "logits/chosen": -2.211124897003174, "logits/rejected": -1.4645628929138184, "logps/chosen": -502.86651611328125, "logps/rejected": -650.7592163085938, "loss": 0.2455, "rewards/accuracies": 0.75, "rewards/chosen": -0.3425281047821045, "rewards/margins": 0.20437660813331604, "rewards/rejected": -0.5469046831130981, "step": 510 }, { "epoch": 0.12, "learning_rate": 4.990698517293394e-06, "logits/chosen": -2.18019700050354, "logits/rejected": -1.7085288763046265, "logps/chosen": -345.96453857421875, "logps/rejected": -494.069580078125, "loss": 0.2906, "rewards/accuracies": 0.625, "rewards/chosen": -0.20625567436218262, "rewards/margins": 0.18142256140708923, "rewards/rejected": -0.38767823576927185, "step": 520 }, { "epoch": 0.13, "learning_rate": 4.988806143164159e-06, "logits/chosen": -2.2827541828155518, "logits/rejected": -1.7515159845352173, "logps/chosen": -438.8084411621094, "logps/rejected": -614.1343383789062, "loss": 0.2672, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2659434974193573, "rewards/margins": 0.21279501914978027, "rewards/rejected": -0.47873854637145996, "step": 530 }, { "epoch": 0.13, "learning_rate": 4.98673909605074e-06, "logits/chosen": -2.0330111980438232, "logits/rejected": -1.4639112949371338, "logps/chosen": -537.9144287109375, "logps/rejected": -713.9910278320312, "loss": 0.2527, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44672146439552307, "rewards/margins": 0.18766596913337708, "rewards/rejected": -0.6343873739242554, "step": 540 }, { "epoch": 0.13, "learning_rate": 4.984497521025622e-06, "logits/chosen": -2.2105565071105957, "logits/rejected": -1.4069889783859253, "logps/chosen": -542.669921875, "logps/rejected": -775.8263549804688, "loss": 0.2405, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.40085896849632263, "rewards/margins": 0.2834365665912628, "rewards/rejected": -0.6842954754829407, "step": 550 }, { "epoch": 0.13, "learning_rate": 4.982081575410256e-06, "logits/chosen": -2.0892019271850586, "logits/rejected": -1.392425298690796, "logps/chosen": -382.64324951171875, "logps/rejected": -578.7179565429688, "loss": 0.2516, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23520061373710632, "rewards/margins": 0.25908973813056946, "rewards/rejected": -0.4942903518676758, "step": 560 }, { "epoch": 0.14, "learning_rate": 4.9794914287640264e-06, "logits/chosen": -2.0458247661590576, "logits/rejected": -1.3753255605697632, "logps/chosen": -428.4219665527344, "logps/rejected": -638.8211669921875, "loss": 0.2238, "rewards/accuracies": 0.875, "rewards/chosen": -0.2656584680080414, "rewards/margins": 0.25109249353408813, "rewards/rejected": -0.5167509317398071, "step": 570 }, { "epoch": 0.14, "learning_rate": 4.97672726287234e-06, "logits/chosen": -1.9591379165649414, "logits/rejected": -1.1869251728057861, "logps/chosen": -559.7215576171875, "logps/rejected": -756.046875, "loss": 0.2294, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40815895795822144, "rewards/margins": 0.2627512514591217, "rewards/rejected": -0.6709102392196655, "step": 580 }, { "epoch": 0.14, "learning_rate": 4.973789271733877e-06, "logits/chosen": -1.920282006263733, "logits/rejected": -1.284289836883545, "logps/chosen": -598.05517578125, "logps/rejected": -780.0867919921875, "loss": 0.2434, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.42106491327285767, "rewards/margins": 0.24786987900733948, "rewards/rejected": -0.6689347624778748, "step": 590 }, { "epoch": 0.14, "learning_rate": 4.970677661546972e-06, "logits/chosen": -2.094456911087036, "logits/rejected": -1.331095576286316, "logps/chosen": -331.28814697265625, "logps/rejected": -457.91357421875, "loss": 0.2778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16717010736465454, "rewards/margins": 0.20541410148143768, "rewards/rejected": -0.3725842237472534, "step": 600 }, { "epoch": 0.14, "eval_logits/chosen": -2.038715362548828, "eval_logits/rejected": -1.858792781829834, "eval_logps/chosen": -559.9217529296875, "eval_logps/rejected": -581.3212280273438, "eval_loss": 0.10329626500606537, "eval_rewards/accuracies": 0.47604790329933167, "eval_rewards/chosen": -0.29404398798942566, "eval_rewards/margins": 0.036301057785749435, "eval_rewards/rejected": -0.3303450644016266, "eval_runtime": 1203.5816, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 600 }, { "epoch": 0.15, "learning_rate": 4.967392650695141e-06, "logits/chosen": -2.105530023574829, "logits/rejected": -1.4126538038253784, "logps/chosen": -502.4693908691406, "logps/rejected": -687.6373291015625, "loss": 0.2302, "rewards/accuracies": 0.75, "rewards/chosen": -0.3487677574157715, "rewards/margins": 0.24097435176372528, "rewards/rejected": -0.5897420644760132, "step": 610 }, { "epoch": 0.15, "learning_rate": 4.963934469731756e-06, "logits/chosen": -2.2350103855133057, "logits/rejected": -1.4374760389328003, "logps/chosen": -585.46142578125, "logps/rejected": -764.7803344726562, "loss": 0.2396, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3805561363697052, "rewards/margins": 0.2608867883682251, "rewards/rejected": -0.6414428949356079, "step": 620 }, { "epoch": 0.15, "learning_rate": 4.960303361363863e-06, "logits/chosen": -2.2643039226531982, "logits/rejected": -1.3659436702728271, "logps/chosen": -410.9107360839844, "logps/rejected": -657.2432861328125, "loss": 0.2117, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2585026025772095, "rewards/margins": 0.2946074604988098, "rewards/rejected": -0.5531100630760193, "step": 630 }, { "epoch": 0.15, "learning_rate": 4.95649958043515e-06, "logits/chosen": -2.051509380340576, "logits/rejected": -1.5291717052459717, "logps/chosen": -479.66485595703125, "logps/rejected": -612.9735717773438, "loss": 0.3096, "rewards/accuracies": 0.75, "rewards/chosen": -0.32900354266166687, "rewards/margins": 0.15808680653572083, "rewards/rejected": -0.4870903491973877, "step": 640 }, { "epoch": 0.16, "learning_rate": 4.952523393908059e-06, "logits/chosen": -2.1708595752716064, "logits/rejected": -1.4666064977645874, "logps/chosen": -429.61993408203125, "logps/rejected": -645.1808471679688, "loss": 0.2658, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24558138847351074, "rewards/margins": 0.2705684304237366, "rewards/rejected": -0.5161498188972473, "step": 650 }, { "epoch": 0.16, "learning_rate": 4.94837508084505e-06, "logits/chosen": -1.6731560230255127, "logits/rejected": -1.053863763809204, "logps/chosen": -567.708251953125, "logps/rejected": -712.137451171875, "loss": 0.2458, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4341394007205963, "rewards/margins": 0.17748543620109558, "rewards/rejected": -0.6116248369216919, "step": 660 }, { "epoch": 0.16, "learning_rate": 4.944054932389018e-06, "logits/chosen": -1.9068609476089478, "logits/rejected": -1.1263360977172852, "logps/chosen": -494.4112854003906, "logps/rejected": -656.45849609375, "loss": 0.3016, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3296416401863098, "rewards/margins": 0.21011313796043396, "rewards/rejected": -0.5397548079490662, "step": 670 }, { "epoch": 0.16, "learning_rate": 4.9395632517428546e-06, "logits/chosen": -1.8886890411376953, "logits/rejected": -1.3575305938720703, "logps/chosen": -529.0499267578125, "logps/rejected": -631.37548828125, "loss": 0.2584, "rewards/accuracies": 0.875, "rewards/chosen": -0.36437273025512695, "rewards/margins": 0.1604866236448288, "rewards/rejected": -0.5248593091964722, "step": 680 }, { "epoch": 0.17, "learning_rate": 4.934900354148173e-06, "logits/chosen": -2.286695957183838, "logits/rejected": -1.578734278678894, "logps/chosen": -428.54754638671875, "logps/rejected": -578.58154296875, "loss": 0.2392, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2616836130619049, "rewards/margins": 0.20264342427253723, "rewards/rejected": -0.464326947927475, "step": 690 }, { "epoch": 0.17, "learning_rate": 4.930066566863182e-06, "logits/chosen": -2.213470220565796, "logits/rejected": -1.4638140201568604, "logps/chosen": -487.4681091308594, "logps/rejected": -714.6258544921875, "loss": 0.2631, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3131553530693054, "rewards/margins": 0.2786567509174347, "rewards/rejected": -0.5918121337890625, "step": 700 }, { "epoch": 0.17, "eval_logits/chosen": -2.025181293487549, "eval_logits/rejected": -1.8458251953125, "eval_logps/chosen": -624.5896606445312, "eval_logps/rejected": -653.3798217773438, "eval_loss": 0.10840175300836563, "eval_rewards/accuracies": 0.4865269362926483, "eval_rewards/chosen": -0.35871198773384094, "eval_rewards/margins": 0.04369162395596504, "eval_rewards/rejected": -0.4024035930633545, "eval_runtime": 1203.1607, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 700 }, { "epoch": 0.17, "learning_rate": 4.9250622291397144e-06, "logits/chosen": -2.1941981315612793, "logits/rejected": -1.6003679037094116, "logps/chosen": -461.31085205078125, "logps/rejected": -647.9520263671875, "loss": 0.3026, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33714646100997925, "rewards/margins": 0.21369722485542297, "rewards/rejected": -0.5508437156677246, "step": 710 }, { "epoch": 0.17, "learning_rate": 4.919887692199423e-06, "logits/chosen": -2.0292787551879883, "logits/rejected": -1.356974720954895, "logps/chosen": -588.8004150390625, "logps/rejected": -770.320068359375, "loss": 0.2344, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4120548665523529, "rewards/margins": 0.24379810690879822, "rewards/rejected": -0.6558529734611511, "step": 720 }, { "epoch": 0.18, "learning_rate": 4.914543319209126e-06, "logits/chosen": -2.038294553756714, "logits/rejected": -1.4942939281463623, "logps/chosen": -495.69830322265625, "logps/rejected": -639.773681640625, "loss": 0.2385, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3475106954574585, "rewards/margins": 0.1999860405921936, "rewards/rejected": -0.5474967360496521, "step": 730 }, { "epoch": 0.18, "learning_rate": 4.909029485255321e-06, "logits/chosen": -1.7691395282745361, "logits/rejected": -1.154401183128357, "logps/chosen": -598.4854125976562, "logps/rejected": -780.4405517578125, "loss": 0.2614, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4482877850532532, "rewards/margins": 0.21686573326587677, "rewards/rejected": -0.6651535630226135, "step": 740 }, { "epoch": 0.18, "learning_rate": 4.903346577317859e-06, "logits/chosen": -2.172246217727661, "logits/rejected": -1.5015490055084229, "logps/chosen": -461.6849670410156, "logps/rejected": -630.1130981445312, "loss": 0.2582, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31072449684143066, "rewards/margins": 0.22744186222553253, "rewards/rejected": -0.5381664037704468, "step": 750 }, { "epoch": 0.18, "learning_rate": 4.8974949942427854e-06, "logits/chosen": -2.016395092010498, "logits/rejected": -1.4038422107696533, "logps/chosen": -401.8443298339844, "logps/rejected": -590.9119873046875, "loss": 0.2531, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24203196167945862, "rewards/margins": 0.2207801342010498, "rewards/rejected": -0.4628120958805084, "step": 760 }, { "epoch": 0.18, "learning_rate": 4.891475146714348e-06, "logits/chosen": -1.9541422128677368, "logits/rejected": -1.3009142875671387, "logps/chosen": -558.2137451171875, "logps/rejected": -718.1923828125, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.4078094959259033, "rewards/margins": 0.21052256226539612, "rewards/rejected": -0.618332028388977, "step": 770 }, { "epoch": 0.19, "learning_rate": 4.8852874572261715e-06, "logits/chosen": -1.756496787071228, "logits/rejected": -1.3869259357452393, "logps/chosen": -650.5875854492188, "logps/rejected": -800.0562744140625, "loss": 0.25, "rewards/accuracies": 0.75, "rewards/chosen": -0.5272155404090881, "rewards/margins": 0.16045689582824707, "rewards/rejected": -0.68767249584198, "step": 780 }, { "epoch": 0.19, "learning_rate": 4.878932360051611e-06, "logits/chosen": -2.051021099090576, "logits/rejected": -1.3476202487945557, "logps/chosen": -510.95947265625, "logps/rejected": -655.3389892578125, "loss": 0.2505, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31041261553764343, "rewards/margins": 0.23392383754253387, "rewards/rejected": -0.5443364381790161, "step": 790 }, { "epoch": 0.19, "learning_rate": 4.872410301213265e-06, "logits/chosen": -2.1144466400146484, "logits/rejected": -1.4822992086410522, "logps/chosen": -519.9110717773438, "logps/rejected": -679.2328491210938, "loss": 0.2264, "rewards/accuracies": 0.75, "rewards/chosen": -0.33818256855010986, "rewards/margins": 0.23063333332538605, "rewards/rejected": -0.5688159465789795, "step": 800 }, { "epoch": 0.19, "eval_logits/chosen": -2.0501108169555664, "eval_logits/rejected": -1.87258780002594, "eval_logps/chosen": -501.3898620605469, "eval_logps/rejected": -524.3302612304688, "eval_loss": 0.11576098948717117, "eval_rewards/accuracies": 0.473053902387619, "eval_rewards/chosen": -0.2355121225118637, "eval_rewards/margins": 0.037841975688934326, "eval_rewards/rejected": -0.27335408329963684, "eval_runtime": 1203.8519, "eval_samples_per_second": 1.661, "eval_steps_per_second": 0.277, "step": 800 }, { "epoch": 0.19, "learning_rate": 4.86572173845168e-06, "logits/chosen": -2.0405406951904297, "logits/rejected": -1.2668204307556152, "logps/chosen": -300.8737487792969, "logps/rejected": -487.8505859375, "loss": 0.226, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1566271334886551, "rewards/margins": 0.2406269758939743, "rewards/rejected": -0.3972541391849518, "step": 810 }, { "epoch": 0.2, "learning_rate": 4.8588671411932195e-06, "logits/chosen": -2.1229705810546875, "logits/rejected": -1.7653032541275024, "logps/chosen": -421.510986328125, "logps/rejected": -498.025390625, "loss": 0.2888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2679327130317688, "rewards/margins": 0.11803333461284637, "rewards/rejected": -0.3859660029411316, "step": 820 }, { "epoch": 0.2, "learning_rate": 4.851846990517118e-06, "logits/chosen": -2.066283702850342, "logits/rejected": -1.4142102003097534, "logps/chosen": -519.0017700195312, "logps/rejected": -696.2020263671875, "loss": 0.2132, "rewards/accuracies": 0.875, "rewards/chosen": -0.3236960768699646, "rewards/margins": 0.24426403641700745, "rewards/rejected": -0.5679601430892944, "step": 830 }, { "epoch": 0.2, "learning_rate": 4.844661779121723e-06, "logits/chosen": -2.071094036102295, "logits/rejected": -1.4416927099227905, "logps/chosen": -422.37042236328125, "logps/rejected": -665.9434814453125, "loss": 0.2252, "rewards/accuracies": 0.875, "rewards/chosen": -0.2813955545425415, "rewards/margins": 0.2645932137966156, "rewards/rejected": -0.5459887981414795, "step": 840 }, { "epoch": 0.2, "learning_rate": 4.837312011289907e-06, "logits/chosen": -1.9781221151351929, "logits/rejected": -1.3000903129577637, "logps/chosen": -364.5234069824219, "logps/rejected": -544.6810302734375, "loss": 0.2281, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2271805703639984, "rewards/margins": 0.22540287673473358, "rewards/rejected": -0.4525834023952484, "step": 850 }, { "epoch": 0.21, "learning_rate": 4.829798202853683e-06, "logits/chosen": -2.1561391353607178, "logits/rejected": -1.5510307550430298, "logps/chosen": -463.4254455566406, "logps/rejected": -614.3165893554688, "loss": 0.2597, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35351258516311646, "rewards/margins": 0.16675546765327454, "rewards/rejected": -0.5202680826187134, "step": 860 }, { "epoch": 0.21, "learning_rate": 4.822120881157998e-06, "logits/chosen": -1.9491517543792725, "logits/rejected": -1.471637487411499, "logps/chosen": -488.54736328125, "logps/rejected": -602.2813110351562, "loss": 0.2591, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3221733868122101, "rewards/margins": 0.1643521785736084, "rewards/rejected": -0.4865254759788513, "step": 870 }, { "epoch": 0.21, "learning_rate": 4.81428058502372e-06, "logits/chosen": -1.8634504079818726, "logits/rejected": -1.4165079593658447, "logps/chosen": -496.98541259765625, "logps/rejected": -616.6558227539062, "loss": 0.2696, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3558669686317444, "rewards/margins": 0.15624353289604187, "rewards/rejected": -0.5121105313301086, "step": 880 }, { "epoch": 0.21, "learning_rate": 4.806277864709828e-06, "logits/chosen": -2.1702616214752197, "logits/rejected": -1.448373556137085, "logps/chosen": -471.16552734375, "logps/rejected": -660.8609619140625, "loss": 0.2164, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3139738440513611, "rewards/margins": 0.23638978600502014, "rewards/rejected": -0.5503636002540588, "step": 890 }, { "epoch": 0.22, "learning_rate": 4.798113281874788e-06, "logits/chosen": -2.061413526535034, "logits/rejected": -1.572426676750183, "logps/chosen": -448.4598693847656, "logps/rejected": -584.1369018554688, "loss": 0.2593, "rewards/accuracies": 0.75, "rewards/chosen": -0.27689287066459656, "rewards/margins": 0.18237103521823883, "rewards/rejected": -0.45926395058631897, "step": 900 }, { "epoch": 0.22, "eval_logits/chosen": -1.959326148033142, "eval_logits/rejected": -1.788332223892212, "eval_logps/chosen": -538.8648071289062, "eval_logps/rejected": -572.4186401367188, "eval_loss": 0.10479563474655151, "eval_rewards/accuracies": 0.4865269362926483, "eval_rewards/chosen": -0.2729870676994324, "eval_rewards/margins": 0.048455387353897095, "eval_rewards/rejected": -0.32144248485565186, "eval_runtime": 1203.161, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 900 }, { "epoch": 0.22, "learning_rate": 4.789787409537131e-06, "logits/chosen": -2.069319248199463, "logits/rejected": -1.328410267829895, "logps/chosen": -427.32696533203125, "logps/rejected": -632.281982421875, "loss": 0.2241, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2708687484264374, "rewards/margins": 0.25986698269844055, "rewards/rejected": -0.5307357311248779, "step": 910 }, { "epoch": 0.22, "learning_rate": 4.7813008320352475e-06, "logits/chosen": -2.3153481483459473, "logits/rejected": -2.0093064308166504, "logps/chosen": -452.0673828125, "logps/rejected": -569.5745849609375, "loss": 0.2583, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2830807566642761, "rewards/margins": 0.14870959520339966, "rewards/rejected": -0.431790292263031, "step": 920 }, { "epoch": 0.22, "learning_rate": 4.772654144986364e-06, "logits/chosen": -2.1607768535614014, "logits/rejected": -1.5249392986297607, "logps/chosen": -493.0628967285156, "logps/rejected": -692.3101806640625, "loss": 0.2709, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35409092903137207, "rewards/margins": 0.2260843962430954, "rewards/rejected": -0.5801753401756287, "step": 930 }, { "epoch": 0.23, "learning_rate": 4.763847955244749e-06, "logits/chosen": -1.9021730422973633, "logits/rejected": -1.4885327816009521, "logps/chosen": -548.5184936523438, "logps/rejected": -677.77099609375, "loss": 0.2867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3523848056793213, "rewards/margins": 0.19301694631576538, "rewards/rejected": -0.5454016923904419, "step": 940 }, { "epoch": 0.23, "learning_rate": 4.75488288085912e-06, "logits/chosen": -2.131861925125122, "logits/rejected": -1.4264814853668213, "logps/chosen": -516.8963623046875, "logps/rejected": -701.9620971679688, "loss": 0.27, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3451305627822876, "rewards/margins": 0.26578670740127563, "rewards/rejected": -0.6109172105789185, "step": 950 }, { "epoch": 0.23, "learning_rate": 4.7457595510292615e-06, "logits/chosen": -1.8315563201904297, "logits/rejected": -1.1477875709533691, "logps/chosen": -574.5037841796875, "logps/rejected": -733.3638305664062, "loss": 0.2175, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4093077778816223, "rewards/margins": 0.20782968401908875, "rewards/rejected": -0.6171373724937439, "step": 960 }, { "epoch": 0.23, "learning_rate": 4.736478606061876e-06, "logits/chosen": -1.8318229913711548, "logits/rejected": -1.091698408126831, "logps/chosen": -604.476318359375, "logps/rejected": -824.9821166992188, "loss": 0.2405, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4510875344276428, "rewards/margins": 0.2847138047218323, "rewards/rejected": -0.7358013391494751, "step": 970 }, { "epoch": 0.24, "learning_rate": 4.727040697325634e-06, "logits/chosen": -2.106877565383911, "logits/rejected": -1.4905835390090942, "logps/chosen": -394.05206298828125, "logps/rejected": -628.0339965820312, "loss": 0.2363, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25630083680152893, "rewards/margins": 0.2429000437259674, "rewards/rejected": -0.4992009103298187, "step": 980 }, { "epoch": 0.24, "learning_rate": 4.717446487205466e-06, "logits/chosen": -1.7781366109848022, "logits/rejected": -1.100829005241394, "logps/chosen": -392.24334716796875, "logps/rejected": -559.0985107421875, "loss": 0.2331, "rewards/accuracies": 0.875, "rewards/chosen": -0.25553885102272034, "rewards/margins": 0.20305395126342773, "rewards/rejected": -0.45859280228614807, "step": 990 }, { "epoch": 0.24, "learning_rate": 4.707696649056073e-06, "logits/chosen": -2.1286673545837402, "logits/rejected": -1.3238550424575806, "logps/chosen": -508.052734375, "logps/rejected": -696.025390625, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": -0.33855944871902466, "rewards/margins": 0.2581236660480499, "rewards/rejected": -0.596683144569397, "step": 1000 }, { "epoch": 0.24, "eval_logits/chosen": -2.008842706680298, "eval_logits/rejected": -1.830832600593567, "eval_logps/chosen": -541.15478515625, "eval_logps/rejected": -572.58056640625, "eval_loss": 0.11222018301486969, "eval_rewards/accuracies": 0.47604790329933167, "eval_rewards/chosen": -0.27527713775634766, "eval_rewards/margins": 0.04632725566625595, "eval_rewards/rejected": -0.3216043710708618, "eval_runtime": 1203.4302, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.6977918671546635e-06, "logits/chosen": -1.8748924732208252, "logits/rejected": -1.3912321329116821, "logps/chosen": -472.98175048828125, "logps/rejected": -626.2139892578125, "loss": 0.2718, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3531573712825775, "rewards/margins": 0.183078795671463, "rewards/rejected": -0.5362361669540405, "step": 1010 }, { "epoch": 0.24, "learning_rate": 4.687732836652935e-06, "logits/chosen": -2.1505730152130127, "logits/rejected": -1.4997731447219849, "logps/chosen": -545.2147827148438, "logps/rejected": -727.5930786132812, "loss": 0.2263, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35972410440444946, "rewards/margins": 0.23597171902656555, "rewards/rejected": -0.5956958532333374, "step": 1020 }, { "epoch": 0.25, "learning_rate": 4.67752026352828e-06, "logits/chosen": -1.992722749710083, "logits/rejected": -1.3906102180480957, "logps/chosen": -472.16400146484375, "logps/rejected": -638.1741333007812, "loss": 0.2518, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31594809889793396, "rewards/margins": 0.2172807902097702, "rewards/rejected": -0.5332289338111877, "step": 1030 }, { "epoch": 0.25, "learning_rate": 4.667154864534245e-06, "logits/chosen": -2.091357707977295, "logits/rejected": -1.4316502809524536, "logps/chosen": -327.59185791015625, "logps/rejected": -419.2935485839844, "loss": 0.2616, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12283909320831299, "rewards/margins": 0.16584691405296326, "rewards/rejected": -0.28868600726127625, "step": 1040 }, { "epoch": 0.25, "learning_rate": 4.65663736715022e-06, "logits/chosen": -2.312605381011963, "logits/rejected": -1.6226316690444946, "logps/chosen": -396.3221130371094, "logps/rejected": -608.3341674804688, "loss": 0.2609, "rewards/accuracies": 0.75, "rewards/chosen": -0.240009143948555, "rewards/margins": 0.261523574590683, "rewards/rejected": -0.5015326738357544, "step": 1050 }, { "epoch": 0.25, "learning_rate": 4.645968509530381e-06, "logits/chosen": -2.176739454269409, "logits/rejected": -1.7795130014419556, "logps/chosen": -432.09710693359375, "logps/rejected": -569.2291259765625, "loss": 0.2374, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28482887148857117, "rewards/margins": 0.15277311205863953, "rewards/rejected": -0.43760204315185547, "step": 1060 }, { "epoch": 0.26, "learning_rate": 4.635149040451891e-06, "logits/chosen": -2.2085583209991455, "logits/rejected": -1.4051626920700073, "logps/chosen": -459.7369079589844, "logps/rejected": -706.0966186523438, "loss": 0.2175, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3029608726501465, "rewards/margins": 0.2853223979473114, "rewards/rejected": -0.5882833003997803, "step": 1070 }, { "epoch": 0.26, "learning_rate": 4.624179719262342e-06, "logits/chosen": -2.0442612171173096, "logits/rejected": -1.4930709600448608, "logps/chosen": -600.1920776367188, "logps/rejected": -770.61474609375, "loss": 0.2345, "rewards/accuracies": 0.875, "rewards/chosen": -0.4393043518066406, "rewards/margins": 0.21135537326335907, "rewards/rejected": -0.6506597399711609, "step": 1080 }, { "epoch": 0.26, "learning_rate": 4.6130613158264605e-06, "logits/chosen": -1.9211866855621338, "logits/rejected": -1.5160692930221558, "logps/chosen": -563.6661376953125, "logps/rejected": -651.0433959960938, "loss": 0.2346, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39791566133499146, "rewards/margins": 0.15383829176425934, "rewards/rejected": -0.5517539381980896, "step": 1090 }, { "epoch": 0.26, "learning_rate": 4.601794610472083e-06, "logits/chosen": -2.1883292198181152, "logits/rejected": -1.439866065979004, "logps/chosen": -410.6734924316406, "logps/rejected": -619.9073486328125, "loss": 0.2345, "rewards/accuracies": 0.875, "rewards/chosen": -0.25752681493759155, "rewards/margins": 0.2435484379529953, "rewards/rejected": -0.5010752081871033, "step": 1100 }, { "epoch": 0.26, "eval_logits/chosen": -2.040632724761963, "eval_logits/rejected": -1.862848162651062, "eval_logps/chosen": -525.3046264648438, "eval_logps/rejected": -548.6309814453125, "eval_loss": 0.12486293911933899, "eval_rewards/accuracies": 0.458083838224411, "eval_rewards/chosen": -0.25942695140838623, "eval_rewards/margins": 0.038227882236242294, "eval_rewards/rejected": -0.29765480756759644, "eval_runtime": 1203.3782, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 1100 }, { "epoch": 0.27, "learning_rate": 4.590380393935383e-06, "logits/chosen": -2.2218379974365234, "logits/rejected": -1.6870031356811523, "logps/chosen": -420.0345764160156, "logps/rejected": -609.2630004882812, "loss": 0.2559, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26971399784088135, "rewards/margins": 0.2136654406785965, "rewards/rejected": -0.48337942361831665, "step": 1110 }, { "epoch": 0.27, "learning_rate": 4.578819467305375e-06, "logits/chosen": -2.0684561729431152, "logits/rejected": -1.4911776781082153, "logps/chosen": -431.186279296875, "logps/rejected": -609.454345703125, "loss": 0.2241, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31041502952575684, "rewards/margins": 0.2138279676437378, "rewards/rejected": -0.5242429971694946, "step": 1120 }, { "epoch": 0.27, "learning_rate": 4.567112641967697e-06, "logits/chosen": -2.0992836952209473, "logits/rejected": -1.482446312904358, "logps/chosen": -453.166748046875, "logps/rejected": -623.111572265625, "loss": 0.2381, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.285548597574234, "rewards/margins": 0.2099190205335617, "rewards/rejected": -0.4954676628112793, "step": 1130 }, { "epoch": 0.27, "learning_rate": 4.555260739547657e-06, "logits/chosen": -1.9118674993515015, "logits/rejected": -1.366361141204834, "logps/chosen": -547.6295166015625, "logps/rejected": -728.0347900390625, "loss": 0.2383, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.42481884360313416, "rewards/margins": 0.2053198367357254, "rewards/rejected": -0.6301386952400208, "step": 1140 }, { "epoch": 0.28, "learning_rate": 4.543264591852572e-06, "logits/chosen": -2.1311652660369873, "logits/rejected": -1.6140754222869873, "logps/chosen": -462.1819763183594, "logps/rejected": -635.8753662109375, "loss": 0.2499, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3019821047782898, "rewards/margins": 0.20516860485076904, "rewards/rejected": -0.5071507692337036, "step": 1150 }, { "epoch": 0.28, "learning_rate": 4.531125040813392e-06, "logits/chosen": -2.1703248023986816, "logits/rejected": -1.3149917125701904, "logps/chosen": -461.96142578125, "logps/rejected": -657.17333984375, "loss": 0.251, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29148387908935547, "rewards/margins": 0.2733469605445862, "rewards/rejected": -0.5648308992385864, "step": 1160 }, { "epoch": 0.28, "learning_rate": 4.518842938425606e-06, "logits/chosen": -2.1028647422790527, "logits/rejected": -1.3059847354888916, "logps/chosen": -435.107177734375, "logps/rejected": -605.2284545898438, "loss": 0.2235, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2709727883338928, "rewards/margins": 0.24675357341766357, "rewards/rejected": -0.5177263021469116, "step": 1170 }, { "epoch": 0.28, "learning_rate": 4.506419146689445e-06, "logits/chosen": -2.143951892852783, "logits/rejected": -1.8283379077911377, "logps/chosen": -457.28424072265625, "logps/rejected": -572.7716674804688, "loss": 0.2802, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3253043293952942, "rewards/margins": 0.1499744951725006, "rewards/rejected": -0.4752787947654724, "step": 1180 }, { "epoch": 0.29, "learning_rate": 4.493854537549393e-06, "logits/chosen": -2.354525089263916, "logits/rejected": -1.8402080535888672, "logps/chosen": -418.96832275390625, "logps/rejected": -549.19091796875, "loss": 0.276, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2467678338289261, "rewards/margins": 0.17465372383594513, "rewards/rejected": -0.42142146825790405, "step": 1190 }, { "epoch": 0.29, "learning_rate": 4.4811499928329775e-06, "logits/chosen": -2.2344844341278076, "logits/rejected": -1.6635558605194092, "logps/chosen": -495.52685546875, "logps/rejected": -704.2921142578125, "loss": 0.2, "rewards/accuracies": 0.875, "rewards/chosen": -0.34860464930534363, "rewards/margins": 0.24101896584033966, "rewards/rejected": -0.5896236300468445, "step": 1200 }, { "epoch": 0.29, "eval_logits/chosen": -2.0176565647125244, "eval_logits/rejected": -1.838198184967041, "eval_logps/chosen": -645.4562377929688, "eval_logps/rejected": -675.9450073242188, "eval_loss": 0.12122058868408203, "eval_rewards/accuracies": 0.492514967918396, "eval_rewards/chosen": -0.3795784115791321, "eval_rewards/margins": 0.04539034515619278, "eval_rewards/rejected": -0.42496877908706665, "eval_runtime": 1203.4652, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 1200 }, { "epoch": 0.29, "learning_rate": 4.468306404188887e-06, "logits/chosen": -2.237093448638916, "logits/rejected": -1.4834849834442139, "logps/chosen": -452.543701171875, "logps/rejected": -645.3001708984375, "loss": 0.2177, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30300500988960266, "rewards/margins": 0.2524145245552063, "rewards/rejected": -0.5554195642471313, "step": 1210 }, { "epoch": 0.29, "learning_rate": 4.455324673024396e-06, "logits/chosen": -2.057074546813965, "logits/rejected": -1.2809579372406006, "logps/chosen": -424.4122009277344, "logps/rejected": -615.1519775390625, "loss": 0.2296, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2470451295375824, "rewards/margins": 0.2809050381183624, "rewards/rejected": -0.5279501676559448, "step": 1220 }, { "epoch": 0.3, "learning_rate": 4.442205710442095e-06, "logits/chosen": -2.1770660877227783, "logits/rejected": -1.5463249683380127, "logps/chosen": -479.94293212890625, "logps/rejected": -671.468505859375, "loss": 0.2335, "rewards/accuracies": 0.875, "rewards/chosen": -0.34120839834213257, "rewards/margins": 0.21707534790039062, "rewards/rejected": -0.5582836866378784, "step": 1230 }, { "epoch": 0.3, "learning_rate": 4.428950437175944e-06, "logits/chosen": -2.4161217212677, "logits/rejected": -1.8605661392211914, "logps/chosen": -385.0937805175781, "logps/rejected": -548.4673461914062, "loss": 0.2351, "rewards/accuracies": 0.75, "rewards/chosen": -0.23536495864391327, "rewards/margins": 0.21707212924957275, "rewards/rejected": -0.4524371027946472, "step": 1240 }, { "epoch": 0.3, "learning_rate": 4.415559783526661e-06, "logits/chosen": -2.1271445751190186, "logits/rejected": -1.640547513961792, "logps/chosen": -457.23358154296875, "logps/rejected": -636.016845703125, "loss": 0.2535, "rewards/accuracies": 0.75, "rewards/chosen": -0.30234211683273315, "rewards/margins": 0.23468852043151855, "rewards/rejected": -0.5370305776596069, "step": 1250 }, { "epoch": 0.3, "learning_rate": 4.402034689296425e-06, "logits/chosen": -2.2080228328704834, "logits/rejected": -1.6849820613861084, "logps/chosen": -509.2059631347656, "logps/rejected": -640.7454833984375, "loss": 0.2411, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.34007102251052856, "rewards/margins": 0.18306025862693787, "rewards/rejected": -0.5231313109397888, "step": 1260 }, { "epoch": 0.3, "learning_rate": 4.388376103722914e-06, "logits/chosen": -2.217073917388916, "logits/rejected": -1.4917023181915283, "logps/chosen": -409.48211669921875, "logps/rejected": -595.3389282226562, "loss": 0.2212, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26184767484664917, "rewards/margins": 0.21404054760932922, "rewards/rejected": -0.4758882522583008, "step": 1270 }, { "epoch": 0.31, "learning_rate": 4.374584985412692e-06, "logits/chosen": -2.0558717250823975, "logits/rejected": -1.4697771072387695, "logps/chosen": -477.6874084472656, "logps/rejected": -670.9654541015625, "loss": 0.2266, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3099061846733093, "rewards/margins": 0.2663113474845886, "rewards/rejected": -0.576217532157898, "step": 1280 }, { "epoch": 0.31, "learning_rate": 4.360662302273926e-06, "logits/chosen": -2.5113179683685303, "logits/rejected": -1.8599239587783813, "logps/chosen": -390.2957458496094, "logps/rejected": -549.9722900390625, "loss": 0.2255, "rewards/accuracies": 0.75, "rewards/chosen": -0.23061151802539825, "rewards/margins": 0.2004891186952591, "rewards/rejected": -0.43110060691833496, "step": 1290 }, { "epoch": 0.31, "learning_rate": 4.3466090314484526e-06, "logits/chosen": -2.3822035789489746, "logits/rejected": -1.3982654809951782, "logps/chosen": -370.07586669921875, "logps/rejected": -603.9423828125, "loss": 0.2246, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21468210220336914, "rewards/margins": 0.2874305248260498, "rewards/rejected": -0.5021125674247742, "step": 1300 }, { "epoch": 0.31, "eval_logits/chosen": -2.144918918609619, "eval_logits/rejected": -1.958416223526001, "eval_logps/chosen": -520.653076171875, "eval_logps/rejected": -553.978271484375, "eval_loss": 0.11018311232328415, "eval_rewards/accuracies": 0.485029935836792, "eval_rewards/chosen": -0.2547752559185028, "eval_rewards/margins": 0.04822676628828049, "eval_rewards/rejected": -0.3030020594596863, "eval_runtime": 1203.4406, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 1300 }, { "epoch": 0.31, "learning_rate": 4.332426159243206e-06, "logits/chosen": -2.204087495803833, "logits/rejected": -1.716626524925232, "logps/chosen": -454.99200439453125, "logps/rejected": -582.8176879882812, "loss": 0.2187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3013390898704529, "rewards/margins": 0.18349790573120117, "rewards/rejected": -0.48483699560165405, "step": 1310 }, { "epoch": 0.32, "learning_rate": 4.318114681060989e-06, "logits/chosen": -2.1799073219299316, "logits/rejected": -1.6815240383148193, "logps/chosen": -403.85955810546875, "logps/rejected": -545.7196044921875, "loss": 0.2185, "rewards/accuracies": 0.75, "rewards/chosen": -0.26794302463531494, "rewards/margins": 0.17821064591407776, "rewards/rejected": -0.4461537003517151, "step": 1320 }, { "epoch": 0.32, "learning_rate": 4.303675601330618e-06, "logits/chosen": -2.35904860496521, "logits/rejected": -1.7340424060821533, "logps/chosen": -383.64532470703125, "logps/rejected": -585.9424438476562, "loss": 0.2445, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21578499674797058, "rewards/margins": 0.25628381967544556, "rewards/rejected": -0.47206878662109375, "step": 1330 }, { "epoch": 0.32, "learning_rate": 4.28910993343642e-06, "logits/chosen": -2.1612188816070557, "logits/rejected": -1.4686048030853271, "logps/chosen": -376.5054016113281, "logps/rejected": -623.7740478515625, "loss": 0.2465, "rewards/accuracies": 0.875, "rewards/chosen": -0.23754379153251648, "rewards/margins": 0.27792996168136597, "rewards/rejected": -0.5154737234115601, "step": 1340 }, { "epoch": 0.32, "learning_rate": 4.274418699647117e-06, "logits/chosen": -2.2266831398010254, "logits/rejected": -1.4922927618026733, "logps/chosen": -471.74273681640625, "logps/rejected": -711.8084716796875, "loss": 0.2449, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30801472067832947, "rewards/margins": 0.2989717125892639, "rewards/rejected": -0.606986403465271, "step": 1350 }, { "epoch": 0.33, "learning_rate": 4.2596029310440826e-06, "logits/chosen": -2.2162411212921143, "logits/rejected": -1.713690996170044, "logps/chosen": -454.17340087890625, "logps/rejected": -648.7305908203125, "loss": 0.2377, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.278207391500473, "rewards/margins": 0.22771382331848145, "rewards/rejected": -0.5059212446212769, "step": 1360 }, { "epoch": 0.33, "learning_rate": 4.244663667448965e-06, "logits/chosen": -2.2029411792755127, "logits/rejected": -1.5216740369796753, "logps/chosen": -455.4468688964844, "logps/rejected": -685.3499755859375, "loss": 0.2351, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3026086688041687, "rewards/margins": 0.2744491994380951, "rewards/rejected": -0.577057957649231, "step": 1370 }, { "epoch": 0.33, "learning_rate": 4.229601957350722e-06, "logits/chosen": -2.325174331665039, "logits/rejected": -1.640735387802124, "logps/chosen": -398.3515319824219, "logps/rejected": -568.1483764648438, "loss": 0.2189, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23919232189655304, "rewards/margins": 0.23529152572155, "rewards/rejected": -0.4744838774204254, "step": 1380 }, { "epoch": 0.33, "learning_rate": 4.214418857832025e-06, "logits/chosen": -2.2072818279266357, "logits/rejected": -1.7126047611236572, "logps/chosen": -541.3663330078125, "logps/rejected": -677.165771484375, "loss": 0.2065, "rewards/accuracies": 0.75, "rewards/chosen": -0.3992317318916321, "rewards/margins": 0.18512602150440216, "rewards/rejected": -0.584357738494873, "step": 1390 }, { "epoch": 0.34, "learning_rate": 4.1991154344950755e-06, "logits/chosen": -2.420837163925171, "logits/rejected": -1.8687019348144531, "logps/chosen": -392.09844970703125, "logps/rejected": -547.9636840820312, "loss": 0.2481, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1913362443447113, "rewards/margins": 0.2113974541425705, "rewards/rejected": -0.402733713388443, "step": 1400 }, { "epoch": 0.34, "eval_logits/chosen": -2.0707738399505615, "eval_logits/rejected": -1.8876737356185913, "eval_logps/chosen": -564.654541015625, "eval_logps/rejected": -605.4993896484375, "eval_loss": 0.1082334965467453, "eval_rewards/accuracies": 0.49550896883010864, "eval_rewards/chosen": -0.298776775598526, "eval_rewards/margins": 0.055746398866176605, "eval_rewards/rejected": -0.3545231819152832, "eval_runtime": 1202.4636, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 1400 }, { "epoch": 0.34, "learning_rate": 4.183692761386813e-06, "logits/chosen": -2.3195748329162598, "logits/rejected": -1.7311146259307861, "logps/chosen": -505.4513244628906, "logps/rejected": -691.0968017578125, "loss": 0.2696, "rewards/accuracies": 0.875, "rewards/chosen": -0.3387676179409027, "rewards/margins": 0.24428458511829376, "rewards/rejected": -0.5830522179603577, "step": 1410 }, { "epoch": 0.34, "learning_rate": 4.168151920923536e-06, "logits/chosen": -2.1461219787597656, "logits/rejected": -1.2803102731704712, "logps/chosen": -452.47113037109375, "logps/rejected": -682.32568359375, "loss": 0.2239, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2477176934480667, "rewards/margins": 0.3097091615200043, "rewards/rejected": -0.5574268698692322, "step": 1420 }, { "epoch": 0.34, "learning_rate": 4.152494003814939e-06, "logits/chosen": -2.3391871452331543, "logits/rejected": -1.7408854961395264, "logps/chosen": -363.1476745605469, "logps/rejected": -525.1312255859375, "loss": 0.2504, "rewards/accuracies": 0.75, "rewards/chosen": -0.21484942734241486, "rewards/margins": 0.19908122718334198, "rewards/rejected": -0.41393065452575684, "step": 1430 }, { "epoch": 0.35, "learning_rate": 4.136720108987552e-06, "logits/chosen": -2.3591384887695312, "logits/rejected": -1.6594280004501343, "logps/chosen": -440.5811462402344, "logps/rejected": -658.6076049804688, "loss": 0.1996, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2752758860588074, "rewards/margins": 0.2533265948295593, "rewards/rejected": -0.5286025404930115, "step": 1440 }, { "epoch": 0.35, "learning_rate": 4.1208313435076255e-06, "logits/chosen": -2.3529205322265625, "logits/rejected": -1.6829198598861694, "logps/chosen": -462.7308654785156, "logps/rejected": -635.0047607421875, "loss": 0.229, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3021255135536194, "rewards/margins": 0.22096781432628632, "rewards/rejected": -0.5230933427810669, "step": 1450 }, { "epoch": 0.35, "learning_rate": 4.104828822503427e-06, "logits/chosen": -2.3641936779022217, "logits/rejected": -1.9770774841308594, "logps/chosen": -430.566650390625, "logps/rejected": -516.67724609375, "loss": 0.2942, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2665793001651764, "rewards/margins": 0.13289934396743774, "rewards/rejected": -0.39947864413261414, "step": 1460 }, { "epoch": 0.35, "learning_rate": 4.0887136690869774e-06, "logits/chosen": -2.1829793453216553, "logits/rejected": -1.2559645175933838, "logps/chosen": -402.70965576171875, "logps/rejected": -655.2979125976562, "loss": 0.2322, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22634737193584442, "rewards/margins": 0.3205374479293823, "rewards/rejected": -0.5468848347663879, "step": 1470 }, { "epoch": 0.36, "learning_rate": 4.072487014275228e-06, "logits/chosen": -2.1143088340759277, "logits/rejected": -1.6525228023529053, "logps/chosen": -526.2901611328125, "logps/rejected": -701.7463989257812, "loss": 0.2394, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.345375120639801, "rewards/margins": 0.21472780406475067, "rewards/rejected": -0.5601029396057129, "step": 1480 }, { "epoch": 0.36, "learning_rate": 4.056149996910683e-06, "logits/chosen": -1.8528966903686523, "logits/rejected": -1.3513877391815186, "logps/chosen": -693.648193359375, "logps/rejected": -803.7005615234375, "loss": 0.2581, "rewards/accuracies": 0.75, "rewards/chosen": -0.5191287994384766, "rewards/margins": 0.177033931016922, "rewards/rejected": -0.696162760257721, "step": 1490 }, { "epoch": 0.36, "learning_rate": 4.039703763581472e-06, "logits/chosen": -2.2459537982940674, "logits/rejected": -1.5481600761413574, "logps/chosen": -461.68853759765625, "logps/rejected": -584.2391967773438, "loss": 0.232, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.27624958753585815, "rewards/margins": 0.19465519487857819, "rewards/rejected": -0.4709048271179199, "step": 1500 }, { "epoch": 0.36, "eval_logits/chosen": -2.1255552768707275, "eval_logits/rejected": -1.940369725227356, "eval_logps/chosen": -508.01702880859375, "eval_logps/rejected": -541.716064453125, "eval_loss": 0.10533170402050018, "eval_rewards/accuracies": 0.4910179674625397, "eval_rewards/chosen": -0.2421393096446991, "eval_rewards/margins": 0.04860049486160278, "eval_rewards/rejected": -0.2907397747039795, "eval_runtime": 1202.3225, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 1500 }, { "epoch": 0.36, "learning_rate": 4.023149468540871e-06, "logits/chosen": -2.2070467472076416, "logits/rejected": -1.1731164455413818, "logps/chosen": -386.8218688964844, "logps/rejected": -641.1959228515625, "loss": 0.2474, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21106207370758057, "rewards/margins": 0.3297719359397888, "rewards/rejected": -0.5408340692520142, "step": 1510 }, { "epoch": 0.36, "learning_rate": 4.006488273626307e-06, "logits/chosen": -2.190619945526123, "logits/rejected": -1.777308702468872, "logps/chosen": -369.71697998046875, "logps/rejected": -509.35748291015625, "loss": 0.2341, "rewards/accuracies": 0.625, "rewards/chosen": -0.22588412463665009, "rewards/margins": 0.17034974694252014, "rewards/rejected": -0.39623385667800903, "step": 1520 }, { "epoch": 0.37, "learning_rate": 3.989721348177801e-06, "logits/chosen": -2.242610216140747, "logits/rejected": -1.4042037725448608, "logps/chosen": -430.89312744140625, "logps/rejected": -654.9481201171875, "loss": 0.2262, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2636120319366455, "rewards/margins": 0.2909775674343109, "rewards/rejected": -0.5545896291732788, "step": 1530 }, { "epoch": 0.37, "learning_rate": 3.972849868955913e-06, "logits/chosen": -2.2347402572631836, "logits/rejected": -1.4619821310043335, "logps/chosen": -459.16583251953125, "logps/rejected": -660.1212158203125, "loss": 0.2117, "rewards/accuracies": 0.875, "rewards/chosen": -0.29292288422584534, "rewards/margins": 0.2698090672492981, "rewards/rejected": -0.5627318620681763, "step": 1540 }, { "epoch": 0.37, "learning_rate": 3.955875020059141e-06, "logits/chosen": -2.192470073699951, "logits/rejected": -1.4760370254516602, "logps/chosen": -418.90972900390625, "logps/rejected": -597.2352294921875, "loss": 0.1984, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26831185817718506, "rewards/margins": 0.23686759173870087, "rewards/rejected": -0.5051795244216919, "step": 1550 }, { "epoch": 0.37, "learning_rate": 3.938797992840828e-06, "logits/chosen": -2.309406280517578, "logits/rejected": -1.611665964126587, "logps/chosen": -544.3067016601562, "logps/rejected": -743.3720703125, "loss": 0.2453, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3659195601940155, "rewards/margins": 0.24159732460975647, "rewards/rejected": -0.607516884803772, "step": 1560 }, { "epoch": 0.38, "learning_rate": 3.92161998582554e-06, "logits/chosen": -2.2581753730773926, "logits/rejected": -1.5438921451568604, "logps/chosen": -463.71295166015625, "logps/rejected": -718.9074096679688, "loss": 0.2336, "rewards/accuracies": 0.75, "rewards/chosen": -0.31643715500831604, "rewards/margins": 0.2882859408855438, "rewards/rejected": -0.6047230958938599, "step": 1570 }, { "epoch": 0.38, "learning_rate": 3.904342204624955e-06, "logits/chosen": -2.0873873233795166, "logits/rejected": -1.5464417934417725, "logps/chosen": -484.0960388183594, "logps/rejected": -615.3694458007812, "loss": 0.2085, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3140603005886078, "rewards/margins": 0.18569275736808777, "rewards/rejected": -0.49975305795669556, "step": 1580 }, { "epoch": 0.38, "learning_rate": 3.886965861853243e-06, "logits/chosen": -2.237922191619873, "logits/rejected": -1.5272413492202759, "logps/chosen": -436.9114685058594, "logps/rejected": -638.0137939453125, "loss": 0.2133, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25133323669433594, "rewards/margins": 0.261993944644928, "rewards/rejected": -0.5133271217346191, "step": 1590 }, { "epoch": 0.38, "learning_rate": 3.869492177041971e-06, "logits/chosen": -2.049579620361328, "logits/rejected": -1.4861265420913696, "logps/chosen": -471.28240966796875, "logps/rejected": -640.3895263671875, "loss": 0.2351, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30936118960380554, "rewards/margins": 0.22528687119483948, "rewards/rejected": -0.534648060798645, "step": 1600 }, { "epoch": 0.38, "eval_logits/chosen": -2.0290181636810303, "eval_logits/rejected": -1.8506064414978027, "eval_logps/chosen": -604.1563720703125, "eval_logps/rejected": -637.3509521484375, "eval_loss": 0.10981600731611252, "eval_rewards/accuracies": 0.477544903755188, "eval_rewards/chosen": -0.338278591632843, "eval_rewards/margins": 0.04809616506099701, "eval_rewards/rejected": -0.3863748013973236, "eval_runtime": 1202.2009, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 1600 }, { "epoch": 0.39, "learning_rate": 3.8519223765544985e-06, "logits/chosen": -2.2203073501586914, "logits/rejected": -1.753635048866272, "logps/chosen": -478.2240295410156, "logps/rejected": -650.3573608398438, "loss": 0.2385, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.30317071080207825, "rewards/margins": 0.21921324729919434, "rewards/rejected": -0.5223838686943054, "step": 1610 }, { "epoch": 0.39, "learning_rate": 3.8342576934999184e-06, "logits/chosen": -2.1206583976745605, "logits/rejected": -1.5278724431991577, "logps/chosen": -521.4976806640625, "logps/rejected": -674.3228759765625, "loss": 0.2031, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.353380024433136, "rewards/margins": 0.21709546446800232, "rewards/rejected": -0.5704754590988159, "step": 1620 }, { "epoch": 0.39, "learning_rate": 3.816499367646508e-06, "logits/chosen": -2.0388755798339844, "logits/rejected": -1.663521409034729, "logps/chosen": -514.2132568359375, "logps/rejected": -665.8958740234375, "loss": 0.2487, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3622397482395172, "rewards/margins": 0.18139563500881195, "rewards/rejected": -0.543635368347168, "step": 1630 }, { "epoch": 0.39, "learning_rate": 3.7986486453347183e-06, "logits/chosen": -2.166191339492798, "logits/rejected": -1.5982885360717773, "logps/chosen": -548.0076293945312, "logps/rejected": -730.3982543945312, "loss": 0.257, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.38546380400657654, "rewards/margins": 0.24203220009803772, "rewards/rejected": -0.6274961233139038, "step": 1640 }, { "epoch": 0.4, "learning_rate": 3.7807067793897006e-06, "logits/chosen": -2.118896961212158, "logits/rejected": -1.4653342962265015, "logps/chosen": -552.0101318359375, "logps/rejected": -733.3226928710938, "loss": 0.2293, "rewards/accuracies": 0.75, "rewards/chosen": -0.3991249203681946, "rewards/margins": 0.2145986557006836, "rewards/rejected": -0.6137235760688782, "step": 1650 }, { "epoch": 0.4, "learning_rate": 3.7626750290333824e-06, "logits/chosen": -2.1274633407592773, "logits/rejected": -1.3947694301605225, "logps/chosen": -469.06414794921875, "logps/rejected": -688.154541015625, "loss": 0.2615, "rewards/accuracies": 0.875, "rewards/chosen": -0.3271122872829437, "rewards/margins": 0.2564288377761841, "rewards/rejected": -0.5835410952568054, "step": 1660 }, { "epoch": 0.4, "learning_rate": 3.7445546597960882e-06, "logits/chosen": -2.152128219604492, "logits/rejected": -1.558203101158142, "logps/chosen": -394.2665710449219, "logps/rejected": -589.2421875, "loss": 0.1963, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2612588405609131, "rewards/margins": 0.22633543610572815, "rewards/rejected": -0.48759427666664124, "step": 1670 }, { "epoch": 0.4, "learning_rate": 3.726346943427719e-06, "logits/chosen": -2.235344171524048, "logits/rejected": -1.6670506000518799, "logps/chosen": -430.75799560546875, "logps/rejected": -613.2647705078125, "loss": 0.2219, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2798583507537842, "rewards/margins": 0.23533377051353455, "rewards/rejected": -0.5151920914649963, "step": 1680 }, { "epoch": 0.41, "learning_rate": 3.7080531578085e-06, "logits/chosen": -2.2296547889709473, "logits/rejected": -1.511041283607483, "logps/chosen": -444.85894775390625, "logps/rejected": -703.8099365234375, "loss": 0.2271, "rewards/accuracies": 0.75, "rewards/chosen": -0.32802051305770874, "rewards/margins": 0.2928990125656128, "rewards/rejected": -0.6209195256233215, "step": 1690 }, { "epoch": 0.41, "learning_rate": 3.6896745868592924e-06, "logits/chosen": -2.29763126373291, "logits/rejected": -1.5639420747756958, "logps/chosen": -464.4435119628906, "logps/rejected": -664.3502197265625, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": -0.26803210377693176, "rewards/margins": 0.2635959982872009, "rewards/rejected": -0.5316281318664551, "step": 1700 }, { "epoch": 0.41, "eval_logits/chosen": -2.1015992164611816, "eval_logits/rejected": -1.9196619987487793, "eval_logps/chosen": -527.2567749023438, "eval_logps/rejected": -563.0452270507812, "eval_loss": 0.119623564183712, "eval_rewards/accuracies": 0.48203593492507935, "eval_rewards/chosen": -0.26137909293174744, "eval_rewards/margins": 0.05068989098072052, "eval_rewards/rejected": -0.31206896901130676, "eval_runtime": 1202.449, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 1700 }, { "epoch": 0.41, "learning_rate": 3.6712125204514836e-06, "logits/chosen": -2.15364146232605, "logits/rejected": -1.6057710647583008, "logps/chosen": -467.53009033203125, "logps/rejected": -611.0509033203125, "loss": 0.257, "rewards/accuracies": 0.75, "rewards/chosen": -0.3211197853088379, "rewards/margins": 0.19124528765678406, "rewards/rejected": -0.5123651027679443, "step": 1710 }, { "epoch": 0.41, "learning_rate": 3.65266825431646e-06, "logits/chosen": -2.3185622692108154, "logits/rejected": -1.8268556594848633, "logps/chosen": -447.40435791015625, "logps/rejected": -584.9998779296875, "loss": 0.2175, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3207271695137024, "rewards/margins": 0.14955464005470276, "rewards/rejected": -0.47028183937072754, "step": 1720 }, { "epoch": 0.42, "learning_rate": 3.6340430899546656e-06, "logits/chosen": -1.7856781482696533, "logits/rejected": -1.204011082649231, "logps/chosen": -490.888916015625, "logps/rejected": -679.3291015625, "loss": 0.2225, "rewards/accuracies": 0.875, "rewards/chosen": -0.36003556847572327, "rewards/margins": 0.22519242763519287, "rewards/rejected": -0.5852279663085938, "step": 1730 }, { "epoch": 0.42, "learning_rate": 3.615338334544265e-06, "logits/chosen": -2.080984592437744, "logits/rejected": -1.3211934566497803, "logps/chosen": -477.18035888671875, "logps/rejected": -715.7023315429688, "loss": 0.2142, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3356272578239441, "rewards/margins": 0.27829962968826294, "rewards/rejected": -0.6139269471168518, "step": 1740 }, { "epoch": 0.42, "learning_rate": 3.5965553008493924e-06, "logits/chosen": -1.9550186395645142, "logits/rejected": -1.2599399089813232, "logps/chosen": -501.34527587890625, "logps/rejected": -723.1912231445312, "loss": 0.2127, "rewards/accuracies": 0.875, "rewards/chosen": -0.32816281914711, "rewards/margins": 0.3019830286502838, "rewards/rejected": -0.6301458477973938, "step": 1750 }, { "epoch": 0.42, "learning_rate": 3.577695307128024e-06, "logits/chosen": -1.952284574508667, "logits/rejected": -1.473092794418335, "logps/chosen": -553.9656982421875, "logps/rejected": -672.5125732421875, "loss": 0.2285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41058188676834106, "rewards/margins": 0.16641955077648163, "rewards/rejected": -0.5770013928413391, "step": 1760 }, { "epoch": 0.42, "learning_rate": 3.558759677039455e-06, "logits/chosen": -2.179708957672119, "logits/rejected": -1.5432889461517334, "logps/chosen": -416.77923583984375, "logps/rejected": -605.5023193359375, "loss": 0.2168, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24098536372184753, "rewards/margins": 0.2360987663269043, "rewards/rejected": -0.4770841598510742, "step": 1770 }, { "epoch": 0.43, "learning_rate": 3.539749739551401e-06, "logits/chosen": -2.0971553325653076, "logits/rejected": -1.4411263465881348, "logps/chosen": -528.1760864257812, "logps/rejected": -726.6964721679688, "loss": 0.2717, "rewards/accuracies": 0.875, "rewards/chosen": -0.352076917886734, "rewards/margins": 0.24887827038764954, "rewards/rejected": -0.6009551882743835, "step": 1780 }, { "epoch": 0.43, "learning_rate": 3.520666828846726e-06, "logits/chosen": -2.1314852237701416, "logits/rejected": -1.5494054555892944, "logps/chosen": -475.7056579589844, "logps/rejected": -679.2175903320312, "loss": 0.2234, "rewards/accuracies": 0.875, "rewards/chosen": -0.3036540150642395, "rewards/margins": 0.2536951005458832, "rewards/rejected": -0.5573492050170898, "step": 1790 }, { "epoch": 0.43, "learning_rate": 3.501512284229807e-06, "logits/chosen": -2.1376233100891113, "logits/rejected": -1.6488583087921143, "logps/chosen": -414.2095642089844, "logps/rejected": -621.1632080078125, "loss": 0.2043, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2777335047721863, "rewards/margins": 0.238857239484787, "rewards/rejected": -0.5165907740592957, "step": 1800 }, { "epoch": 0.43, "eval_logits/chosen": -2.098015308380127, "eval_logits/rejected": -1.917683482170105, "eval_logps/chosen": -545.7018432617188, "eval_logps/rejected": -576.196533203125, "eval_loss": 0.12571881711483002, "eval_rewards/accuracies": 0.48203593492507935, "eval_rewards/chosen": -0.27982407808303833, "eval_rewards/margins": 0.045396242290735245, "eval_rewards/rejected": -0.3252203166484833, "eval_runtime": 1202.6328, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 1800 }, { "epoch": 0.43, "learning_rate": 3.482287450032536e-06, "logits/chosen": -2.29888653755188, "logits/rejected": -1.7066341638565063, "logps/chosen": -397.7171325683594, "logps/rejected": -592.3546142578125, "loss": 0.249, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20270375907421112, "rewards/margins": 0.26644089818000793, "rewards/rejected": -0.46914467215538025, "step": 1810 }, { "epoch": 0.44, "learning_rate": 3.462993675519968e-06, "logits/chosen": -2.1094446182250977, "logits/rejected": -1.447059154510498, "logps/chosen": -389.1714172363281, "logps/rejected": -662.8653564453125, "loss": 0.2142, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23571112751960754, "rewards/margins": 0.30633097887039185, "rewards/rejected": -0.542042076587677, "step": 1820 }, { "epoch": 0.44, "learning_rate": 3.443632314795627e-06, "logits/chosen": -2.2550530433654785, "logits/rejected": -1.4738640785217285, "logps/chosen": -464.8247985839844, "logps/rejected": -663.5584716796875, "loss": 0.2365, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2962021231651306, "rewards/margins": 0.26197540760040283, "rewards/rejected": -0.5581775307655334, "step": 1830 }, { "epoch": 0.44, "learning_rate": 3.4242047267064714e-06, "logits/chosen": -2.0004706382751465, "logits/rejected": -1.398463249206543, "logps/chosen": -626.9364013671875, "logps/rejected": -828.2486572265625, "loss": 0.2493, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.47840484976768494, "rewards/margins": 0.24040815234184265, "rewards/rejected": -0.7188130617141724, "step": 1840 }, { "epoch": 0.44, "learning_rate": 3.4047122747475227e-06, "logits/chosen": -2.1171586513519287, "logits/rejected": -1.447742223739624, "logps/chosen": -405.7727966308594, "logps/rejected": -596.1717529296875, "loss": 0.2181, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2449554204940796, "rewards/margins": 0.2641601264476776, "rewards/rejected": -0.5091155171394348, "step": 1850 }, { "epoch": 0.45, "learning_rate": 3.385156326966173e-06, "logits/chosen": -1.993351697921753, "logits/rejected": -1.5818934440612793, "logps/chosen": -483.9803771972656, "logps/rejected": -639.0924072265625, "loss": 0.247, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33118367195129395, "rewards/margins": 0.1799928843975067, "rewards/rejected": -0.5111765265464783, "step": 1860 }, { "epoch": 0.45, "learning_rate": 3.365538255866169e-06, "logits/chosen": -2.0671772956848145, "logits/rejected": -1.5636236667633057, "logps/chosen": -548.2286987304688, "logps/rejected": -702.2981567382812, "loss": 0.2362, "rewards/accuracies": 0.75, "rewards/chosen": -0.40322232246398926, "rewards/margins": 0.18652090430259705, "rewards/rejected": -0.5897432565689087, "step": 1870 }, { "epoch": 0.45, "learning_rate": 3.3458594383112868e-06, "logits/chosen": -2.0276670455932617, "logits/rejected": -1.4879323244094849, "logps/chosen": -527.5305786132812, "logps/rejected": -729.074951171875, "loss": 0.2068, "rewards/accuracies": 0.75, "rewards/chosen": -0.3747082054615021, "rewards/margins": 0.2514965236186981, "rewards/rejected": -0.6262047290802002, "step": 1880 }, { "epoch": 0.45, "learning_rate": 3.3261212554286977e-06, "logits/chosen": -2.072234869003296, "logits/rejected": -1.5208971500396729, "logps/chosen": -509.4759826660156, "logps/rejected": -656.38037109375, "loss": 0.2277, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32120758295059204, "rewards/margins": 0.228814035654068, "rewards/rejected": -0.5500216484069824, "step": 1890 }, { "epoch": 0.46, "learning_rate": 3.306325092512034e-06, "logits/chosen": -2.171290874481201, "logits/rejected": -1.3472914695739746, "logps/chosen": -516.923828125, "logps/rejected": -744.3807373046875, "loss": 0.2205, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.32172220945358276, "rewards/margins": 0.312275767326355, "rewards/rejected": -0.6339979767799377, "step": 1900 }, { "epoch": 0.46, "eval_logits/chosen": -1.9971847534179688, "eval_logits/rejected": -1.8197708129882812, "eval_logps/chosen": -669.595703125, "eval_logps/rejected": -713.9169921875, "eval_loss": 0.11544374376535416, "eval_rewards/accuracies": 0.485029935836792, "eval_rewards/chosen": -0.40371790528297424, "eval_rewards/margins": 0.05922284349799156, "eval_rewards/rejected": -0.4629407823085785, "eval_runtime": 1202.9191, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 1900 }, { "epoch": 0.46, "learning_rate": 3.2864723389241697e-06, "logits/chosen": -2.122145175933838, "logits/rejected": -1.2872363328933716, "logps/chosen": -564.5249633789062, "logps/rejected": -791.4066162109375, "loss": 0.2396, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41658464074134827, "rewards/margins": 0.26830723881721497, "rewards/rejected": -0.6848918795585632, "step": 1910 }, { "epoch": 0.46, "learning_rate": 3.2665643879997054e-06, "logits/chosen": -2.0699048042297363, "logits/rejected": -1.4691760540008545, "logps/chosen": -435.06304931640625, "logps/rejected": -597.763427734375, "loss": 0.2548, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.286428302526474, "rewards/margins": 0.19408531486988068, "rewards/rejected": -0.48051366209983826, "step": 1920 }, { "epoch": 0.46, "learning_rate": 3.2466026369471804e-06, "logits/chosen": -2.298539400100708, "logits/rejected": -1.5294179916381836, "logps/chosen": -473.37994384765625, "logps/rejected": -645.1686401367188, "loss": 0.2231, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28003352880477905, "rewards/margins": 0.25798726081848145, "rewards/rejected": -0.5380207300186157, "step": 1930 }, { "epoch": 0.47, "learning_rate": 3.226588486751012e-06, "logits/chosen": -2.1958389282226562, "logits/rejected": -1.351405143737793, "logps/chosen": -437.44085693359375, "logps/rejected": -692.4970703125, "loss": 0.1735, "rewards/accuracies": 0.875, "rewards/chosen": -0.26957300305366516, "rewards/margins": 0.3142974078655243, "rewards/rejected": -0.5838704109191895, "step": 1940 }, { "epoch": 0.47, "learning_rate": 3.2065233420731717e-06, "logits/chosen": -2.202249050140381, "logits/rejected": -1.5769132375717163, "logps/chosen": -542.3634033203125, "logps/rejected": -699.9461669921875, "loss": 0.2285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38574230670928955, "rewards/margins": 0.22279763221740723, "rewards/rejected": -0.6085399389266968, "step": 1950 }, { "epoch": 0.47, "learning_rate": 3.186408611154597e-06, "logits/chosen": -2.100429058074951, "logits/rejected": -1.3115769624710083, "logps/chosen": -516.0811157226562, "logps/rejected": -691.1436767578125, "loss": 0.2435, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3459722399711609, "rewards/margins": 0.24837620556354523, "rewards/rejected": -0.5943484306335449, "step": 1960 }, { "epoch": 0.47, "learning_rate": 3.1662457057163603e-06, "logits/chosen": -2.1250691413879395, "logits/rejected": -1.5894689559936523, "logps/chosen": -404.347412109375, "logps/rejected": -560.5941772460938, "loss": 0.2335, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2800315022468567, "rewards/margins": 0.20710071921348572, "rewards/rejected": -0.4871322214603424, "step": 1970 }, { "epoch": 0.48, "learning_rate": 3.1460360408605866e-06, "logits/chosen": -2.1719813346862793, "logits/rejected": -1.4831385612487793, "logps/chosen": -455.1966857910156, "logps/rejected": -667.8966064453125, "loss": 0.2104, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2794944941997528, "rewards/margins": 0.2690897583961487, "rewards/rejected": -0.5485842823982239, "step": 1980 }, { "epoch": 0.48, "learning_rate": 3.1257810349711388e-06, "logits/chosen": -2.128711700439453, "logits/rejected": -1.4837182760238647, "logps/chosen": -530.9110107421875, "logps/rejected": -727.338134765625, "loss": 0.252, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3870256841182709, "rewards/margins": 0.24742953479290009, "rewards/rejected": -0.6344552636146545, "step": 1990 }, { "epoch": 0.48, "learning_rate": 3.1054821096140675e-06, "logits/chosen": -2.0311193466186523, "logits/rejected": -1.371429443359375, "logps/chosen": -579.6988525390625, "logps/rejected": -779.2301025390625, "loss": 0.2156, "rewards/accuracies": 0.875, "rewards/chosen": -0.4279225468635559, "rewards/margins": 0.23268994688987732, "rewards/rejected": -0.6606124639511108, "step": 2000 }, { "epoch": 0.48, "eval_logits/chosen": -2.104422092437744, "eval_logits/rejected": -1.923403024673462, "eval_logps/chosen": -538.5911254882812, "eval_logps/rejected": -567.0794067382812, "eval_loss": 0.11033174395561218, "eval_rewards/accuracies": 0.4865269362926483, "eval_rewards/chosen": -0.27271345257759094, "eval_rewards/margins": 0.04338974133133888, "eval_rewards/rejected": -0.3161032199859619, "eval_runtime": 1203.5781, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2000 }, { "epoch": 0.48, "learning_rate": 3.085140689437846e-06, "logits/chosen": -2.144815683364868, "logits/rejected": -1.8596773147583008, "logps/chosen": -468.91534423828125, "logps/rejected": -585.1773681640625, "loss": 0.2692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.31402096152305603, "rewards/margins": 0.16459986567497253, "rewards/rejected": -0.47862082719802856, "step": 2010 }, { "epoch": 0.48, "learning_rate": 3.0647582020733773e-06, "logits/chosen": -1.9965989589691162, "logits/rejected": -1.4881664514541626, "logps/chosen": -561.114501953125, "logps/rejected": -704.7186279296875, "loss": 0.2248, "rewards/accuracies": 0.75, "rewards/chosen": -0.4185667932033539, "rewards/margins": 0.19495680928230286, "rewards/rejected": -0.613523542881012, "step": 2020 }, { "epoch": 0.49, "learning_rate": 3.0443360780338034e-06, "logits/chosen": -1.8410367965698242, "logits/rejected": -1.1502970457077026, "logps/chosen": -596.3934936523438, "logps/rejected": -874.7312622070312, "loss": 0.2096, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.45028114318847656, "rewards/margins": 0.327549010515213, "rewards/rejected": -0.777830183506012, "step": 2030 }, { "epoch": 0.49, "learning_rate": 3.0238757506141013e-06, "logits/chosen": -2.287742853164673, "logits/rejected": -1.325746774673462, "logps/chosen": -506.9778747558594, "logps/rejected": -808.0347900390625, "loss": 0.2029, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3414602279663086, "rewards/margins": 0.37787026166915894, "rewards/rejected": -0.7193304896354675, "step": 2040 }, { "epoch": 0.49, "learning_rate": 3.0033786557904982e-06, "logits/chosen": -2.1395773887634277, "logits/rejected": -1.5288134813308716, "logps/chosen": -586.067138671875, "logps/rejected": -800.8207397460938, "loss": 0.2611, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4134158194065094, "rewards/margins": 0.2782798111438751, "rewards/rejected": -0.6916956305503845, "step": 2050 }, { "epoch": 0.49, "learning_rate": 2.982846232119679e-06, "logits/chosen": -1.829058051109314, "logits/rejected": -1.2867153882980347, "logps/chosen": -622.7283935546875, "logps/rejected": -833.6815185546875, "loss": 0.2542, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.47451552748680115, "rewards/margins": 0.2557712197303772, "rewards/rejected": -0.7302867770195007, "step": 2060 }, { "epoch": 0.5, "learning_rate": 2.9622799206378306e-06, "logits/chosen": -2.011798143386841, "logits/rejected": -1.393971562385559, "logps/chosen": -692.2308959960938, "logps/rejected": -878.6043701171875, "loss": 0.2454, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.549125611782074, "rewards/margins": 0.20759177207946777, "rewards/rejected": -0.756717324256897, "step": 2070 }, { "epoch": 0.5, "learning_rate": 2.9416811647595052e-06, "logits/chosen": -2.0739920139312744, "logits/rejected": -1.546463966369629, "logps/chosen": -567.7322387695312, "logps/rejected": -760.2849731445312, "loss": 0.2241, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4037742614746094, "rewards/margins": 0.24794352054595947, "rewards/rejected": -0.6517177820205688, "step": 2080 }, { "epoch": 0.5, "learning_rate": 2.9210514101763116e-06, "logits/chosen": -2.0362982749938965, "logits/rejected": -1.5337706804275513, "logps/chosen": -603.1917114257812, "logps/rejected": -792.9419555664062, "loss": 0.223, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4241882264614105, "rewards/margins": 0.22013404965400696, "rewards/rejected": -0.6443222761154175, "step": 2090 }, { "epoch": 0.5, "learning_rate": 2.900392104755455e-06, "logits/chosen": -2.0222811698913574, "logits/rejected": -1.466583490371704, "logps/chosen": -611.0118408203125, "logps/rejected": -779.8033447265625, "loss": 0.2308, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44711288809776306, "rewards/margins": 0.2303667813539505, "rewards/rejected": -0.67747962474823, "step": 2100 }, { "epoch": 0.5, "eval_logits/chosen": -1.9761273860931396, "eval_logits/rejected": -1.8012945652008057, "eval_logps/chosen": -698.0287475585938, "eval_logps/rejected": -736.1897583007812, "eval_loss": 0.1162559762597084, "eval_rewards/accuracies": 0.492514967918396, "eval_rewards/chosen": -0.43215104937553406, "eval_rewards/margins": 0.05306249111890793, "eval_rewards/rejected": -0.4852134883403778, "eval_runtime": 1203.465, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2100 }, { "epoch": 0.51, "learning_rate": 2.879704698438121e-06, "logits/chosen": -2.0037026405334473, "logits/rejected": -1.3713741302490234, "logps/chosen": -604.12841796875, "logps/rejected": -768.1104125976562, "loss": 0.2095, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.45974642038345337, "rewards/margins": 0.220332071185112, "rewards/rejected": -0.680078387260437, "step": 2110 }, { "epoch": 0.51, "learning_rate": 2.8589906431377133e-06, "logits/chosen": -2.245237350463867, "logits/rejected": -1.8536914587020874, "logps/chosen": -458.50018310546875, "logps/rejected": -570.5384521484375, "loss": 0.2409, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29385268688201904, "rewards/margins": 0.1265999972820282, "rewards/rejected": -0.42045268416404724, "step": 2120 }, { "epoch": 0.51, "learning_rate": 2.8382513926379508e-06, "logits/chosen": -2.123015880584717, "logits/rejected": -1.348111867904663, "logps/chosen": -533.6074829101562, "logps/rejected": -767.3739624023438, "loss": 0.2249, "rewards/accuracies": 0.875, "rewards/chosen": -0.34552595019340515, "rewards/margins": 0.29653745889663696, "rewards/rejected": -0.6420633792877197, "step": 2130 }, { "epoch": 0.51, "learning_rate": 2.817488402490841e-06, "logits/chosen": -2.2186074256896973, "logits/rejected": -1.4078962802886963, "logps/chosen": -534.7769775390625, "logps/rejected": -796.1058349609375, "loss": 0.229, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3885762095451355, "rewards/margins": 0.3023179769515991, "rewards/rejected": -0.6908941268920898, "step": 2140 }, { "epoch": 0.52, "learning_rate": 2.796703129914519e-06, "logits/chosen": -2.02451491355896, "logits/rejected": -1.3592201471328735, "logps/chosen": -571.4125366210938, "logps/rejected": -741.8164672851562, "loss": 0.2641, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40158170461654663, "rewards/margins": 0.2435813844203949, "rewards/rejected": -0.6451630592346191, "step": 2150 }, { "epoch": 0.52, "learning_rate": 2.7758970336909795e-06, "logits/chosen": -2.0419907569885254, "logits/rejected": -1.4874043464660645, "logps/chosen": -496.23626708984375, "logps/rejected": -659.5692138671875, "loss": 0.2282, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3442228436470032, "rewards/margins": 0.2103544920682907, "rewards/rejected": -0.5545774102210999, "step": 2160 }, { "epoch": 0.52, "learning_rate": 2.755071574063692e-06, "logits/chosen": -2.2657744884490967, "logits/rejected": -1.713344931602478, "logps/chosen": -447.258056640625, "logps/rejected": -591.7661743164062, "loss": 0.2489, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2957819104194641, "rewards/margins": 0.1900225132703781, "rewards/rejected": -0.48580440878868103, "step": 2170 }, { "epoch": 0.52, "learning_rate": 2.7342282126351145e-06, "logits/chosen": -2.0341286659240723, "logits/rejected": -1.6015617847442627, "logps/chosen": -555.8262939453125, "logps/rejected": -717.1621704101562, "loss": 0.2187, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3921821117401123, "rewards/margins": 0.1928720772266388, "rewards/rejected": -0.5850542187690735, "step": 2180 }, { "epoch": 0.53, "learning_rate": 2.713368412264118e-06, "logits/chosen": -2.043358564376831, "logits/rejected": -1.469124674797058, "logps/chosen": -615.0474853515625, "logps/rejected": -768.4633178710938, "loss": 0.2347, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.44691038131713867, "rewards/margins": 0.20122608542442322, "rewards/rejected": -0.6481364965438843, "step": 2190 }, { "epoch": 0.53, "learning_rate": 2.6924936369633126e-06, "logits/chosen": -1.9981714487075806, "logits/rejected": -1.3957901000976562, "logps/chosen": -534.0799560546875, "logps/rejected": -720.1726684570312, "loss": 0.2204, "rewards/accuracies": 0.875, "rewards/chosen": -0.4002350866794586, "rewards/margins": 0.23594383895397186, "rewards/rejected": -0.6361789107322693, "step": 2200 }, { "epoch": 0.53, "eval_logits/chosen": -2.0259740352630615, "eval_logits/rejected": -1.8486952781677246, "eval_logps/chosen": -588.3228759765625, "eval_logps/rejected": -622.175048828125, "eval_loss": 0.10833055526018143, "eval_rewards/accuracies": 0.4940119683742523, "eval_rewards/chosen": -0.3224451243877411, "eval_rewards/margins": 0.04875374585390091, "eval_rewards/rejected": -0.3711988925933838, "eval_runtime": 1203.5217, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2200 }, { "epoch": 0.53, "learning_rate": 2.671605351796302e-06, "logits/chosen": -2.2026238441467285, "logits/rejected": -1.744830846786499, "logps/chosen": -476.657958984375, "logps/rejected": -595.4264526367188, "loss": 0.2512, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32139354944229126, "rewards/margins": 0.1794227957725525, "rewards/rejected": -0.500816285610199, "step": 2210 }, { "epoch": 0.53, "learning_rate": 2.6507050227748595e-06, "logits/chosen": -1.9843857288360596, "logits/rejected": -1.1882731914520264, "logps/chosen": -538.7609252929688, "logps/rejected": -776.5012817382812, "loss": 0.2313, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3686228394508362, "rewards/margins": 0.31085333228111267, "rewards/rejected": -0.679476261138916, "step": 2220 }, { "epoch": 0.54, "learning_rate": 2.629794116756035e-06, "logits/chosen": -2.1628077030181885, "logits/rejected": -1.483097791671753, "logps/chosen": -536.7071533203125, "logps/rejected": -720.0963134765625, "loss": 0.2406, "rewards/accuracies": 0.75, "rewards/chosen": -0.3600391745567322, "rewards/margins": 0.23469308018684387, "rewards/rejected": -0.5947321653366089, "step": 2230 }, { "epoch": 0.54, "learning_rate": 2.60887410133921e-06, "logits/chosen": -2.172809362411499, "logits/rejected": -1.5343067646026611, "logps/chosen": -491.45379638671875, "logps/rejected": -679.3123779296875, "loss": 0.2316, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3373010754585266, "rewards/margins": 0.23081345856189728, "rewards/rejected": -0.5681145787239075, "step": 2240 }, { "epoch": 0.54, "learning_rate": 2.5879464447630947e-06, "logits/chosen": -2.159168243408203, "logits/rejected": -1.6364202499389648, "logps/chosen": -518.8533935546875, "logps/rejected": -680.24560546875, "loss": 0.2428, "rewards/accuracies": 0.875, "rewards/chosen": -0.37567752599716187, "rewards/margins": 0.20008471608161926, "rewards/rejected": -0.5757622718811035, "step": 2250 }, { "epoch": 0.54, "learning_rate": 2.5670126158026843e-06, "logits/chosen": -2.027329683303833, "logits/rejected": -1.305175542831421, "logps/chosen": -484.75830078125, "logps/rejected": -736.8638305664062, "loss": 0.2291, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35542964935302734, "rewards/margins": 0.2777010202407837, "rewards/rejected": -0.6331306099891663, "step": 2260 }, { "epoch": 0.54, "learning_rate": 2.546074083666169e-06, "logits/chosen": -2.138233184814453, "logits/rejected": -1.4262261390686035, "logps/chosen": -570.4146728515625, "logps/rejected": -761.60498046875, "loss": 0.2277, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39604076743125916, "rewards/margins": 0.2491084784269333, "rewards/rejected": -0.6451492309570312, "step": 2270 }, { "epoch": 0.55, "learning_rate": 2.525132317891827e-06, "logits/chosen": -2.155468463897705, "logits/rejected": -1.4776188135147095, "logps/chosen": -633.605224609375, "logps/rejected": -833.9893798828125, "loss": 0.22, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4600352644920349, "rewards/margins": 0.23945260047912598, "rewards/rejected": -0.6994878649711609, "step": 2280 }, { "epoch": 0.55, "learning_rate": 2.5041887882448845e-06, "logits/chosen": -1.838815689086914, "logits/rejected": -1.2401320934295654, "logps/chosen": -630.2888793945312, "logps/rejected": -802.6695556640625, "loss": 0.2388, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4658653736114502, "rewards/margins": 0.23190948367118835, "rewards/rejected": -0.6977748274803162, "step": 2290 }, { "epoch": 0.55, "learning_rate": 2.4832449646143605e-06, "logits/chosen": -2.143864154815674, "logits/rejected": -1.4324805736541748, "logps/chosen": -600.2898559570312, "logps/rejected": -811.3610229492188, "loss": 0.2303, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4388144016265869, "rewards/margins": 0.2606216371059418, "rewards/rejected": -0.6994358897209167, "step": 2300 }, { "epoch": 0.55, "eval_logits/chosen": -2.0472917556762695, "eval_logits/rejected": -1.867949366569519, "eval_logps/chosen": -577.5366821289062, "eval_logps/rejected": -617.70751953125, "eval_loss": 0.11917373538017273, "eval_rewards/accuracies": 0.4940119683742523, "eval_rewards/chosen": -0.3116588890552521, "eval_rewards/margins": 0.05507243424654007, "eval_rewards/rejected": -0.36673131585121155, "eval_runtime": 1203.2664, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2300 }, { "epoch": 0.55, "learning_rate": 2.4623023169099074e-06, "logits/chosen": -2.0397801399230957, "logits/rejected": -1.31986665725708, "logps/chosen": -485.8924255371094, "logps/rejected": -719.5282592773438, "loss": 0.2622, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32573679089546204, "rewards/margins": 0.2711804211139679, "rewards/rejected": -0.5969171524047852, "step": 2310 }, { "epoch": 0.56, "learning_rate": 2.441362314958649e-06, "logits/chosen": -2.3322081565856934, "logits/rejected": -1.835338830947876, "logps/chosen": -445.72808837890625, "logps/rejected": -583.0438842773438, "loss": 0.2527, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30502650141716003, "rewards/margins": 0.17221921682357788, "rewards/rejected": -0.4772457182407379, "step": 2320 }, { "epoch": 0.56, "learning_rate": 2.4204264284020182e-06, "logits/chosen": -2.121436595916748, "logits/rejected": -1.3262660503387451, "logps/chosen": -469.47296142578125, "logps/rejected": -640.6263427734375, "loss": 0.2107, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2932582497596741, "rewards/margins": 0.2390308678150177, "rewards/rejected": -0.5322891473770142, "step": 2330 }, { "epoch": 0.56, "learning_rate": 2.3994961265926166e-06, "logits/chosen": -2.095651388168335, "logits/rejected": -1.281914472579956, "logps/chosen": -548.6361083984375, "logps/rejected": -733.3841552734375, "loss": 0.218, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3627166450023651, "rewards/margins": 0.27520424127578735, "rewards/rejected": -0.6379209160804749, "step": 2340 }, { "epoch": 0.56, "learning_rate": 2.378572878491091e-06, "logits/chosen": -2.3607330322265625, "logits/rejected": -1.5038191080093384, "logps/chosen": -469.8932189941406, "logps/rejected": -725.5943603515625, "loss": 0.1968, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.29796329140663147, "rewards/margins": 0.32396772503852844, "rewards/rejected": -0.6219310164451599, "step": 2350 }, { "epoch": 0.57, "learning_rate": 2.3576581525630297e-06, "logits/chosen": -2.0950441360473633, "logits/rejected": -1.7722364664077759, "logps/chosen": -586.0914306640625, "logps/rejected": -699.4112548828125, "loss": 0.2439, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43629246950149536, "rewards/margins": 0.1433209925889969, "rewards/rejected": -0.5796133875846863, "step": 2360 }, { "epoch": 0.57, "learning_rate": 2.3367534166759105e-06, "logits/chosen": -2.288886070251465, "logits/rejected": -1.8359441757202148, "logps/chosen": -522.1571655273438, "logps/rejected": -687.4425659179688, "loss": 0.2629, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35911834239959717, "rewards/margins": 0.20178253948688507, "rewards/rejected": -0.5609009265899658, "step": 2370 }, { "epoch": 0.57, "learning_rate": 2.315860137996074e-06, "logits/chosen": -1.9418423175811768, "logits/rejected": -1.3743575811386108, "logps/chosen": -544.7579956054688, "logps/rejected": -704.5997314453125, "loss": 0.2211, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3829517960548401, "rewards/margins": 0.22514621913433075, "rewards/rejected": -0.608098030090332, "step": 2380 }, { "epoch": 0.57, "learning_rate": 2.2949797828857527e-06, "logits/chosen": -2.2519288063049316, "logits/rejected": -1.5188825130462646, "logps/chosen": -485.19256591796875, "logps/rejected": -668.4830322265625, "loss": 0.2276, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2801663875579834, "rewards/margins": 0.23724499344825745, "rewards/rejected": -0.5174113512039185, "step": 2390 }, { "epoch": 0.58, "learning_rate": 2.274113816800161e-06, "logits/chosen": -2.1723268032073975, "logits/rejected": -1.6469682455062866, "logps/chosen": -480.55322265625, "logps/rejected": -658.7047729492188, "loss": 0.231, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3439343571662903, "rewards/margins": 0.2000315934419632, "rewards/rejected": -0.5439659357070923, "step": 2400 }, { "epoch": 0.58, "eval_logits/chosen": -1.992563247680664, "eval_logits/rejected": -1.8167330026626587, "eval_logps/chosen": -613.4935302734375, "eval_logps/rejected": -651.760009765625, "eval_loss": 0.10684346407651901, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": -0.3476158082485199, "eval_rewards/margins": 0.053168028593063354, "eval_rewards/rejected": -0.40078383684158325, "eval_runtime": 1203.3736, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2400 }, { "epoch": 0.58, "learning_rate": 2.2532637041846423e-06, "logits/chosen": -1.9646238088607788, "logits/rejected": -1.4029309749603271, "logps/chosen": -590.4119262695312, "logps/rejected": -777.0964965820312, "loss": 0.2505, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.43447965383529663, "rewards/margins": 0.22341768443584442, "rewards/rejected": -0.6578973531723022, "step": 2410 }, { "epoch": 0.58, "learning_rate": 2.232430908371885e-06, "logits/chosen": -2.113560199737549, "logits/rejected": -1.3618537187576294, "logps/chosen": -590.8735961914062, "logps/rejected": -779.5650024414062, "loss": 0.2459, "rewards/accuracies": 0.875, "rewards/chosen": -0.40445756912231445, "rewards/margins": 0.2651630938053131, "rewards/rejected": -0.6696206331253052, "step": 2420 }, { "epoch": 0.58, "learning_rate": 2.2116168914792293e-06, "logits/chosen": -2.2550454139709473, "logits/rejected": -1.4135196208953857, "logps/chosen": -425.3912658691406, "logps/rejected": -675.1080322265625, "loss": 0.2176, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28882336616516113, "rewards/margins": 0.289298951625824, "rewards/rejected": -0.5781222581863403, "step": 2430 }, { "epoch": 0.59, "learning_rate": 2.190823114306045e-06, "logits/chosen": -2.298129081726074, "logits/rejected": -1.6922838687896729, "logps/chosen": -488.34033203125, "logps/rejected": -662.1011352539062, "loss": 0.2424, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3121868371963501, "rewards/margins": 0.21380400657653809, "rewards/rejected": -0.5259908437728882, "step": 2440 }, { "epoch": 0.59, "learning_rate": 2.1700510362312053e-06, "logits/chosen": -1.9786720275878906, "logits/rejected": -1.2434707880020142, "logps/chosen": -560.7887573242188, "logps/rejected": -735.9052734375, "loss": 0.2313, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4106983542442322, "rewards/margins": 0.2269977629184723, "rewards/rejected": -0.6376960873603821, "step": 2450 }, { "epoch": 0.59, "learning_rate": 2.1493021151106704e-06, "logits/chosen": -2.1541171073913574, "logits/rejected": -1.4196815490722656, "logps/chosen": -438.66827392578125, "logps/rejected": -661.3670043945312, "loss": 0.2196, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28850236535072327, "rewards/margins": 0.2897711992263794, "rewards/rejected": -0.5782734751701355, "step": 2460 }, { "epoch": 0.59, "learning_rate": 2.1285778071751638e-06, "logits/chosen": -2.178353786468506, "logits/rejected": -1.47813880443573, "logps/chosen": -523.3999633789062, "logps/rejected": -745.962158203125, "loss": 0.239, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.36805760860443115, "rewards/margins": 0.28246694803237915, "rewards/rejected": -0.6505244970321655, "step": 2470 }, { "epoch": 0.6, "learning_rate": 2.10787956692797e-06, "logits/chosen": -2.181842803955078, "logits/rejected": -1.5035537481307983, "logps/chosen": -505.61724853515625, "logps/rejected": -733.2935791015625, "loss": 0.2193, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.34529489278793335, "rewards/margins": 0.2919084429740906, "rewards/rejected": -0.6372033357620239, "step": 2480 }, { "epoch": 0.6, "learning_rate": 2.0872088470428553e-06, "logits/chosen": -2.2804157733917236, "logits/rejected": -1.5763556957244873, "logps/chosen": -385.0133056640625, "logps/rejected": -602.9778442382812, "loss": 0.2402, "rewards/accuracies": 0.75, "rewards/chosen": -0.2299300879240036, "rewards/margins": 0.2738630175590515, "rewards/rejected": -0.5037931203842163, "step": 2490 }, { "epoch": 0.6, "learning_rate": 2.0665670982621107e-06, "logits/chosen": -2.2260727882385254, "logits/rejected": -1.584256887435913, "logps/chosen": -503.12640380859375, "logps/rejected": -692.163330078125, "loss": 0.2252, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3701305389404297, "rewards/margins": 0.22943897545337677, "rewards/rejected": -0.5995694994926453, "step": 2500 }, { "epoch": 0.6, "eval_logits/chosen": -2.097222089767456, "eval_logits/rejected": -1.9123655557632446, "eval_logps/chosen": -622.7223510742188, "eval_logps/rejected": -666.3873291015625, "eval_loss": 0.12397874146699905, "eval_rewards/accuracies": 0.4940119683742523, "eval_rewards/chosen": -0.35684460401535034, "eval_rewards/margins": 0.058566465973854065, "eval_rewards/rejected": -0.4154110252857208, "eval_runtime": 1203.2418, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2500 }, { "epoch": 0.6, "learning_rate": 2.045955769294737e-06, "logits/chosen": -2.146933078765869, "logits/rejected": -1.4604142904281616, "logps/chosen": -526.0533447265625, "logps/rejected": -720.2157592773438, "loss": 0.2537, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3862465023994446, "rewards/margins": 0.22982840240001678, "rewards/rejected": -0.6160749197006226, "step": 2510 }, { "epoch": 0.6, "learning_rate": 2.0253763067147657e-06, "logits/chosen": -2.266871213912964, "logits/rejected": -1.7144603729248047, "logps/chosen": -473.1556701660156, "logps/rejected": -665.5570068359375, "loss": 0.2142, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3031342625617981, "rewards/margins": 0.24628221988677979, "rewards/rejected": -0.5494165420532227, "step": 2520 }, { "epoch": 0.61, "learning_rate": 2.0048301548597365e-06, "logits/chosen": -2.0515618324279785, "logits/rejected": -1.3304736614227295, "logps/chosen": -511.51837158203125, "logps/rejected": -686.8341064453125, "loss": 0.2447, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35747718811035156, "rewards/margins": 0.2265380322933197, "rewards/rejected": -0.5840152502059937, "step": 2530 }, { "epoch": 0.61, "learning_rate": 1.9843187557293286e-06, "logits/chosen": -2.468869686126709, "logits/rejected": -1.820254921913147, "logps/chosen": -468.31011962890625, "logps/rejected": -655.8160400390625, "loss": 0.2291, "rewards/accuracies": 0.75, "rewards/chosen": -0.29281821846961975, "rewards/margins": 0.24303650856018066, "rewards/rejected": -0.5358547568321228, "step": 2540 }, { "epoch": 0.61, "learning_rate": 1.9638435488841543e-06, "logits/chosen": -2.059474468231201, "logits/rejected": -1.2953211069107056, "logps/chosen": -421.70916748046875, "logps/rejected": -729.7376708984375, "loss": 0.1764, "rewards/accuracies": 0.875, "rewards/chosen": -0.2625841498374939, "rewards/margins": 0.3549993634223938, "rewards/rejected": -0.6175835728645325, "step": 2550 }, { "epoch": 0.61, "learning_rate": 1.9434059713447264e-06, "logits/chosen": -2.273080587387085, "logits/rejected": -1.4256597757339478, "logps/chosen": -501.80694580078125, "logps/rejected": -754.3250122070312, "loss": 0.2446, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3341330587863922, "rewards/margins": 0.32210057973861694, "rewards/rejected": -0.656233549118042, "step": 2560 }, { "epoch": 0.62, "learning_rate": 1.9230074574906043e-06, "logits/chosen": -2.2289280891418457, "logits/rejected": -1.7485036849975586, "logps/chosen": -469.74774169921875, "logps/rejected": -611.9984130859375, "loss": 0.2449, "rewards/accuracies": 0.625, "rewards/chosen": -0.3387991487979889, "rewards/margins": 0.1680956333875656, "rewards/rejected": -0.5068947672843933, "step": 2570 }, { "epoch": 0.62, "learning_rate": 1.9026494389597239e-06, "logits/chosen": -2.2467925548553467, "logits/rejected": -1.6694444417953491, "logps/chosen": -523.0413818359375, "logps/rejected": -689.0504150390625, "loss": 0.2504, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32973501086235046, "rewards/margins": 0.22147789597511292, "rewards/rejected": -0.5512129068374634, "step": 2580 }, { "epoch": 0.62, "learning_rate": 1.8823333445479175e-06, "logits/chosen": -2.2301502227783203, "logits/rejected": -1.644044280052185, "logps/chosen": -525.6343994140625, "logps/rejected": -763.82666015625, "loss": 0.2074, "rewards/accuracies": 0.875, "rewards/chosen": -0.36197489500045776, "rewards/margins": 0.2621820569038391, "rewards/rejected": -0.6241569519042969, "step": 2590 }, { "epoch": 0.62, "learning_rate": 1.8620606001086423e-06, "logits/chosen": -2.244600772857666, "logits/rejected": -1.6469579935073853, "logps/chosen": -500.04168701171875, "logps/rejected": -700.35791015625, "loss": 0.2445, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3519020974636078, "rewards/margins": 0.23894628882408142, "rewards/rejected": -0.590848445892334, "step": 2600 }, { "epoch": 0.62, "eval_logits/chosen": -2.1073102951049805, "eval_logits/rejected": -1.923039436340332, "eval_logps/chosen": -608.5199584960938, "eval_logps/rejected": -651.2365112304688, "eval_loss": 0.12397192418575287, "eval_rewards/accuracies": 0.480538934469223, "eval_rewards/chosen": -0.342642217874527, "eval_rewards/margins": 0.057618096470832825, "eval_rewards/rejected": -0.4002602994441986, "eval_runtime": 1203.4556, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.278, "step": 2600 }, { "epoch": 0.63, "learning_rate": 1.8418326284528997e-06, "logits/chosen": -2.167713165283203, "logits/rejected": -1.4876689910888672, "logps/chosen": -523.5201416015625, "logps/rejected": -767.9014892578125, "loss": 0.207, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3801141679286957, "rewards/margins": 0.2987547814846039, "rewards/rejected": -0.6788689494132996, "step": 2610 }, { "epoch": 0.63, "learning_rate": 1.8216508492493887e-06, "logits/chosen": -2.2397613525390625, "logits/rejected": -1.7202036380767822, "logps/chosen": -559.1814575195312, "logps/rejected": -769.8832397460938, "loss": 0.2207, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4272993206977844, "rewards/margins": 0.21038667857646942, "rewards/rejected": -0.6376858949661255, "step": 2620 }, { "epoch": 0.63, "learning_rate": 1.8015166789248606e-06, "logits/chosen": -2.0814664363861084, "logits/rejected": -1.4224138259887695, "logps/chosen": -587.5399780273438, "logps/rejected": -741.4795532226562, "loss": 0.2535, "rewards/accuracies": 0.75, "rewards/chosen": -0.44020071625709534, "rewards/margins": 0.20819997787475586, "rewards/rejected": -0.6484007239341736, "step": 2630 }, { "epoch": 0.63, "learning_rate": 1.7814315305647095e-06, "logits/chosen": -2.138298749923706, "logits/rejected": -1.49704909324646, "logps/chosen": -484.08917236328125, "logps/rejected": -690.213134765625, "loss": 0.2113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34651824831962585, "rewards/margins": 0.24669913947582245, "rewards/rejected": -0.5932173132896423, "step": 2640 }, { "epoch": 0.64, "learning_rate": 1.7613968138138027e-06, "logits/chosen": -2.221468925476074, "logits/rejected": -1.5751087665557861, "logps/chosen": -588.3160400390625, "logps/rejected": -752.5386962890625, "loss": 0.2017, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.41586509346961975, "rewards/margins": 0.23006346821784973, "rewards/rejected": -0.6459285616874695, "step": 2650 }, { "epoch": 0.64, "learning_rate": 1.7414139347775423e-06, "logits/chosen": -2.351968288421631, "logits/rejected": -1.938773512840271, "logps/chosen": -538.7922973632812, "logps/rejected": -658.6979370117188, "loss": 0.2423, "rewards/accuracies": 0.625, "rewards/chosen": -0.387543261051178, "rewards/margins": 0.1547742336988449, "rewards/rejected": -0.5423175096511841, "step": 2660 }, { "epoch": 0.64, "learning_rate": 1.7214842959231796e-06, "logits/chosen": -2.395155668258667, "logits/rejected": -1.9260895252227783, "logps/chosen": -508.83380126953125, "logps/rejected": -612.7691650390625, "loss": 0.2719, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3546169698238373, "rewards/margins": 0.14962822198867798, "rewards/rejected": -0.5042451620101929, "step": 2670 }, { "epoch": 0.64, "learning_rate": 1.7016092959813892e-06, "logits/chosen": -2.2317426204681396, "logits/rejected": -1.5688402652740479, "logps/chosen": -560.9075317382812, "logps/rejected": -803.4161376953125, "loss": 0.2271, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.40735405683517456, "rewards/margins": 0.28638577461242676, "rewards/rejected": -0.6937398910522461, "step": 2680 }, { "epoch": 0.65, "learning_rate": 1.681790329848097e-06, "logits/chosen": -2.4424562454223633, "logits/rejected": -1.8636490106582642, "logps/chosen": -467.88848876953125, "logps/rejected": -653.8547973632812, "loss": 0.2154, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27331727743148804, "rewards/margins": 0.21809041500091553, "rewards/rejected": -0.49140772223472595, "step": 2690 }, { "epoch": 0.65, "learning_rate": 1.6620287884865831e-06, "logits/chosen": -2.032238245010376, "logits/rejected": -1.261397123336792, "logps/chosen": -556.3517456054688, "logps/rejected": -758.9189453125, "loss": 0.2212, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.42433324456214905, "rewards/margins": 0.24921254813671112, "rewards/rejected": -0.6735457181930542, "step": 2700 }, { "epoch": 0.65, "eval_logits/chosen": -2.085954189300537, "eval_logits/rejected": -1.9049383401870728, "eval_logps/chosen": -555.2967529296875, "eval_logps/rejected": -587.150634765625, "eval_loss": 0.11031323671340942, "eval_rewards/accuracies": 0.492514967918396, "eval_rewards/chosen": -0.2894190549850464, "eval_rewards/margins": 0.04675537720322609, "eval_rewards/rejected": -0.33617445826530457, "eval_runtime": 1204.6921, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.277, "step": 2700 }, { "epoch": 0.65, "learning_rate": 1.6423260588298608e-06, "logits/chosen": -2.0640082359313965, "logits/rejected": -1.4335803985595703, "logps/chosen": -534.9646606445312, "logps/rejected": -709.1345825195312, "loss": 0.2333, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3720775246620178, "rewards/margins": 0.23830974102020264, "rewards/rejected": -0.6103872060775757, "step": 2710 }, { "epoch": 0.65, "learning_rate": 1.6226835236833356e-06, "logits/chosen": -2.219705104827881, "logits/rejected": -1.5398896932601929, "logps/chosen": -591.5511474609375, "logps/rejected": -778.790771484375, "loss": 0.2068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4227065145969391, "rewards/margins": 0.23486466705799103, "rewards/rejected": -0.6575711369514465, "step": 2720 }, { "epoch": 0.66, "learning_rate": 1.6031025616277512e-06, "logits/chosen": -2.3268961906433105, "logits/rejected": -1.9088338613510132, "logps/chosen": -519.0732421875, "logps/rejected": -698.407958984375, "loss": 0.2408, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34299373626708984, "rewards/margins": 0.2001771628856659, "rewards/rejected": -0.5431708097457886, "step": 2730 }, { "epoch": 0.66, "learning_rate": 1.5835845469224447e-06, "logits/chosen": -2.0732569694519043, "logits/rejected": -1.3370907306671143, "logps/chosen": -519.775390625, "logps/rejected": -787.9893188476562, "loss": 0.203, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.350313276052475, "rewards/margins": 0.3310011029243469, "rewards/rejected": -0.6813143491744995, "step": 2740 }, { "epoch": 0.66, "learning_rate": 1.5641308494088903e-06, "logits/chosen": -2.190338373184204, "logits/rejected": -1.5155017375946045, "logps/chosen": -537.736328125, "logps/rejected": -741.5314331054688, "loss": 0.2423, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3821929395198822, "rewards/margins": 0.24444735050201416, "rewards/rejected": -0.6266402006149292, "step": 2750 }, { "epoch": 0.66, "learning_rate": 1.5447428344145565e-06, "logits/chosen": -2.1321663856506348, "logits/rejected": -1.3660682439804077, "logps/chosen": -523.1742553710938, "logps/rejected": -753.5155639648438, "loss": 0.2281, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.36487728357315063, "rewards/margins": 0.26777541637420654, "rewards/rejected": -0.6326526403427124, "step": 2760 }, { "epoch": 0.66, "learning_rate": 1.5254218626570927e-06, "logits/chosen": -2.0647172927856445, "logits/rejected": -1.7285734415054321, "logps/chosen": -488.0934143066406, "logps/rejected": -607.048828125, "loss": 0.247, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3441561162471771, "rewards/margins": 0.1596425622701645, "rewards/rejected": -0.5037986040115356, "step": 2770 }, { "epoch": 0.67, "learning_rate": 1.5061692901488161e-06, "logits/chosen": -2.2383837699890137, "logits/rejected": -1.8211209774017334, "logps/chosen": -564.5126953125, "logps/rejected": -712.2330932617188, "loss": 0.2191, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39523395895957947, "rewards/margins": 0.17936298251152039, "rewards/rejected": -0.5745970010757446, "step": 2780 }, { "epoch": 0.67, "learning_rate": 1.486986468101555e-06, "logits/chosen": -1.9777355194091797, "logits/rejected": -1.433154821395874, "logps/chosen": -520.0540161132812, "logps/rejected": -722.2971801757812, "loss": 0.2369, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.37635913491249084, "rewards/margins": 0.23324568569660187, "rewards/rejected": -0.6096048355102539, "step": 2790 }, { "epoch": 0.67, "learning_rate": 1.467874742831808e-06, "logits/chosen": -2.08467698097229, "logits/rejected": -1.395107626914978, "logps/chosen": -427.42974853515625, "logps/rejected": -643.0478515625, "loss": 0.2301, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25038906931877136, "rewards/margins": 0.2833944261074066, "rewards/rejected": -0.5337835550308228, "step": 2800 }, { "epoch": 0.67, "eval_logits/chosen": -2.0837836265563965, "eval_logits/rejected": -1.9023804664611816, "eval_logps/chosen": -541.2313232421875, "eval_logps/rejected": -578.7744750976562, "eval_loss": 0.1072813868522644, "eval_rewards/accuracies": 0.5104790329933167, "eval_rewards/chosen": -0.2753536105155945, "eval_rewards/margins": 0.05244464799761772, "eval_rewards/rejected": -0.3277982473373413, "eval_runtime": 1203.9543, "eval_samples_per_second": 1.661, "eval_steps_per_second": 0.277, "step": 2800 }, { "epoch": 0.67, "learning_rate": 1.4488354556662553e-06, "logits/chosen": -2.135097026824951, "logits/rejected": -1.5127490758895874, "logps/chosen": -471.5577697753906, "logps/rejected": -698.9330444335938, "loss": 0.2424, "rewards/accuracies": 0.75, "rewards/chosen": -0.28911831974983215, "rewards/margins": 0.29640114307403564, "rewards/rejected": -0.5855194330215454, "step": 2810 }, { "epoch": 0.68, "learning_rate": 1.4298699428476236e-06, "logits/chosen": -2.169896364212036, "logits/rejected": -1.6068958044052124, "logps/chosen": -445.7212829589844, "logps/rejected": -605.3258056640625, "loss": 0.2497, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31002697348594666, "rewards/margins": 0.17650462687015533, "rewards/rejected": -0.48653164505958557, "step": 2820 }, { "epoch": 0.68, "learning_rate": 1.4109795354409045e-06, "logits/chosen": -2.185542583465576, "logits/rejected": -1.547067403793335, "logps/chosen": -495.6243591308594, "logps/rejected": -689.5414428710938, "loss": 0.2164, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35141193866729736, "rewards/margins": 0.24584810435771942, "rewards/rejected": -0.597260057926178, "step": 2830 }, { "epoch": 0.68, "learning_rate": 1.3921655592399256e-06, "logits/chosen": -2.1768155097961426, "logits/rejected": -1.667258858680725, "logps/chosen": -442.35772705078125, "logps/rejected": -603.5693359375, "loss": 0.2421, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30999085307121277, "rewards/margins": 0.19103391468524933, "rewards/rejected": -0.5010247826576233, "step": 2840 }, { "epoch": 0.68, "learning_rate": 1.373429334674317e-06, "logits/chosen": -2.0179648399353027, "logits/rejected": -1.5763208866119385, "logps/chosen": -370.852294921875, "logps/rejected": -556.5926513671875, "loss": 0.2306, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2577959895133972, "rewards/margins": 0.20393744111061096, "rewards/rejected": -0.4617334306240082, "step": 2850 }, { "epoch": 0.69, "learning_rate": 1.3547721767168273e-06, "logits/chosen": -2.316718101501465, "logits/rejected": -1.516549825668335, "logps/chosen": -496.86676025390625, "logps/rejected": -732.0982666015625, "loss": 0.2151, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.31045517325401306, "rewards/margins": 0.30066969990730286, "rewards/rejected": -0.6111248731613159, "step": 2860 }, { "epoch": 0.69, "learning_rate": 1.3361953947910394e-06, "logits/chosen": -2.0438685417175293, "logits/rejected": -1.4607021808624268, "logps/chosen": -555.384765625, "logps/rejected": -781.2374877929688, "loss": 0.227, "rewards/accuracies": 0.875, "rewards/chosen": -0.40037497878074646, "rewards/margins": 0.270369291305542, "rewards/rejected": -0.6707442998886108, "step": 2870 }, { "epoch": 0.69, "learning_rate": 1.3177002926794685e-06, "logits/chosen": -2.167976140975952, "logits/rejected": -1.6518261432647705, "logps/chosen": -583.111572265625, "logps/rejected": -716.2067260742188, "loss": 0.2431, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4048164486885071, "rewards/margins": 0.18419420719146729, "rewards/rejected": -0.5890105962753296, "step": 2880 }, { "epoch": 0.69, "learning_rate": 1.2992881684320627e-06, "logits/chosen": -2.114382266998291, "logits/rejected": -1.4376449584960938, "logps/chosen": -555.6717529296875, "logps/rejected": -797.0183715820312, "loss": 0.2355, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4007048010826111, "rewards/margins": 0.28534895181655884, "rewards/rejected": -0.6860538125038147, "step": 2890 }, { "epoch": 0.7, "learning_rate": 1.280960314275092e-06, "logits/chosen": -2.203066110610962, "logits/rejected": -1.4496381282806396, "logps/chosen": -504.5201110839844, "logps/rejected": -760.7296142578125, "loss": 0.2099, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3362181484699249, "rewards/margins": 0.30968424677848816, "rewards/rejected": -0.6459023952484131, "step": 2900 }, { "epoch": 0.7, "eval_logits/chosen": -2.1013636589050293, "eval_logits/rejected": -1.9181909561157227, "eval_logps/chosen": -576.685791015625, "eval_logps/rejected": -616.715576171875, "eval_loss": 0.11909682303667068, "eval_rewards/accuracies": 0.5014970302581787, "eval_rewards/chosen": -0.3108081519603729, "eval_rewards/margins": 0.05493125692009926, "eval_rewards/rejected": -0.3657393455505371, "eval_runtime": 1202.8609, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 2900 }, { "epoch": 0.7, "learning_rate": 1.2627180165204671e-06, "logits/chosen": -2.2129924297332764, "logits/rejected": -1.4643549919128418, "logps/chosen": -480.690673828125, "logps/rejected": -725.8658447265625, "loss": 0.2223, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3262184262275696, "rewards/margins": 0.3106166422367096, "rewards/rejected": -0.6368350982666016, "step": 2910 }, { "epoch": 0.7, "learning_rate": 1.2445625554754526e-06, "logits/chosen": -2.2265453338623047, "logits/rejected": -1.5895025730133057, "logps/chosen": -527.1163330078125, "logps/rejected": -731.4031372070312, "loss": 0.227, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3665752410888672, "rewards/margins": 0.24773302674293518, "rewards/rejected": -0.61430823802948, "step": 2920 }, { "epoch": 0.7, "learning_rate": 1.2264952053528145e-06, "logits/chosen": -2.2679755687713623, "logits/rejected": -1.6115745306015015, "logps/chosen": -495.69696044921875, "logps/rejected": -677.9437255859375, "loss": 0.2363, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3489711880683899, "rewards/margins": 0.21225669980049133, "rewards/rejected": -0.5612279176712036, "step": 2930 }, { "epoch": 0.71, "learning_rate": 1.208517234181391e-06, "logits/chosen": -2.2844974994659424, "logits/rejected": -1.523255705833435, "logps/chosen": -519.15625, "logps/rejected": -752.979736328125, "loss": 0.2113, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3315885066986084, "rewards/margins": 0.2873605191707611, "rewards/rejected": -0.6189489960670471, "step": 2940 }, { "epoch": 0.71, "learning_rate": 1.190629903717097e-06, "logits/chosen": -1.9890199899673462, "logits/rejected": -1.3253666162490845, "logps/chosen": -531.1495971679688, "logps/rejected": -724.848876953125, "loss": 0.2021, "rewards/accuracies": 0.75, "rewards/chosen": -0.391356498003006, "rewards/margins": 0.2454235553741455, "rewards/rejected": -0.6367800831794739, "step": 2950 }, { "epoch": 0.71, "learning_rate": 1.172834469354373e-06, "logits/chosen": -2.2386484146118164, "logits/rejected": -1.7754074335098267, "logps/chosen": -448.0233459472656, "logps/rejected": -691.5328369140625, "loss": 0.2039, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31916800141334534, "rewards/margins": 0.22315283119678497, "rewards/rejected": -0.5423208475112915, "step": 2960 }, { "epoch": 0.71, "learning_rate": 1.1551321800380722e-06, "logits/chosen": -2.254110813140869, "logits/rejected": -1.618407964706421, "logps/chosen": -445.1405334472656, "logps/rejected": -648.235595703125, "loss": 0.2361, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26483047008514404, "rewards/margins": 0.26764997839927673, "rewards/rejected": -0.5324803590774536, "step": 2970 }, { "epoch": 0.72, "learning_rate": 1.1375242781758077e-06, "logits/chosen": -1.9792537689208984, "logits/rejected": -1.2487452030181885, "logps/chosen": -573.9744873046875, "logps/rejected": -785.95068359375, "loss": 0.2005, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4200894832611084, "rewards/margins": 0.2546847462654114, "rewards/rejected": -0.674774169921875, "step": 2980 }, { "epoch": 0.72, "learning_rate": 1.1200119995507572e-06, "logits/chosen": -2.355534076690674, "logits/rejected": -1.7117884159088135, "logps/chosen": -517.779296875, "logps/rejected": -704.0262451171875, "loss": 0.2282, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3368731737136841, "rewards/margins": 0.23025667667388916, "rewards/rejected": -0.5671298503875732, "step": 2990 }, { "epoch": 0.72, "learning_rate": 1.1025965732349318e-06, "logits/chosen": -2.0606141090393066, "logits/rejected": -1.4610722064971924, "logps/chosen": -554.5364379882812, "logps/rejected": -741.3343505859375, "loss": 0.2072, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4323582649230957, "rewards/margins": 0.21536937355995178, "rewards/rejected": -0.6477276086807251, "step": 3000 }, { "epoch": 0.72, "eval_logits/chosen": -2.1089577674865723, "eval_logits/rejected": -1.9257806539535522, "eval_logps/chosen": -572.10986328125, "eval_logps/rejected": -607.2318725585938, "eval_loss": 0.11199278384447098, "eval_rewards/accuracies": 0.4910179674625397, "eval_rewards/chosen": -0.30623218417167664, "eval_rewards/margins": 0.05002351850271225, "eval_rewards/rejected": -0.3562556803226471, "eval_runtime": 1202.2566, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 3000 }, { "epoch": 0.72, "learning_rate": 1.085279221502909e-06, "logits/chosen": -2.168842315673828, "logits/rejected": -1.4940745830535889, "logps/chosen": -509.2674865722656, "logps/rejected": -726.7362060546875, "loss": 0.2193, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33928245306015015, "rewards/margins": 0.2726161777973175, "rewards/rejected": -0.6118985414505005, "step": 3010 }, { "epoch": 0.72, "learning_rate": 1.0680611597460607e-06, "logits/chosen": -2.1892337799072266, "logits/rejected": -1.9174913167953491, "logps/chosen": -542.6739501953125, "logps/rejected": -655.5626220703125, "loss": 0.2446, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38044658303260803, "rewards/margins": 0.15411126613616943, "rewards/rejected": -0.5345577597618103, "step": 3020 }, { "epoch": 0.73, "learning_rate": 1.0509435963872422e-06, "logits/chosen": -2.2147586345672607, "logits/rejected": -1.5097887516021729, "logps/chosen": -470.73681640625, "logps/rejected": -676.3612670898438, "loss": 0.2477, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3258078396320343, "rewards/margins": 0.24642141163349152, "rewards/rejected": -0.572229266166687, "step": 3030 }, { "epoch": 0.73, "learning_rate": 1.0339277327959863e-06, "logits/chosen": -1.9413217306137085, "logits/rejected": -1.3799703121185303, "logps/chosen": -562.2501220703125, "logps/rejected": -722.2896728515625, "loss": 0.2081, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4545202851295471, "rewards/margins": 0.18854863941669464, "rewards/rejected": -0.6430689096450806, "step": 3040 }, { "epoch": 0.73, "learning_rate": 1.0170147632041858e-06, "logits/chosen": -2.1207759380340576, "logits/rejected": -1.5410287380218506, "logps/chosen": -547.0601196289062, "logps/rejected": -727.9929809570312, "loss": 0.2318, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3909928500652313, "rewards/margins": 0.24240347743034363, "rewards/rejected": -0.6333963871002197, "step": 3050 }, { "epoch": 0.73, "learning_rate": 1.0002058746222807e-06, "logits/chosen": -2.0327346324920654, "logits/rejected": -1.2125627994537354, "logps/chosen": -560.7425537109375, "logps/rejected": -803.0782470703125, "loss": 0.2224, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.40220609307289124, "rewards/margins": 0.3016859292984009, "rewards/rejected": -0.7038920521736145, "step": 3060 }, { "epoch": 0.74, "learning_rate": 9.83502246755942e-07, "logits/chosen": -2.1720359325408936, "logits/rejected": -1.4709327220916748, "logps/chosen": -447.08538818359375, "logps/rejected": -707.2015991210938, "loss": 0.1958, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28289416432380676, "rewards/margins": 0.32097429037094116, "rewards/rejected": -0.6038684844970703, "step": 3070 }, { "epoch": 0.74, "learning_rate": 9.669050519232875e-07, "logits/chosen": -2.3601675033569336, "logits/rejected": -1.676936388015747, "logps/chosen": -427.8089904785156, "logps/rejected": -644.7459716796875, "loss": 0.2071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.250078022480011, "rewards/margins": 0.27098745107650757, "rewards/rejected": -0.5210654139518738, "step": 3080 }, { "epoch": 0.74, "learning_rate": 9.504154549725944e-07, "logits/chosen": -2.0875611305236816, "logits/rejected": -1.4849574565887451, "logps/chosen": -541.3798828125, "logps/rejected": -734.3297119140625, "loss": 0.2092, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38182270526885986, "rewards/margins": 0.2472592145204544, "rewards/rejected": -0.6290819048881531, "step": 3090 }, { "epoch": 0.74, "learning_rate": 9.340346132005507e-07, "logits/chosen": -1.9655243158340454, "logits/rejected": -1.34130859375, "logps/chosen": -536.2704467773438, "logps/rejected": -768.4984130859375, "loss": 0.2186, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.39620083570480347, "rewards/margins": 0.2569933235645294, "rewards/rejected": -0.6531941890716553, "step": 3100 }, { "epoch": 0.74, "eval_logits/chosen": -2.0849273204803467, "eval_logits/rejected": -1.90314781665802, "eval_logps/chosen": -561.9234008789062, "eval_logps/rejected": -598.4004516601562, "eval_loss": 0.11545602977275848, "eval_rewards/accuracies": 0.4985029995441437, "eval_rewards/chosen": -0.29604560136795044, "eval_rewards/margins": 0.05137856304645538, "eval_rewards/rejected": -0.347424179315567, "eval_runtime": 1202.5152, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 3100 }, { "epoch": 0.75, "learning_rate": 9.177636762710321e-07, "logits/chosen": -2.1303648948669434, "logits/rejected": -1.4139230251312256, "logps/chosen": -493.11444091796875, "logps/rejected": -746.3267822265625, "loss": 0.2109, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3294147849082947, "rewards/margins": 0.29774171113967896, "rewards/rejected": -0.6271565556526184, "step": 3110 }, { "epoch": 0.75, "learning_rate": 9.01603786134413e-07, "logits/chosen": -2.0604751110076904, "logits/rejected": -1.474351167678833, "logps/chosen": -540.5977172851562, "logps/rejected": -729.1321411132812, "loss": 0.2184, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35828250646591187, "rewards/margins": 0.2698938250541687, "rewards/rejected": -0.6281762719154358, "step": 3120 }, { "epoch": 0.75, "learning_rate": 8.855560769474237e-07, "logits/chosen": -2.1283202171325684, "logits/rejected": -1.408500075340271, "logps/chosen": -458.2832946777344, "logps/rejected": -671.4112548828125, "loss": 0.2389, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28560301661491394, "rewards/margins": 0.29178422689437866, "rewards/rejected": -0.577387273311615, "step": 3130 }, { "epoch": 0.75, "learning_rate": 8.696216749935471e-07, "logits/chosen": -2.222780704498291, "logits/rejected": -1.5516656637191772, "logps/chosen": -499.7232360839844, "logps/rejected": -702.0225830078125, "loss": 0.2071, "rewards/accuracies": 0.875, "rewards/chosen": -0.3520824909210205, "rewards/margins": 0.24775567650794983, "rewards/rejected": -0.599838137626648, "step": 3140 }, { "epoch": 0.76, "learning_rate": 8.538016986039751e-07, "logits/chosen": -2.051525354385376, "logits/rejected": -1.5327043533325195, "logps/chosen": -434.45391845703125, "logps/rejected": -608.8433227539062, "loss": 0.2498, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2737278342247009, "rewards/margins": 0.21330229938030243, "rewards/rejected": -0.48703011870384216, "step": 3150 }, { "epoch": 0.76, "learning_rate": 8.380972580791191e-07, "logits/chosen": -2.1379358768463135, "logits/rejected": -1.6785972118377686, "logps/chosen": -459.676025390625, "logps/rejected": -624.14697265625, "loss": 0.2379, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27942705154418945, "rewards/margins": 0.2311849296092987, "rewards/rejected": -0.5106119513511658, "step": 3160 }, { "epoch": 0.76, "learning_rate": 8.22509455610688e-07, "logits/chosen": -2.2192459106445312, "logits/rejected": -1.5867321491241455, "logps/chosen": -571.0604858398438, "logps/rejected": -757.7674560546875, "loss": 0.1938, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.40131741762161255, "rewards/margins": 0.24261796474456787, "rewards/rejected": -0.6439354419708252, "step": 3170 }, { "epoch": 0.76, "learning_rate": 8.070393852043251e-07, "logits/chosen": -2.323155641555786, "logits/rejected": -1.6226155757904053, "logps/chosen": -484.06878662109375, "logps/rejected": -705.3519287109375, "loss": 0.2214, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29587116837501526, "rewards/margins": 0.2754477560520172, "rewards/rejected": -0.5713188648223877, "step": 3180 }, { "epoch": 0.77, "learning_rate": 7.916881326028387e-07, "logits/chosen": -2.050631046295166, "logits/rejected": -1.518162727355957, "logps/chosen": -600.6834716796875, "logps/rejected": -812.4183959960938, "loss": 0.2344, "rewards/accuracies": 0.875, "rewards/chosen": -0.44463053345680237, "rewards/margins": 0.2456853687763214, "rewards/rejected": -0.6903159022331238, "step": 3190 }, { "epoch": 0.77, "learning_rate": 7.7645677520999e-07, "logits/chosen": -2.213151454925537, "logits/rejected": -1.6413898468017578, "logps/chosen": -510.93341064453125, "logps/rejected": -684.2901611328125, "loss": 0.2743, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36973968148231506, "rewards/margins": 0.20788827538490295, "rewards/rejected": -0.5776280164718628, "step": 3200 }, { "epoch": 0.77, "eval_logits/chosen": -2.117027521133423, "eval_logits/rejected": -1.933222770690918, "eval_logps/chosen": -547.4085693359375, "eval_logps/rejected": -582.3980102539062, "eval_loss": 0.11210732907056808, "eval_rewards/accuracies": 0.49550896883010864, "eval_rewards/chosen": -0.2815307676792145, "eval_rewards/margins": 0.049891021102666855, "eval_rewards/rejected": -0.33142179250717163, "eval_runtime": 1202.2814, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 3200 }, { "epoch": 0.77, "learning_rate": 7.613463820148831e-07, "logits/chosen": -2.287684917449951, "logits/rejected": -1.7129566669464111, "logps/chosen": -466.8036193847656, "logps/rejected": -705.1732788085938, "loss": 0.1976, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30113768577575684, "rewards/margins": 0.2584455907344818, "rewards/rejected": -0.559583306312561, "step": 3210 }, { "epoch": 0.77, "learning_rate": 7.46358013516938e-07, "logits/chosen": -2.136887311935425, "logits/rejected": -1.4578837156295776, "logps/chosen": -446.27020263671875, "logps/rejected": -646.9642944335938, "loss": 0.2276, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.31002265214920044, "rewards/margins": 0.23059165477752686, "rewards/rejected": -0.5406142473220825, "step": 3220 }, { "epoch": 0.78, "learning_rate": 7.314927216514617e-07, "logits/chosen": -2.23048734664917, "logits/rejected": -1.5183980464935303, "logps/chosen": -458.1307678222656, "logps/rejected": -665.5648803710938, "loss": 0.2502, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2901155352592468, "rewards/margins": 0.2509034276008606, "rewards/rejected": -0.5410189628601074, "step": 3230 }, { "epoch": 0.78, "learning_rate": 7.167515497158179e-07, "logits/chosen": -2.1396336555480957, "logits/rejected": -1.470117211341858, "logps/chosen": -493.9241638183594, "logps/rejected": -694.4556274414062, "loss": 0.2212, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3393593430519104, "rewards/margins": 0.24507682025432587, "rewards/rejected": -0.5844362378120422, "step": 3240 }, { "epoch": 0.78, "learning_rate": 7.021355322962103e-07, "logits/chosen": -2.27089262008667, "logits/rejected": -1.488008737564087, "logps/chosen": -520.2621459960938, "logps/rejected": -743.5213623046875, "loss": 0.2338, "rewards/accuracies": 0.875, "rewards/chosen": -0.3471354842185974, "rewards/margins": 0.2685074210166931, "rewards/rejected": -0.6156429052352905, "step": 3250 }, { "epoch": 0.78, "learning_rate": 6.876456951950614e-07, "logits/chosen": -2.048699140548706, "logits/rejected": -1.5960794687271118, "logps/chosen": -489.568115234375, "logps/rejected": -647.7818603515625, "loss": 0.2228, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3318161070346832, "rewards/margins": 0.20408396422863007, "rewards/rejected": -0.5359001159667969, "step": 3260 }, { "epoch": 0.78, "learning_rate": 6.732830553590305e-07, "logits/chosen": -2.3154354095458984, "logits/rejected": -1.6182634830474854, "logps/chosen": -431.6551818847656, "logps/rejected": -626.2493286132812, "loss": 0.2486, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26363369822502136, "rewards/margins": 0.23036842048168182, "rewards/rejected": -0.49400216341018677, "step": 3270 }, { "epoch": 0.79, "learning_rate": 6.590486208076319e-07, "logits/chosen": -2.2397027015686035, "logits/rejected": -1.525174856185913, "logps/chosen": -513.7566528320312, "logps/rejected": -732.8204956054688, "loss": 0.206, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32936355471611023, "rewards/margins": 0.2873368561267853, "rewards/rejected": -0.6167004704475403, "step": 3280 }, { "epoch": 0.79, "learning_rate": 6.449433905624916e-07, "logits/chosen": -2.2996716499328613, "logits/rejected": -1.7234981060028076, "logps/chosen": -405.33392333984375, "logps/rejected": -642.2993774414062, "loss": 0.2065, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2398863285779953, "rewards/margins": 0.25551775097846985, "rewards/rejected": -0.49540409445762634, "step": 3290 }, { "epoch": 0.79, "learning_rate": 6.309683545772327e-07, "logits/chosen": -2.089585781097412, "logits/rejected": -1.4566513299942017, "logps/chosen": -605.0691528320312, "logps/rejected": -772.1795654296875, "loss": 0.1989, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4322146475315094, "rewards/margins": 0.24194130301475525, "rewards/rejected": -0.6741558313369751, "step": 3300 }, { "epoch": 0.79, "eval_logits/chosen": -2.078937530517578, "eval_logits/rejected": -1.8977138996124268, "eval_logps/chosen": -589.4213256835938, "eval_logps/rejected": -625.3888549804688, "eval_loss": 0.11159414052963257, "eval_rewards/accuracies": 0.485029935836792, "eval_rewards/chosen": -0.3235435485839844, "eval_rewards/margins": 0.05086908116936684, "eval_rewards/rejected": -0.3744126558303833, "eval_runtime": 1201.7379, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 3300 }, { "epoch": 0.79, "learning_rate": 6.171244936679985e-07, "logits/chosen": -2.156498670578003, "logits/rejected": -1.5686888694763184, "logps/chosen": -517.328125, "logps/rejected": -720.8557739257812, "loss": 0.2222, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38057941198349, "rewards/margins": 0.23988094925880432, "rewards/rejected": -0.6204602718353271, "step": 3310 }, { "epoch": 0.8, "learning_rate": 6.03412779444612e-07, "logits/chosen": -2.139711380004883, "logits/rejected": -1.6575161218643188, "logps/chosen": -492.5684509277344, "logps/rejected": -700.1224365234375, "loss": 0.2216, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.363933801651001, "rewards/margins": 0.22296495735645294, "rewards/rejected": -0.5868988037109375, "step": 3320 }, { "epoch": 0.8, "learning_rate": 5.898341742423866e-07, "logits/chosen": -2.1976096630096436, "logits/rejected": -1.7457921504974365, "logps/chosen": -467.56964111328125, "logps/rejected": -719.9373779296875, "loss": 0.2083, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3288330137729645, "rewards/margins": 0.26724451780319214, "rewards/rejected": -0.596077561378479, "step": 3330 }, { "epoch": 0.8, "learning_rate": 5.763896310545893e-07, "logits/chosen": -2.279165744781494, "logits/rejected": -1.6992404460906982, "logps/chosen": -525.0632934570312, "logps/rejected": -693.7684326171875, "loss": 0.2525, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36443620920181274, "rewards/margins": 0.21246829628944397, "rewards/rejected": -0.5769044160842896, "step": 3340 }, { "epoch": 0.8, "learning_rate": 5.630800934655481e-07, "logits/chosen": -2.1953513622283936, "logits/rejected": -1.6583993434906006, "logps/chosen": -567.8196411132812, "logps/rejected": -701.3932495117188, "loss": 0.2321, "rewards/accuracies": 0.875, "rewards/chosen": -0.3820284903049469, "rewards/margins": 0.20547595620155334, "rewards/rejected": -0.587504506111145, "step": 3350 }, { "epoch": 0.81, "learning_rate": 5.499064955844383e-07, "logits/chosen": -2.342818260192871, "logits/rejected": -1.7481062412261963, "logps/chosen": -467.240234375, "logps/rejected": -653.8292846679688, "loss": 0.2016, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3041478991508484, "rewards/margins": 0.24987390637397766, "rewards/rejected": -0.5540217757225037, "step": 3360 }, { "epoch": 0.81, "learning_rate": 5.368697619797159e-07, "logits/chosen": -2.2441914081573486, "logits/rejected": -1.7231801748275757, "logps/chosen": -487.1238708496094, "logps/rejected": -679.908447265625, "loss": 0.2269, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3233507573604584, "rewards/margins": 0.22874152660369873, "rewards/rejected": -0.5520923137664795, "step": 3370 }, { "epoch": 0.81, "learning_rate": 5.239708076142311e-07, "logits/chosen": -2.1569647789001465, "logits/rejected": -1.3708707094192505, "logps/chosen": -475.98870849609375, "logps/rejected": -776.7669677734375, "loss": 0.2122, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2993699908256531, "rewards/margins": 0.37619748711586, "rewards/rejected": -0.6755674481391907, "step": 3380 }, { "epoch": 0.81, "learning_rate": 5.112105377810128e-07, "logits/chosen": -2.194058418273926, "logits/rejected": -1.4502426385879517, "logps/chosen": -484.3235778808594, "logps/rejected": -692.0008544921875, "loss": 0.2367, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33010968565940857, "rewards/margins": 0.2725631594657898, "rewards/rejected": -0.6026727557182312, "step": 3390 }, { "epoch": 0.82, "learning_rate": 4.985898480397322e-07, "logits/chosen": -2.257282018661499, "logits/rejected": -1.683765172958374, "logps/chosen": -534.498291015625, "logps/rejected": -733.8802490234375, "loss": 0.2258, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3430347442626953, "rewards/margins": 0.24402375519275665, "rewards/rejected": -0.587058424949646, "step": 3400 }, { "epoch": 0.82, "eval_logits/chosen": -2.098897695541382, "eval_logits/rejected": -1.9164294004440308, "eval_logps/chosen": -574.9765625, "eval_logps/rejected": -611.2417602539062, "eval_loss": 0.10930529981851578, "eval_rewards/accuracies": 0.49700599908828735, "eval_rewards/chosen": -0.30909889936447144, "eval_rewards/margins": 0.05116667598485947, "eval_rewards/rejected": -0.3602655529975891, "eval_runtime": 1202.474, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 3400 }, { "epoch": 0.82, "learning_rate": 4.861096241538483e-07, "logits/chosen": -2.0668435096740723, "logits/rejected": -1.627023696899414, "logps/chosen": -481.35699462890625, "logps/rejected": -688.6731567382812, "loss": 0.203, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3404432535171509, "rewards/margins": 0.2262330949306488, "rewards/rejected": -0.5666762590408325, "step": 3410 }, { "epoch": 0.82, "learning_rate": 4.7377074202844514e-07, "logits/chosen": -2.268639087677002, "logits/rejected": -1.8624111413955688, "logps/chosen": -501.8617248535156, "logps/rejected": -634.2926635742188, "loss": 0.217, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3503084182739258, "rewards/margins": 0.1614145189523697, "rewards/rejected": -0.5117229223251343, "step": 3420 }, { "epoch": 0.82, "learning_rate": 4.615740676487507e-07, "logits/chosen": -2.0803284645080566, "logits/rejected": -1.4268542528152466, "logps/chosen": -481.5899353027344, "logps/rejected": -727.3535766601562, "loss": 0.2247, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31610196828842163, "rewards/margins": 0.2857650816440582, "rewards/rejected": -0.6018670797348022, "step": 3430 }, { "epoch": 0.83, "learning_rate": 4.495204570193687e-07, "logits/chosen": -2.074976682662964, "logits/rejected": -1.3842707872390747, "logps/chosen": -552.0552368164062, "logps/rejected": -750.6548461914062, "loss": 0.2333, "rewards/accuracies": 0.875, "rewards/chosen": -0.3645619750022888, "rewards/margins": 0.2746102809906006, "rewards/rejected": -0.6391721963882446, "step": 3440 }, { "epoch": 0.83, "learning_rate": 4.376107561041937e-07, "logits/chosen": -2.095445394515991, "logits/rejected": -1.43033766746521, "logps/chosen": -583.4439086914062, "logps/rejected": -766.9414672851562, "loss": 0.257, "rewards/accuracies": 0.875, "rewards/chosen": -0.41500750184059143, "rewards/margins": 0.24710014462471008, "rewards/rejected": -0.6621075868606567, "step": 3450 }, { "epoch": 0.83, "learning_rate": 4.258458007670413e-07, "logits/chosen": -2.1980960369110107, "logits/rejected": -1.6657772064208984, "logps/chosen": -488.93927001953125, "logps/rejected": -709.1919555664062, "loss": 0.2025, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3362073302268982, "rewards/margins": 0.2494477480649948, "rewards/rejected": -0.5856550335884094, "step": 3460 }, { "epoch": 0.83, "learning_rate": 4.1422641671298336e-07, "logits/chosen": -2.3565773963928223, "logits/rejected": -1.5782469511032104, "logps/chosen": -432.61676025390625, "logps/rejected": -706.4254150390625, "loss": 0.2104, "rewards/accuracies": 0.875, "rewards/chosen": -0.2704422175884247, "rewards/margins": 0.32848039269447327, "rewards/rejected": -0.598922610282898, "step": 3470 }, { "epoch": 0.84, "learning_rate": 4.0275341943040057e-07, "logits/chosen": -2.3177318572998047, "logits/rejected": -1.3838385343551636, "logps/chosen": -469.2691955566406, "logps/rejected": -692.705078125, "loss": 0.2241, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.28749537467956543, "rewards/margins": 0.2984176278114319, "rewards/rejected": -0.5859130620956421, "step": 3480 }, { "epoch": 0.84, "learning_rate": 3.9142761413374336e-07, "logits/chosen": -2.1981492042541504, "logits/rejected": -1.5857261419296265, "logps/chosen": -563.069580078125, "logps/rejected": -761.644775390625, "loss": 0.2396, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3896663784980774, "rewards/margins": 0.2428751438856125, "rewards/rejected": -0.6325414776802063, "step": 3490 }, { "epoch": 0.84, "learning_rate": 3.802497957070225e-07, "logits/chosen": -2.1995058059692383, "logits/rejected": -1.543595552444458, "logps/chosen": -507.3514709472656, "logps/rejected": -675.6525268554688, "loss": 0.2524, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.34191256761550903, "rewards/margins": 0.23432913422584534, "rewards/rejected": -0.576241672039032, "step": 3500 }, { "epoch": 0.84, "eval_logits/chosen": -2.0956172943115234, "eval_logits/rejected": -1.91299307346344, "eval_logps/chosen": -604.2028198242188, "eval_logps/rejected": -640.6893310546875, "eval_loss": 0.11420483887195587, "eval_rewards/accuracies": 0.4910179674625397, "eval_rewards/chosen": -0.33832505345344543, "eval_rewards/margins": 0.05138807371258736, "eval_rewards/rejected": -0.3897131383419037, "eval_runtime": 1202.4811, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 3500 }, { "epoch": 0.84, "learning_rate": 3.6922074864802095e-07, "logits/chosen": -2.1560494899749756, "logits/rejected": -1.4765256643295288, "logps/chosen": -531.6047973632812, "logps/rejected": -799.2362060546875, "loss": 0.221, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38027533888816833, "rewards/margins": 0.31235387921333313, "rewards/rejected": -0.6926292181015015, "step": 3510 }, { "epoch": 0.84, "learning_rate": 3.5834124701323414e-07, "logits/chosen": -2.3160910606384277, "logits/rejected": -1.5684566497802734, "logps/chosen": -469.61871337890625, "logps/rejected": -694.918701171875, "loss": 0.2166, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2824546694755554, "rewards/margins": 0.30628883838653564, "rewards/rejected": -0.5887435674667358, "step": 3520 }, { "epoch": 0.85, "learning_rate": 3.476120543635469e-07, "logits/chosen": -2.173966646194458, "logits/rejected": -1.6922099590301514, "logps/chosen": -524.9368286132812, "logps/rejected": -688.4470825195312, "loss": 0.2437, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3909275531768799, "rewards/margins": 0.20910099148750305, "rewards/rejected": -0.6000285148620605, "step": 3530 }, { "epoch": 0.85, "learning_rate": 3.370339237106385e-07, "logits/chosen": -2.2078137397766113, "logits/rejected": -1.5486336946487427, "logps/chosen": -605.7921752929688, "logps/rejected": -812.9849853515625, "loss": 0.2676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.43324851989746094, "rewards/margins": 0.2597372233867645, "rewards/rejected": -0.6929857134819031, "step": 3540 }, { "epoch": 0.85, "learning_rate": 3.2660759746414055e-07, "logits/chosen": -2.3300044536590576, "logits/rejected": -1.6363261938095093, "logps/chosen": -503.43389892578125, "logps/rejected": -697.4727783203125, "loss": 0.21, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3338503837585449, "rewards/margins": 0.26159441471099854, "rewards/rejected": -0.595444917678833, "step": 3550 }, { "epoch": 0.85, "learning_rate": 3.1633380737952663e-07, "logits/chosen": -2.2085890769958496, "logits/rejected": -1.6105495691299438, "logps/chosen": -536.9605102539062, "logps/rejected": -736.1573486328125, "loss": 0.2153, "rewards/accuracies": 0.75, "rewards/chosen": -0.4149557948112488, "rewards/margins": 0.2275308072566986, "rewards/rejected": -0.6424866914749146, "step": 3560 }, { "epoch": 0.86, "learning_rate": 3.0621327450675806e-07, "logits/chosen": -2.1140153408050537, "logits/rejected": -1.419506311416626, "logps/chosen": -476.4149475097656, "logps/rejected": -692.6353759765625, "loss": 0.2369, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32543691992759705, "rewards/margins": 0.2517564594745636, "rewards/rejected": -0.5771933794021606, "step": 3570 }, { "epoch": 0.86, "learning_rate": 2.96246709139677e-07, "logits/chosen": -2.3951430320739746, "logits/rejected": -1.572355031967163, "logps/chosen": -429.2137145996094, "logps/rejected": -726.4138793945312, "loss": 0.1967, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26176077127456665, "rewards/margins": 0.37150201201438904, "rewards/rejected": -0.6332628130912781, "step": 3580 }, { "epoch": 0.86, "learning_rate": 2.8643481076615717e-07, "logits/chosen": -2.0544791221618652, "logits/rejected": -1.4575806856155396, "logps/chosen": -499.528564453125, "logps/rejected": -739.0894775390625, "loss": 0.2194, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3649117648601532, "rewards/margins": 0.27804499864578247, "rewards/rejected": -0.6429567337036133, "step": 3590 }, { "epoch": 0.86, "learning_rate": 2.767782680190073e-07, "logits/chosen": -2.2115986347198486, "logits/rejected": -1.6992902755737305, "logps/chosen": -477.15130615234375, "logps/rejected": -667.1221313476562, "loss": 0.2202, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32506081461906433, "rewards/margins": 0.2292177677154541, "rewards/rejected": -0.554278552532196, "step": 3600 }, { "epoch": 0.86, "eval_logits/chosen": -2.097348690032959, "eval_logits/rejected": -1.9145889282226562, "eval_logps/chosen": -607.1243896484375, "eval_logps/rejected": -643.49365234375, "eval_loss": 0.1172671988606453, "eval_rewards/accuracies": 0.48353293538093567, "eval_rewards/chosen": -0.34124666452407837, "eval_rewards/margins": 0.05127076432108879, "eval_rewards/rejected": -0.39251741766929626, "eval_runtime": 1201.9673, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 3600 }, { "epoch": 0.87, "learning_rate": 2.6727775862764703e-07, "logits/chosen": -2.1986818313598633, "logits/rejected": -1.5473062992095947, "logps/chosen": -509.9576110839844, "logps/rejected": -716.515380859375, "loss": 0.2166, "rewards/accuracies": 0.75, "rewards/chosen": -0.382008820772171, "rewards/margins": 0.26671355962753296, "rewards/rejected": -0.6487222909927368, "step": 3610 }, { "epoch": 0.87, "learning_rate": 2.579339493705355e-07, "logits/chosen": -2.0473005771636963, "logits/rejected": -1.4649312496185303, "logps/chosen": -526.8579711914062, "logps/rejected": -757.1593017578125, "loss": 0.238, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3886314630508423, "rewards/margins": 0.2629753351211548, "rewards/rejected": -0.6516067385673523, "step": 3620 }, { "epoch": 0.87, "learning_rate": 2.48747496028377e-07, "logits/chosen": -2.400381326675415, "logits/rejected": -1.842514991760254, "logps/chosen": -526.4846801757812, "logps/rejected": -663.5477905273438, "loss": 0.2114, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3170974850654602, "rewards/margins": 0.2216567099094391, "rewards/rejected": -0.5387541651725769, "step": 3630 }, { "epoch": 0.87, "learning_rate": 2.397190433380964e-07, "logits/chosen": -2.186983585357666, "logits/rejected": -1.466896653175354, "logps/chosen": -533.7484130859375, "logps/rejected": -750.9659423828125, "loss": 0.2031, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.36392658948898315, "rewards/margins": 0.2693334221839905, "rewards/rejected": -0.6332601308822632, "step": 3640 }, { "epoch": 0.88, "learning_rate": 2.3084922494758965e-07, "logits/chosen": -2.3395190238952637, "logits/rejected": -1.672703742980957, "logps/chosen": -514.8880615234375, "logps/rejected": -701.5098876953125, "loss": 0.2171, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32516831159591675, "rewards/margins": 0.25780266523361206, "rewards/rejected": -0.5829709768295288, "step": 3650 }, { "epoch": 0.88, "learning_rate": 2.2213866337125022e-07, "logits/chosen": -2.353017568588257, "logits/rejected": -1.6513875722885132, "logps/chosen": -455.20831298828125, "logps/rejected": -725.7654418945312, "loss": 0.1905, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.27765128016471863, "rewards/margins": 0.31067362427711487, "rewards/rejected": -0.5883249044418335, "step": 3660 }, { "epoch": 0.88, "learning_rate": 2.1358796994628005e-07, "logits/chosen": -2.3297479152679443, "logits/rejected": -1.7508220672607422, "logps/chosen": -478.0596618652344, "logps/rejected": -708.4781494140625, "loss": 0.2241, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3622106909751892, "rewards/margins": 0.24578292667865753, "rewards/rejected": -0.6079936027526855, "step": 3670 }, { "epoch": 0.88, "learning_rate": 2.0519774478978404e-07, "logits/chosen": -2.2244420051574707, "logits/rejected": -1.7041347026824951, "logps/chosen": -453.597412109375, "logps/rejected": -672.0855712890625, "loss": 0.2102, "rewards/accuracies": 0.75, "rewards/chosen": -0.2906479239463806, "rewards/margins": 0.2701519727706909, "rewards/rejected": -0.5607999563217163, "step": 3680 }, { "epoch": 0.89, "learning_rate": 1.9696857675665122e-07, "logits/chosen": -2.357430934906006, "logits/rejected": -1.7562310695648193, "logps/chosen": -517.4642333984375, "logps/rejected": -683.9576416015625, "loss": 0.215, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3092634081840515, "rewards/margins": 0.24541537463665009, "rewards/rejected": -0.5546787977218628, "step": 3690 }, { "epoch": 0.89, "learning_rate": 1.8890104339822913e-07, "logits/chosen": -2.3692615032196045, "logits/rejected": -1.9229549169540405, "logps/chosen": -407.3956604003906, "logps/rejected": -604.9410400390625, "loss": 0.2365, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23363754153251648, "rewards/margins": 0.2574554681777954, "rewards/rejected": -0.49109306931495667, "step": 3700 }, { "epoch": 0.89, "eval_logits/chosen": -2.111711263656616, "eval_logits/rejected": -1.9279160499572754, "eval_logps/chosen": -593.2113647460938, "eval_logps/rejected": -629.6786499023438, "eval_loss": 0.11781512200832367, "eval_rewards/accuracies": 0.485029935836792, "eval_rewards/chosen": -0.32733359932899475, "eval_rewards/margins": 0.051368873566389084, "eval_rewards/rejected": -0.3787024915218353, "eval_runtime": 1201.7001, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 3700 }, { "epoch": 0.89, "learning_rate": 1.809957109217833e-07, "logits/chosen": -2.1375186443328857, "logits/rejected": -1.3925069570541382, "logps/chosen": -534.6188354492188, "logps/rejected": -745.802001953125, "loss": 0.2201, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.363347589969635, "rewards/margins": 0.26264601945877075, "rewards/rejected": -0.6259936094284058, "step": 3710 }, { "epoch": 0.89, "learning_rate": 1.7325313415076705e-07, "logits/chosen": -2.1828553676605225, "logits/rejected": -1.5356895923614502, "logps/chosen": -600.4208984375, "logps/rejected": -829.5792846679688, "loss": 0.2363, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4620317816734314, "rewards/margins": 0.2596507966518402, "rewards/rejected": -0.7216825485229492, "step": 3720 }, { "epoch": 0.9, "learning_rate": 1.6567385648587563e-07, "logits/chosen": -2.342515230178833, "logits/rejected": -1.5815179347991943, "logps/chosen": -465.18426513671875, "logps/rejected": -719.2786254882812, "loss": 0.2563, "rewards/accuracies": 0.875, "rewards/chosen": -0.3026709258556366, "rewards/margins": 0.3173164129257202, "rewards/rejected": -0.6199873685836792, "step": 3730 }, { "epoch": 0.9, "learning_rate": 1.5825840986691155e-07, "logits/chosen": -2.254272937774658, "logits/rejected": -1.479819893836975, "logps/chosen": -474.2950134277344, "logps/rejected": -734.5546264648438, "loss": 0.2375, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3106682598590851, "rewards/margins": 0.32218748331069946, "rewards/rejected": -0.6328557133674622, "step": 3740 }, { "epoch": 0.9, "learning_rate": 1.5100731473544932e-07, "logits/chosen": -2.3709492683410645, "logits/rejected": -1.5354996919631958, "logps/chosen": -428.4142150878906, "logps/rejected": -707.5547485351562, "loss": 0.2148, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2521052658557892, "rewards/margins": 0.3602182865142822, "rewards/rejected": -0.6123236417770386, "step": 3750 }, { "epoch": 0.9, "learning_rate": 1.439210799983126e-07, "logits/chosen": -2.1984145641326904, "logits/rejected": -1.509284257888794, "logps/chosen": -500.31396484375, "logps/rejected": -694.1808471679688, "loss": 0.2365, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35744860768318176, "rewards/margins": 0.23036059737205505, "rewards/rejected": -0.5878092646598816, "step": 3760 }, { "epoch": 0.9, "learning_rate": 1.3700020299185156e-07, "logits/chosen": -2.264267921447754, "logits/rejected": -1.6434733867645264, "logps/chosen": -407.46783447265625, "logps/rejected": -639.7389526367188, "loss": 0.1937, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2735540270805359, "rewards/margins": 0.2768193185329437, "rewards/rejected": -0.5503733158111572, "step": 3770 }, { "epoch": 0.91, "learning_rate": 1.3024516944704495e-07, "logits/chosen": -2.247830390930176, "logits/rejected": -1.6828949451446533, "logps/chosen": -474.3807067871094, "logps/rejected": -660.2564086914062, "loss": 0.2216, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.275582879781723, "rewards/margins": 0.2697158455848694, "rewards/rejected": -0.54529869556427, "step": 3780 }, { "epoch": 0.91, "learning_rate": 1.2365645345540383e-07, "logits/chosen": -2.219400405883789, "logits/rejected": -1.4605425596237183, "logps/chosen": -505.559326171875, "logps/rejected": -701.7130126953125, "loss": 0.2029, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.34596461057662964, "rewards/margins": 0.2494499683380127, "rewards/rejected": -0.5954145789146423, "step": 3790 }, { "epoch": 0.91, "learning_rate": 1.172345174357023e-07, "logits/chosen": -2.0350794792175293, "logits/rejected": -1.59932541847229, "logps/chosen": -508.6976623535156, "logps/rejected": -721.3234252929688, "loss": 0.1894, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32896578311920166, "rewards/margins": 0.2509525418281555, "rewards/rejected": -0.5799182653427124, "step": 3800 }, { "epoch": 0.91, "eval_logits/chosen": -2.1088204383850098, "eval_logits/rejected": -1.9252161979675293, "eval_logps/chosen": -584.3236694335938, "eval_logps/rejected": -620.3304443359375, "eval_loss": 0.11522623896598816, "eval_rewards/accuracies": 0.492514967918396, "eval_rewards/chosen": -0.31844595074653625, "eval_rewards/margins": 0.050908301025629044, "eval_rewards/rejected": -0.3693542182445526, "eval_runtime": 1201.6558, "eval_samples_per_second": 1.664, "eval_steps_per_second": 0.278, "step": 3800 }, { "epoch": 0.91, "learning_rate": 1.1097981210152042e-07, "logits/chosen": -2.33854603767395, "logits/rejected": -1.6289736032485962, "logps/chosen": -513.0220947265625, "logps/rejected": -756.1377563476562, "loss": 0.2139, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.34179630875587463, "rewards/margins": 0.2884097695350647, "rewards/rejected": -0.6302061080932617, "step": 3810 }, { "epoch": 0.92, "learning_rate": 1.0489277642961481e-07, "logits/chosen": -2.048307180404663, "logits/rejected": -1.4036309719085693, "logps/chosen": -529.2738647460938, "logps/rejected": -765.36083984375, "loss": 0.186, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35568323731422424, "rewards/margins": 0.30473262071609497, "rewards/rejected": -0.6604158878326416, "step": 3820 }, { "epoch": 0.92, "learning_rate": 9.897383762910606e-08, "logits/chosen": -1.9754745960235596, "logits/rejected": -1.5431668758392334, "logps/chosen": -553.9776000976562, "logps/rejected": -741.6851806640625, "loss": 0.2304, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3955530524253845, "rewards/margins": 0.206370547413826, "rewards/rejected": -0.6019236445426941, "step": 3830 }, { "epoch": 0.92, "learning_rate": 9.322341111149852e-08, "logits/chosen": -2.027393341064453, "logits/rejected": -1.5670268535614014, "logps/chosen": -468.5950622558594, "logps/rejected": -621.8480834960938, "loss": 0.2437, "rewards/accuracies": 0.75, "rewards/chosen": -0.32971760630607605, "rewards/margins": 0.20072904229164124, "rewards/rejected": -0.5304466485977173, "step": 3840 }, { "epoch": 0.92, "learning_rate": 8.764190046152421e-08, "logits/chosen": -2.19490909576416, "logits/rejected": -1.496145486831665, "logps/chosen": -535.2597045898438, "logps/rejected": -781.6893310546875, "loss": 0.2316, "rewards/accuracies": 0.875, "rewards/chosen": -0.37169763445854187, "rewards/margins": 0.2971428632736206, "rewards/rejected": -0.6688405275344849, "step": 3850 }, { "epoch": 0.93, "learning_rate": 8.22296974088177e-08, "logits/chosen": -1.967779517173767, "logits/rejected": -1.4400413036346436, "logps/chosen": -442.20361328125, "logps/rejected": -644.3177490234375, "loss": 0.2315, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2571173310279846, "rewards/margins": 0.2540501058101654, "rewards/rejected": -0.5111674666404724, "step": 3860 }, { "epoch": 0.93, "learning_rate": 7.698718180042392e-08, "logits/chosen": -2.1146020889282227, "logits/rejected": -1.5487241744995117, "logps/chosen": -518.4695434570312, "logps/rejected": -713.0475463867188, "loss": 0.1937, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3744608163833618, "rewards/margins": 0.24362783133983612, "rewards/rejected": -0.6180886626243591, "step": 3870 }, { "epoch": 0.93, "learning_rate": 7.19147215741381e-08, "logits/chosen": -2.159484624862671, "logits/rejected": -1.5101068019866943, "logps/chosen": -535.372314453125, "logps/rejected": -722.1314697265625, "loss": 0.2525, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39585381746292114, "rewards/margins": 0.22491466999053955, "rewards/rejected": -0.6207684278488159, "step": 3880 }, { "epoch": 0.93, "learning_rate": 6.701267273268392e-08, "logits/chosen": -2.262892484664917, "logits/rejected": -1.6548888683319092, "logps/chosen": -534.9225463867188, "logps/rejected": -699.1444091796875, "loss": 0.2128, "rewards/accuracies": 0.75, "rewards/chosen": -0.37802669405937195, "rewards/margins": 0.23024694621562958, "rewards/rejected": -0.6082736849784851, "step": 3890 }, { "epoch": 0.94, "learning_rate": 6.228137931872713e-08, "logits/chosen": -2.2181999683380127, "logits/rejected": -1.6821672916412354, "logps/chosen": -486.3285217285156, "logps/rejected": -643.837158203125, "loss": 0.2372, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3677838444709778, "rewards/margins": 0.19924761354923248, "rewards/rejected": -0.5670315027236938, "step": 3900 }, { "epoch": 0.94, "eval_logits/chosen": -2.1021220684051514, "eval_logits/rejected": -1.9193756580352783, "eval_logps/chosen": -581.354248046875, "eval_logps/rejected": -616.7926025390625, "eval_loss": 0.11302248388528824, "eval_rewards/accuracies": 0.4940119683742523, "eval_rewards/chosen": -0.3154764771461487, "eval_rewards/margins": 0.05033989995718002, "eval_rewards/rejected": -0.3658163845539093, "eval_runtime": 1202.5868, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 3900 }, { "epoch": 0.94, "learning_rate": 5.772117339072902e-08, "logits/chosen": -2.4557697772979736, "logits/rejected": -1.5533807277679443, "logps/chosen": -464.8731994628906, "logps/rejected": -771.2939453125, "loss": 0.2117, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.28584685921669006, "rewards/margins": 0.35117307305336, "rewards/rejected": -0.6370199918746948, "step": 3910 }, { "epoch": 0.94, "learning_rate": 5.333237499964283e-08, "logits/chosen": -2.152449369430542, "logits/rejected": -1.7136890888214111, "logps/chosen": -507.4432067871094, "logps/rejected": -695.372314453125, "loss": 0.1965, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.328453004360199, "rewards/margins": 0.2325679361820221, "rewards/rejected": -0.5610209107398987, "step": 3920 }, { "epoch": 0.94, "learning_rate": 4.911529216645089e-08, "logits/chosen": -2.002575635910034, "logits/rejected": -1.488743543624878, "logps/chosen": -451.83343505859375, "logps/rejected": -666.2559814453125, "loss": 0.2141, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26483482122421265, "rewards/margins": 0.2841261625289917, "rewards/rejected": -0.5489609837532043, "step": 3930 }, { "epoch": 0.95, "learning_rate": 4.5070220860545244e-08, "logits/chosen": -2.195952892303467, "logits/rejected": -1.6254193782806396, "logps/chosen": -536.245361328125, "logps/rejected": -695.6385498046875, "loss": 0.2393, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33327215909957886, "rewards/margins": 0.23736166954040527, "rewards/rejected": -0.5706338286399841, "step": 3940 }, { "epoch": 0.95, "learning_rate": 4.119744497895817e-08, "logits/chosen": -2.2160677909851074, "logits/rejected": -1.7492036819458008, "logps/chosen": -455.21820068359375, "logps/rejected": -629.6312866210938, "loss": 0.2423, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29098254442214966, "rewards/margins": 0.2242966890335083, "rewards/rejected": -0.515279233455658, "step": 3950 }, { "epoch": 0.95, "learning_rate": 3.749723632643476e-08, "logits/chosen": -2.2475035190582275, "logits/rejected": -1.6090848445892334, "logps/chosen": -500.4864807128906, "logps/rejected": -743.6297607421875, "loss": 0.2251, "rewards/accuracies": 0.875, "rewards/chosen": -0.3396914303302765, "rewards/margins": 0.2848801016807556, "rewards/rejected": -0.6245715022087097, "step": 3960 }, { "epoch": 0.95, "learning_rate": 3.396985459635821e-08, "logits/chosen": -2.3554368019104004, "logits/rejected": -1.4616578817367554, "logps/chosen": -490.79425048828125, "logps/rejected": -738.3787841796875, "loss": 0.2224, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.29442912340164185, "rewards/margins": 0.3050374388694763, "rewards/rejected": -0.5994665622711182, "step": 3970 }, { "epoch": 0.96, "learning_rate": 3.061554735252325e-08, "logits/chosen": -2.465724468231201, "logits/rejected": -1.8613615036010742, "logps/chosen": -499.9761657714844, "logps/rejected": -648.8406982421875, "loss": 0.2125, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3114131689071655, "rewards/margins": 0.2070644199848175, "rewards/rejected": -0.5184775590896606, "step": 3980 }, { "epoch": 0.96, "learning_rate": 2.7434550011761763e-08, "logits/chosen": -2.517122268676758, "logits/rejected": -1.78408682346344, "logps/chosen": -432.67120361328125, "logps/rejected": -686.5255737304688, "loss": 0.2275, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25613075494766235, "rewards/margins": 0.2904641330242157, "rewards/rejected": -0.5465949177742004, "step": 3990 }, { "epoch": 0.96, "learning_rate": 2.4427085827418706e-08, "logits/chosen": -2.1960973739624023, "logits/rejected": -1.632516622543335, "logps/chosen": -503.66571044921875, "logps/rejected": -686.78857421875, "loss": 0.2029, "rewards/accuracies": 0.75, "rewards/chosen": -0.3624489903450012, "rewards/margins": 0.21902470290660858, "rewards/rejected": -0.581473708152771, "step": 4000 }, { "epoch": 0.96, "eval_logits/chosen": -2.0964488983154297, "eval_logits/rejected": -1.914116382598877, "eval_logps/chosen": -586.688720703125, "eval_logps/rejected": -622.4911499023438, "eval_loss": 0.1132635846734047, "eval_rewards/accuracies": 0.492514967918396, "eval_rewards/chosen": -0.3208109736442566, "eval_rewards/margins": 0.0507039800286293, "eval_rewards/rejected": -0.3715149760246277, "eval_runtime": 1202.8185, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 4000 }, { "epoch": 0.96, "learning_rate": 2.1593365873685544e-08, "logits/chosen": -2.2445883750915527, "logits/rejected": -1.711534857749939, "logps/chosen": -456.6461486816406, "logps/rejected": -650.14990234375, "loss": 0.251, "rewards/accuracies": 0.75, "rewards/chosen": -0.3230751156806946, "rewards/margins": 0.22738023102283478, "rewards/rejected": -0.5504552721977234, "step": 4010 }, { "epoch": 0.96, "learning_rate": 1.893358903078568e-08, "logits/chosen": -2.204037666320801, "logits/rejected": -1.5954258441925049, "logps/chosen": -509.4784240722656, "logps/rejected": -722.2180786132812, "loss": 0.2219, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.364745557308197, "rewards/margins": 0.24251048266887665, "rewards/rejected": -0.6072560548782349, "step": 4020 }, { "epoch": 0.97, "learning_rate": 1.644794197101507e-08, "logits/chosen": -2.2876009941101074, "logits/rejected": -1.839961290359497, "logps/chosen": -475.39697265625, "logps/rejected": -646.7371826171875, "loss": 0.2365, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2945319712162018, "rewards/margins": 0.22561600804328918, "rewards/rejected": -0.520147979259491, "step": 4030 }, { "epoch": 0.97, "learning_rate": 1.413659914564297e-08, "logits/chosen": -2.1976215839385986, "logits/rejected": -1.7245981693267822, "logps/chosen": -497.1715393066406, "logps/rejected": -684.0360717773438, "loss": 0.2323, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3244447410106659, "rewards/margins": 0.2322753667831421, "rewards/rejected": -0.5567201375961304, "step": 4040 }, { "epoch": 0.97, "learning_rate": 1.1999722772666478e-08, "logits/chosen": -2.1289570331573486, "logits/rejected": -1.252652645111084, "logps/chosen": -540.336669921875, "logps/rejected": -710.6973266601562, "loss": 0.2831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3626651465892792, "rewards/margins": 0.24450966715812683, "rewards/rejected": -0.6071747541427612, "step": 4050 }, { "epoch": 0.97, "learning_rate": 1.0037462825427113e-08, "logits/chosen": -2.0594303607940674, "logits/rejected": -1.6286876201629639, "logps/chosen": -545.4386596679688, "logps/rejected": -671.5677490234375, "loss": 0.2303, "rewards/accuracies": 0.625, "rewards/chosen": -0.4014796316623688, "rewards/margins": 0.1813354790210724, "rewards/rejected": -0.5828150510787964, "step": 4060 }, { "epoch": 0.98, "learning_rate": 8.249957022084254e-09, "logits/chosen": -2.0236423015594482, "logits/rejected": -1.4071561098098755, "logps/chosen": -579.674560546875, "logps/rejected": -771.0028076171875, "loss": 0.2263, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.42344412207603455, "rewards/margins": 0.23946556448936462, "rewards/rejected": -0.6629096865653992, "step": 4070 }, { "epoch": 0.98, "learning_rate": 6.6373308159495275e-09, "logits/chosen": -2.327468156814575, "logits/rejected": -1.5423156023025513, "logps/chosen": -451.54351806640625, "logps/rejected": -700.2867431640625, "loss": 0.2078, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2926388680934906, "rewards/margins": 0.3081340789794922, "rewards/rejected": -0.6007729172706604, "step": 4080 }, { "epoch": 0.98, "learning_rate": 5.19969738668219e-09, "logits/chosen": -2.1366591453552246, "logits/rejected": -1.5162761211395264, "logps/chosen": -510.18487548828125, "logps/rejected": -715.0636596679688, "loss": 0.2391, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34880882501602173, "rewards/margins": 0.25075677037239075, "rewards/rejected": -0.5995656251907349, "step": 4090 }, { "epoch": 0.98, "learning_rate": 3.937157632346311e-09, "logits/chosen": -2.089216947555542, "logits/rejected": -1.5874278545379639, "logps/chosen": -532.5524291992188, "logps/rejected": -717.9437255859375, "loss": 0.2438, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3899141252040863, "rewards/margins": 0.20226602256298065, "rewards/rejected": -0.5921801328659058, "step": 4100 }, { "epoch": 0.98, "eval_logits/chosen": -2.0965089797973633, "eval_logits/rejected": -1.9139881134033203, "eval_logps/chosen": -585.755126953125, "eval_logps/rejected": -621.66357421875, "eval_loss": 0.11288020014762878, "eval_rewards/accuracies": 0.4940119683742523, "eval_rewards/chosen": -0.3198772966861725, "eval_rewards/margins": 0.05081005394458771, "eval_rewards/rejected": -0.3706873655319214, "eval_runtime": 1202.5148, "eval_samples_per_second": 1.663, "eval_steps_per_second": 0.278, "step": 4100 }, { "epoch": 0.99, "learning_rate": 2.849800162328664e-09, "logits/chosen": -2.3101534843444824, "logits/rejected": -1.9180978536605835, "logps/chosen": -450.175537109375, "logps/rejected": -637.3862915039062, "loss": 0.2367, "rewards/accuracies": 0.75, "rewards/chosen": -0.2935420870780945, "rewards/margins": 0.2234950065612793, "rewards/rejected": -0.517037034034729, "step": 4110 }, { "epoch": 0.99, "learning_rate": 1.9377012911203642e-09, "logits/chosen": -2.1814098358154297, "logits/rejected": -1.5700258016586304, "logps/chosen": -518.3272705078125, "logps/rejected": -699.9993896484375, "loss": 0.2338, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39073213934898376, "rewards/margins": 0.21291008591651917, "rewards/rejected": -0.6036421656608582, "step": 4120 }, { "epoch": 0.99, "learning_rate": 1.2009250329608757e-09, "logits/chosen": -2.1806552410125732, "logits/rejected": -1.5287284851074219, "logps/chosen": -555.316650390625, "logps/rejected": -793.3533935546875, "loss": 0.208, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3881283402442932, "rewards/margins": 0.3002643585205078, "rewards/rejected": -0.6883927583694458, "step": 4130 }, { "epoch": 0.99, "learning_rate": 6.395230973443856e-10, "logits/chosen": -2.172107696533203, "logits/rejected": -1.6020357608795166, "logps/chosen": -563.0657348632812, "logps/rejected": -758.2376708984375, "loss": 0.2285, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4259403645992279, "rewards/margins": 0.23714318871498108, "rewards/rejected": -0.6630834937095642, "step": 4140 }, { "epoch": 1.0, "learning_rate": 2.5353488539187066e-10, "logits/chosen": -2.037019968032837, "logits/rejected": -1.4118521213531494, "logps/chosen": -557.1146240234375, "logps/rejected": -745.1408081054688, "loss": 0.2374, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40070056915283203, "rewards/margins": 0.23871159553527832, "rewards/rejected": -0.6394121050834656, "step": 4150 }, { "epoch": 1.0, "learning_rate": 4.298748708470024e-11, "logits/chosen": -2.233302593231201, "logits/rejected": -1.6489896774291992, "logps/chosen": -450.9864807128906, "logps/rejected": -668.4241333007812, "loss": 0.2261, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28009340167045593, "rewards/margins": 0.27558884024620056, "rewards/rejected": -0.5556822419166565, "step": 4160 }, { "epoch": 1.0, "step": 4167, "total_flos": 0.0, "train_loss": 0.23936548975694372, "train_runtime": 88980.8578, "train_samples_per_second": 0.562, "train_steps_per_second": 0.047 } ], "logging_steps": 10, "max_steps": 4167, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }