|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9817045961624274, |
|
"eval_steps": 100, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004462293618920125, |
|
"grad_norm": 10.592765808105469, |
|
"learning_rate": 4.9776885319053994e-05, |
|
"logits/chosen": -0.4821785092353821, |
|
"logits/rejected": -0.515357494354248, |
|
"logps/chosen": -46.793216705322266, |
|
"logps/rejected": -45.11051940917969, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.13182350993156433, |
|
"rewards/margins": 0.14049032330513, |
|
"rewards/rejected": -0.00866681057959795, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00892458723784025, |
|
"grad_norm": 5.534114360809326, |
|
"learning_rate": 4.9553770638107986e-05, |
|
"logits/chosen": -0.40027499198913574, |
|
"logits/rejected": -0.3886343836784363, |
|
"logps/chosen": -39.815574645996094, |
|
"logps/rejected": -29.108535766601562, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.009116433560848236, |
|
"rewards/margins": -0.14123764634132385, |
|
"rewards/rejected": 0.1321212351322174, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013386880856760375, |
|
"grad_norm": 6.359144687652588, |
|
"learning_rate": 4.933065595716198e-05, |
|
"logits/chosen": -0.5110928416252136, |
|
"logits/rejected": -0.49840885400772095, |
|
"logps/chosen": -44.8924446105957, |
|
"logps/rejected": -51.18391036987305, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.22143979370594025, |
|
"rewards/margins": 0.4565449357032776, |
|
"rewards/rejected": -0.23510512709617615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0178491744756805, |
|
"grad_norm": 12.116751670837402, |
|
"learning_rate": 4.9107541276215976e-05, |
|
"logits/chosen": -0.40832486748695374, |
|
"logits/rejected": -0.415947824716568, |
|
"logps/chosen": -46.321205139160156, |
|
"logps/rejected": -40.64304733276367, |
|
"loss": 0.7652, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4836673140525818, |
|
"rewards/margins": -0.2773968279361725, |
|
"rewards/rejected": -0.20627036690711975, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.022311468094600623, |
|
"grad_norm": 4.754805088043213, |
|
"learning_rate": 4.8884426595269975e-05, |
|
"logits/chosen": -0.46318650245666504, |
|
"logits/rejected": -0.5075193643569946, |
|
"logps/chosen": -49.582366943359375, |
|
"logps/rejected": -64.06822204589844, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2501673996448517, |
|
"rewards/margins": 0.5193334817886353, |
|
"rewards/rejected": -0.7695007920265198, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02677376171352075, |
|
"grad_norm": 3.5485920906066895, |
|
"learning_rate": 4.8661311914323966e-05, |
|
"logits/chosen": -0.45250263810157776, |
|
"logits/rejected": -0.48430103063583374, |
|
"logps/chosen": -43.37895965576172, |
|
"logps/rejected": -61.72364044189453, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.48389190435409546, |
|
"rewards/margins": 1.6048848628997803, |
|
"rewards/rejected": -1.1209927797317505, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.031236055332440876, |
|
"grad_norm": 13.985785484313965, |
|
"learning_rate": 4.843819723337796e-05, |
|
"logits/chosen": -0.5342388153076172, |
|
"logits/rejected": -0.5270048975944519, |
|
"logps/chosen": -58.14774703979492, |
|
"logps/rejected": -59.576927185058594, |
|
"loss": 0.7304, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08286930620670319, |
|
"rewards/margins": 1.0662521123886108, |
|
"rewards/rejected": -0.9833827018737793, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.035698348951361, |
|
"grad_norm": 12.143803596496582, |
|
"learning_rate": 4.821508255243195e-05, |
|
"logits/chosen": -0.3441067934036255, |
|
"logits/rejected": -0.3296460509300232, |
|
"logps/chosen": -43.2880973815918, |
|
"logps/rejected": -49.62990951538086, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0368906632065773, |
|
"rewards/margins": 0.8531161546707153, |
|
"rewards/rejected": -0.816225528717041, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.040160642570281124, |
|
"grad_norm": 7.444468021392822, |
|
"learning_rate": 4.799196787148594e-05, |
|
"logits/chosen": -0.45429477095603943, |
|
"logits/rejected": -0.4618884027004242, |
|
"logps/chosen": -38.82330322265625, |
|
"logps/rejected": -63.32975387573242, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.21749186515808105, |
|
"rewards/margins": -0.18056556582450867, |
|
"rewards/rejected": -0.03692631423473358, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04462293618920125, |
|
"grad_norm": 5.038405895233154, |
|
"learning_rate": 4.776885319053994e-05, |
|
"logits/chosen": -0.5670369863510132, |
|
"logits/rejected": -0.5326591730117798, |
|
"logps/chosen": -34.404144287109375, |
|
"logps/rejected": -19.423460006713867, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.22313335537910461, |
|
"rewards/margins": -0.1140323057770729, |
|
"rewards/rejected": -0.10910103470087051, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04462293618920125, |
|
"eval_logits/chosen": -0.31733304262161255, |
|
"eval_logits/rejected": -0.32061833143234253, |
|
"eval_logps/chosen": -46.686729431152344, |
|
"eval_logps/rejected": -52.44818878173828, |
|
"eval_loss": 0.6370808482170105, |
|
"eval_rewards/accuracies": 0.6397058963775635, |
|
"eval_rewards/chosen": -0.0028795618563890457, |
|
"eval_rewards/margins": 0.35795658826828003, |
|
"eval_rewards/rejected": -0.3608362078666687, |
|
"eval_runtime": 105.155, |
|
"eval_samples_per_second": 10.309, |
|
"eval_steps_per_second": 0.647, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.049085229808121376, |
|
"grad_norm": 5.659931659698486, |
|
"learning_rate": 4.754573850959393e-05, |
|
"logits/chosen": -0.42005282640457153, |
|
"logits/rejected": -0.4412313997745514, |
|
"logps/chosen": -44.89687728881836, |
|
"logps/rejected": -70.47555541992188, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.039139073342084885, |
|
"rewards/margins": 0.7593569755554199, |
|
"rewards/rejected": -0.7984960675239563, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0535475234270415, |
|
"grad_norm": 4.756991386413574, |
|
"learning_rate": 4.732262382864793e-05, |
|
"logits/chosen": -0.4370805323123932, |
|
"logits/rejected": -0.4195021688938141, |
|
"logps/chosen": -58.568077087402344, |
|
"logps/rejected": -36.193973541259766, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06512323021888733, |
|
"rewards/margins": 0.077705517411232, |
|
"rewards/rejected": -0.14282873272895813, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05800981704596162, |
|
"grad_norm": 4.192239761352539, |
|
"learning_rate": 4.709950914770192e-05, |
|
"logits/chosen": -0.4242766499519348, |
|
"logits/rejected": -0.41500869393348694, |
|
"logps/chosen": -56.26778030395508, |
|
"logps/rejected": -56.050201416015625, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.10200215876102448, |
|
"rewards/margins": 0.6572259664535522, |
|
"rewards/rejected": -0.5552238821983337, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06247211066488175, |
|
"grad_norm": 4.155123233795166, |
|
"learning_rate": 4.6876394466755914e-05, |
|
"logits/chosen": -0.3079138696193695, |
|
"logits/rejected": -0.3212467432022095, |
|
"logps/chosen": -54.36994552612305, |
|
"logps/rejected": -69.90930938720703, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2159431278705597, |
|
"rewards/margins": 0.9896115064620972, |
|
"rewards/rejected": -1.205554723739624, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06693440428380187, |
|
"grad_norm": 6.09385871887207, |
|
"learning_rate": 4.6653279785809906e-05, |
|
"logits/chosen": -0.39594393968582153, |
|
"logits/rejected": -0.41531887650489807, |
|
"logps/chosen": -45.87028884887695, |
|
"logps/rejected": -64.75723266601562, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1658216267824173, |
|
"rewards/margins": 1.210204839706421, |
|
"rewards/rejected": -1.3760265111923218, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.071396697902722, |
|
"grad_norm": 5.659728527069092, |
|
"learning_rate": 4.6430165104863904e-05, |
|
"logits/chosen": -0.36691778898239136, |
|
"logits/rejected": -0.40529337525367737, |
|
"logps/chosen": -23.407129287719727, |
|
"logps/rejected": -60.879005432128906, |
|
"loss": 0.6318, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16499443352222443, |
|
"rewards/margins": 1.2766371965408325, |
|
"rewards/rejected": -1.441631555557251, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07585899152164212, |
|
"grad_norm": 6.760382652282715, |
|
"learning_rate": 4.6207050423917896e-05, |
|
"logits/chosen": -0.4060201048851013, |
|
"logits/rejected": -0.42930418252944946, |
|
"logps/chosen": -68.62867736816406, |
|
"logps/rejected": -78.13455963134766, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.21399441361427307, |
|
"rewards/margins": 1.01082444190979, |
|
"rewards/rejected": -1.2248189449310303, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08032128514056225, |
|
"grad_norm": 6.949064254760742, |
|
"learning_rate": 4.598393574297189e-05, |
|
"logits/chosen": -0.3692146837711334, |
|
"logits/rejected": -0.37857574224472046, |
|
"logps/chosen": -37.41041564941406, |
|
"logps/rejected": -60.207984924316406, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16386118531227112, |
|
"rewards/margins": 0.9659687876701355, |
|
"rewards/rejected": -1.129830002784729, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08478357875948238, |
|
"grad_norm": 9.083560943603516, |
|
"learning_rate": 4.576082106202588e-05, |
|
"logits/chosen": -0.5585399866104126, |
|
"logits/rejected": -0.5583776831626892, |
|
"logps/chosen": -66.86927795410156, |
|
"logps/rejected": -68.13627624511719, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3321790099143982, |
|
"rewards/margins": 0.661481499671936, |
|
"rewards/rejected": -0.993660569190979, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0892458723784025, |
|
"grad_norm": 8.672013282775879, |
|
"learning_rate": 4.553770638107988e-05, |
|
"logits/chosen": -0.26764315366744995, |
|
"logits/rejected": -0.269832044839859, |
|
"logps/chosen": -32.92001724243164, |
|
"logps/rejected": -53.53894805908203, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.017507025972008705, |
|
"rewards/margins": 1.3314012289047241, |
|
"rewards/rejected": -1.3138940334320068, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0892458723784025, |
|
"eval_logits/chosen": -0.27716612815856934, |
|
"eval_logits/rejected": -0.2809113562107086, |
|
"eval_logps/chosen": -49.147117614746094, |
|
"eval_logps/rejected": -56.569976806640625, |
|
"eval_loss": 0.5994076728820801, |
|
"eval_rewards/accuracies": 0.7132353186607361, |
|
"eval_rewards/chosen": -0.24891817569732666, |
|
"eval_rewards/margins": 0.524096667766571, |
|
"eval_rewards/rejected": -0.7730148434638977, |
|
"eval_runtime": 101.3224, |
|
"eval_samples_per_second": 10.699, |
|
"eval_steps_per_second": 0.671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09370816599732262, |
|
"grad_norm": 6.387267112731934, |
|
"learning_rate": 4.531459170013387e-05, |
|
"logits/chosen": -0.4771585464477539, |
|
"logits/rejected": -0.47311025857925415, |
|
"logps/chosen": -73.28004455566406, |
|
"logps/rejected": -71.69839477539062, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3659582734107971, |
|
"rewards/margins": 1.025471568107605, |
|
"rewards/rejected": -1.3914297819137573, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09817045961624275, |
|
"grad_norm": 6.133056640625, |
|
"learning_rate": 4.509147701918787e-05, |
|
"logits/chosen": -0.4101637005805969, |
|
"logits/rejected": -0.4236904978752136, |
|
"logps/chosen": -38.38233947753906, |
|
"logps/rejected": -49.3653564453125, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.02271045371890068, |
|
"rewards/margins": 0.8071755170822144, |
|
"rewards/rejected": -0.8298860788345337, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10263275323516287, |
|
"grad_norm": 7.030124187469482, |
|
"learning_rate": 4.486836233824186e-05, |
|
"logits/chosen": -0.403881311416626, |
|
"logits/rejected": -0.3970370292663574, |
|
"logps/chosen": -37.82977294921875, |
|
"logps/rejected": -39.60215759277344, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18033410608768463, |
|
"rewards/margins": 0.6896679997444153, |
|
"rewards/rejected": -0.8700020909309387, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.107095046854083, |
|
"grad_norm": 7.851283073425293, |
|
"learning_rate": 4.464524765729585e-05, |
|
"logits/chosen": -0.4536186754703522, |
|
"logits/rejected": -0.45034199953079224, |
|
"logps/chosen": -46.97504806518555, |
|
"logps/rejected": -47.199581146240234, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3460264503955841, |
|
"rewards/margins": 0.6552676558494568, |
|
"rewards/rejected": -1.0012940168380737, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11155734047300313, |
|
"grad_norm": 7.0389862060546875, |
|
"learning_rate": 4.4422132976349844e-05, |
|
"logits/chosen": -0.39026567339897156, |
|
"logits/rejected": -0.4114016890525818, |
|
"logps/chosen": -57.63338088989258, |
|
"logps/rejected": -57.96870040893555, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7554053068161011, |
|
"rewards/margins": 0.5336441993713379, |
|
"rewards/rejected": -1.289049506187439, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11601963409192324, |
|
"grad_norm": 10.85291576385498, |
|
"learning_rate": 4.4199018295403835e-05, |
|
"logits/chosen": -0.3878644108772278, |
|
"logits/rejected": -0.38308557868003845, |
|
"logps/chosen": -48.117191314697266, |
|
"logps/rejected": -76.66590881347656, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5809325575828552, |
|
"rewards/margins": 0.676474392414093, |
|
"rewards/rejected": -1.2574069499969482, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12048192771084337, |
|
"grad_norm": 7.656342506408691, |
|
"learning_rate": 4.3975903614457834e-05, |
|
"logits/chosen": -0.34853556752204895, |
|
"logits/rejected": -0.3886043429374695, |
|
"logps/chosen": -31.3344783782959, |
|
"logps/rejected": -61.63756561279297, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14684630930423737, |
|
"rewards/margins": 1.2698640823364258, |
|
"rewards/rejected": -1.4167104959487915, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1249442213297635, |
|
"grad_norm": 8.007035255432129, |
|
"learning_rate": 4.3752788933511826e-05, |
|
"logits/chosen": -0.4611396789550781, |
|
"logits/rejected": -0.4779717028141022, |
|
"logps/chosen": -61.92280197143555, |
|
"logps/rejected": -71.4999771118164, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3773617744445801, |
|
"rewards/margins": 0.9715533256530762, |
|
"rewards/rejected": -1.3489152193069458, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12940651494868363, |
|
"grad_norm": 6.168390274047852, |
|
"learning_rate": 4.3529674252565824e-05, |
|
"logits/chosen": -0.34872573614120483, |
|
"logits/rejected": -0.3857693672180176, |
|
"logps/chosen": -53.93714141845703, |
|
"logps/rejected": -82.67308044433594, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5559147596359253, |
|
"rewards/margins": 0.5804899334907532, |
|
"rewards/rejected": -1.1364047527313232, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13386880856760375, |
|
"grad_norm": 8.348642349243164, |
|
"learning_rate": 4.3306559571619816e-05, |
|
"logits/chosen": -0.3294086158275604, |
|
"logits/rejected": -0.3615977466106415, |
|
"logps/chosen": -34.571632385253906, |
|
"logps/rejected": -66.2335205078125, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.424107164144516, |
|
"rewards/margins": 0.6470038890838623, |
|
"rewards/rejected": -1.0711110830307007, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13386880856760375, |
|
"eval_logits/chosen": -0.25365474820137024, |
|
"eval_logits/rejected": -0.25904685258865356, |
|
"eval_logps/chosen": -51.732627868652344, |
|
"eval_logps/rejected": -60.69346618652344, |
|
"eval_loss": 0.5827349424362183, |
|
"eval_rewards/accuracies": 0.7132353186607361, |
|
"eval_rewards/chosen": -0.5074694752693176, |
|
"eval_rewards/margins": 0.6778943538665771, |
|
"eval_rewards/rejected": -1.1853638887405396, |
|
"eval_runtime": 101.1924, |
|
"eval_samples_per_second": 10.712, |
|
"eval_steps_per_second": 0.672, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13833110218652386, |
|
"grad_norm": 10.979486465454102, |
|
"learning_rate": 4.308344489067381e-05, |
|
"logits/chosen": -0.3864378035068512, |
|
"logits/rejected": -0.41810816526412964, |
|
"logps/chosen": -48.907447814941406, |
|
"logps/rejected": -63.63328170776367, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6910004019737244, |
|
"rewards/margins": 0.5998687744140625, |
|
"rewards/rejected": -1.290869116783142, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.142793395805444, |
|
"grad_norm": 11.107022285461426, |
|
"learning_rate": 4.28603302097278e-05, |
|
"logits/chosen": -0.3289085328578949, |
|
"logits/rejected": -0.38175052404403687, |
|
"logps/chosen": -55.011627197265625, |
|
"logps/rejected": -82.5877914428711, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3773338794708252, |
|
"rewards/margins": 1.1075068712234497, |
|
"rewards/rejected": -1.4848406314849854, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14725568942436412, |
|
"grad_norm": 10.214667320251465, |
|
"learning_rate": 4.263721552878179e-05, |
|
"logits/chosen": -0.28453752398490906, |
|
"logits/rejected": -0.29568082094192505, |
|
"logps/chosen": -45.49639129638672, |
|
"logps/rejected": -52.34321212768555, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.32706964015960693, |
|
"rewards/margins": 1.053871512413025, |
|
"rewards/rejected": -1.3809412717819214, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15171798304328424, |
|
"grad_norm": 16.683269500732422, |
|
"learning_rate": 4.241410084783579e-05, |
|
"logits/chosen": -0.360818475484848, |
|
"logits/rejected": -0.36484581232070923, |
|
"logps/chosen": -52.61760711669922, |
|
"logps/rejected": -56.44896697998047, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.690588116645813, |
|
"rewards/margins": 0.7141519784927368, |
|
"rewards/rejected": -1.4047400951385498, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15618027666220438, |
|
"grad_norm": 12.096985816955566, |
|
"learning_rate": 4.219098616688978e-05, |
|
"logits/chosen": -0.539945662021637, |
|
"logits/rejected": -0.5587334036827087, |
|
"logps/chosen": -48.64118194580078, |
|
"logps/rejected": -50.03197479248047, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.584144651889801, |
|
"rewards/margins": 0.21761731803417206, |
|
"rewards/rejected": -0.8017619848251343, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1606425702811245, |
|
"grad_norm": 11.363245010375977, |
|
"learning_rate": 4.196787148594378e-05, |
|
"logits/chosen": -0.43976059556007385, |
|
"logits/rejected": -0.47161954641342163, |
|
"logps/chosen": -40.839881896972656, |
|
"logps/rejected": -71.27018737792969, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.009871432557702065, |
|
"rewards/margins": 0.642917275428772, |
|
"rewards/rejected": -0.6330458521842957, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1651048639000446, |
|
"grad_norm": 8.395732879638672, |
|
"learning_rate": 4.174475680499777e-05, |
|
"logits/chosen": -0.41130638122558594, |
|
"logits/rejected": -0.4276394248008728, |
|
"logps/chosen": -54.29048538208008, |
|
"logps/rejected": -53.052513122558594, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4554901719093323, |
|
"rewards/margins": 0.7232891321182251, |
|
"rewards/rejected": -1.1787793636322021, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16956715751896476, |
|
"grad_norm": 8.227060317993164, |
|
"learning_rate": 4.1521642124051764e-05, |
|
"logits/chosen": -0.5096425414085388, |
|
"logits/rejected": -0.5043798089027405, |
|
"logps/chosen": -60.0747184753418, |
|
"logps/rejected": -81.67964172363281, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6249334216117859, |
|
"rewards/margins": 1.0047292709350586, |
|
"rewards/rejected": -1.6296625137329102, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17402945113788487, |
|
"grad_norm": 7.17982816696167, |
|
"learning_rate": 4.1298527443105755e-05, |
|
"logits/chosen": -0.3979923725128174, |
|
"logits/rejected": -0.44463175535202026, |
|
"logps/chosen": -21.290367126464844, |
|
"logps/rejected": -63.85686111450195, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.40098923444747925, |
|
"rewards/margins": 1.2615143060684204, |
|
"rewards/rejected": -1.6625036001205444, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.178491744756805, |
|
"grad_norm": 5.973713397979736, |
|
"learning_rate": 4.1075412762159754e-05, |
|
"logits/chosen": -0.33949995040893555, |
|
"logits/rejected": -0.3710685670375824, |
|
"logps/chosen": -49.4135856628418, |
|
"logps/rejected": -59.9512825012207, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.759450376033783, |
|
"rewards/margins": 0.44380512833595276, |
|
"rewards/rejected": -1.2032554149627686, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.178491744756805, |
|
"eval_logits/chosen": -0.2872345745563507, |
|
"eval_logits/rejected": -0.2932293117046356, |
|
"eval_logps/chosen": -53.17053985595703, |
|
"eval_logps/rejected": -62.56633377075195, |
|
"eval_loss": 0.5854782462120056, |
|
"eval_rewards/accuracies": 0.7058823704719543, |
|
"eval_rewards/chosen": -0.6512607336044312, |
|
"eval_rewards/margins": 0.7213900089263916, |
|
"eval_rewards/rejected": -1.3726506233215332, |
|
"eval_runtime": 101.203, |
|
"eval_samples_per_second": 10.711, |
|
"eval_steps_per_second": 0.672, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18295403837572513, |
|
"grad_norm": 12.613880157470703, |
|
"learning_rate": 4.0852298081213746e-05, |
|
"logits/chosen": -0.4235966205596924, |
|
"logits/rejected": -0.4264090955257416, |
|
"logps/chosen": -48.44654083251953, |
|
"logps/rejected": -80.52620697021484, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6325960755348206, |
|
"rewards/margins": 1.662226915359497, |
|
"rewards/rejected": -2.294822931289673, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18741633199464525, |
|
"grad_norm": 9.811868667602539, |
|
"learning_rate": 4.062918340026774e-05, |
|
"logits/chosen": -0.37342894077301025, |
|
"logits/rejected": -0.3943329155445099, |
|
"logps/chosen": -55.858421325683594, |
|
"logps/rejected": -66.1947021484375, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5095465779304504, |
|
"rewards/margins": 1.825829267501831, |
|
"rewards/rejected": -2.3353757858276367, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19187862561356536, |
|
"grad_norm": 13.996188163757324, |
|
"learning_rate": 4.040606871932173e-05, |
|
"logits/chosen": -0.4081090986728668, |
|
"logits/rejected": -0.41200193762779236, |
|
"logps/chosen": -29.08770751953125, |
|
"logps/rejected": -36.86586380004883, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.41352134943008423, |
|
"rewards/margins": 0.9602751731872559, |
|
"rewards/rejected": -1.3737965822219849, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1963409192324855, |
|
"grad_norm": 8.436747550964355, |
|
"learning_rate": 4.018295403837573e-05, |
|
"logits/chosen": -0.42156997323036194, |
|
"logits/rejected": -0.4419097304344177, |
|
"logps/chosen": -57.04413604736328, |
|
"logps/rejected": -73.21409606933594, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7313698530197144, |
|
"rewards/margins": 0.5576564073562622, |
|
"rewards/rejected": -1.2890262603759766, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.20080321285140562, |
|
"grad_norm": 11.02582836151123, |
|
"learning_rate": 3.995983935742972e-05, |
|
"logits/chosen": -0.4285162091255188, |
|
"logits/rejected": -0.41831493377685547, |
|
"logps/chosen": -55.75530242919922, |
|
"logps/rejected": -63.482810974121094, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5613804459571838, |
|
"rewards/margins": 1.0267794132232666, |
|
"rewards/rejected": -1.5881597995758057, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20526550647032574, |
|
"grad_norm": 13.220766067504883, |
|
"learning_rate": 3.973672467648372e-05, |
|
"logits/chosen": -0.5481917262077332, |
|
"logits/rejected": -0.5449990034103394, |
|
"logps/chosen": -73.10223388671875, |
|
"logps/rejected": -73.17869567871094, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.32689306139945984, |
|
"rewards/margins": 0.9151934385299683, |
|
"rewards/rejected": -1.24208664894104, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20972780008924588, |
|
"grad_norm": 7.844621658325195, |
|
"learning_rate": 3.951360999553771e-05, |
|
"logits/chosen": -0.5205482244491577, |
|
"logits/rejected": -0.519945502281189, |
|
"logps/chosen": -56.165924072265625, |
|
"logps/rejected": -71.98924255371094, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23725154995918274, |
|
"rewards/margins": 1.7018741369247437, |
|
"rewards/rejected": -1.939125657081604, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.214190093708166, |
|
"grad_norm": 13.160161018371582, |
|
"learning_rate": 3.92904953145917e-05, |
|
"logits/chosen": -0.43102946877479553, |
|
"logits/rejected": -0.47075390815734863, |
|
"logps/chosen": -41.25495529174805, |
|
"logps/rejected": -62.530914306640625, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2759312391281128, |
|
"rewards/margins": 1.306725025177002, |
|
"rewards/rejected": -1.5826562643051147, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2186523873270861, |
|
"grad_norm": 12.124773979187012, |
|
"learning_rate": 3.906738063364569e-05, |
|
"logits/chosen": -0.5562575459480286, |
|
"logits/rejected": -0.5301380753517151, |
|
"logps/chosen": -61.703941345214844, |
|
"logps/rejected": -61.715789794921875, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.032676100730896, |
|
"rewards/margins": 0.3819430470466614, |
|
"rewards/rejected": -1.4146192073822021, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22311468094600626, |
|
"grad_norm": 6.453996658325195, |
|
"learning_rate": 3.8844265952699685e-05, |
|
"logits/chosen": -0.4614725708961487, |
|
"logits/rejected": -0.48735547065734863, |
|
"logps/chosen": -51.5485954284668, |
|
"logps/rejected": -76.01066589355469, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7149358987808228, |
|
"rewards/margins": 1.4906076192855835, |
|
"rewards/rejected": -2.2055435180664062, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22311468094600626, |
|
"eval_logits/chosen": -0.3360340893268585, |
|
"eval_logits/rejected": -0.3425233066082001, |
|
"eval_logps/chosen": -53.04523468017578, |
|
"eval_logps/rejected": -61.96220016479492, |
|
"eval_loss": 0.555486798286438, |
|
"eval_rewards/accuracies": 0.6985294222831726, |
|
"eval_rewards/chosen": -0.6387300491333008, |
|
"eval_rewards/margins": 0.6735072135925293, |
|
"eval_rewards/rejected": -1.31223726272583, |
|
"eval_runtime": 101.1438, |
|
"eval_samples_per_second": 10.717, |
|
"eval_steps_per_second": 0.672, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22757697456492637, |
|
"grad_norm": 8.892796516418457, |
|
"learning_rate": 3.8621151271753684e-05, |
|
"logits/chosen": -0.5508376955986023, |
|
"logits/rejected": -0.5385161638259888, |
|
"logps/chosen": -75.99156951904297, |
|
"logps/rejected": -70.40966033935547, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7829670906066895, |
|
"rewards/margins": 1.0163862705230713, |
|
"rewards/rejected": -1.7993533611297607, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2320392681838465, |
|
"grad_norm": 9.191746711730957, |
|
"learning_rate": 3.839803659080768e-05, |
|
"logits/chosen": -0.4840109944343567, |
|
"logits/rejected": -0.5072845220565796, |
|
"logps/chosen": -64.5361328125, |
|
"logps/rejected": -85.254638671875, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.066715955734253, |
|
"rewards/margins": 0.5964549779891968, |
|
"rewards/rejected": -1.6631710529327393, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.23650156180276663, |
|
"grad_norm": 6.455129146575928, |
|
"learning_rate": 3.8174921909861674e-05, |
|
"logits/chosen": -0.4131253659725189, |
|
"logits/rejected": -0.46761250495910645, |
|
"logps/chosen": -28.723114013671875, |
|
"logps/rejected": -71.45252990722656, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6233731508255005, |
|
"rewards/margins": 1.5422556400299072, |
|
"rewards/rejected": -2.1656289100646973, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.24096385542168675, |
|
"grad_norm": 5.072336673736572, |
|
"learning_rate": 3.7951807228915666e-05, |
|
"logits/chosen": -0.4270264208316803, |
|
"logits/rejected": -0.4727645814418793, |
|
"logps/chosen": -28.046024322509766, |
|
"logps/rejected": -67.43064880371094, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2529679238796234, |
|
"rewards/margins": 1.8341560363769531, |
|
"rewards/rejected": -2.0871243476867676, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24542614904060686, |
|
"grad_norm": 5.309718608856201, |
|
"learning_rate": 3.772869254796966e-05, |
|
"logits/chosen": -0.48323965072631836, |
|
"logits/rejected": -0.486124187707901, |
|
"logps/chosen": -47.4906005859375, |
|
"logps/rejected": -54.931488037109375, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.081662893295288, |
|
"rewards/margins": 0.21773222088813782, |
|
"rewards/rejected": -1.2993950843811035, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.249888442659527, |
|
"grad_norm": 6.606435298919678, |
|
"learning_rate": 3.750557786702365e-05, |
|
"logits/chosen": -0.3396775722503662, |
|
"logits/rejected": -0.3402368426322937, |
|
"logps/chosen": -47.58402633666992, |
|
"logps/rejected": -62.18511962890625, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4098769724369049, |
|
"rewards/margins": 2.0012354850769043, |
|
"rewards/rejected": -2.4111123085021973, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2543507362784471, |
|
"grad_norm": 11.281230926513672, |
|
"learning_rate": 3.728246318607764e-05, |
|
"logits/chosen": -0.4171501696109772, |
|
"logits/rejected": -0.4306756854057312, |
|
"logps/chosen": -65.50303649902344, |
|
"logps/rejected": -80.46224212646484, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.593769371509552, |
|
"rewards/margins": 0.9695860743522644, |
|
"rewards/rejected": -1.5633554458618164, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.25881302989736726, |
|
"grad_norm": 7.0754170417785645, |
|
"learning_rate": 3.705934850513164e-05, |
|
"logits/chosen": -0.3426072299480438, |
|
"logits/rejected": -0.4008513391017914, |
|
"logps/chosen": -39.230133056640625, |
|
"logps/rejected": -71.08201599121094, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7468615770339966, |
|
"rewards/margins": 0.4554690420627594, |
|
"rewards/rejected": -1.2023305892944336, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.26327532351628735, |
|
"grad_norm": 5.632711410522461, |
|
"learning_rate": 3.683623382418563e-05, |
|
"logits/chosen": -0.42842036485671997, |
|
"logits/rejected": -0.42812585830688477, |
|
"logps/chosen": -73.30018615722656, |
|
"logps/rejected": -76.4922103881836, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7800514101982117, |
|
"rewards/margins": 0.5425807237625122, |
|
"rewards/rejected": -1.3226321935653687, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2677376171352075, |
|
"grad_norm": 4.532802104949951, |
|
"learning_rate": 3.661311914323963e-05, |
|
"logits/chosen": -0.35508742928504944, |
|
"logits/rejected": -0.3886106610298157, |
|
"logps/chosen": -41.14291763305664, |
|
"logps/rejected": -61.5432243347168, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3588089346885681, |
|
"rewards/margins": 0.7083269953727722, |
|
"rewards/rejected": -1.0671359300613403, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2677376171352075, |
|
"eval_logits/chosen": -0.2772347927093506, |
|
"eval_logits/rejected": -0.28446057438850403, |
|
"eval_logps/chosen": -52.01206970214844, |
|
"eval_logps/rejected": -61.566307067871094, |
|
"eval_loss": 0.534620463848114, |
|
"eval_rewards/accuracies": 0.7573529481887817, |
|
"eval_rewards/chosen": -0.5354136824607849, |
|
"eval_rewards/margins": 0.7372342944145203, |
|
"eval_rewards/rejected": -1.2726480960845947, |
|
"eval_runtime": 101.358, |
|
"eval_samples_per_second": 10.695, |
|
"eval_steps_per_second": 0.671, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27219991075412764, |
|
"grad_norm": 12.182924270629883, |
|
"learning_rate": 3.639000446229362e-05, |
|
"logits/chosen": -0.5002850294113159, |
|
"logits/rejected": -0.5081164836883545, |
|
"logps/chosen": -60.66400909423828, |
|
"logps/rejected": -62.3776969909668, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7016831636428833, |
|
"rewards/margins": 0.8640923500061035, |
|
"rewards/rejected": -1.5657755136489868, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2766622043730477, |
|
"grad_norm": 14.608983039855957, |
|
"learning_rate": 3.616688978134761e-05, |
|
"logits/chosen": -0.42569294571876526, |
|
"logits/rejected": -0.393016517162323, |
|
"logps/chosen": -95.41868591308594, |
|
"logps/rejected": -88.76508331298828, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1945841312408447, |
|
"rewards/margins": -0.19830770790576935, |
|
"rewards/rejected": -0.9962764978408813, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.28112449799196787, |
|
"grad_norm": 7.271474838256836, |
|
"learning_rate": 3.5943775100401605e-05, |
|
"logits/chosen": -0.34523439407348633, |
|
"logits/rejected": -0.3723691403865814, |
|
"logps/chosen": -47.205299377441406, |
|
"logps/rejected": -68.35771942138672, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.43685656785964966, |
|
"rewards/margins": 0.6329293251037598, |
|
"rewards/rejected": -1.0697859525680542, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.285586791610888, |
|
"grad_norm": 14.443384170532227, |
|
"learning_rate": 3.5720660419455603e-05, |
|
"logits/chosen": -0.5274410247802734, |
|
"logits/rejected": -0.5353649854660034, |
|
"logps/chosen": -62.40537643432617, |
|
"logps/rejected": -86.25618743896484, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2340843677520752, |
|
"rewards/margins": 0.2194378823041916, |
|
"rewards/rejected": -1.4535222053527832, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2900490852298081, |
|
"grad_norm": 18.477384567260742, |
|
"learning_rate": 3.5497545738509595e-05, |
|
"logits/chosen": -0.5444958209991455, |
|
"logits/rejected": -0.5462334752082825, |
|
"logps/chosen": -50.28334426879883, |
|
"logps/rejected": -59.960235595703125, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0729570388793945, |
|
"rewards/margins": 0.45481768250465393, |
|
"rewards/rejected": -1.5277748107910156, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.29451137884872824, |
|
"grad_norm": 7.401510715484619, |
|
"learning_rate": 3.527443105756359e-05, |
|
"logits/chosen": -0.5343846082687378, |
|
"logits/rejected": -0.5842040181159973, |
|
"logps/chosen": -42.856529235839844, |
|
"logps/rejected": -75.27730560302734, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3604152798652649, |
|
"rewards/margins": 1.0930335521697998, |
|
"rewards/rejected": -1.45344877243042, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2989736724676484, |
|
"grad_norm": 9.736113548278809, |
|
"learning_rate": 3.5051316376617586e-05, |
|
"logits/chosen": -0.4966914653778076, |
|
"logits/rejected": -0.46418899297714233, |
|
"logps/chosen": -76.83399963378906, |
|
"logps/rejected": -70.08952331542969, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0979061126708984, |
|
"rewards/margins": 0.7910483479499817, |
|
"rewards/rejected": -1.888954520225525, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3034359660865685, |
|
"grad_norm": 12.46096420288086, |
|
"learning_rate": 3.482820169567158e-05, |
|
"logits/chosen": -0.37888795137405396, |
|
"logits/rejected": -0.4011334478855133, |
|
"logps/chosen": -39.91533279418945, |
|
"logps/rejected": -67.97415161132812, |
|
"loss": 0.4128, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.341644287109375, |
|
"rewards/margins": 2.0945029258728027, |
|
"rewards/rejected": -2.4361472129821777, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3078982597054886, |
|
"grad_norm": 7.685484409332275, |
|
"learning_rate": 3.460508701472557e-05, |
|
"logits/chosen": -0.4292709231376648, |
|
"logits/rejected": -0.44719892740249634, |
|
"logps/chosen": -32.39400863647461, |
|
"logps/rejected": -57.27886199951172, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5088361501693726, |
|
"rewards/margins": 1.422799825668335, |
|
"rewards/rejected": -1.931635856628418, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.31236055332440876, |
|
"grad_norm": 13.410787582397461, |
|
"learning_rate": 3.438197233377957e-05, |
|
"logits/chosen": -0.5349194407463074, |
|
"logits/rejected": -0.5286005735397339, |
|
"logps/chosen": -68.42704772949219, |
|
"logps/rejected": -77.38883972167969, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1387966871261597, |
|
"rewards/margins": 0.6625735759735107, |
|
"rewards/rejected": -1.8013702630996704, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31236055332440876, |
|
"eval_logits/chosen": -0.30144786834716797, |
|
"eval_logits/rejected": -0.3091001808643341, |
|
"eval_logps/chosen": -55.06523895263672, |
|
"eval_logps/rejected": -66.52020263671875, |
|
"eval_loss": 0.5352330803871155, |
|
"eval_rewards/accuracies": 0.7426470518112183, |
|
"eval_rewards/chosen": -0.8407304286956787, |
|
"eval_rewards/margins": 0.927306592464447, |
|
"eval_rewards/rejected": -1.768036961555481, |
|
"eval_runtime": 101.2663, |
|
"eval_samples_per_second": 10.704, |
|
"eval_steps_per_second": 0.671, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31682284694332885, |
|
"grad_norm": 15.995697975158691, |
|
"learning_rate": 3.415885765283356e-05, |
|
"logits/chosen": -0.401008278131485, |
|
"logits/rejected": -0.39111563563346863, |
|
"logps/chosen": -48.752140045166016, |
|
"logps/rejected": -50.47780990600586, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1747238636016846, |
|
"rewards/margins": 0.26606908440589905, |
|
"rewards/rejected": -1.4407927989959717, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.321285140562249, |
|
"grad_norm": 18.01109504699707, |
|
"learning_rate": 3.393574297188755e-05, |
|
"logits/chosen": -0.6200358867645264, |
|
"logits/rejected": -0.6256454586982727, |
|
"logps/chosen": -57.28871536254883, |
|
"logps/rejected": -65.64725494384766, |
|
"loss": 0.6522, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.732961654663086, |
|
"rewards/margins": -0.5568166971206665, |
|
"rewards/rejected": -1.1761447191238403, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.32574743418116914, |
|
"grad_norm": 9.055215835571289, |
|
"learning_rate": 3.371262829094154e-05, |
|
"logits/chosen": -0.5750800371170044, |
|
"logits/rejected": -0.5928658246994019, |
|
"logps/chosen": -34.40730667114258, |
|
"logps/rejected": -47.20003128051758, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8418025970458984, |
|
"rewards/margins": 1.1105152368545532, |
|
"rewards/rejected": -1.9523175954818726, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3302097278000892, |
|
"grad_norm": 7.461859226226807, |
|
"learning_rate": 3.3489513609995535e-05, |
|
"logits/chosen": -0.41441091895103455, |
|
"logits/rejected": -0.42659908533096313, |
|
"logps/chosen": -26.145000457763672, |
|
"logps/rejected": -57.18937301635742, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14630934596061707, |
|
"rewards/margins": 1.9195754528045654, |
|
"rewards/rejected": -2.065885066986084, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.33467202141900937, |
|
"grad_norm": 5.896705150604248, |
|
"learning_rate": 3.326639892904953e-05, |
|
"logits/chosen": -0.5246270298957825, |
|
"logits/rejected": -0.5468592643737793, |
|
"logps/chosen": -58.91956329345703, |
|
"logps/rejected": -78.82437896728516, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.41132456064224243, |
|
"rewards/margins": 1.6576427221298218, |
|
"rewards/rejected": -2.068967342376709, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3391343150379295, |
|
"grad_norm": 6.602281093597412, |
|
"learning_rate": 3.304328424810353e-05, |
|
"logits/chosen": -0.40789738297462463, |
|
"logits/rejected": -0.4226173460483551, |
|
"logps/chosen": -35.644351959228516, |
|
"logps/rejected": -61.506996154785156, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5834091305732727, |
|
"rewards/margins": 1.6069660186767578, |
|
"rewards/rejected": -2.1903750896453857, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3435966086568496, |
|
"grad_norm": 14.179399490356445, |
|
"learning_rate": 3.2820169567157523e-05, |
|
"logits/chosen": -0.48065876960754395, |
|
"logits/rejected": -0.48874765634536743, |
|
"logps/chosen": -75.91587829589844, |
|
"logps/rejected": -90.74102020263672, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3592981100082397, |
|
"rewards/margins": 0.06780596822500229, |
|
"rewards/rejected": -1.4271042346954346, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.34805890227576974, |
|
"grad_norm": 8.853806495666504, |
|
"learning_rate": 3.2597054886211515e-05, |
|
"logits/chosen": -0.5286054015159607, |
|
"logits/rejected": -0.53807133436203, |
|
"logps/chosen": -50.976131439208984, |
|
"logps/rejected": -78.4500503540039, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.102555513381958, |
|
"rewards/margins": 0.7141586542129517, |
|
"rewards/rejected": -1.8167140483856201, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3525211958946899, |
|
"grad_norm": 9.936935424804688, |
|
"learning_rate": 3.237394020526551e-05, |
|
"logits/chosen": -0.5033255815505981, |
|
"logits/rejected": -0.47105175256729126, |
|
"logps/chosen": -64.8266372680664, |
|
"logps/rejected": -50.24152755737305, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2452679872512817, |
|
"rewards/margins": 0.23730802536010742, |
|
"rewards/rejected": -1.4825758934020996, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.35698348951361, |
|
"grad_norm": 10.322833061218262, |
|
"learning_rate": 3.21508255243195e-05, |
|
"logits/chosen": -0.34730347990989685, |
|
"logits/rejected": -0.3808386027812958, |
|
"logps/chosen": -44.28337478637695, |
|
"logps/rejected": -66.12834167480469, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9084919691085815, |
|
"rewards/margins": 0.5049213171005249, |
|
"rewards/rejected": -1.4134135246276855, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.35698348951361, |
|
"eval_logits/chosen": -0.2633431851863861, |
|
"eval_logits/rejected": -0.2696159780025482, |
|
"eval_logps/chosen": -53.41120147705078, |
|
"eval_logps/rejected": -65.52088165283203, |
|
"eval_loss": 0.5359630584716797, |
|
"eval_rewards/accuracies": 0.779411792755127, |
|
"eval_rewards/chosen": -0.6753266453742981, |
|
"eval_rewards/margins": 0.9927788972854614, |
|
"eval_rewards/rejected": -1.6681054830551147, |
|
"eval_runtime": 101.2668, |
|
"eval_samples_per_second": 10.704, |
|
"eval_steps_per_second": 0.671, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 5.2997355461120605, |
|
"learning_rate": 3.192771084337349e-05, |
|
"logits/chosen": -0.4527266025543213, |
|
"logits/rejected": -0.46011242270469666, |
|
"logps/chosen": -62.06636428833008, |
|
"logps/rejected": -70.8133773803711, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4379562437534332, |
|
"rewards/margins": 1.3011623620986938, |
|
"rewards/rejected": -1.7391185760498047, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.36590807675145026, |
|
"grad_norm": 17.103435516357422, |
|
"learning_rate": 3.170459616242749e-05, |
|
"logits/chosen": -0.46886172890663147, |
|
"logits/rejected": -0.43263405561447144, |
|
"logps/chosen": -82.98887634277344, |
|
"logps/rejected": -72.94896697998047, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4050307273864746, |
|
"rewards/margins": 0.36944881081581116, |
|
"rewards/rejected": -1.7744795083999634, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 11.236923217773438, |
|
"learning_rate": 3.148148148148148e-05, |
|
"logits/chosen": -0.4804226756095886, |
|
"logits/rejected": -0.5012927651405334, |
|
"logps/chosen": -49.03837966918945, |
|
"logps/rejected": -81.67446899414062, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8686717748641968, |
|
"rewards/margins": 1.75730299949646, |
|
"rewards/rejected": -2.625974655151367, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3748326639892905, |
|
"grad_norm": 12.821537017822266, |
|
"learning_rate": 3.125836680053548e-05, |
|
"logits/chosen": -0.3475271165370941, |
|
"logits/rejected": -0.36012762784957886, |
|
"logps/chosen": -70.02849578857422, |
|
"logps/rejected": -93.74113464355469, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0148234367370605, |
|
"rewards/margins": 1.655390739440918, |
|
"rewards/rejected": -2.6702141761779785, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.37929495760821064, |
|
"grad_norm": 10.132525444030762, |
|
"learning_rate": 3.103525211958947e-05, |
|
"logits/chosen": -0.4582541584968567, |
|
"logits/rejected": -0.4778170585632324, |
|
"logps/chosen": -56.45001983642578, |
|
"logps/rejected": -77.17431640625, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2128407955169678, |
|
"rewards/margins": 0.84297114610672, |
|
"rewards/rejected": -2.055811882019043, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3837572512271307, |
|
"grad_norm": 5.735864639282227, |
|
"learning_rate": 3.081213743864346e-05, |
|
"logits/chosen": -0.41625410318374634, |
|
"logits/rejected": -0.414503276348114, |
|
"logps/chosen": -68.24815368652344, |
|
"logps/rejected": -78.7939453125, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2397136688232422, |
|
"rewards/margins": 1.1576507091522217, |
|
"rewards/rejected": -2.397364377975464, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.38821954484605087, |
|
"grad_norm": 10.431081771850586, |
|
"learning_rate": 3.0589022757697455e-05, |
|
"logits/chosen": -0.37043625116348267, |
|
"logits/rejected": -0.36505812406539917, |
|
"logps/chosen": -61.54585647583008, |
|
"logps/rejected": -54.09624481201172, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1678658723831177, |
|
"rewards/margins": 0.3360505700111389, |
|
"rewards/rejected": -1.5039165019989014, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.392681838464971, |
|
"grad_norm": 4.431809425354004, |
|
"learning_rate": 3.0365908076751453e-05, |
|
"logits/chosen": -0.39441370964050293, |
|
"logits/rejected": -0.40531492233276367, |
|
"logps/chosen": -48.60410690307617, |
|
"logps/rejected": -65.05528259277344, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.39353805780410767, |
|
"rewards/margins": 1.5742074251174927, |
|
"rewards/rejected": -1.9677455425262451, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3971441320838911, |
|
"grad_norm": 4.3731231689453125, |
|
"learning_rate": 3.0142793395805445e-05, |
|
"logits/chosen": -0.3797403573989868, |
|
"logits/rejected": -0.46006473898887634, |
|
"logps/chosen": -47.00241470336914, |
|
"logps/rejected": -96.08380126953125, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9989534616470337, |
|
"rewards/margins": 1.4857182502746582, |
|
"rewards/rejected": -2.4846718311309814, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.40160642570281124, |
|
"grad_norm": 7.302632808685303, |
|
"learning_rate": 2.991967871485944e-05, |
|
"logits/chosen": -0.264810174703598, |
|
"logits/rejected": -0.32226264476776123, |
|
"logps/chosen": -35.36387634277344, |
|
"logps/rejected": -106.31819152832031, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6298936009407043, |
|
"rewards/margins": 1.4022096395492554, |
|
"rewards/rejected": -2.0321033000946045, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.40160642570281124, |
|
"eval_logits/chosen": -0.2647411823272705, |
|
"eval_logits/rejected": -0.27174243330955505, |
|
"eval_logps/chosen": -53.786895751953125, |
|
"eval_logps/rejected": -64.93712615966797, |
|
"eval_loss": 0.5274691581726074, |
|
"eval_rewards/accuracies": 0.7573529481887817, |
|
"eval_rewards/chosen": -0.7128964066505432, |
|
"eval_rewards/margins": 0.8968335390090942, |
|
"eval_rewards/rejected": -1.6097300052642822, |
|
"eval_runtime": 101.1144, |
|
"eval_samples_per_second": 10.721, |
|
"eval_steps_per_second": 0.673, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4060687193217314, |
|
"grad_norm": 8.521402359008789, |
|
"learning_rate": 2.9696564033913432e-05, |
|
"logits/chosen": -0.45753902196884155, |
|
"logits/rejected": -0.5024253726005554, |
|
"logps/chosen": -61.389381408691406, |
|
"logps/rejected": -82.64546966552734, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.58506178855896, |
|
"rewards/margins": 1.6228090524673462, |
|
"rewards/rejected": -2.2078709602355957, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4105310129406515, |
|
"grad_norm": 15.225913047790527, |
|
"learning_rate": 2.9473449352967423e-05, |
|
"logits/chosen": -0.36992186307907104, |
|
"logits/rejected": -0.3966086208820343, |
|
"logps/chosen": -32.59536361694336, |
|
"logps/rejected": -55.531272888183594, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7083104252815247, |
|
"rewards/margins": 1.4818216562271118, |
|
"rewards/rejected": -2.190131902694702, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4149933065595716, |
|
"grad_norm": 8.55628776550293, |
|
"learning_rate": 2.925033467202142e-05, |
|
"logits/chosen": -0.4315904974937439, |
|
"logits/rejected": -0.44425058364868164, |
|
"logps/chosen": -41.5259895324707, |
|
"logps/rejected": -46.93579864501953, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1068578958511353, |
|
"rewards/margins": 0.3185254633426666, |
|
"rewards/rejected": -1.425383448600769, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.41945560017849176, |
|
"grad_norm": 17.669008255004883, |
|
"learning_rate": 2.9027219991075417e-05, |
|
"logits/chosen": -0.41975274682044983, |
|
"logits/rejected": -0.4231061339378357, |
|
"logps/chosen": -68.89994812011719, |
|
"logps/rejected": -73.39204406738281, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9215507507324219, |
|
"rewards/margins": 0.9238599538803101, |
|
"rewards/rejected": -1.845410704612732, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.42391789379741185, |
|
"grad_norm": 6.391268253326416, |
|
"learning_rate": 2.880410531012941e-05, |
|
"logits/chosen": -0.4267478883266449, |
|
"logits/rejected": -0.48769569396972656, |
|
"logps/chosen": -30.958057403564453, |
|
"logps/rejected": -66.41792297363281, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3045811951160431, |
|
"rewards/margins": 2.2374026775360107, |
|
"rewards/rejected": -2.5419838428497314, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.428380187416332, |
|
"grad_norm": 4.784140110015869, |
|
"learning_rate": 2.85809906291834e-05, |
|
"logits/chosen": -0.4728068709373474, |
|
"logits/rejected": -0.47991928458213806, |
|
"logps/chosen": -54.8348274230957, |
|
"logps/rejected": -75.6138687133789, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3464925289154053, |
|
"rewards/margins": 0.40916529297828674, |
|
"rewards/rejected": -1.7556579113006592, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.43284248103525214, |
|
"grad_norm": 5.4959025382995605, |
|
"learning_rate": 2.8357875948237396e-05, |
|
"logits/chosen": -0.5516588687896729, |
|
"logits/rejected": -0.5653491020202637, |
|
"logps/chosen": -80.06575012207031, |
|
"logps/rejected": -54.55800247192383, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9705550074577332, |
|
"rewards/margins": 0.5480602979660034, |
|
"rewards/rejected": -1.5186152458190918, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4373047746541722, |
|
"grad_norm": 11.408933639526367, |
|
"learning_rate": 2.8134761267291388e-05, |
|
"logits/chosen": -0.42612046003341675, |
|
"logits/rejected": -0.3984217941761017, |
|
"logps/chosen": -70.78621673583984, |
|
"logps/rejected": -66.38645935058594, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9449518322944641, |
|
"rewards/margins": 1.3869943618774414, |
|
"rewards/rejected": -2.3319461345672607, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.44176706827309237, |
|
"grad_norm": 8.765157699584961, |
|
"learning_rate": 2.791164658634538e-05, |
|
"logits/chosen": -0.5121334791183472, |
|
"logits/rejected": -0.5097476840019226, |
|
"logps/chosen": -61.830230712890625, |
|
"logps/rejected": -70.93310546875, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5881780982017517, |
|
"rewards/margins": 1.4265491962432861, |
|
"rewards/rejected": -2.0147273540496826, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4462293618920125, |
|
"grad_norm": 5.806690692901611, |
|
"learning_rate": 2.7688531905399378e-05, |
|
"logits/chosen": -0.45526012778282166, |
|
"logits/rejected": -0.492563396692276, |
|
"logps/chosen": -46.581809997558594, |
|
"logps/rejected": -69.85079193115234, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2136809825897217, |
|
"rewards/margins": 0.9724341630935669, |
|
"rewards/rejected": -2.186115264892578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4462293618920125, |
|
"eval_logits/chosen": -0.32657647132873535, |
|
"eval_logits/rejected": -0.33385756611824036, |
|
"eval_logps/chosen": -53.78933334350586, |
|
"eval_logps/rejected": -65.79786682128906, |
|
"eval_loss": 0.5189034342765808, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.7131394147872925, |
|
"eval_rewards/margins": 0.9826644659042358, |
|
"eval_rewards/rejected": -1.6958038806915283, |
|
"eval_runtime": 101.0344, |
|
"eval_samples_per_second": 10.729, |
|
"eval_steps_per_second": 0.673, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4506916555109326, |
|
"grad_norm": 8.036296844482422, |
|
"learning_rate": 2.7465417224453373e-05, |
|
"logits/chosen": -0.43507981300354004, |
|
"logits/rejected": -0.4114597737789154, |
|
"logps/chosen": -47.215457916259766, |
|
"logps/rejected": -59.1058464050293, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.658048152923584, |
|
"rewards/margins": 1.7121719121932983, |
|
"rewards/rejected": -2.3702197074890137, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.45515394912985274, |
|
"grad_norm": 8.876015663146973, |
|
"learning_rate": 2.7242302543507365e-05, |
|
"logits/chosen": -0.4568140506744385, |
|
"logits/rejected": -0.4774126410484314, |
|
"logps/chosen": -42.18695831298828, |
|
"logps/rejected": -77.10087585449219, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0223681926727295, |
|
"rewards/margins": 0.8598814010620117, |
|
"rewards/rejected": -1.8822495937347412, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4596162427487729, |
|
"grad_norm": 11.355658531188965, |
|
"learning_rate": 2.7019187862561356e-05, |
|
"logits/chosen": -0.44892439246177673, |
|
"logits/rejected": -0.47990983724594116, |
|
"logps/chosen": -43.116966247558594, |
|
"logps/rejected": -78.34208679199219, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2549419403076172, |
|
"rewards/margins": 1.9508358240127563, |
|
"rewards/rejected": -3.205777645111084, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.464078536367693, |
|
"grad_norm": 11.093362808227539, |
|
"learning_rate": 2.6796073181615348e-05, |
|
"logits/chosen": -0.48284226655960083, |
|
"logits/rejected": -0.4833238124847412, |
|
"logps/chosen": -52.57035446166992, |
|
"logps/rejected": -79.04975891113281, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7964859008789062, |
|
"rewards/margins": 1.3674949407577515, |
|
"rewards/rejected": -2.1639809608459473, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4685408299866131, |
|
"grad_norm": 7.181427955627441, |
|
"learning_rate": 2.6572958500669343e-05, |
|
"logits/chosen": -0.4957679808139801, |
|
"logits/rejected": -0.5112098455429077, |
|
"logps/chosen": -53.201026916503906, |
|
"logps/rejected": -69.32044219970703, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7264264225959778, |
|
"rewards/margins": 1.1275393962860107, |
|
"rewards/rejected": -1.8539657592773438, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.47300312360553326, |
|
"grad_norm": 13.365934371948242, |
|
"learning_rate": 2.6349843819723342e-05, |
|
"logits/chosen": -0.5008978247642517, |
|
"logits/rejected": -0.528388261795044, |
|
"logps/chosen": -49.908477783203125, |
|
"logps/rejected": -71.87808227539062, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6799066662788391, |
|
"rewards/margins": 1.3960431814193726, |
|
"rewards/rejected": -2.0759501457214355, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.47746541722445335, |
|
"grad_norm": 12.133490562438965, |
|
"learning_rate": 2.6126729138777334e-05, |
|
"logits/chosen": -0.46771472692489624, |
|
"logits/rejected": -0.4607790410518646, |
|
"logps/chosen": -30.067209243774414, |
|
"logps/rejected": -54.92804718017578, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5407927632331848, |
|
"rewards/margins": 0.9840974807739258, |
|
"rewards/rejected": -1.5248900651931763, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 13.021413803100586, |
|
"learning_rate": 2.5903614457831325e-05, |
|
"logits/chosen": -0.47129884362220764, |
|
"logits/rejected": -0.47959232330322266, |
|
"logps/chosen": -53.81621170043945, |
|
"logps/rejected": -71.24736022949219, |
|
"loss": 0.4385, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6359355449676514, |
|
"rewards/margins": 1.6977494955062866, |
|
"rewards/rejected": -2.3336853981018066, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.48639000446229363, |
|
"grad_norm": 9.804703712463379, |
|
"learning_rate": 2.568049977688532e-05, |
|
"logits/chosen": -0.4634290635585785, |
|
"logits/rejected": -0.5325796604156494, |
|
"logps/chosen": -19.7753849029541, |
|
"logps/rejected": -75.23787689208984, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20430362224578857, |
|
"rewards/margins": 1.7501170635223389, |
|
"rewards/rejected": -1.9544206857681274, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.4908522980812137, |
|
"grad_norm": 7.781200885772705, |
|
"learning_rate": 2.5457385095939312e-05, |
|
"logits/chosen": -0.40000230073928833, |
|
"logits/rejected": -0.4445677399635315, |
|
"logps/chosen": -37.69085693359375, |
|
"logps/rejected": -60.876617431640625, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6515712141990662, |
|
"rewards/margins": 1.1187314987182617, |
|
"rewards/rejected": -1.7703025341033936, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4908522980812137, |
|
"eval_logits/chosen": -0.31743428111076355, |
|
"eval_logits/rejected": -0.3242875635623932, |
|
"eval_logps/chosen": -52.111961364746094, |
|
"eval_logps/rejected": -63.963722229003906, |
|
"eval_loss": 0.5114731788635254, |
|
"eval_rewards/accuracies": 0.7941176295280457, |
|
"eval_rewards/chosen": -0.5454027056694031, |
|
"eval_rewards/margins": 0.9669870734214783, |
|
"eval_rewards/rejected": -1.5123897790908813, |
|
"eval_runtime": 101.4382, |
|
"eval_samples_per_second": 10.686, |
|
"eval_steps_per_second": 0.67, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.49531459170013387, |
|
"grad_norm": 8.43966007232666, |
|
"learning_rate": 2.5234270414993304e-05, |
|
"logits/chosen": -0.512862503528595, |
|
"logits/rejected": -0.5506672859191895, |
|
"logps/chosen": -44.659854888916016, |
|
"logps/rejected": -75.06166076660156, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4413532614707947, |
|
"rewards/margins": 1.5806982517242432, |
|
"rewards/rejected": -2.0220513343811035, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.499776885319054, |
|
"grad_norm": 9.60834789276123, |
|
"learning_rate": 2.5011155734047303e-05, |
|
"logits/chosen": -0.45566287636756897, |
|
"logits/rejected": -0.4595232605934143, |
|
"logps/chosen": -37.148109436035156, |
|
"logps/rejected": -70.86797332763672, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6128483414649963, |
|
"rewards/margins": 1.4094016551971436, |
|
"rewards/rejected": -2.022250175476074, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5042391789379741, |
|
"grad_norm": 6.633546829223633, |
|
"learning_rate": 2.4788041053101298e-05, |
|
"logits/chosen": -0.3673175573348999, |
|
"logits/rejected": -0.37458890676498413, |
|
"logps/chosen": -44.811927795410156, |
|
"logps/rejected": -67.8986587524414, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4975345730781555, |
|
"rewards/margins": 1.2830320596694946, |
|
"rewards/rejected": -1.7805664539337158, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5087014725568942, |
|
"grad_norm": 10.727168083190918, |
|
"learning_rate": 2.456492637215529e-05, |
|
"logits/chosen": -0.5026527643203735, |
|
"logits/rejected": -0.5698090195655823, |
|
"logps/chosen": -32.542945861816406, |
|
"logps/rejected": -68.20999145507812, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8789398074150085, |
|
"rewards/margins": 1.0748862028121948, |
|
"rewards/rejected": -1.9538259506225586, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5131637661758144, |
|
"grad_norm": 8.839791297912598, |
|
"learning_rate": 2.434181169120928e-05, |
|
"logits/chosen": -0.47114723920822144, |
|
"logits/rejected": -0.4503035545349121, |
|
"logps/chosen": -50.48181915283203, |
|
"logps/rejected": -53.96525955200195, |
|
"loss": 0.3485, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4421239495277405, |
|
"rewards/margins": 1.921203374862671, |
|
"rewards/rejected": -2.3633270263671875, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5176260597947345, |
|
"grad_norm": 13.914494514465332, |
|
"learning_rate": 2.4118697010263276e-05, |
|
"logits/chosen": -0.46128734946250916, |
|
"logits/rejected": -0.4048306941986084, |
|
"logps/chosen": -48.669891357421875, |
|
"logps/rejected": -66.31475830078125, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.649463951587677, |
|
"rewards/margins": 1.0556119680404663, |
|
"rewards/rejected": -1.7050758600234985, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5220883534136547, |
|
"grad_norm": 7.4474568367004395, |
|
"learning_rate": 2.389558232931727e-05, |
|
"logits/chosen": -0.5582461953163147, |
|
"logits/rejected": -0.5327040553092957, |
|
"logps/chosen": -55.52189254760742, |
|
"logps/rejected": -67.39286804199219, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7929929494857788, |
|
"rewards/margins": 0.9378184080123901, |
|
"rewards/rejected": -1.7308114767074585, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5265506470325747, |
|
"grad_norm": 19.10196876525879, |
|
"learning_rate": 2.3672467648371263e-05, |
|
"logits/chosen": -0.392108291387558, |
|
"logits/rejected": -0.42606526613235474, |
|
"logps/chosen": -49.433712005615234, |
|
"logps/rejected": -88.58631896972656, |
|
"loss": 0.4521, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5232197046279907, |
|
"rewards/margins": 2.0684046745300293, |
|
"rewards/rejected": -2.5916244983673096, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5310129406514948, |
|
"grad_norm": 11.901342391967773, |
|
"learning_rate": 2.344935296742526e-05, |
|
"logits/chosen": -0.4675375819206238, |
|
"logits/rejected": -0.5168163180351257, |
|
"logps/chosen": -50.06731414794922, |
|
"logps/rejected": -101.67756652832031, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.018130898475647, |
|
"rewards/margins": 1.2320268154144287, |
|
"rewards/rejected": -2.250157594680786, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.535475234270415, |
|
"grad_norm": 7.169270038604736, |
|
"learning_rate": 2.322623828647925e-05, |
|
"logits/chosen": -0.4502222537994385, |
|
"logits/rejected": -0.43182867765426636, |
|
"logps/chosen": -39.58936309814453, |
|
"logps/rejected": -48.83460235595703, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4606359004974365, |
|
"rewards/margins": 1.3261363506317139, |
|
"rewards/rejected": -1.7867721319198608, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.535475234270415, |
|
"eval_logits/chosen": -0.3180222809314728, |
|
"eval_logits/rejected": -0.3257814645767212, |
|
"eval_logps/chosen": -53.41277313232422, |
|
"eval_logps/rejected": -65.79794311523438, |
|
"eval_loss": 0.5051827430725098, |
|
"eval_rewards/accuracies": 0.7647058963775635, |
|
"eval_rewards/chosen": -0.6754838228225708, |
|
"eval_rewards/margins": 1.0203275680541992, |
|
"eval_rewards/rejected": -1.69581139087677, |
|
"eval_runtime": 101.3349, |
|
"eval_samples_per_second": 10.697, |
|
"eval_steps_per_second": 0.671, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5399375278893351, |
|
"grad_norm": 10.257837295532227, |
|
"learning_rate": 2.3003123605533245e-05, |
|
"logits/chosen": -0.5042774677276611, |
|
"logits/rejected": -0.5268147587776184, |
|
"logps/chosen": -62.96257781982422, |
|
"logps/rejected": -58.47686004638672, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4827579855918884, |
|
"rewards/margins": 1.2310010194778442, |
|
"rewards/rejected": -1.7137588262557983, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5443998215082553, |
|
"grad_norm": 5.952612400054932, |
|
"learning_rate": 2.278000892458724e-05, |
|
"logits/chosen": -0.40703314542770386, |
|
"logits/rejected": -0.4681945741176605, |
|
"logps/chosen": -39.01232147216797, |
|
"logps/rejected": -82.01300048828125, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.49716702103614807, |
|
"rewards/margins": 2.3570475578308105, |
|
"rewards/rejected": -2.854214906692505, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5488621151271754, |
|
"grad_norm": 8.427573204040527, |
|
"learning_rate": 2.2556894243641232e-05, |
|
"logits/chosen": -0.459498792886734, |
|
"logits/rejected": -0.43668580055236816, |
|
"logps/chosen": -64.96345520019531, |
|
"logps/rejected": -68.22449493408203, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6714249849319458, |
|
"rewards/margins": 1.7123854160308838, |
|
"rewards/rejected": -2.383810520172119, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5533244087460955, |
|
"grad_norm": 9.57485580444336, |
|
"learning_rate": 2.2333779562695224e-05, |
|
"logits/chosen": -0.3903302848339081, |
|
"logits/rejected": -0.41302937269210815, |
|
"logps/chosen": -52.199951171875, |
|
"logps/rejected": -73.79011535644531, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2116793692111969, |
|
"rewards/margins": 1.9253880977630615, |
|
"rewards/rejected": -2.1370673179626465, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5577867023650156, |
|
"grad_norm": 5.944408893585205, |
|
"learning_rate": 2.2110664881749223e-05, |
|
"logits/chosen": -0.40285030007362366, |
|
"logits/rejected": -0.4791896939277649, |
|
"logps/chosen": -28.524505615234375, |
|
"logps/rejected": -59.82304000854492, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7982343435287476, |
|
"rewards/margins": 1.1634286642074585, |
|
"rewards/rejected": -1.961663007736206, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5622489959839357, |
|
"grad_norm": 5.317239284515381, |
|
"learning_rate": 2.1887550200803214e-05, |
|
"logits/chosen": -0.41990455985069275, |
|
"logits/rejected": -0.4091187119483948, |
|
"logps/chosen": -73.32225036621094, |
|
"logps/rejected": -65.48411560058594, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1173427104949951, |
|
"rewards/margins": 0.9003829956054688, |
|
"rewards/rejected": -2.017725706100464, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5667112896028559, |
|
"grad_norm": 27.485258102416992, |
|
"learning_rate": 2.1664435519857206e-05, |
|
"logits/chosen": -0.3818276524543762, |
|
"logits/rejected": -0.4024294912815094, |
|
"logps/chosen": -50.28858184814453, |
|
"logps/rejected": -66.37843322753906, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0053184032440186, |
|
"rewards/margins": 0.9303516149520874, |
|
"rewards/rejected": -1.9356701374053955, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.571173583221776, |
|
"grad_norm": 10.918617248535156, |
|
"learning_rate": 2.14413208389112e-05, |
|
"logits/chosen": -0.3870011568069458, |
|
"logits/rejected": -0.39095568656921387, |
|
"logps/chosen": -76.80699157714844, |
|
"logps/rejected": -92.99617004394531, |
|
"loss": 0.4318, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6950416564941406, |
|
"rewards/margins": 1.082456350326538, |
|
"rewards/rejected": -2.7774977684020996, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5756358768406962, |
|
"grad_norm": 9.784821510314941, |
|
"learning_rate": 2.1218206157965196e-05, |
|
"logits/chosen": -0.5406726002693176, |
|
"logits/rejected": -0.5538305044174194, |
|
"logps/chosen": -66.9935302734375, |
|
"logps/rejected": -65.8385238647461, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.548122763633728, |
|
"rewards/margins": 1.1862457990646362, |
|
"rewards/rejected": -2.734368324279785, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5800981704596162, |
|
"grad_norm": 6.906895160675049, |
|
"learning_rate": 2.0995091477019188e-05, |
|
"logits/chosen": -0.5440912246704102, |
|
"logits/rejected": -0.5402802228927612, |
|
"logps/chosen": -58.34566116333008, |
|
"logps/rejected": -70.64501190185547, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.991224467754364, |
|
"rewards/margins": 0.940337061882019, |
|
"rewards/rejected": -1.9315614700317383, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5800981704596162, |
|
"eval_logits/chosen": -0.33946266770362854, |
|
"eval_logits/rejected": -0.3464009463787079, |
|
"eval_logps/chosen": -55.18332290649414, |
|
"eval_logps/rejected": -69.80598449707031, |
|
"eval_loss": 0.5237538814544678, |
|
"eval_rewards/accuracies": 0.7573529481887817, |
|
"eval_rewards/chosen": -0.8525390625, |
|
"eval_rewards/margins": 1.2440773248672485, |
|
"eval_rewards/rejected": -2.096616506576538, |
|
"eval_runtime": 101.2246, |
|
"eval_samples_per_second": 10.709, |
|
"eval_steps_per_second": 0.672, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5845604640785363, |
|
"grad_norm": 10.362833976745605, |
|
"learning_rate": 2.0771976796073183e-05, |
|
"logits/chosen": -0.42652565240859985, |
|
"logits/rejected": -0.44766539335250854, |
|
"logps/chosen": -35.81039047241211, |
|
"logps/rejected": -58.48369598388672, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9444332122802734, |
|
"rewards/margins": 1.8897850513458252, |
|
"rewards/rejected": -2.8342182636260986, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5890227576974565, |
|
"grad_norm": 13.620052337646484, |
|
"learning_rate": 2.0548862115127175e-05, |
|
"logits/chosen": -0.4428171217441559, |
|
"logits/rejected": -0.4566754698753357, |
|
"logps/chosen": -57.314903259277344, |
|
"logps/rejected": -84.61968994140625, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0011004209518433, |
|
"rewards/margins": 1.7249071598052979, |
|
"rewards/rejected": -2.7260074615478516, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5934850513163766, |
|
"grad_norm": 5.6110687255859375, |
|
"learning_rate": 2.032574743418117e-05, |
|
"logits/chosen": -0.49851423501968384, |
|
"logits/rejected": -0.5178089141845703, |
|
"logps/chosen": -56.25935745239258, |
|
"logps/rejected": -101.2408447265625, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.514937400817871, |
|
"rewards/margins": 1.5338618755340576, |
|
"rewards/rejected": -3.0487992763519287, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5979473449352968, |
|
"grad_norm": 11.155418395996094, |
|
"learning_rate": 2.0102632753235165e-05, |
|
"logits/chosen": -0.43120676279067993, |
|
"logits/rejected": -0.4328102171421051, |
|
"logps/chosen": -32.240638732910156, |
|
"logps/rejected": -44.41697311401367, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7402340769767761, |
|
"rewards/margins": 1.476976990699768, |
|
"rewards/rejected": -2.2172112464904785, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 9.595579147338867, |
|
"learning_rate": 1.9879518072289157e-05, |
|
"logits/chosen": -0.49842292070388794, |
|
"logits/rejected": -0.5113445520401001, |
|
"logps/chosen": -60.954429626464844, |
|
"logps/rejected": -57.362586975097656, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3111724853515625, |
|
"rewards/margins": 1.888265609741211, |
|
"rewards/rejected": -2.1994378566741943, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.606871932173137, |
|
"grad_norm": 9.583802223205566, |
|
"learning_rate": 1.9656403391343152e-05, |
|
"logits/chosen": -0.4840572774410248, |
|
"logits/rejected": -0.46289128065109253, |
|
"logps/chosen": -59.59627151489258, |
|
"logps/rejected": -54.098060607910156, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6472072601318359, |
|
"rewards/margins": 1.8097079992294312, |
|
"rewards/rejected": -2.4569153785705566, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6113342257920571, |
|
"grad_norm": 12.826497077941895, |
|
"learning_rate": 1.9433288710397147e-05, |
|
"logits/chosen": -0.4850468039512634, |
|
"logits/rejected": -0.4863508641719818, |
|
"logps/chosen": -52.7967529296875, |
|
"logps/rejected": -79.17362213134766, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0112146139144897, |
|
"rewards/margins": 0.7297881841659546, |
|
"rewards/rejected": -1.7410027980804443, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6157965194109772, |
|
"grad_norm": 11.039398193359375, |
|
"learning_rate": 1.921017402945114e-05, |
|
"logits/chosen": -0.427783340215683, |
|
"logits/rejected": -0.4737025201320648, |
|
"logps/chosen": -26.334285736083984, |
|
"logps/rejected": -75.00768280029297, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7227652668952942, |
|
"rewards/margins": 1.7406288385391235, |
|
"rewards/rejected": -2.4633939266204834, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6202588130298974, |
|
"grad_norm": 4.866807460784912, |
|
"learning_rate": 1.898705934850513e-05, |
|
"logits/chosen": -0.306061327457428, |
|
"logits/rejected": -0.29123881459236145, |
|
"logps/chosen": -49.8400993347168, |
|
"logps/rejected": -55.355064392089844, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9609847068786621, |
|
"rewards/margins": 1.2047231197357178, |
|
"rewards/rejected": -2.16570782661438, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6247211066488175, |
|
"grad_norm": 10.117777824401855, |
|
"learning_rate": 1.8763944667559126e-05, |
|
"logits/chosen": -0.47783708572387695, |
|
"logits/rejected": -0.5060421228408813, |
|
"logps/chosen": -33.2414436340332, |
|
"logps/rejected": -68.35478210449219, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5439062714576721, |
|
"rewards/margins": 2.1169350147247314, |
|
"rewards/rejected": -2.6608409881591797, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6247211066488175, |
|
"eval_logits/chosen": -0.32495564222335815, |
|
"eval_logits/rejected": -0.33121562004089355, |
|
"eval_logps/chosen": -55.766353607177734, |
|
"eval_logps/rejected": -69.83653259277344, |
|
"eval_loss": 0.5076476335525513, |
|
"eval_rewards/accuracies": 0.7720588445663452, |
|
"eval_rewards/chosen": -0.9108418226242065, |
|
"eval_rewards/margins": 1.188828468322754, |
|
"eval_rewards/rejected": -2.09967041015625, |
|
"eval_runtime": 108.2891, |
|
"eval_samples_per_second": 10.01, |
|
"eval_steps_per_second": 0.628, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6291834002677377, |
|
"grad_norm": 13.984685897827148, |
|
"learning_rate": 1.854082998661312e-05, |
|
"logits/chosen": -0.35908347368240356, |
|
"logits/rejected": -0.38989967107772827, |
|
"logps/chosen": -68.23403930664062, |
|
"logps/rejected": -110.07171630859375, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.378349781036377, |
|
"rewards/margins": 1.4953136444091797, |
|
"rewards/rejected": -2.8736634254455566, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6336456938866577, |
|
"grad_norm": 14.599750518798828, |
|
"learning_rate": 1.8317715305667113e-05, |
|
"logits/chosen": -0.44348686933517456, |
|
"logits/rejected": -0.41905340552330017, |
|
"logps/chosen": -74.93011474609375, |
|
"logps/rejected": -82.81726837158203, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1212565898895264, |
|
"rewards/margins": 1.7725646495819092, |
|
"rewards/rejected": -2.8938212394714355, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6381079875055778, |
|
"grad_norm": 6.304447174072266, |
|
"learning_rate": 1.8094600624721108e-05, |
|
"logits/chosen": -0.43102559447288513, |
|
"logits/rejected": -0.4420749247074127, |
|
"logps/chosen": -61.5362548828125, |
|
"logps/rejected": -79.70188903808594, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3591984510421753, |
|
"rewards/margins": 1.5787193775177002, |
|
"rewards/rejected": -2.937918186187744, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.642570281124498, |
|
"grad_norm": 7.736660480499268, |
|
"learning_rate": 1.78714859437751e-05, |
|
"logits/chosen": -0.4689575135707855, |
|
"logits/rejected": -0.5129413604736328, |
|
"logps/chosen": -46.32543182373047, |
|
"logps/rejected": -73.49056243896484, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6536145210266113, |
|
"rewards/margins": 1.313718557357788, |
|
"rewards/rejected": -1.967333197593689, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6470325747434181, |
|
"grad_norm": 11.176409721374512, |
|
"learning_rate": 1.7648371262829095e-05, |
|
"logits/chosen": -0.393160343170166, |
|
"logits/rejected": -0.4170041084289551, |
|
"logps/chosen": -41.45922088623047, |
|
"logps/rejected": -59.269920349121094, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2571731209754944, |
|
"rewards/margins": 2.5432677268981934, |
|
"rewards/rejected": -2.800440788269043, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6514948683623383, |
|
"grad_norm": 5.22855806350708, |
|
"learning_rate": 1.742525658188309e-05, |
|
"logits/chosen": -0.36421069502830505, |
|
"logits/rejected": -0.4037911295890808, |
|
"logps/chosen": -20.40701675415039, |
|
"logps/rejected": -53.22682571411133, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3262079060077667, |
|
"rewards/margins": 1.5061506032943726, |
|
"rewards/rejected": -1.8323585987091064, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6559571619812584, |
|
"grad_norm": 7.177018165588379, |
|
"learning_rate": 1.7202141900937082e-05, |
|
"logits/chosen": -0.4719008803367615, |
|
"logits/rejected": -0.48431381583213806, |
|
"logps/chosen": -42.980186462402344, |
|
"logps/rejected": -67.3738021850586, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6576524972915649, |
|
"rewards/margins": 1.067777156829834, |
|
"rewards/rejected": -1.7254295349121094, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6604194556001785, |
|
"grad_norm": 16.66737937927246, |
|
"learning_rate": 1.6979027219991077e-05, |
|
"logits/chosen": -0.5019597411155701, |
|
"logits/rejected": -0.4994475841522217, |
|
"logps/chosen": -88.7463607788086, |
|
"logps/rejected": -76.72743225097656, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.399727463722229, |
|
"rewards/margins": -0.017042243853211403, |
|
"rewards/rejected": -1.3826851844787598, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6648817492190986, |
|
"grad_norm": 7.705491065979004, |
|
"learning_rate": 1.6755912539045072e-05, |
|
"logits/chosen": -0.44774121046066284, |
|
"logits/rejected": -0.4976162314414978, |
|
"logps/chosen": -36.44142532348633, |
|
"logps/rejected": -71.34239196777344, |
|
"loss": 0.4034, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5971248149871826, |
|
"rewards/margins": 1.3644919395446777, |
|
"rewards/rejected": -1.9616167545318604, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6693440428380187, |
|
"grad_norm": 5.330473899841309, |
|
"learning_rate": 1.6532797858099064e-05, |
|
"logits/chosen": -0.48206400871276855, |
|
"logits/rejected": -0.4945335388183594, |
|
"logps/chosen": -59.6390495300293, |
|
"logps/rejected": -89.34614562988281, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9486683011054993, |
|
"rewards/margins": 2.047510862350464, |
|
"rewards/rejected": -2.9961795806884766, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6693440428380187, |
|
"eval_logits/chosen": -0.3083733916282654, |
|
"eval_logits/rejected": -0.31396305561065674, |
|
"eval_logps/chosen": -55.00502014160156, |
|
"eval_logps/rejected": -69.16178131103516, |
|
"eval_loss": 0.509951651096344, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.8347085118293762, |
|
"eval_rewards/margins": 1.1974865198135376, |
|
"eval_rewards/rejected": -2.0321948528289795, |
|
"eval_runtime": 108.0234, |
|
"eval_samples_per_second": 10.035, |
|
"eval_steps_per_second": 0.629, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6738063364569389, |
|
"grad_norm": 8.810395240783691, |
|
"learning_rate": 1.6309683177153056e-05, |
|
"logits/chosen": -0.33375945687294006, |
|
"logits/rejected": -0.3630695939064026, |
|
"logps/chosen": -67.69178771972656, |
|
"logps/rejected": -86.74795532226562, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0225179195404053, |
|
"rewards/margins": 1.4887641668319702, |
|
"rewards/rejected": -2.511282444000244, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.678268630075859, |
|
"grad_norm": 6.438061237335205, |
|
"learning_rate": 1.608656849620705e-05, |
|
"logits/chosen": -0.44403213262557983, |
|
"logits/rejected": -0.43231791257858276, |
|
"logps/chosen": -53.98673629760742, |
|
"logps/rejected": -64.71061706542969, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0513066053390503, |
|
"rewards/margins": 0.6497117280960083, |
|
"rewards/rejected": -1.7010183334350586, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6827309236947792, |
|
"grad_norm": 14.077432632446289, |
|
"learning_rate": 1.5863453815261046e-05, |
|
"logits/chosen": -0.4353730082511902, |
|
"logits/rejected": -0.4359283447265625, |
|
"logps/chosen": -35.019020080566406, |
|
"logps/rejected": -60.0317268371582, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26359862089157104, |
|
"rewards/margins": 2.748635768890381, |
|
"rewards/rejected": -3.0122342109680176, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6871932173136992, |
|
"grad_norm": 8.738804817199707, |
|
"learning_rate": 1.5640339134315038e-05, |
|
"logits/chosen": -0.5284406542778015, |
|
"logits/rejected": -0.5210434198379517, |
|
"logps/chosen": -63.6600227355957, |
|
"logps/rejected": -92.09748840332031, |
|
"loss": 0.4168, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3289673328399658, |
|
"rewards/margins": 2.0158588886260986, |
|
"rewards/rejected": -3.3448264598846436, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6916555109326193, |
|
"grad_norm": 11.36034107208252, |
|
"learning_rate": 1.5417224453369033e-05, |
|
"logits/chosen": -0.4030238687992096, |
|
"logits/rejected": -0.43137067556381226, |
|
"logps/chosen": -57.97930908203125, |
|
"logps/rejected": -76.85355377197266, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9804684519767761, |
|
"rewards/margins": 1.1237452030181885, |
|
"rewards/rejected": -2.1042134761810303, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6961178045515395, |
|
"grad_norm": 6.310385704040527, |
|
"learning_rate": 1.5194109772423026e-05, |
|
"logits/chosen": -0.5055387020111084, |
|
"logits/rejected": -0.498251736164093, |
|
"logps/chosen": -53.9688835144043, |
|
"logps/rejected": -57.785980224609375, |
|
"loss": 0.3666, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5929397344589233, |
|
"rewards/margins": 1.0913288593292236, |
|
"rewards/rejected": -1.684268593788147, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7005800981704596, |
|
"grad_norm": 7.37251615524292, |
|
"learning_rate": 1.497099509147702e-05, |
|
"logits/chosen": -0.4770750105381012, |
|
"logits/rejected": -0.49025219678878784, |
|
"logps/chosen": -41.87021255493164, |
|
"logps/rejected": -91.51953887939453, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.648074746131897, |
|
"rewards/margins": 2.891958713531494, |
|
"rewards/rejected": -3.5400328636169434, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7050423917893798, |
|
"grad_norm": 12.839133262634277, |
|
"learning_rate": 1.4747880410531015e-05, |
|
"logits/chosen": -0.40690451860427856, |
|
"logits/rejected": -0.3717205822467804, |
|
"logps/chosen": -72.51580810546875, |
|
"logps/rejected": -53.998374938964844, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -1.4899680614471436, |
|
"rewards/margins": -0.1300063580274582, |
|
"rewards/rejected": -1.359961986541748, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7095046854082999, |
|
"grad_norm": 11.395818710327148, |
|
"learning_rate": 1.4524765729585008e-05, |
|
"logits/chosen": -0.43929824233055115, |
|
"logits/rejected": -0.4690605700016022, |
|
"logps/chosen": -56.8370361328125, |
|
"logps/rejected": -97.08080291748047, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3506587743759155, |
|
"rewards/margins": 0.9556113481521606, |
|
"rewards/rejected": -2.306270122528076, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.71396697902722, |
|
"grad_norm": 4.949778079986572, |
|
"learning_rate": 1.4301651048639e-05, |
|
"logits/chosen": -0.513823926448822, |
|
"logits/rejected": -0.48624104261398315, |
|
"logps/chosen": -59.283348083496094, |
|
"logps/rejected": -48.48424530029297, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8699353933334351, |
|
"rewards/margins": 1.5018436908721924, |
|
"rewards/rejected": -2.371778964996338, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.71396697902722, |
|
"eval_logits/chosen": -0.2982633113861084, |
|
"eval_logits/rejected": -0.30315345525741577, |
|
"eval_logps/chosen": -54.16980743408203, |
|
"eval_logps/rejected": -67.76078033447266, |
|
"eval_loss": 0.5016461610794067, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.7511871457099915, |
|
"eval_rewards/margins": 1.1409085988998413, |
|
"eval_rewards/rejected": -1.892095685005188, |
|
"eval_runtime": 101.2549, |
|
"eval_samples_per_second": 10.706, |
|
"eval_steps_per_second": 0.672, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7184292726461401, |
|
"grad_norm": 3.346531867980957, |
|
"learning_rate": 1.4078536367692995e-05, |
|
"logits/chosen": -0.4629895091056824, |
|
"logits/rejected": -0.46206387877464294, |
|
"logps/chosen": -75.60456085205078, |
|
"logps/rejected": -65.10679626464844, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.201537847518921, |
|
"rewards/margins": 0.4333462715148926, |
|
"rewards/rejected": -1.6348838806152344, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 9.76317310333252, |
|
"learning_rate": 1.3855421686746989e-05, |
|
"logits/chosen": -0.43066325783729553, |
|
"logits/rejected": -0.4698410630226135, |
|
"logps/chosen": -49.887386322021484, |
|
"logps/rejected": -86.30489349365234, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6148198246955872, |
|
"rewards/margins": 2.13301420211792, |
|
"rewards/rejected": -2.7478346824645996, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7273538598839804, |
|
"grad_norm": 11.436201095581055, |
|
"learning_rate": 1.3632307005800982e-05, |
|
"logits/chosen": -0.4333977699279785, |
|
"logits/rejected": -0.407217800617218, |
|
"logps/chosen": -75.92472076416016, |
|
"logps/rejected": -73.89183044433594, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9417136311531067, |
|
"rewards/margins": 1.3147486448287964, |
|
"rewards/rejected": -2.256462574005127, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7318161535029005, |
|
"grad_norm": 7.414572238922119, |
|
"learning_rate": 1.3409192324854977e-05, |
|
"logits/chosen": -0.41667813062667847, |
|
"logits/rejected": -0.3875000774860382, |
|
"logps/chosen": -62.658782958984375, |
|
"logps/rejected": -81.83895874023438, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7160552144050598, |
|
"rewards/margins": 1.0887064933776855, |
|
"rewards/rejected": -1.8047618865966797, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7362784471218207, |
|
"grad_norm": 4.594730854034424, |
|
"learning_rate": 1.318607764390897e-05, |
|
"logits/chosen": -0.4519456923007965, |
|
"logits/rejected": -0.420981228351593, |
|
"logps/chosen": -63.16857147216797, |
|
"logps/rejected": -40.614803314208984, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8438780903816223, |
|
"rewards/margins": 0.9280462265014648, |
|
"rewards/rejected": -1.771924376487732, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 6.27785587310791, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"logits/chosen": -0.45475682616233826, |
|
"logits/rejected": -0.4671143889427185, |
|
"logps/chosen": -59.521018981933594, |
|
"logps/rejected": -93.85238647460938, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6071975827217102, |
|
"rewards/margins": 1.8668525218963623, |
|
"rewards/rejected": -2.4740498065948486, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7452030343596608, |
|
"grad_norm": 14.149540901184082, |
|
"learning_rate": 1.273984828201696e-05, |
|
"logits/chosen": -0.33088964223861694, |
|
"logits/rejected": -0.3637358844280243, |
|
"logps/chosen": -35.719932556152344, |
|
"logps/rejected": -68.94010162353516, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4173128604888916, |
|
"rewards/margins": 1.1251620054244995, |
|
"rewards/rejected": -1.5424749851226807, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.749665327978581, |
|
"grad_norm": 3.945425510406494, |
|
"learning_rate": 1.2516733601070951e-05, |
|
"logits/chosen": -0.4443919062614441, |
|
"logits/rejected": -0.4429320693016052, |
|
"logps/chosen": -52.670204162597656, |
|
"logps/rejected": -63.29277801513672, |
|
"loss": 0.3682, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23792748153209686, |
|
"rewards/margins": 2.1122870445251465, |
|
"rewards/rejected": -2.3502144813537598, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7541276215975011, |
|
"grad_norm": 9.136353492736816, |
|
"learning_rate": 1.2293618920124946e-05, |
|
"logits/chosen": -0.4495305120944977, |
|
"logits/rejected": -0.4505879878997803, |
|
"logps/chosen": -62.21101760864258, |
|
"logps/rejected": -66.43666076660156, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.524583101272583, |
|
"rewards/margins": 0.3498340845108032, |
|
"rewards/rejected": -1.8744173049926758, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7585899152164213, |
|
"grad_norm": 4.758078575134277, |
|
"learning_rate": 1.2070504239178938e-05, |
|
"logits/chosen": -0.3549007475376129, |
|
"logits/rejected": -0.34870830178260803, |
|
"logps/chosen": -63.522850036621094, |
|
"logps/rejected": -69.67027282714844, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1696350574493408, |
|
"rewards/margins": 0.38375288248062134, |
|
"rewards/rejected": -1.5533878803253174, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7585899152164213, |
|
"eval_logits/chosen": -0.2880391478538513, |
|
"eval_logits/rejected": -0.29347360134124756, |
|
"eval_logps/chosen": -53.26831817626953, |
|
"eval_logps/rejected": -66.06800079345703, |
|
"eval_loss": 0.4919765293598175, |
|
"eval_rewards/accuracies": 0.7573529481887817, |
|
"eval_rewards/chosen": -0.6610381603240967, |
|
"eval_rewards/margins": 1.0617787837982178, |
|
"eval_rewards/rejected": -1.7228171825408936, |
|
"eval_runtime": 101.032, |
|
"eval_samples_per_second": 10.729, |
|
"eval_steps_per_second": 0.673, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7630522088353414, |
|
"grad_norm": 8.118391036987305, |
|
"learning_rate": 1.1847389558232933e-05, |
|
"logits/chosen": -0.5301041007041931, |
|
"logits/rejected": -0.5422204732894897, |
|
"logps/chosen": -61.74125289916992, |
|
"logps/rejected": -71.42862701416016, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6467247605323792, |
|
"rewards/margins": 1.4055416584014893, |
|
"rewards/rejected": -2.0522665977478027, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7675145024542614, |
|
"grad_norm": 8.22636890411377, |
|
"learning_rate": 1.1624274877286927e-05, |
|
"logits/chosen": -0.35849496722221375, |
|
"logits/rejected": -0.4118039011955261, |
|
"logps/chosen": -35.86994934082031, |
|
"logps/rejected": -86.98027038574219, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9291173219680786, |
|
"rewards/margins": 1.6408910751342773, |
|
"rewards/rejected": -2.5700087547302246, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7719767960731816, |
|
"grad_norm": 10.276652336120605, |
|
"learning_rate": 1.140116019634092e-05, |
|
"logits/chosen": -0.4604182839393616, |
|
"logits/rejected": -0.4417789578437805, |
|
"logps/chosen": -64.12181091308594, |
|
"logps/rejected": -62.79909133911133, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3580498695373535, |
|
"rewards/margins": 1.1392812728881836, |
|
"rewards/rejected": -1.497330904006958, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7764390896921017, |
|
"grad_norm": 19.242168426513672, |
|
"learning_rate": 1.1178045515394913e-05, |
|
"logits/chosen": -0.3768201470375061, |
|
"logits/rejected": -0.3910086750984192, |
|
"logps/chosen": -51.35431671142578, |
|
"logps/rejected": -96.27626037597656, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.834559440612793, |
|
"rewards/margins": 1.2117122411727905, |
|
"rewards/rejected": -2.046271800994873, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7809013833110219, |
|
"grad_norm": 7.228452682495117, |
|
"learning_rate": 1.0954930834448909e-05, |
|
"logits/chosen": -0.4177151620388031, |
|
"logits/rejected": -0.4117390215396881, |
|
"logps/chosen": -80.69796752929688, |
|
"logps/rejected": -72.65681457519531, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5733884572982788, |
|
"rewards/margins": 0.2874446511268616, |
|
"rewards/rejected": -1.8608331680297852, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.785363676929942, |
|
"grad_norm": 7.547688007354736, |
|
"learning_rate": 1.07318161535029e-05, |
|
"logits/chosen": -0.24763599038124084, |
|
"logits/rejected": -0.28708669543266296, |
|
"logps/chosen": -48.46918487548828, |
|
"logps/rejected": -92.23126220703125, |
|
"loss": 0.3591, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.004040002822876, |
|
"rewards/margins": 2.1987831592559814, |
|
"rewards/rejected": -3.2028231620788574, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7898259705488622, |
|
"grad_norm": 9.068070411682129, |
|
"learning_rate": 1.0508701472556895e-05, |
|
"logits/chosen": -0.3363795280456543, |
|
"logits/rejected": -0.3373815417289734, |
|
"logps/chosen": -66.52767181396484, |
|
"logps/rejected": -74.45672607421875, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2857935428619385, |
|
"rewards/margins": 1.5400397777557373, |
|
"rewards/rejected": -2.8258330821990967, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7942882641677822, |
|
"grad_norm": 12.077897071838379, |
|
"learning_rate": 1.0285586791610889e-05, |
|
"logits/chosen": -0.47275543212890625, |
|
"logits/rejected": -0.486142635345459, |
|
"logps/chosen": -33.20726013183594, |
|
"logps/rejected": -57.34539794921875, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4416440427303314, |
|
"rewards/margins": 1.5108617544174194, |
|
"rewards/rejected": -1.9525058269500732, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7987505577867023, |
|
"grad_norm": 9.646073341369629, |
|
"learning_rate": 1.0062472110664882e-05, |
|
"logits/chosen": -0.3709737956523895, |
|
"logits/rejected": -0.37242692708969116, |
|
"logps/chosen": -28.837970733642578, |
|
"logps/rejected": -42.87725067138672, |
|
"loss": 0.4277, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5846964120864868, |
|
"rewards/margins": 1.4075520038604736, |
|
"rewards/rejected": -1.992248773574829, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8032128514056225, |
|
"grad_norm": 7.524696350097656, |
|
"learning_rate": 9.839357429718876e-06, |
|
"logits/chosen": -0.3760637938976288, |
|
"logits/rejected": -0.39673715829849243, |
|
"logps/chosen": -55.75551223754883, |
|
"logps/rejected": -91.19023132324219, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2662776708602905, |
|
"rewards/margins": 0.8104913830757141, |
|
"rewards/rejected": -2.0767691135406494, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8032128514056225, |
|
"eval_logits/chosen": -0.29305195808410645, |
|
"eval_logits/rejected": -0.29807421565055847, |
|
"eval_logps/chosen": -54.66646957397461, |
|
"eval_logps/rejected": -68.70016479492188, |
|
"eval_loss": 0.5018442869186401, |
|
"eval_rewards/accuracies": 0.7426470518112183, |
|
"eval_rewards/chosen": -0.8008536100387573, |
|
"eval_rewards/margins": 1.185180425643921, |
|
"eval_rewards/rejected": -1.9860339164733887, |
|
"eval_runtime": 101.4323, |
|
"eval_samples_per_second": 10.687, |
|
"eval_steps_per_second": 0.67, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8076751450245426, |
|
"grad_norm": 16.806318283081055, |
|
"learning_rate": 9.616242748772871e-06, |
|
"logits/chosen": -0.37702614068984985, |
|
"logits/rejected": -0.42263609170913696, |
|
"logps/chosen": -30.743494033813477, |
|
"logps/rejected": -69.86903381347656, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16401787102222443, |
|
"rewards/margins": 2.1475062370300293, |
|
"rewards/rejected": -2.311523914337158, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8121374386434628, |
|
"grad_norm": 15.107300758361816, |
|
"learning_rate": 9.393128067826863e-06, |
|
"logits/chosen": -0.5003937482833862, |
|
"logits/rejected": -0.5252431035041809, |
|
"logps/chosen": -69.12486267089844, |
|
"logps/rejected": -87.68672180175781, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.476993203163147, |
|
"rewards/margins": 0.5065998435020447, |
|
"rewards/rejected": -1.983593225479126, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8165997322623829, |
|
"grad_norm": 13.862650871276855, |
|
"learning_rate": 9.170013386880858e-06, |
|
"logits/chosen": -0.3862130343914032, |
|
"logits/rejected": -0.40607863664627075, |
|
"logps/chosen": -33.24978256225586, |
|
"logps/rejected": -61.93735885620117, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3195345401763916, |
|
"rewards/margins": 2.819258689880371, |
|
"rewards/rejected": -3.1387929916381836, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.821062025881303, |
|
"grad_norm": 7.556354999542236, |
|
"learning_rate": 8.946898705934851e-06, |
|
"logits/chosen": -0.413855642080307, |
|
"logits/rejected": -0.4082458019256592, |
|
"logps/chosen": -66.56385803222656, |
|
"logps/rejected": -77.54808044433594, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5070929527282715, |
|
"rewards/margins": 1.2685377597808838, |
|
"rewards/rejected": -2.7756309509277344, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8255243195002231, |
|
"grad_norm": 12.322093963623047, |
|
"learning_rate": 8.723784024988845e-06, |
|
"logits/chosen": -0.5252609252929688, |
|
"logits/rejected": -0.5314427614212036, |
|
"logps/chosen": -60.695709228515625, |
|
"logps/rejected": -102.69441223144531, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7939317226409912, |
|
"rewards/margins": 0.7528426051139832, |
|
"rewards/rejected": -1.5467742681503296, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8299866131191432, |
|
"grad_norm": 15.452473640441895, |
|
"learning_rate": 8.500669344042838e-06, |
|
"logits/chosen": -0.4900451600551605, |
|
"logits/rejected": -0.49664202332496643, |
|
"logps/chosen": -49.01878356933594, |
|
"logps/rejected": -67.43014526367188, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6613854169845581, |
|
"rewards/margins": 2.0071310997009277, |
|
"rewards/rejected": -2.6685163974761963, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8344489067380634, |
|
"grad_norm": 12.514020919799805, |
|
"learning_rate": 8.277554663096832e-06, |
|
"logits/chosen": -0.5205062031745911, |
|
"logits/rejected": -0.4845796227455139, |
|
"logps/chosen": -56.92864990234375, |
|
"logps/rejected": -40.073753356933594, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.832059383392334, |
|
"rewards/margins": 0.7743573188781738, |
|
"rewards/rejected": -1.6064167022705078, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8389112003569835, |
|
"grad_norm": 4.63672399520874, |
|
"learning_rate": 8.054439982150825e-06, |
|
"logits/chosen": -0.5098186731338501, |
|
"logits/rejected": -0.503180742263794, |
|
"logps/chosen": -63.7332878112793, |
|
"logps/rejected": -56.689796447753906, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6310215592384338, |
|
"rewards/margins": 0.4131811261177063, |
|
"rewards/rejected": -1.0442026853561401, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8433734939759037, |
|
"grad_norm": 9.505380630493164, |
|
"learning_rate": 7.83132530120482e-06, |
|
"logits/chosen": -0.38880008459091187, |
|
"logits/rejected": -0.38642123341560364, |
|
"logps/chosen": -57.18964385986328, |
|
"logps/rejected": -63.263214111328125, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.522090196609497, |
|
"rewards/margins": 1.053093671798706, |
|
"rewards/rejected": -2.575183629989624, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8478357875948237, |
|
"grad_norm": 7.378079414367676, |
|
"learning_rate": 7.608210620258813e-06, |
|
"logits/chosen": -0.3517548143863678, |
|
"logits/rejected": -0.3867552876472473, |
|
"logps/chosen": -50.64849090576172, |
|
"logps/rejected": -84.17240905761719, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1465795040130615, |
|
"rewards/margins": 1.0569746494293213, |
|
"rewards/rejected": -2.2035536766052246, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8478357875948237, |
|
"eval_logits/chosen": -0.30358660221099854, |
|
"eval_logits/rejected": -0.3094210922718048, |
|
"eval_logps/chosen": -54.082584381103516, |
|
"eval_logps/rejected": -67.53509521484375, |
|
"eval_loss": 0.4930057227611542, |
|
"eval_rewards/accuracies": 0.7647058963775635, |
|
"eval_rewards/chosen": -0.7424651980400085, |
|
"eval_rewards/margins": 1.127060890197754, |
|
"eval_rewards/rejected": -1.8695260286331177, |
|
"eval_runtime": 101.2398, |
|
"eval_samples_per_second": 10.707, |
|
"eval_steps_per_second": 0.672, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8522980812137438, |
|
"grad_norm": 9.315518379211426, |
|
"learning_rate": 7.385095939312807e-06, |
|
"logits/chosen": -0.42414218187332153, |
|
"logits/rejected": -0.46152201294898987, |
|
"logps/chosen": -42.91413116455078, |
|
"logps/rejected": -71.15705871582031, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9550193548202515, |
|
"rewards/margins": 1.5954539775848389, |
|
"rewards/rejected": -2.55047345161438, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.856760374832664, |
|
"grad_norm": 10.241185188293457, |
|
"learning_rate": 7.1619812583668015e-06, |
|
"logits/chosen": -0.36176812648773193, |
|
"logits/rejected": -0.3657335638999939, |
|
"logps/chosen": -50.08300018310547, |
|
"logps/rejected": -69.12751007080078, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1663748025894165, |
|
"rewards/margins": 0.40770405530929565, |
|
"rewards/rejected": -1.574078917503357, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8612226684515841, |
|
"grad_norm": 7.457269668579102, |
|
"learning_rate": 6.938866577420794e-06, |
|
"logits/chosen": -0.45793724060058594, |
|
"logits/rejected": -0.45489731431007385, |
|
"logps/chosen": -87.29850769042969, |
|
"logps/rejected": -81.79814147949219, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4621846675872803, |
|
"rewards/margins": 0.8464537858963013, |
|
"rewards/rejected": -2.308638334274292, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8656849620705043, |
|
"grad_norm": 9.775691032409668, |
|
"learning_rate": 6.715751896474788e-06, |
|
"logits/chosen": -0.37481555342674255, |
|
"logits/rejected": -0.40004855394363403, |
|
"logps/chosen": -47.608821868896484, |
|
"logps/rejected": -74.66232299804688, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7277095913887024, |
|
"rewards/margins": 1.2722523212432861, |
|
"rewards/rejected": -1.9999618530273438, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8701472556894244, |
|
"grad_norm": 6.072810649871826, |
|
"learning_rate": 6.492637215528783e-06, |
|
"logits/chosen": -0.4388096332550049, |
|
"logits/rejected": -0.4612492620944977, |
|
"logps/chosen": -33.26784896850586, |
|
"logps/rejected": -62.914772033691406, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2671688199043274, |
|
"rewards/margins": 1.4179699420928955, |
|
"rewards/rejected": -1.6851387023925781, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8746095493083444, |
|
"grad_norm": 13.426563262939453, |
|
"learning_rate": 6.269522534582775e-06, |
|
"logits/chosen": -0.3368372917175293, |
|
"logits/rejected": -0.3831294775009155, |
|
"logps/chosen": -23.782691955566406, |
|
"logps/rejected": -72.92296600341797, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7565244436264038, |
|
"rewards/margins": 1.568673014640808, |
|
"rewards/rejected": -2.325197458267212, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8790718429272646, |
|
"grad_norm": 9.663064002990723, |
|
"learning_rate": 6.0464078536367696e-06, |
|
"logits/chosen": -0.47802871465682983, |
|
"logits/rejected": -0.4481546878814697, |
|
"logps/chosen": -74.21025848388672, |
|
"logps/rejected": -57.58526611328125, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.378021240234375, |
|
"rewards/margins": -0.19112446904182434, |
|
"rewards/rejected": -1.186896562576294, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.8835341365461847, |
|
"grad_norm": 8.557766914367676, |
|
"learning_rate": 5.823293172690764e-06, |
|
"logits/chosen": -0.38979801535606384, |
|
"logits/rejected": -0.40961775183677673, |
|
"logps/chosen": -53.91607666015625, |
|
"logps/rejected": -77.83052062988281, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4018312692642212, |
|
"rewards/margins": 0.9410960078239441, |
|
"rewards/rejected": -2.3429272174835205, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8879964301651049, |
|
"grad_norm": 10.93035888671875, |
|
"learning_rate": 5.600178491744757e-06, |
|
"logits/chosen": -0.42609018087387085, |
|
"logits/rejected": -0.434058278799057, |
|
"logps/chosen": -77.43383026123047, |
|
"logps/rejected": -81.38287353515625, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.5246906280517578, |
|
"rewards/margins": 1.1381233930587769, |
|
"rewards/rejected": -2.662813901901245, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.892458723784025, |
|
"grad_norm": 10.252880096435547, |
|
"learning_rate": 5.377063810798751e-06, |
|
"logits/chosen": -0.39261507987976074, |
|
"logits/rejected": -0.43658408522605896, |
|
"logps/chosen": -43.862735748291016, |
|
"logps/rejected": -89.9842529296875, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8263942003250122, |
|
"rewards/margins": 1.6156145334243774, |
|
"rewards/rejected": -2.4420089721679688, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.892458723784025, |
|
"eval_logits/chosen": -0.3016761839389801, |
|
"eval_logits/rejected": -0.3078857362270355, |
|
"eval_logps/chosen": -54.635650634765625, |
|
"eval_logps/rejected": -67.65421295166016, |
|
"eval_loss": 0.48789989948272705, |
|
"eval_rewards/accuracies": 0.7426470518112183, |
|
"eval_rewards/chosen": -0.797771692276001, |
|
"eval_rewards/margins": 1.083666443824768, |
|
"eval_rewards/rejected": -1.8814382553100586, |
|
"eval_runtime": 101.3879, |
|
"eval_samples_per_second": 10.692, |
|
"eval_steps_per_second": 0.671, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8969210174029452, |
|
"grad_norm": 8.110907554626465, |
|
"learning_rate": 5.153949129852745e-06, |
|
"logits/chosen": -0.336694598197937, |
|
"logits/rejected": -0.39082592725753784, |
|
"logps/chosen": -35.137779235839844, |
|
"logps/rejected": -91.90613555908203, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5796006917953491, |
|
"rewards/margins": 2.7877049446105957, |
|
"rewards/rejected": -3.3673057556152344, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9013833110218652, |
|
"grad_norm": 12.234983444213867, |
|
"learning_rate": 4.9308344489067385e-06, |
|
"logits/chosen": -0.4599209427833557, |
|
"logits/rejected": -0.48277148604393005, |
|
"logps/chosen": -16.474313735961914, |
|
"logps/rejected": -40.7441520690918, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1433514654636383, |
|
"rewards/margins": 0.9705824851989746, |
|
"rewards/rejected": -1.1139341592788696, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9058456046407853, |
|
"grad_norm": 6.006363868713379, |
|
"learning_rate": 4.707719767960732e-06, |
|
"logits/chosen": -0.5185050964355469, |
|
"logits/rejected": -0.5282645225524902, |
|
"logps/chosen": -63.518226623535156, |
|
"logps/rejected": -78.12336730957031, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1981505155563354, |
|
"rewards/margins": 0.7462947368621826, |
|
"rewards/rejected": -1.9444453716278076, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9103078982597055, |
|
"grad_norm": 13.10690689086914, |
|
"learning_rate": 4.484605087014726e-06, |
|
"logits/chosen": -0.318155437707901, |
|
"logits/rejected": -0.3284832239151001, |
|
"logps/chosen": -50.32611846923828, |
|
"logps/rejected": -86.18081665039062, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7798346281051636, |
|
"rewards/margins": 1.6652348041534424, |
|
"rewards/rejected": -2.4450693130493164, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9147701918786256, |
|
"grad_norm": 5.4444122314453125, |
|
"learning_rate": 4.26149040606872e-06, |
|
"logits/chosen": -0.6425202488899231, |
|
"logits/rejected": -0.6391957998275757, |
|
"logps/chosen": -46.43994140625, |
|
"logps/rejected": -53.859893798828125, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5537580847740173, |
|
"rewards/margins": 0.553223729133606, |
|
"rewards/rejected": -1.1069817543029785, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9192324854975458, |
|
"grad_norm": 9.501949310302734, |
|
"learning_rate": 4.038375725122713e-06, |
|
"logits/chosen": -0.4042991101741791, |
|
"logits/rejected": -0.4195373058319092, |
|
"logps/chosen": -51.28417205810547, |
|
"logps/rejected": -87.4020767211914, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2516388893127441, |
|
"rewards/margins": 1.711236596107483, |
|
"rewards/rejected": -2.9628753662109375, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9236947791164659, |
|
"grad_norm": 10.44618034362793, |
|
"learning_rate": 3.8152610441767074e-06, |
|
"logits/chosen": -0.4849855899810791, |
|
"logits/rejected": -0.4761204719543457, |
|
"logps/chosen": -68.31929016113281, |
|
"logps/rejected": -76.27852630615234, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5858898758888245, |
|
"rewards/margins": 1.2441669702529907, |
|
"rewards/rejected": -1.8300567865371704, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.928157072735386, |
|
"grad_norm": 6.363342761993408, |
|
"learning_rate": 3.592146363230701e-06, |
|
"logits/chosen": -0.34438687562942505, |
|
"logits/rejected": -0.3649711310863495, |
|
"logps/chosen": -46.63995361328125, |
|
"logps/rejected": -74.31135559082031, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7127798795700073, |
|
"rewards/margins": 2.289309024810791, |
|
"rewards/rejected": -3.002089023590088, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9326193663543061, |
|
"grad_norm": 13.498308181762695, |
|
"learning_rate": 3.3690316822846943e-06, |
|
"logits/chosen": -0.508689820766449, |
|
"logits/rejected": -0.4503362774848938, |
|
"logps/chosen": -46.135929107666016, |
|
"logps/rejected": -34.64082717895508, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5625860095024109, |
|
"rewards/margins": 1.3244779109954834, |
|
"rewards/rejected": -1.8870636224746704, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9370816599732262, |
|
"grad_norm": 10.539041519165039, |
|
"learning_rate": 3.1459170013386886e-06, |
|
"logits/chosen": -0.5147204995155334, |
|
"logits/rejected": -0.5219804048538208, |
|
"logps/chosen": -66.9437026977539, |
|
"logps/rejected": -99.84492492675781, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7651069760322571, |
|
"rewards/margins": 0.6679459810256958, |
|
"rewards/rejected": -1.433052897453308, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9370816599732262, |
|
"eval_logits/chosen": -0.3009275496006012, |
|
"eval_logits/rejected": -0.30699196457862854, |
|
"eval_logps/chosen": -54.10767364501953, |
|
"eval_logps/rejected": -67.41754150390625, |
|
"eval_loss": 0.4908609390258789, |
|
"eval_rewards/accuracies": 0.7867646813392639, |
|
"eval_rewards/chosen": -0.7449739575386047, |
|
"eval_rewards/margins": 1.1127972602844238, |
|
"eval_rewards/rejected": -1.8577712774276733, |
|
"eval_runtime": 107.4936, |
|
"eval_samples_per_second": 10.084, |
|
"eval_steps_per_second": 0.633, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9415439535921464, |
|
"grad_norm": 10.287480354309082, |
|
"learning_rate": 2.922802320392682e-06, |
|
"logits/chosen": -0.4798213839530945, |
|
"logits/rejected": -0.4891911447048187, |
|
"logps/chosen": -46.30778121948242, |
|
"logps/rejected": -77.4688491821289, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1262010335922241, |
|
"rewards/margins": 1.3376787900924683, |
|
"rewards/rejected": -2.4638798236846924, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9460062472110665, |
|
"grad_norm": 5.710153579711914, |
|
"learning_rate": 2.699687639446676e-06, |
|
"logits/chosen": -0.4680832326412201, |
|
"logits/rejected": -0.513514518737793, |
|
"logps/chosen": -23.656709671020508, |
|
"logps/rejected": -81.7193603515625, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08419324457645416, |
|
"rewards/margins": 2.462780475616455, |
|
"rewards/rejected": -2.546973705291748, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9504685408299867, |
|
"grad_norm": 8.636849403381348, |
|
"learning_rate": 2.4765729585006694e-06, |
|
"logits/chosen": -0.5273581743240356, |
|
"logits/rejected": -0.5214824676513672, |
|
"logps/chosen": -59.94758224487305, |
|
"logps/rejected": -74.18995666503906, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1900839805603027, |
|
"rewards/margins": 1.1294690370559692, |
|
"rewards/rejected": -2.3195528984069824, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9549308344489067, |
|
"grad_norm": 9.620250701904297, |
|
"learning_rate": 2.2534582775546633e-06, |
|
"logits/chosen": -0.3756674826145172, |
|
"logits/rejected": -0.3716146647930145, |
|
"logps/chosen": -39.12682342529297, |
|
"logps/rejected": -54.693084716796875, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.43729519844055176, |
|
"rewards/margins": 1.4970862865447998, |
|
"rewards/rejected": -1.9343814849853516, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9593931280678268, |
|
"grad_norm": 10.2340726852417, |
|
"learning_rate": 2.030343596608657e-06, |
|
"logits/chosen": -0.35947781801223755, |
|
"logits/rejected": -0.3904605507850647, |
|
"logps/chosen": -49.44614791870117, |
|
"logps/rejected": -90.57923889160156, |
|
"loss": 0.4216, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3010484576225281, |
|
"rewards/margins": 2.3448996543884277, |
|
"rewards/rejected": -2.6459481716156006, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"grad_norm": 10.500276565551758, |
|
"learning_rate": 1.8072289156626506e-06, |
|
"logits/chosen": -0.4205097258090973, |
|
"logits/rejected": -0.41137799620628357, |
|
"logps/chosen": -59.52259063720703, |
|
"logps/rejected": -76.6214828491211, |
|
"loss": 0.3888, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5943686962127686, |
|
"rewards/margins": 0.9938135147094727, |
|
"rewards/rejected": -2.588181972503662, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9683177153056671, |
|
"grad_norm": 4.686072826385498, |
|
"learning_rate": 1.5841142347166445e-06, |
|
"logits/chosen": -0.48913031816482544, |
|
"logits/rejected": -0.48259633779525757, |
|
"logps/chosen": -49.0291748046875, |
|
"logps/rejected": -72.66325378417969, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9799516797065735, |
|
"rewards/margins": 1.6757595539093018, |
|
"rewards/rejected": -2.6557109355926514, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9727800089245873, |
|
"grad_norm": 13.50939655303955, |
|
"learning_rate": 1.3609995537706381e-06, |
|
"logits/chosen": -0.3935714066028595, |
|
"logits/rejected": -0.3884749412536621, |
|
"logps/chosen": -65.09027862548828, |
|
"logps/rejected": -67.38182067871094, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.304712176322937, |
|
"rewards/margins": 0.5648558139801025, |
|
"rewards/rejected": -1.86956787109375, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9772423025435074, |
|
"grad_norm": 11.413427352905273, |
|
"learning_rate": 1.1378848728246318e-06, |
|
"logits/chosen": -0.3162749409675598, |
|
"logits/rejected": -0.3454035520553589, |
|
"logps/chosen": -67.44561767578125, |
|
"logps/rejected": -83.52703094482422, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.085411548614502, |
|
"rewards/margins": 0.794529139995575, |
|
"rewards/rejected": -1.8799407482147217, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9817045961624274, |
|
"grad_norm": 9.88737678527832, |
|
"learning_rate": 9.147701918786257e-07, |
|
"logits/chosen": -0.49362993240356445, |
|
"logits/rejected": -0.5343436598777771, |
|
"logps/chosen": -60.54063034057617, |
|
"logps/rejected": -91.24678802490234, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8494547009468079, |
|
"rewards/margins": 1.7675020694732666, |
|
"rewards/rejected": -2.6169567108154297, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9817045961624274, |
|
"eval_logits/chosen": -0.29997923970222473, |
|
"eval_logits/rejected": -0.3060593605041504, |
|
"eval_logps/chosen": -54.62287902832031, |
|
"eval_logps/rejected": -67.98063659667969, |
|
"eval_loss": 0.4901544153690338, |
|
"eval_rewards/accuracies": 0.7573529481887817, |
|
"eval_rewards/chosen": -0.7964945435523987, |
|
"eval_rewards/margins": 1.1175856590270996, |
|
"eval_rewards/rejected": -1.914080262184143, |
|
"eval_runtime": 101.3687, |
|
"eval_samples_per_second": 10.694, |
|
"eval_steps_per_second": 0.671, |
|
"step": 2200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2241, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|