|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -2.377302885055542, |
|
"logits/rejected": -2.2193117141723633, |
|
"logps/chosen": -290.4185485839844, |
|
"logps/rejected": -374.6501770019531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.25045108795166, |
|
"logits/rejected": -2.052776575088501, |
|
"logps/chosen": -279.61688232421875, |
|
"logps/rejected": -245.4197540283203, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.000771976076066494, |
|
"rewards/margins": 0.00010288292105542496, |
|
"rewards/rejected": 0.0006690931040793657, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.2451391220092773, |
|
"logits/rejected": -1.944021224975586, |
|
"logps/chosen": -305.45184326171875, |
|
"logps/rejected": -237.7191619873047, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004307927563786507, |
|
"rewards/margins": 0.0011060098186135292, |
|
"rewards/rejected": 0.003201917978003621, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.2053542137145996, |
|
"logits/rejected": -2.136805772781372, |
|
"logps/chosen": -251.1873016357422, |
|
"logps/rejected": -251.39126586914062, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.012356054969131947, |
|
"rewards/margins": 0.0023143726866692305, |
|
"rewards/rejected": 0.010041682049632072, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.062053918838501, |
|
"logits/rejected": -2.0244908332824707, |
|
"logps/chosen": -216.23828125, |
|
"logps/rejected": -221.68917846679688, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.019059285521507263, |
|
"rewards/margins": 0.0032902732491493225, |
|
"rewards/rejected": 0.01576901227235794, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.1121723651885986, |
|
"logits/rejected": -2.1005072593688965, |
|
"logps/chosen": -266.8817443847656, |
|
"logps/rejected": -234.3415069580078, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.030057832598686218, |
|
"rewards/margins": 0.005467818584293127, |
|
"rewards/rejected": 0.024590013548731804, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.125, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.0995335578918457, |
|
"logits/rejected": -1.9425058364868164, |
|
"logps/chosen": -252.32351684570312, |
|
"logps/rejected": -226.69961547851562, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.03176448494195938, |
|
"rewards/margins": 0.006372343748807907, |
|
"rewards/rejected": 0.025392139330506325, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.2442469596862793, |
|
"logits/rejected": -2.036492347717285, |
|
"logps/chosen": -272.0433044433594, |
|
"logps/rejected": -246.6951446533203, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.04112860932946205, |
|
"rewards/margins": 0.010742614977061749, |
|
"rewards/rejected": 0.030385995283722878, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.390625, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.153740882873535, |
|
"logits/rejected": -1.977267861366272, |
|
"logps/chosen": -257.5650329589844, |
|
"logps/rejected": -246.85354614257812, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.038635507225990295, |
|
"rewards/margins": 0.012301743030548096, |
|
"rewards/rejected": 0.0263337641954422, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.136314868927002, |
|
"logits/rejected": -2.000256061553955, |
|
"logps/chosen": -250.14096069335938, |
|
"logps/rejected": -234.5118408203125, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.04195228964090347, |
|
"rewards/margins": 0.017196740955114365, |
|
"rewards/rejected": 0.02475554868578911, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.125, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.179086208343506, |
|
"logits/rejected": -2.068403482437134, |
|
"logps/chosen": -246.95883178710938, |
|
"logps/rejected": -230.7919921875, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04810682684183121, |
|
"rewards/margins": 0.023308780044317245, |
|
"rewards/rejected": 0.024798044934868813, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -2.095933198928833, |
|
"eval_logits/rejected": -1.9564727544784546, |
|
"eval_logps/chosen": -259.64715576171875, |
|
"eval_logps/rejected": -241.9028778076172, |
|
"eval_loss": 0.6821568012237549, |
|
"eval_rewards/accuracies": 0.6545000076293945, |
|
"eval_rewards/chosen": 0.05004846677184105, |
|
"eval_rewards/margins": 0.02299799770116806, |
|
"eval_rewards/rejected": 0.02705046720802784, |
|
"eval_runtime": 381.806, |
|
"eval_samples_per_second": 5.238, |
|
"eval_steps_per_second": 0.655, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.1454405784606934, |
|
"logits/rejected": -2.0017640590667725, |
|
"logps/chosen": -284.425537109375, |
|
"logps/rejected": -238.8695526123047, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.049382902681827545, |
|
"rewards/margins": 0.02859182097017765, |
|
"rewards/rejected": 0.020791077986359596, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.1937575340270996, |
|
"logits/rejected": -2.054399013519287, |
|
"logps/chosen": -287.4629821777344, |
|
"logps/rejected": -271.8957824707031, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.05581967160105705, |
|
"rewards/margins": 0.042316947132349014, |
|
"rewards/rejected": 0.013502727262675762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.208482265472412, |
|
"logits/rejected": -2.118875503540039, |
|
"logps/chosen": -250.0573272705078, |
|
"logps/rejected": -252.57418823242188, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.050946980714797974, |
|
"rewards/margins": 0.049403756856918335, |
|
"rewards/rejected": 0.0015432273503392935, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.2458949089050293, |
|
"logits/rejected": -1.911431074142456, |
|
"logps/chosen": -270.4693298339844, |
|
"logps/rejected": -226.22677612304688, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.04268602281808853, |
|
"rewards/margins": 0.05290870741009712, |
|
"rewards/rejected": -0.010222683660686016, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.2650039196014404, |
|
"logits/rejected": -2.039114475250244, |
|
"logps/chosen": -280.2913818359375, |
|
"logps/rejected": -242.7501983642578, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.03719528391957283, |
|
"rewards/margins": 0.05549495667219162, |
|
"rewards/rejected": -0.01829967275261879, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.1557822227478027, |
|
"logits/rejected": -2.0535261631011963, |
|
"logps/chosen": -256.06103515625, |
|
"logps/rejected": -261.87261962890625, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007628369145095348, |
|
"rewards/margins": 0.05603449419140816, |
|
"rewards/rejected": -0.04840613156557083, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.875, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.125109910964966, |
|
"logits/rejected": -1.9704573154449463, |
|
"logps/chosen": -220.9778594970703, |
|
"logps/rejected": -228.26919555664062, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0038716509006917477, |
|
"rewards/margins": 0.05044783279299736, |
|
"rewards/rejected": -0.05431948974728584, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.1243832111358643, |
|
"logits/rejected": -1.9889084100723267, |
|
"logps/chosen": -258.29095458984375, |
|
"logps/rejected": -251.7142333984375, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.025864282622933388, |
|
"rewards/margins": 0.06769417971372604, |
|
"rewards/rejected": -0.09355846792459488, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.2455646991729736, |
|
"logits/rejected": -2.0299086570739746, |
|
"logps/chosen": -272.17633056640625, |
|
"logps/rejected": -253.8187255859375, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.020921263843774796, |
|
"rewards/margins": 0.09995204210281372, |
|
"rewards/rejected": -0.12087330967187881, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.1975388526916504, |
|
"logits/rejected": -1.9570707082748413, |
|
"logps/chosen": -264.46234130859375, |
|
"logps/rejected": -235.4163818359375, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.14753015339374542, |
|
"rewards/margins": 0.09057016670703888, |
|
"rewards/rejected": -0.23810029029846191, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.0694758892059326, |
|
"eval_logits/rejected": -1.9328563213348389, |
|
"eval_logps/chosen": -279.5373229980469, |
|
"eval_logps/rejected": -269.7627868652344, |
|
"eval_loss": 0.6499924063682556, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": -0.1488528698682785, |
|
"eval_rewards/margins": 0.10269534587860107, |
|
"eval_rewards/rejected": -0.2515482008457184, |
|
"eval_runtime": 382.022, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.198995590209961, |
|
"logits/rejected": -1.9819616079330444, |
|
"logps/chosen": -271.3312072753906, |
|
"logps/rejected": -252.93746948242188, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09111092239618301, |
|
"rewards/margins": 0.1327463835477829, |
|
"rewards/rejected": -0.2238573133945465, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.097423553466797, |
|
"logits/rejected": -1.9822295904159546, |
|
"logps/chosen": -259.9545593261719, |
|
"logps/rejected": -246.3585662841797, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05938801169395447, |
|
"rewards/margins": 0.12806808948516846, |
|
"rewards/rejected": -0.18745610117912292, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.2377123832702637, |
|
"logits/rejected": -2.050795078277588, |
|
"logps/chosen": -315.82159423828125, |
|
"logps/rejected": -288.96539306640625, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.09652389585971832, |
|
"rewards/margins": 0.08648413419723511, |
|
"rewards/rejected": -0.18300803005695343, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.1486618518829346, |
|
"logits/rejected": -1.961085319519043, |
|
"logps/chosen": -312.89373779296875, |
|
"logps/rejected": -312.0883483886719, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.1659378707408905, |
|
"rewards/margins": 0.1430220901966095, |
|
"rewards/rejected": -0.3089599311351776, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.112567186355591, |
|
"logits/rejected": -2.012845039367676, |
|
"logps/chosen": -277.0249938964844, |
|
"logps/rejected": -268.689208984375, |
|
"loss": 0.6263, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1744508147239685, |
|
"rewards/margins": 0.17131540179252625, |
|
"rewards/rejected": -0.34576624631881714, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.1583478450775146, |
|
"logits/rejected": -1.9551265239715576, |
|
"logps/chosen": -310.0099792480469, |
|
"logps/rejected": -299.52789306640625, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.37605080008506775, |
|
"rewards/margins": 0.11539731919765472, |
|
"rewards/rejected": -0.49144816398620605, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.296875, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.0597262382507324, |
|
"logits/rejected": -1.9347015619277954, |
|
"logps/chosen": -287.3021545410156, |
|
"logps/rejected": -277.96014404296875, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.26627668738365173, |
|
"rewards/margins": 0.22069358825683594, |
|
"rewards/rejected": -0.48697033524513245, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.25, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.125945568084717, |
|
"logits/rejected": -1.954007863998413, |
|
"logps/chosen": -298.900390625, |
|
"logps/rejected": -293.0090637207031, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.39951270818710327, |
|
"rewards/margins": 0.1558128446340561, |
|
"rewards/rejected": -0.5553255081176758, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -2.0477206707000732, |
|
"logits/rejected": -1.9491031169891357, |
|
"logps/chosen": -324.5054626464844, |
|
"logps/rejected": -319.0287780761719, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.38871732354164124, |
|
"rewards/margins": 0.19628065824508667, |
|
"rewards/rejected": -0.5849979519844055, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -2.0910630226135254, |
|
"logits/rejected": -1.888196587562561, |
|
"logps/chosen": -272.47198486328125, |
|
"logps/rejected": -281.57830810546875, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.28343814611434937, |
|
"rewards/margins": 0.22831246256828308, |
|
"rewards/rejected": -0.5117505788803101, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.011384963989258, |
|
"eval_logits/rejected": -1.8770692348480225, |
|
"eval_logps/chosen": -294.2168884277344, |
|
"eval_logps/rejected": -294.5921325683594, |
|
"eval_loss": 0.6213397979736328, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -0.29564887285232544, |
|
"eval_rewards/margins": 0.20419315993785858, |
|
"eval_rewards/rejected": -0.4998420178890228, |
|
"eval_runtime": 381.8433, |
|
"eval_samples_per_second": 5.238, |
|
"eval_steps_per_second": 0.655, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -2.2418582439422607, |
|
"logits/rejected": -2.04129695892334, |
|
"logps/chosen": -316.5093994140625, |
|
"logps/rejected": -291.79010009765625, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.27603110671043396, |
|
"rewards/margins": 0.2892019748687744, |
|
"rewards/rejected": -0.565233051776886, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -2.1178698539733887, |
|
"logits/rejected": -1.9309499263763428, |
|
"logps/chosen": -298.84527587890625, |
|
"logps/rejected": -299.9272155761719, |
|
"loss": 0.6369, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.40581315755844116, |
|
"rewards/margins": 0.1810055673122406, |
|
"rewards/rejected": -0.5868188142776489, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -2.046699047088623, |
|
"logits/rejected": -1.9039798974990845, |
|
"logps/chosen": -296.7830505371094, |
|
"logps/rejected": -293.9065246582031, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3718874454498291, |
|
"rewards/margins": 0.21303264796733856, |
|
"rewards/rejected": -0.5849201083183289, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -2.1172854900360107, |
|
"logits/rejected": -2.0036845207214355, |
|
"logps/chosen": -316.01226806640625, |
|
"logps/rejected": -323.5932922363281, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.26326456665992737, |
|
"rewards/margins": 0.28980451822280884, |
|
"rewards/rejected": -0.5530691146850586, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -2.042684555053711, |
|
"logits/rejected": -1.8946377038955688, |
|
"logps/chosen": -352.21502685546875, |
|
"logps/rejected": -358.153564453125, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6295667886734009, |
|
"rewards/margins": 0.17427758872509003, |
|
"rewards/rejected": -0.8038444519042969, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -2.011836528778076, |
|
"logits/rejected": -1.9665615558624268, |
|
"logps/chosen": -317.6282958984375, |
|
"logps/rejected": -318.0123291015625, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7664434909820557, |
|
"rewards/margins": 0.21231558918952942, |
|
"rewards/rejected": -0.9787591099739075, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -2.1028566360473633, |
|
"logits/rejected": -1.9274108409881592, |
|
"logps/chosen": -356.88507080078125, |
|
"logps/rejected": -335.1341857910156, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7998887300491333, |
|
"rewards/margins": 0.22070667147636414, |
|
"rewards/rejected": -1.0205953121185303, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.59375, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -2.069827079772949, |
|
"logits/rejected": -1.8606586456298828, |
|
"logps/chosen": -364.96856689453125, |
|
"logps/rejected": -353.82769775390625, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6691190004348755, |
|
"rewards/margins": 0.2223375141620636, |
|
"rewards/rejected": -0.8914563059806824, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -2.088129997253418, |
|
"logits/rejected": -1.971571683883667, |
|
"logps/chosen": -316.87994384765625, |
|
"logps/rejected": -327.4869079589844, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3583374619483948, |
|
"rewards/margins": 0.3061427175998688, |
|
"rewards/rejected": -0.6644802093505859, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.125, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -2.119563341140747, |
|
"logits/rejected": -1.8813574314117432, |
|
"logps/chosen": -296.64593505859375, |
|
"logps/rejected": -276.7133483886719, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.31169393658638, |
|
"rewards/margins": 0.207248717546463, |
|
"rewards/rejected": -0.5189425945281982, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.9655628204345703, |
|
"eval_logits/rejected": -1.836700201034546, |
|
"eval_logps/chosen": -310.03485107421875, |
|
"eval_logps/rejected": -318.6169738769531, |
|
"eval_loss": 0.6038790345191956, |
|
"eval_rewards/accuracies": 0.6934999823570251, |
|
"eval_rewards/chosen": -0.45382827520370483, |
|
"eval_rewards/margins": 0.2862620949745178, |
|
"eval_rewards/rejected": -0.7400903105735779, |
|
"eval_runtime": 382.0228, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -1.988959550857544, |
|
"logits/rejected": -1.9292503595352173, |
|
"logps/chosen": -292.4900817871094, |
|
"logps/rejected": -307.29473876953125, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5016773343086243, |
|
"rewards/margins": 0.16585329174995422, |
|
"rewards/rejected": -0.6675306558609009, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -2.0150246620178223, |
|
"logits/rejected": -1.8929126262664795, |
|
"logps/chosen": -260.4281921386719, |
|
"logps/rejected": -280.3924865722656, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3163732588291168, |
|
"rewards/margins": 0.29044631123542786, |
|
"rewards/rejected": -0.6068195104598999, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.75, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -2.0086240768432617, |
|
"logits/rejected": -1.9487006664276123, |
|
"logps/chosen": -308.3887634277344, |
|
"logps/rejected": -317.20904541015625, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3084072172641754, |
|
"rewards/margins": 0.3242705166339874, |
|
"rewards/rejected": -0.6326777338981628, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -2.112046718597412, |
|
"logits/rejected": -2.027024984359741, |
|
"logps/chosen": -303.4664306640625, |
|
"logps/rejected": -303.01220703125, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3131754994392395, |
|
"rewards/margins": 0.2790473401546478, |
|
"rewards/rejected": -0.5922229290008545, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -1.9543377161026, |
|
"logits/rejected": -1.8296692371368408, |
|
"logps/chosen": -309.422119140625, |
|
"logps/rejected": -325.46173095703125, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4158857762813568, |
|
"rewards/margins": 0.3905051648616791, |
|
"rewards/rejected": -0.8063910603523254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -1.9097979068756104, |
|
"logits/rejected": -1.8239259719848633, |
|
"logps/chosen": -370.8164978027344, |
|
"logps/rejected": -396.86004638671875, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0571677684783936, |
|
"rewards/margins": 0.4056069254875183, |
|
"rewards/rejected": -1.4627748727798462, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -1.9127140045166016, |
|
"logits/rejected": -1.7631990909576416, |
|
"logps/chosen": -352.392822265625, |
|
"logps/rejected": -381.87896728515625, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0857564210891724, |
|
"rewards/margins": 0.4201774597167969, |
|
"rewards/rejected": -1.5059337615966797, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -1.892653226852417, |
|
"logits/rejected": -1.7571289539337158, |
|
"logps/chosen": -333.1285095214844, |
|
"logps/rejected": -386.10107421875, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7624952793121338, |
|
"rewards/margins": 0.5117734670639038, |
|
"rewards/rejected": -1.2742688655853271, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -1.8925682306289673, |
|
"logits/rejected": -1.7469890117645264, |
|
"logps/chosen": -369.3451232910156, |
|
"logps/rejected": -387.9554443359375, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8445364832878113, |
|
"rewards/margins": 0.4895913600921631, |
|
"rewards/rejected": -1.3341277837753296, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -1.8205528259277344, |
|
"logits/rejected": -1.6672782897949219, |
|
"logps/chosen": -371.17864990234375, |
|
"logps/rejected": -382.154296875, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9741867780685425, |
|
"rewards/margins": 0.522149920463562, |
|
"rewards/rejected": -1.4963366985321045, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -1.671244502067566, |
|
"eval_logits/rejected": -1.5403351783752441, |
|
"eval_logps/chosen": -356.194580078125, |
|
"eval_logps/rejected": -383.8828430175781, |
|
"eval_loss": 0.5691964626312256, |
|
"eval_rewards/accuracies": 0.7049999833106995, |
|
"eval_rewards/chosen": -0.9154260158538818, |
|
"eval_rewards/margins": 0.4773229658603668, |
|
"eval_rewards/rejected": -1.3927491903305054, |
|
"eval_runtime": 382.3757, |
|
"eval_samples_per_second": 5.23, |
|
"eval_steps_per_second": 0.654, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -1.7440261840820312, |
|
"logits/rejected": -1.6317085027694702, |
|
"logps/chosen": -373.0206604003906, |
|
"logps/rejected": -391.50970458984375, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.947595477104187, |
|
"rewards/margins": 0.555194079875946, |
|
"rewards/rejected": -1.5027896165847778, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -1.7637799978256226, |
|
"logits/rejected": -1.7051684856414795, |
|
"logps/chosen": -361.0028991699219, |
|
"logps/rejected": -383.77392578125, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8794111013412476, |
|
"rewards/margins": 0.3896932005882263, |
|
"rewards/rejected": -1.2691043615341187, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -1.7085822820663452, |
|
"logits/rejected": -1.6667120456695557, |
|
"logps/chosen": -398.4223327636719, |
|
"logps/rejected": -447.1837463378906, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.373207688331604, |
|
"rewards/margins": 0.4029502272605896, |
|
"rewards/rejected": -1.776158094406128, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -1.6480659246444702, |
|
"logits/rejected": -1.4872467517852783, |
|
"logps/chosen": -413.641845703125, |
|
"logps/rejected": -420.10565185546875, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.3515903949737549, |
|
"rewards/margins": 0.5323625206947327, |
|
"rewards/rejected": -1.8839528560638428, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.375, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -1.6254298686981201, |
|
"logits/rejected": -1.5536671876907349, |
|
"logps/chosen": -402.1681213378906, |
|
"logps/rejected": -431.54510498046875, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2365509271621704, |
|
"rewards/margins": 0.4915947914123535, |
|
"rewards/rejected": -1.7281455993652344, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -1.788649559020996, |
|
"logits/rejected": -1.6893421411514282, |
|
"logps/chosen": -362.63739013671875, |
|
"logps/rejected": -421.24505615234375, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0513819456100464, |
|
"rewards/margins": 0.5819835066795349, |
|
"rewards/rejected": -1.6333656311035156, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -1.7911745309829712, |
|
"logits/rejected": -1.6469875574111938, |
|
"logps/chosen": -439.2314453125, |
|
"logps/rejected": -465.60174560546875, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.391915202140808, |
|
"rewards/margins": 0.61050945520401, |
|
"rewards/rejected": -2.002424716949463, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.375, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -1.7446343898773193, |
|
"logits/rejected": -1.5205295085906982, |
|
"logps/chosen": -424.37664794921875, |
|
"logps/rejected": -455.3761291503906, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2658993005752563, |
|
"rewards/margins": 0.7207783460617065, |
|
"rewards/rejected": -1.9866775274276733, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -1.5974572896957397, |
|
"logits/rejected": -1.4778482913970947, |
|
"logps/chosen": -365.91943359375, |
|
"logps/rejected": -388.0648498535156, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0861928462982178, |
|
"rewards/margins": 0.45802631974220276, |
|
"rewards/rejected": -1.5442192554473877, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.5625, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -1.682959794998169, |
|
"logits/rejected": -1.6160876750946045, |
|
"logps/chosen": -331.21978759765625, |
|
"logps/rejected": -367.4974060058594, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8046302795410156, |
|
"rewards/margins": 0.5121658444404602, |
|
"rewards/rejected": -1.316796064376831, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -1.5049409866333008, |
|
"eval_logits/rejected": -1.3701001405715942, |
|
"eval_logps/chosen": -345.8829650878906, |
|
"eval_logps/rejected": -376.7896423339844, |
|
"eval_loss": 0.5658991932868958, |
|
"eval_rewards/accuracies": 0.7024999856948853, |
|
"eval_rewards/chosen": -0.8123093843460083, |
|
"eval_rewards/margins": 0.5095077753067017, |
|
"eval_rewards/rejected": -1.32181715965271, |
|
"eval_runtime": 382.004, |
|
"eval_samples_per_second": 5.236, |
|
"eval_steps_per_second": 0.654, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -1.578046202659607, |
|
"logits/rejected": -1.4836609363555908, |
|
"logps/chosen": -328.2045593261719, |
|
"logps/rejected": -375.481201171875, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8234087228775024, |
|
"rewards/margins": 0.6500986218452454, |
|
"rewards/rejected": -1.473507285118103, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.875, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -1.5045579671859741, |
|
"logits/rejected": -1.2604496479034424, |
|
"logps/chosen": -402.4884338378906, |
|
"logps/rejected": -422.79736328125, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.146611213684082, |
|
"rewards/margins": 0.7896040678024292, |
|
"rewards/rejected": -1.9362151622772217, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.5, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -1.372164249420166, |
|
"logits/rejected": -1.2230699062347412, |
|
"logps/chosen": -414.8818359375, |
|
"logps/rejected": -487.482421875, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.667520523071289, |
|
"rewards/margins": 0.8288544416427612, |
|
"rewards/rejected": -2.4963748455047607, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -1.3338875770568848, |
|
"logits/rejected": -1.1355304718017578, |
|
"logps/chosen": -483.4081115722656, |
|
"logps/rejected": -526.1396484375, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.180846691131592, |
|
"rewards/margins": 0.6847688555717468, |
|
"rewards/rejected": -2.8656158447265625, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 15.6875, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -1.3147588968276978, |
|
"logits/rejected": -1.1933101415634155, |
|
"logps/chosen": -446.49346923828125, |
|
"logps/rejected": -517.9827270507812, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9119131565093994, |
|
"rewards/margins": 0.8099091649055481, |
|
"rewards/rejected": -2.721822500228882, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.875, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -1.3101979494094849, |
|
"logits/rejected": -1.1101386547088623, |
|
"logps/chosen": -426.4873046875, |
|
"logps/rejected": -446.02801513671875, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.6048357486724854, |
|
"rewards/margins": 0.5474850535392761, |
|
"rewards/rejected": -2.152320623397827, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -1.4778305292129517, |
|
"logits/rejected": -1.3038583993911743, |
|
"logps/chosen": -432.8035583496094, |
|
"logps/rejected": -459.92864990234375, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4926092624664307, |
|
"rewards/margins": 0.6784954071044922, |
|
"rewards/rejected": -2.171104907989502, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -1.3521184921264648, |
|
"logits/rejected": -1.1778732538223267, |
|
"logps/chosen": -430.7608947753906, |
|
"logps/rejected": -458.68072509765625, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5742666721343994, |
|
"rewards/margins": 0.7503162622451782, |
|
"rewards/rejected": -2.3245832920074463, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -1.3213449716567993, |
|
"logits/rejected": -1.228070855140686, |
|
"logps/chosen": -417.1580505371094, |
|
"logps/rejected": -537.0321044921875, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8092586994171143, |
|
"rewards/margins": 1.0819432735443115, |
|
"rewards/rejected": -2.8912017345428467, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.5, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -1.2542212009429932, |
|
"logits/rejected": -1.1810188293457031, |
|
"logps/chosen": -466.4949645996094, |
|
"logps/rejected": -549.9205932617188, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.131845474243164, |
|
"rewards/margins": 0.8790606260299683, |
|
"rewards/rejected": -3.0109057426452637, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": -1.0174403190612793, |
|
"eval_logits/rejected": -0.8923892974853516, |
|
"eval_logps/chosen": -528.3277587890625, |
|
"eval_logps/rejected": -591.3086547851562, |
|
"eval_loss": 0.5571516156196594, |
|
"eval_rewards/accuracies": 0.7145000100135803, |
|
"eval_rewards/chosen": -2.6367568969726562, |
|
"eval_rewards/margins": 0.8302499055862427, |
|
"eval_rewards/rejected": -3.4670066833496094, |
|
"eval_runtime": 382.0721, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -1.1841003894805908, |
|
"logits/rejected": -1.0792133808135986, |
|
"logps/chosen": -517.9019775390625, |
|
"logps/rejected": -578.2611083984375, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.5033812522888184, |
|
"rewards/margins": 0.7395020127296448, |
|
"rewards/rejected": -3.2428832054138184, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.875, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -1.1759226322174072, |
|
"logits/rejected": -1.0294206142425537, |
|
"logps/chosen": -477.3987731933594, |
|
"logps/rejected": -544.5623779296875, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.1131680011749268, |
|
"rewards/margins": 0.8433287739753723, |
|
"rewards/rejected": -2.9564967155456543, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -1.1399719715118408, |
|
"logits/rejected": -1.0124037265777588, |
|
"logps/chosen": -500.2403259277344, |
|
"logps/rejected": -533.0379028320312, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3398513793945312, |
|
"rewards/margins": 0.6334503293037415, |
|
"rewards/rejected": -2.973301887512207, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.375, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -1.0892612934112549, |
|
"logits/rejected": -0.941753089427948, |
|
"logps/chosen": -450.81402587890625, |
|
"logps/rejected": -503.46636962890625, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2259793281555176, |
|
"rewards/margins": 0.8490058183670044, |
|
"rewards/rejected": -3.0749852657318115, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 24.0, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -1.0790389776229858, |
|
"logits/rejected": -1.0216121673583984, |
|
"logps/chosen": -440.62109375, |
|
"logps/rejected": -540.6531372070312, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.060957431793213, |
|
"rewards/margins": 1.019281029701233, |
|
"rewards/rejected": -3.0802388191223145, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -1.1443126201629639, |
|
"logits/rejected": -1.065063238143921, |
|
"logps/chosen": -448.5615234375, |
|
"logps/rejected": -542.3307495117188, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.76167893409729, |
|
"rewards/margins": 1.016688346862793, |
|
"rewards/rejected": -2.778367280960083, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": -1.23788583278656, |
|
"logits/rejected": -1.1094398498535156, |
|
"logps/chosen": -407.1124267578125, |
|
"logps/rejected": -459.88226318359375, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5390856266021729, |
|
"rewards/margins": 0.7560388445854187, |
|
"rewards/rejected": -2.2951245307922363, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 16.625, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -1.3532726764678955, |
|
"logits/rejected": -1.2090624570846558, |
|
"logps/chosen": -449.90447998046875, |
|
"logps/rejected": -503.38067626953125, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.645581603050232, |
|
"rewards/margins": 0.783669114112854, |
|
"rewards/rejected": -2.429250955581665, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -1.2730779647827148, |
|
"logits/rejected": -1.2738616466522217, |
|
"logps/chosen": -398.4493103027344, |
|
"logps/rejected": -460.91387939453125, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.4772454500198364, |
|
"rewards/margins": 0.6072799563407898, |
|
"rewards/rejected": -2.0845253467559814, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -1.559012770652771, |
|
"logits/rejected": -1.2587218284606934, |
|
"logps/chosen": -388.8677673339844, |
|
"logps/rejected": -389.87957763671875, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2623231410980225, |
|
"rewards/margins": 0.6337946057319641, |
|
"rewards/rejected": -1.8961181640625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.246036410331726, |
|
"eval_logits/rejected": -1.1140612363815308, |
|
"eval_logps/chosen": -413.7338562011719, |
|
"eval_logps/rejected": -463.30914306640625, |
|
"eval_loss": 0.5373813509941101, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -1.4908183813095093, |
|
"eval_rewards/margins": 0.6961935758590698, |
|
"eval_rewards/rejected": -2.187012195587158, |
|
"eval_runtime": 382.1333, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -1.407278060913086, |
|
"logits/rejected": -1.3845430612564087, |
|
"logps/chosen": -428.33648681640625, |
|
"logps/rejected": -478.8470764160156, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.544116735458374, |
|
"rewards/margins": 0.5756716132164001, |
|
"rewards/rejected": -2.119788646697998, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.625, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -1.4305765628814697, |
|
"logits/rejected": -1.4192079305648804, |
|
"logps/chosen": -346.76165771484375, |
|
"logps/rejected": -416.07073974609375, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1743587255477905, |
|
"rewards/margins": 0.5695887804031372, |
|
"rewards/rejected": -1.7439473867416382, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -1.4643322229385376, |
|
"logits/rejected": -1.3541513681411743, |
|
"logps/chosen": -378.71685791015625, |
|
"logps/rejected": -430.7264709472656, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1027584075927734, |
|
"rewards/margins": 0.5569159984588623, |
|
"rewards/rejected": -1.6596744060516357, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -1.3610130548477173, |
|
"logits/rejected": -1.2018978595733643, |
|
"logps/chosen": -377.59130859375, |
|
"logps/rejected": -424.17108154296875, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2374261617660522, |
|
"rewards/margins": 0.7390316128730774, |
|
"rewards/rejected": -1.9764575958251953, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 7.25, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -1.230850100517273, |
|
"logits/rejected": -1.153451919555664, |
|
"logps/chosen": -397.7276611328125, |
|
"logps/rejected": -469.70037841796875, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4996331930160522, |
|
"rewards/margins": 0.7669634819030762, |
|
"rewards/rejected": -2.266597032546997, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -1.196821689605713, |
|
"logits/rejected": -1.0929956436157227, |
|
"logps/chosen": -442.42779541015625, |
|
"logps/rejected": -496.02508544921875, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6992677450180054, |
|
"rewards/margins": 0.640595018863678, |
|
"rewards/rejected": -2.339862585067749, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -1.1287126541137695, |
|
"logits/rejected": -1.0260584354400635, |
|
"logps/chosen": -434.9798278808594, |
|
"logps/rejected": -504.6143493652344, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.790509819984436, |
|
"rewards/margins": 0.7024968266487122, |
|
"rewards/rejected": -2.493006467819214, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -1.1179661750793457, |
|
"logits/rejected": -1.0696125030517578, |
|
"logps/chosen": -474.17364501953125, |
|
"logps/rejected": -547.4193115234375, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.1093432903289795, |
|
"rewards/margins": 0.8745004534721375, |
|
"rewards/rejected": -2.9838438034057617, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -1.0214884281158447, |
|
"logits/rejected": -0.9855283498764038, |
|
"logps/chosen": -477.1600646972656, |
|
"logps/rejected": -576.8958740234375, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.297248125076294, |
|
"rewards/margins": 0.8587217330932617, |
|
"rewards/rejected": -3.1559698581695557, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -0.8845041394233704, |
|
"logits/rejected": -0.7875598073005676, |
|
"logps/chosen": -482.1604919433594, |
|
"logps/rejected": -565.8832397460938, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.532474994659424, |
|
"rewards/margins": 0.9273085594177246, |
|
"rewards/rejected": -3.4597840309143066, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -0.9341001510620117, |
|
"eval_logits/rejected": -0.8115790486335754, |
|
"eval_logps/chosen": -518.949462890625, |
|
"eval_logps/rejected": -584.0806274414062, |
|
"eval_loss": 0.5331768989562988, |
|
"eval_rewards/accuracies": 0.7179999947547913, |
|
"eval_rewards/chosen": -2.5429742336273193, |
|
"eval_rewards/margins": 0.8517529368400574, |
|
"eval_rewards/rejected": -3.3947272300720215, |
|
"eval_runtime": 382.1611, |
|
"eval_samples_per_second": 5.233, |
|
"eval_steps_per_second": 0.654, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -1.1323182582855225, |
|
"logits/rejected": -0.9318205118179321, |
|
"logps/chosen": -521.3104248046875, |
|
"logps/rejected": -533.2903442382812, |
|
"loss": 0.5686, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.3703832626342773, |
|
"rewards/margins": 0.6751216650009155, |
|
"rewards/rejected": -3.0455050468444824, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.625, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -1.0868864059448242, |
|
"logits/rejected": -0.9571698904037476, |
|
"logps/chosen": -469.189208984375, |
|
"logps/rejected": -530.5699462890625, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.0605311393737793, |
|
"rewards/margins": 0.7877290844917297, |
|
"rewards/rejected": -2.8482604026794434, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -1.1987128257751465, |
|
"logits/rejected": -1.1085574626922607, |
|
"logps/chosen": -479.1398010253906, |
|
"logps/rejected": -532.23828125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0726170539855957, |
|
"rewards/margins": 0.7290612459182739, |
|
"rewards/rejected": -2.80167818069458, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -1.1448571681976318, |
|
"logits/rejected": -1.0365805625915527, |
|
"logps/chosen": -478.0250549316406, |
|
"logps/rejected": -530.4112548828125, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8778432607650757, |
|
"rewards/margins": 0.811355471611023, |
|
"rewards/rejected": -2.6891987323760986, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -1.2020542621612549, |
|
"logits/rejected": -1.0928010940551758, |
|
"logps/chosen": -440.04864501953125, |
|
"logps/rejected": -496.198486328125, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9372259378433228, |
|
"rewards/margins": 0.7049869298934937, |
|
"rewards/rejected": -2.6422126293182373, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -1.2815606594085693, |
|
"logits/rejected": -1.1846911907196045, |
|
"logps/chosen": -445.06915283203125, |
|
"logps/rejected": -479.39093017578125, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.7325608730316162, |
|
"rewards/margins": 0.5843728184700012, |
|
"rewards/rejected": -2.3169338703155518, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -1.3540565967559814, |
|
"logits/rejected": -1.1343624591827393, |
|
"logps/chosen": -444.31640625, |
|
"logps/rejected": -482.2098083496094, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.6433531045913696, |
|
"rewards/margins": 0.8446812629699707, |
|
"rewards/rejected": -2.48803448677063, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -1.3197797536849976, |
|
"logits/rejected": -1.1996195316314697, |
|
"logps/chosen": -457.05712890625, |
|
"logps/rejected": -514.72802734375, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7364044189453125, |
|
"rewards/margins": 0.780579149723053, |
|
"rewards/rejected": -2.5169835090637207, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 13.6875, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -1.2102200984954834, |
|
"logits/rejected": -1.0647470951080322, |
|
"logps/chosen": -436.97991943359375, |
|
"logps/rejected": -520.3751220703125, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9787667989730835, |
|
"rewards/margins": 1.061232328414917, |
|
"rewards/rejected": -3.039999008178711, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.25, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -1.240464687347412, |
|
"logits/rejected": -1.0879384279251099, |
|
"logps/chosen": -531.1487426757812, |
|
"logps/rejected": -574.3619384765625, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2774546146392822, |
|
"rewards/margins": 0.7940423488616943, |
|
"rewards/rejected": -3.0714969635009766, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -0.981342613697052, |
|
"eval_logits/rejected": -0.8557386994361877, |
|
"eval_logps/chosen": -482.09930419921875, |
|
"eval_logps/rejected": -548.8490600585938, |
|
"eval_loss": 0.5178083777427673, |
|
"eval_rewards/accuracies": 0.7315000295639038, |
|
"eval_rewards/chosen": -2.1744725704193115, |
|
"eval_rewards/margins": 0.8679391145706177, |
|
"eval_rewards/rejected": -3.0424115657806396, |
|
"eval_runtime": 382.1372, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.0, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -1.0305756330490112, |
|
"logits/rejected": -0.9370132684707642, |
|
"logps/chosen": -448.99493408203125, |
|
"logps/rejected": -530.3275146484375, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.154376983642578, |
|
"rewards/margins": 0.8647212982177734, |
|
"rewards/rejected": -3.0190985202789307, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 16.75, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -1.0989015102386475, |
|
"logits/rejected": -0.9851810336112976, |
|
"logps/chosen": -443.59423828125, |
|
"logps/rejected": -486.5970764160156, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8985025882720947, |
|
"rewards/margins": 0.7655047178268433, |
|
"rewards/rejected": -2.6640071868896484, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -1.0507217645645142, |
|
"logits/rejected": -0.9594799280166626, |
|
"logps/chosen": -405.2181091308594, |
|
"logps/rejected": -487.1499938964844, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6451423168182373, |
|
"rewards/margins": 0.9089698791503906, |
|
"rewards/rejected": -2.554112434387207, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 13.5625, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -1.0573441982269287, |
|
"logits/rejected": -0.8775628209114075, |
|
"logps/chosen": -527.5038452148438, |
|
"logps/rejected": -620.2794189453125, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2627432346343994, |
|
"rewards/margins": 1.2387964725494385, |
|
"rewards/rejected": -3.501539707183838, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -0.9812475442886353, |
|
"logits/rejected": -0.8811472654342651, |
|
"logps/chosen": -502.1786193847656, |
|
"logps/rejected": -618.7202758789062, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.337085485458374, |
|
"rewards/margins": 1.2312263250350952, |
|
"rewards/rejected": -3.5683116912841797, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -0.9585447311401367, |
|
"logits/rejected": -0.8763798475265503, |
|
"logps/chosen": -443.18878173828125, |
|
"logps/rejected": -523.16015625, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0716359615325928, |
|
"rewards/margins": 0.855958104133606, |
|
"rewards/rejected": -2.9275941848754883, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 15.4375, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -1.122159481048584, |
|
"logits/rejected": -0.9293369054794312, |
|
"logps/chosen": -494.13037109375, |
|
"logps/rejected": -562.1329345703125, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.0596017837524414, |
|
"rewards/margins": 1.1728570461273193, |
|
"rewards/rejected": -3.2324588298797607, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -0.8647342920303345, |
|
"logits/rejected": -0.826617419719696, |
|
"logps/chosen": -561.8629760742188, |
|
"logps/rejected": -615.0023193359375, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.8171119689941406, |
|
"rewards/margins": 0.8539352416992188, |
|
"rewards/rejected": -3.6710472106933594, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -1.0336081981658936, |
|
"logits/rejected": -0.9252422451972961, |
|
"logps/chosen": -563.8508911132812, |
|
"logps/rejected": -638.390869140625, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0344927310943604, |
|
"rewards/margins": 0.9103133082389832, |
|
"rewards/rejected": -3.944805860519409, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -1.0671048164367676, |
|
"logits/rejected": -0.9094209671020508, |
|
"logps/chosen": -568.4443359375, |
|
"logps/rejected": -596.6480712890625, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.021576404571533, |
|
"rewards/margins": 0.7217450141906738, |
|
"rewards/rejected": -3.743321180343628, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -0.8895747661590576, |
|
"eval_logits/rejected": -0.7614892721176147, |
|
"eval_logps/chosen": -514.6676635742188, |
|
"eval_logps/rejected": -577.3698120117188, |
|
"eval_loss": 0.520658552646637, |
|
"eval_rewards/accuracies": 0.7300000190734863, |
|
"eval_rewards/chosen": -2.5001566410064697, |
|
"eval_rewards/margins": 0.8274616599082947, |
|
"eval_rewards/rejected": -3.3276185989379883, |
|
"eval_runtime": 382.1502, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 6.625, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -1.1065692901611328, |
|
"logits/rejected": -1.0572447776794434, |
|
"logps/chosen": -495.5409240722656, |
|
"logps/rejected": -552.65966796875, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.2518980503082275, |
|
"rewards/margins": 0.61982262134552, |
|
"rewards/rejected": -2.871720790863037, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.875, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -1.0463123321533203, |
|
"logits/rejected": -0.9469770193099976, |
|
"logps/chosen": -435.6727600097656, |
|
"logps/rejected": -518.3145751953125, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9452159404754639, |
|
"rewards/margins": 0.8327676057815552, |
|
"rewards/rejected": -2.7779834270477295, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -1.0293817520141602, |
|
"logits/rejected": -0.8690570592880249, |
|
"logps/chosen": -486.1783142089844, |
|
"logps/rejected": -559.431396484375, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2511630058288574, |
|
"rewards/margins": 0.7881690263748169, |
|
"rewards/rejected": -3.0393319129943848, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 14.5, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -1.0534614324569702, |
|
"logits/rejected": -0.8575074076652527, |
|
"logps/chosen": -497.90167236328125, |
|
"logps/rejected": -537.9634399414062, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.1381561756134033, |
|
"rewards/margins": 0.8793197870254517, |
|
"rewards/rejected": -3.0174758434295654, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -1.0756770372390747, |
|
"logits/rejected": -0.9290148615837097, |
|
"logps/chosen": -489.197265625, |
|
"logps/rejected": -558.8743286132812, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1688458919525146, |
|
"rewards/margins": 0.9360774755477905, |
|
"rewards/rejected": -3.1049234867095947, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 12.25, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -0.9989307522773743, |
|
"logits/rejected": -0.8515041470527649, |
|
"logps/chosen": -480.94183349609375, |
|
"logps/rejected": -542.0136108398438, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.1710543632507324, |
|
"rewards/margins": 0.8757139444351196, |
|
"rewards/rejected": -3.0467686653137207, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.625, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -1.1318533420562744, |
|
"logits/rejected": -0.9459112286567688, |
|
"logps/chosen": -481.12335205078125, |
|
"logps/rejected": -544.0623779296875, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2195496559143066, |
|
"rewards/margins": 1.032907247543335, |
|
"rewards/rejected": -3.2524571418762207, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -0.9683933258056641, |
|
"logits/rejected": -1.0098755359649658, |
|
"logps/chosen": -474.65283203125, |
|
"logps/rejected": -566.4153442382812, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.280418872833252, |
|
"rewards/margins": 0.8640033006668091, |
|
"rewards/rejected": -3.1444220542907715, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -1.1871801614761353, |
|
"logits/rejected": -1.0333930253982544, |
|
"logps/chosen": -443.02716064453125, |
|
"logps/rejected": -521.8514404296875, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9543129205703735, |
|
"rewards/margins": 0.8601529002189636, |
|
"rewards/rejected": -2.8144659996032715, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 9.0, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -0.9967072606086731, |
|
"logits/rejected": -0.9244716763496399, |
|
"logps/chosen": -455.01959228515625, |
|
"logps/rejected": -528.6710205078125, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.2530674934387207, |
|
"rewards/margins": 0.6545962691307068, |
|
"rewards/rejected": -2.907663583755493, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -0.9595763087272644, |
|
"eval_logits/rejected": -0.8350398540496826, |
|
"eval_logps/chosen": -482.9834289550781, |
|
"eval_logps/rejected": -543.660400390625, |
|
"eval_loss": 0.5098230838775635, |
|
"eval_rewards/accuracies": 0.7365000247955322, |
|
"eval_rewards/chosen": -2.183314323425293, |
|
"eval_rewards/margins": 0.8072100281715393, |
|
"eval_rewards/rejected": -2.9905245304107666, |
|
"eval_runtime": 382.4857, |
|
"eval_samples_per_second": 5.229, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -1.070488691329956, |
|
"logits/rejected": -0.886951744556427, |
|
"logps/chosen": -466.0955505371094, |
|
"logps/rejected": -520.3566284179688, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.189800977706909, |
|
"rewards/margins": 0.7895106077194214, |
|
"rewards/rejected": -2.979311466217041, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -1.0415198802947998, |
|
"logits/rejected": -0.9387828707695007, |
|
"logps/chosen": -499.1920471191406, |
|
"logps/rejected": -617.3883666992188, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.420851230621338, |
|
"rewards/margins": 1.062877893447876, |
|
"rewards/rejected": -3.483729124069214, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 10.0, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -1.0625154972076416, |
|
"logits/rejected": -0.9172189831733704, |
|
"logps/chosen": -579.9913330078125, |
|
"logps/rejected": -610.0975341796875, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8613951206207275, |
|
"rewards/margins": 0.7642954587936401, |
|
"rewards/rejected": -3.6256909370422363, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -1.1162028312683105, |
|
"logits/rejected": -0.9976137280464172, |
|
"logps/chosen": -525.2400512695312, |
|
"logps/rejected": -569.8626708984375, |
|
"loss": 0.5269, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.3787271976470947, |
|
"rewards/margins": 0.8617948293685913, |
|
"rewards/rejected": -3.2405219078063965, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.625, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -1.144928216934204, |
|
"logits/rejected": -0.9519325494766235, |
|
"logps/chosen": -472.40087890625, |
|
"logps/rejected": -529.2858276367188, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.171128511428833, |
|
"rewards/margins": 0.767959475517273, |
|
"rewards/rejected": -2.9390883445739746, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 8.625, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -1.11684250831604, |
|
"logits/rejected": -0.9831358194351196, |
|
"logps/chosen": -500.71484375, |
|
"logps/rejected": -542.6422119140625, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.447084903717041, |
|
"rewards/margins": 0.5746163129806519, |
|
"rewards/rejected": -3.0217010974884033, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -1.0659453868865967, |
|
"logits/rejected": -0.899361252784729, |
|
"logps/chosen": -475.46148681640625, |
|
"logps/rejected": -525.0037841796875, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.251559257507324, |
|
"rewards/margins": 0.823780357837677, |
|
"rewards/rejected": -3.0753397941589355, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 11.4375, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -1.096620798110962, |
|
"logits/rejected": -0.9550498127937317, |
|
"logps/chosen": -487.46136474609375, |
|
"logps/rejected": -576.1992797851562, |
|
"loss": 0.461, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.4152817726135254, |
|
"rewards/margins": 1.0981849431991577, |
|
"rewards/rejected": -3.5134663581848145, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -1.0538244247436523, |
|
"logits/rejected": -0.9361982345581055, |
|
"logps/chosen": -522.2523193359375, |
|
"logps/rejected": -610.1349487304688, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.730778217315674, |
|
"rewards/margins": 1.0780103206634521, |
|
"rewards/rejected": -3.808788776397705, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 13.125, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -1.0185925960540771, |
|
"logits/rejected": -0.9584161639213562, |
|
"logps/chosen": -538.3131103515625, |
|
"logps/rejected": -635.578369140625, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.846707820892334, |
|
"rewards/margins": 1.0040740966796875, |
|
"rewards/rejected": -3.8507816791534424, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -0.825871467590332, |
|
"eval_logits/rejected": -0.7071986198425293, |
|
"eval_logps/chosen": -574.3861694335938, |
|
"eval_logps/rejected": -660.885009765625, |
|
"eval_loss": 0.5165807008743286, |
|
"eval_rewards/accuracies": 0.7350000143051147, |
|
"eval_rewards/chosen": -3.097341775894165, |
|
"eval_rewards/margins": 1.0654287338256836, |
|
"eval_rewards/rejected": -4.162771224975586, |
|
"eval_runtime": 382.0912, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 11.625, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -1.0443698167800903, |
|
"logits/rejected": -0.8676601648330688, |
|
"logps/chosen": -536.4607543945312, |
|
"logps/rejected": -606.3543701171875, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8971712589263916, |
|
"rewards/margins": 0.9897411465644836, |
|
"rewards/rejected": -3.8869121074676514, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 11.375, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -1.000579595565796, |
|
"logits/rejected": -0.9191876649856567, |
|
"logps/chosen": -538.9154052734375, |
|
"logps/rejected": -649.7552490234375, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.8198482990264893, |
|
"rewards/margins": 1.0736055374145508, |
|
"rewards/rejected": -3.893454074859619, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -0.9208280444145203, |
|
"logits/rejected": -0.8555585741996765, |
|
"logps/chosen": -512.56787109375, |
|
"logps/rejected": -604.376220703125, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.786665439605713, |
|
"rewards/margins": 0.9612969160079956, |
|
"rewards/rejected": -3.747962474822998, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 14.0, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -1.1172326803207397, |
|
"logits/rejected": -0.9976350665092468, |
|
"logps/chosen": -514.8595581054688, |
|
"logps/rejected": -575.3850708007812, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4547386169433594, |
|
"rewards/margins": 0.8639480471611023, |
|
"rewards/rejected": -3.3186867237091064, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.125, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -1.0706989765167236, |
|
"logits/rejected": -0.9391083717346191, |
|
"logps/chosen": -489.779296875, |
|
"logps/rejected": -538.4210815429688, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.274151563644409, |
|
"rewards/margins": 0.6873086094856262, |
|
"rewards/rejected": -2.9614596366882324, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -1.0508559942245483, |
|
"logits/rejected": -0.9001902341842651, |
|
"logps/chosen": -451.5694274902344, |
|
"logps/rejected": -517.9208984375, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.9602162837982178, |
|
"rewards/margins": 0.884141743183136, |
|
"rewards/rejected": -2.844357967376709, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 14.125, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -1.129167914390564, |
|
"logits/rejected": -1.0560190677642822, |
|
"logps/chosen": -474.2417907714844, |
|
"logps/rejected": -552.0899047851562, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.056283473968506, |
|
"rewards/margins": 0.8298514485359192, |
|
"rewards/rejected": -2.886134624481201, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -1.1856621503829956, |
|
"logits/rejected": -0.9643325805664062, |
|
"logps/chosen": -502.15814208984375, |
|
"logps/rejected": -594.064208984375, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.301772356033325, |
|
"rewards/margins": 1.1499149799346924, |
|
"rewards/rejected": -3.4516875743865967, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -0.96733558177948, |
|
"logits/rejected": -0.853344738483429, |
|
"logps/chosen": -489.39990234375, |
|
"logps/rejected": -597.2086181640625, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.3474299907684326, |
|
"rewards/margins": 1.1593117713928223, |
|
"rewards/rejected": -3.506741762161255, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 12.75, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -1.1000730991363525, |
|
"logits/rejected": -0.9693312644958496, |
|
"logps/chosen": -477.19549560546875, |
|
"logps/rejected": -542.30615234375, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.0844216346740723, |
|
"rewards/margins": 0.8978837132453918, |
|
"rewards/rejected": -2.9823052883148193, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -0.9127845168113708, |
|
"eval_logits/rejected": -0.7864713668823242, |
|
"eval_logps/chosen": -474.74249267578125, |
|
"eval_logps/rejected": -551.2366943359375, |
|
"eval_loss": 0.5107593536376953, |
|
"eval_rewards/accuracies": 0.7350000143051147, |
|
"eval_rewards/chosen": -2.100904941558838, |
|
"eval_rewards/margins": 0.9653825163841248, |
|
"eval_rewards/rejected": -3.0662872791290283, |
|
"eval_runtime": 381.6083, |
|
"eval_samples_per_second": 5.241, |
|
"eval_steps_per_second": 0.655, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 10.25, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -1.017508864402771, |
|
"logits/rejected": -0.8959487676620483, |
|
"logps/chosen": -488.11810302734375, |
|
"logps/rejected": -602.2122802734375, |
|
"loss": 0.3715, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.244345188140869, |
|
"rewards/margins": 1.360781192779541, |
|
"rewards/rejected": -3.6051268577575684, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 12.125, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -1.0074245929718018, |
|
"logits/rejected": -0.868901252746582, |
|
"logps/chosen": -552.512939453125, |
|
"logps/rejected": -637.4674072265625, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6735260486602783, |
|
"rewards/margins": 1.1185749769210815, |
|
"rewards/rejected": -3.7921009063720703, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -0.8478316068649292, |
|
"logits/rejected": -0.7511281967163086, |
|
"logps/chosen": -535.4224853515625, |
|
"logps/rejected": -637.5137329101562, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.823219060897827, |
|
"rewards/margins": 1.1367390155792236, |
|
"rewards/rejected": -3.9599578380584717, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 17.125, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -0.9650063514709473, |
|
"logits/rejected": -0.8631266355514526, |
|
"logps/chosen": -534.4532470703125, |
|
"logps/rejected": -645.3438720703125, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.8391730785369873, |
|
"rewards/margins": 1.3146858215332031, |
|
"rewards/rejected": -4.1538591384887695, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -0.9252153635025024, |
|
"logits/rejected": -0.7948675751686096, |
|
"logps/chosen": -540.6839599609375, |
|
"logps/rejected": -642.1290283203125, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.0117030143737793, |
|
"rewards/margins": 1.17899751663208, |
|
"rewards/rejected": -4.190700531005859, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 13.875, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -0.9731215238571167, |
|
"logits/rejected": -0.8713979721069336, |
|
"logps/chosen": -526.899658203125, |
|
"logps/rejected": -591.6453857421875, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.736586570739746, |
|
"rewards/margins": 0.9257469177246094, |
|
"rewards/rejected": -3.6623332500457764, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -1.115800380706787, |
|
"logits/rejected": -1.0172771215438843, |
|
"logps/chosen": -478.37420654296875, |
|
"logps/rejected": -573.0581665039062, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2973954677581787, |
|
"rewards/margins": 1.069636344909668, |
|
"rewards/rejected": -3.3670318126678467, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -1.0454566478729248, |
|
"logits/rejected": -0.8997499346733093, |
|
"logps/chosen": -511.357177734375, |
|
"logps/rejected": -557.3446655273438, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.522265672683716, |
|
"rewards/margins": 1.0096194744110107, |
|
"rewards/rejected": -3.5318856239318848, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 8.125, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -1.0593020915985107, |
|
"logits/rejected": -0.8929145932197571, |
|
"logps/chosen": -473.79302978515625, |
|
"logps/rejected": -565.4286499023438, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2048957347869873, |
|
"rewards/margins": 1.1790317296981812, |
|
"rewards/rejected": -3.3839271068573, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -1.116262674331665, |
|
"logits/rejected": -0.9613265991210938, |
|
"logps/chosen": -477.24810791015625, |
|
"logps/rejected": -587.962646484375, |
|
"loss": 0.4593, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1293580532073975, |
|
"rewards/margins": 1.2989779710769653, |
|
"rewards/rejected": -3.4283363819122314, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -1.0210601091384888, |
|
"eval_logits/rejected": -0.8902665972709656, |
|
"eval_logps/chosen": -496.3184509277344, |
|
"eval_logps/rejected": -587.1505737304688, |
|
"eval_loss": 0.5173963308334351, |
|
"eval_rewards/accuracies": 0.7304999828338623, |
|
"eval_rewards/chosen": -2.316664218902588, |
|
"eval_rewards/margins": 1.1087615489959717, |
|
"eval_rewards/rejected": -3.4254260063171387, |
|
"eval_runtime": 382.2649, |
|
"eval_samples_per_second": 5.232, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 13.5, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -1.0803442001342773, |
|
"logits/rejected": -0.9917434453964233, |
|
"logps/chosen": -461.2118225097656, |
|
"logps/rejected": -549.1537475585938, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.274977922439575, |
|
"rewards/margins": 0.9378048777580261, |
|
"rewards/rejected": -3.212782621383667, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -1.2474887371063232, |
|
"logits/rejected": -1.145392656326294, |
|
"logps/chosen": -428.1084899902344, |
|
"logps/rejected": -485.67694091796875, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6442362070083618, |
|
"rewards/margins": 0.7559275031089783, |
|
"rewards/rejected": -2.4001636505126953, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.75, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -1.3161629438400269, |
|
"logits/rejected": -1.232714295387268, |
|
"logps/chosen": -440.6031188964844, |
|
"logps/rejected": -481.67926025390625, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6595981121063232, |
|
"rewards/margins": 0.5171489119529724, |
|
"rewards/rejected": -2.1767468452453613, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 10.625, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -1.132124662399292, |
|
"logits/rejected": -1.0095793008804321, |
|
"logps/chosen": -423.98553466796875, |
|
"logps/rejected": -478.41015625, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8633050918579102, |
|
"rewards/margins": 0.7011392712593079, |
|
"rewards/rejected": -2.5644445419311523, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -1.2295887470245361, |
|
"logits/rejected": -1.1187238693237305, |
|
"logps/chosen": -489.0294494628906, |
|
"logps/rejected": -551.4732666015625, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1783862113952637, |
|
"rewards/margins": 0.8242964744567871, |
|
"rewards/rejected": -3.002682685852051, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 10.0, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -1.241201639175415, |
|
"logits/rejected": -1.0382106304168701, |
|
"logps/chosen": -548.8870849609375, |
|
"logps/rejected": -611.2633666992188, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6739068031311035, |
|
"rewards/margins": 0.9938074350357056, |
|
"rewards/rejected": -3.6677143573760986, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -1.154157280921936, |
|
"logits/rejected": -1.0291301012039185, |
|
"logps/chosen": -542.0028076171875, |
|
"logps/rejected": -634.2523803710938, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.9064033031463623, |
|
"rewards/margins": 1.071606993675232, |
|
"rewards/rejected": -3.9780097007751465, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -1.0966026782989502, |
|
"logits/rejected": -1.084398627281189, |
|
"logps/chosen": -548.9720458984375, |
|
"logps/rejected": -668.5007934570312, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.963653087615967, |
|
"rewards/margins": 0.9051497578620911, |
|
"rewards/rejected": -3.868802547454834, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -1.2072285413742065, |
|
"logits/rejected": -1.1841914653778076, |
|
"logps/chosen": -524.2724609375, |
|
"logps/rejected": -635.7026977539062, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.862274169921875, |
|
"rewards/margins": 1.047090768814087, |
|
"rewards/rejected": -3.909365177154541, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.21875, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -1.0932730436325073, |
|
"logits/rejected": -1.0192008018493652, |
|
"logps/chosen": -579.1436157226562, |
|
"logps/rejected": -652.6798095703125, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.1483500003814697, |
|
"rewards/margins": 0.9495010375976562, |
|
"rewards/rejected": -4.097850799560547, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.0082374811172485, |
|
"eval_logits/rejected": -0.8800999522209167, |
|
"eval_logps/chosen": -564.0355224609375, |
|
"eval_logps/rejected": -652.812255859375, |
|
"eval_loss": 0.5032184720039368, |
|
"eval_rewards/accuracies": 0.7369999885559082, |
|
"eval_rewards/chosen": -2.99383544921875, |
|
"eval_rewards/margins": 1.088207483291626, |
|
"eval_rewards/rejected": -4.082043170928955, |
|
"eval_runtime": 381.8998, |
|
"eval_samples_per_second": 5.237, |
|
"eval_steps_per_second": 0.655, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -1.1385810375213623, |
|
"logits/rejected": -1.0679770708084106, |
|
"logps/chosen": -523.4324340820312, |
|
"logps/rejected": -647.1407470703125, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.787372350692749, |
|
"rewards/margins": 1.170562744140625, |
|
"rewards/rejected": -3.957934856414795, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.0, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -1.1914455890655518, |
|
"logits/rejected": -1.0186755657196045, |
|
"logps/chosen": -572.4912719726562, |
|
"logps/rejected": -637.8251953125, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -3.0957980155944824, |
|
"rewards/margins": 0.9488485455513, |
|
"rewards/rejected": -4.044646263122559, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -0.9681538343429565, |
|
"logits/rejected": -0.9621971249580383, |
|
"logps/chosen": -520.1128540039062, |
|
"logps/rejected": -614.5860595703125, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.8847546577453613, |
|
"rewards/margins": 1.0869688987731934, |
|
"rewards/rejected": -3.971724271774292, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -1.09974205493927, |
|
"logits/rejected": -1.019108533859253, |
|
"logps/chosen": -545.1671142578125, |
|
"logps/rejected": -630.2543334960938, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.9147398471832275, |
|
"rewards/margins": 0.9028825759887695, |
|
"rewards/rejected": -3.817622423171997, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -1.1649540662765503, |
|
"logits/rejected": -0.9812711477279663, |
|
"logps/chosen": -496.32757568359375, |
|
"logps/rejected": -530.1451416015625, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.4510443210601807, |
|
"rewards/margins": 0.6782389879226685, |
|
"rewards/rejected": -3.1292831897735596, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 11.0, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -1.0830554962158203, |
|
"logits/rejected": -1.0159814357757568, |
|
"logps/chosen": -442.962646484375, |
|
"logps/rejected": -521.5462646484375, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2101898193359375, |
|
"rewards/margins": 0.7478699684143066, |
|
"rewards/rejected": -2.958059549331665, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -1.1321473121643066, |
|
"logits/rejected": -0.9186077117919922, |
|
"logps/chosen": -502.3970642089844, |
|
"logps/rejected": -575.6217041015625, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.3348631858825684, |
|
"rewards/margins": 1.0501439571380615, |
|
"rewards/rejected": -3.385007381439209, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 9.625, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -1.1406095027923584, |
|
"logits/rejected": -0.9287969470024109, |
|
"logps/chosen": -519.1994018554688, |
|
"logps/rejected": -603.8717041015625, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.365922689437866, |
|
"rewards/margins": 1.2659895420074463, |
|
"rewards/rejected": -3.6319122314453125, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 16.5, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -1.1879878044128418, |
|
"logits/rejected": -0.9813734292984009, |
|
"logps/chosen": -539.6956176757812, |
|
"logps/rejected": -599.0775146484375, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6155307292938232, |
|
"rewards/margins": 1.0494682788848877, |
|
"rewards/rejected": -3.664999008178711, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -1.0190632343292236, |
|
"logits/rejected": -0.9378607869148254, |
|
"logps/chosen": -550.7818603515625, |
|
"logps/rejected": -662.2818603515625, |
|
"loss": 0.5425, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.4076619148254395, |
|
"rewards/margins": 1.0471140146255493, |
|
"rewards/rejected": -4.454775810241699, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -0.9685720205307007, |
|
"eval_logits/rejected": -0.8382174968719482, |
|
"eval_logps/chosen": -599.6095581054688, |
|
"eval_logps/rejected": -685.2186889648438, |
|
"eval_loss": 0.49963250756263733, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -3.349576234817505, |
|
"eval_rewards/margins": 1.0565321445465088, |
|
"eval_rewards/rejected": -4.406107425689697, |
|
"eval_runtime": 382.4342, |
|
"eval_samples_per_second": 5.23, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 11.75, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -1.0968348979949951, |
|
"logits/rejected": -1.0862301588058472, |
|
"logps/chosen": -530.6864013671875, |
|
"logps/rejected": -640.4039916992188, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.0613017082214355, |
|
"rewards/margins": 0.9621230959892273, |
|
"rewards/rejected": -4.0234246253967285, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.0, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.2413816452026367, |
|
"logits/rejected": -1.089429259300232, |
|
"logps/chosen": -535.4332275390625, |
|
"logps/rejected": -622.2586059570312, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.66583251953125, |
|
"rewards/margins": 1.01954185962677, |
|
"rewards/rejected": -3.6853744983673096, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -1.112958312034607, |
|
"logits/rejected": -1.0259140729904175, |
|
"logps/chosen": -486.234375, |
|
"logps/rejected": -564.1868896484375, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.260854721069336, |
|
"rewards/margins": 1.0263946056365967, |
|
"rewards/rejected": -3.2872490882873535, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -1.1713676452636719, |
|
"logits/rejected": -1.0070645809173584, |
|
"logps/chosen": -514.056396484375, |
|
"logps/rejected": -565.324951171875, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.390371084213257, |
|
"rewards/margins": 0.8160451054573059, |
|
"rewards/rejected": -3.206415891647339, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -1.3080298900604248, |
|
"logits/rejected": -1.114485502243042, |
|
"logps/chosen": -513.6076049804688, |
|
"logps/rejected": -580.9697265625, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.6475276947021484, |
|
"rewards/margins": 0.8753725290298462, |
|
"rewards/rejected": -3.522900104522705, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.75, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -1.1190599203109741, |
|
"logits/rejected": -1.0877625942230225, |
|
"logps/chosen": -520.4367065429688, |
|
"logps/rejected": -593.3540649414062, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6478748321533203, |
|
"rewards/margins": 0.9716035723686218, |
|
"rewards/rejected": -3.619478225708008, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -1.1805906295776367, |
|
"logits/rejected": -1.1239099502563477, |
|
"logps/chosen": -522.7432861328125, |
|
"logps/rejected": -629.3782958984375, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.695678949356079, |
|
"rewards/margins": 1.022963285446167, |
|
"rewards/rejected": -3.718641996383667, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -1.2425084114074707, |
|
"logits/rejected": -1.1278479099273682, |
|
"logps/chosen": -481.4998474121094, |
|
"logps/rejected": -560.8279418945312, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.449826717376709, |
|
"rewards/margins": 0.9064075350761414, |
|
"rewards/rejected": -3.356234073638916, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 13.9375, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -1.2350413799285889, |
|
"logits/rejected": -1.1462427377700806, |
|
"logps/chosen": -549.2757568359375, |
|
"logps/rejected": -646.181640625, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7407171726226807, |
|
"rewards/margins": 1.003739595413208, |
|
"rewards/rejected": -3.7444565296173096, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 14.625, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -0.9863433837890625, |
|
"logits/rejected": -0.8817607164382935, |
|
"logps/chosen": -533.1535034179688, |
|
"logps/rejected": -698.6467895507812, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.029388189315796, |
|
"rewards/margins": 1.3928486108779907, |
|
"rewards/rejected": -4.422236442565918, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.0005792379379272, |
|
"eval_logits/rejected": -0.8737620115280151, |
|
"eval_logps/chosen": -569.109130859375, |
|
"eval_logps/rejected": -657.4884033203125, |
|
"eval_loss": 0.503667414188385, |
|
"eval_rewards/accuracies": 0.7379999756813049, |
|
"eval_rewards/chosen": -3.0445713996887207, |
|
"eval_rewards/margins": 1.0842331647872925, |
|
"eval_rewards/rejected": -4.1288042068481445, |
|
"eval_runtime": 382.2565, |
|
"eval_samples_per_second": 5.232, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -1.0082833766937256, |
|
"logits/rejected": -0.8253539800643921, |
|
"logps/chosen": -556.5079956054688, |
|
"logps/rejected": -636.0127563476562, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.07832407951355, |
|
"rewards/margins": 1.0969042778015137, |
|
"rewards/rejected": -4.175228595733643, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -1.1307703256607056, |
|
"logits/rejected": -1.0529394149780273, |
|
"logps/chosen": -595.5393676757812, |
|
"logps/rejected": -662.37158203125, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.1214325428009033, |
|
"rewards/margins": 0.8287679553031921, |
|
"rewards/rejected": -3.950200319290161, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -1.07692551612854, |
|
"logits/rejected": -1.0323340892791748, |
|
"logps/chosen": -533.2166748046875, |
|
"logps/rejected": -612.94140625, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8589041233062744, |
|
"rewards/margins": 0.9276583790779114, |
|
"rewards/rejected": -3.786562442779541, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -1.2703588008880615, |
|
"logits/rejected": -1.0852762460708618, |
|
"logps/chosen": -551.0806274414062, |
|
"logps/rejected": -643.0982666015625, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.663365125656128, |
|
"rewards/margins": 1.2043039798736572, |
|
"rewards/rejected": -3.8676695823669434, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -1.248780608177185, |
|
"logits/rejected": -1.084285020828247, |
|
"logps/chosen": -551.0938110351562, |
|
"logps/rejected": -643.5797729492188, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.640122652053833, |
|
"rewards/margins": 1.165264368057251, |
|
"rewards/rejected": -3.805387020111084, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -1.1745688915252686, |
|
"logits/rejected": -1.0959160327911377, |
|
"logps/chosen": -581.6795654296875, |
|
"logps/rejected": -661.7645263671875, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.9558444023132324, |
|
"rewards/margins": 1.0012142658233643, |
|
"rewards/rejected": -3.9570584297180176, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 11.875, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -1.1434388160705566, |
|
"logits/rejected": -0.9975016713142395, |
|
"logps/chosen": -543.2282104492188, |
|
"logps/rejected": -666.7279052734375, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.7089195251464844, |
|
"rewards/margins": 1.425309419631958, |
|
"rewards/rejected": -4.134228706359863, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 11.0, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -1.0967297554016113, |
|
"logits/rejected": -0.9473203420639038, |
|
"logps/chosen": -603.4558715820312, |
|
"logps/rejected": -692.9251708984375, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.3335928916931152, |
|
"rewards/margins": 1.3170349597930908, |
|
"rewards/rejected": -4.650628089904785, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 13.0, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -1.0985617637634277, |
|
"logits/rejected": -0.9863265156745911, |
|
"logps/chosen": -607.0682373046875, |
|
"logps/rejected": -701.697509765625, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.307284116744995, |
|
"rewards/margins": 1.200660228729248, |
|
"rewards/rejected": -4.507944583892822, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -1.1454726457595825, |
|
"logits/rejected": -0.9831218719482422, |
|
"logps/chosen": -579.2799682617188, |
|
"logps/rejected": -706.1749877929688, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.979241371154785, |
|
"rewards/margins": 1.4865919351577759, |
|
"rewards/rejected": -4.4658331871032715, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -1.0213502645492554, |
|
"eval_logits/rejected": -0.891007125377655, |
|
"eval_logps/chosen": -566.8839721679688, |
|
"eval_logps/rejected": -659.4305419921875, |
|
"eval_loss": 0.49620321393013, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -3.0223195552825928, |
|
"eval_rewards/margins": 1.1259068250656128, |
|
"eval_rewards/rejected": -4.148226737976074, |
|
"eval_runtime": 382.1041, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -1.1352207660675049, |
|
"logits/rejected": -1.0086506605148315, |
|
"logps/chosen": -588.1229858398438, |
|
"logps/rejected": -648.9054565429688, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.230529308319092, |
|
"rewards/margins": 0.9782280921936035, |
|
"rewards/rejected": -4.208757400512695, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 13.375, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -1.1499899625778198, |
|
"logits/rejected": -1.0288715362548828, |
|
"logps/chosen": -581.2144775390625, |
|
"logps/rejected": -680.3140869140625, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.2150332927703857, |
|
"rewards/margins": 1.1205800771713257, |
|
"rewards/rejected": -4.335613250732422, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -1.1651884317398071, |
|
"logits/rejected": -0.9733787775039673, |
|
"logps/chosen": -579.3433227539062, |
|
"logps/rejected": -649.0181884765625, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.129295825958252, |
|
"rewards/margins": 1.077726125717163, |
|
"rewards/rejected": -4.207022190093994, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 11.375, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -1.163464069366455, |
|
"logits/rejected": -1.0617696046829224, |
|
"logps/chosen": -556.2322998046875, |
|
"logps/rejected": -651.5863037109375, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.971095561981201, |
|
"rewards/margins": 1.0506844520568848, |
|
"rewards/rejected": -4.021780014038086, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -1.185319185256958, |
|
"logits/rejected": -1.0036907196044922, |
|
"logps/chosen": -566.9384155273438, |
|
"logps/rejected": -628.1956787109375, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.911479949951172, |
|
"rewards/margins": 0.8705935478210449, |
|
"rewards/rejected": -3.782073497772217, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -1.3058470487594604, |
|
"logits/rejected": -1.113465666770935, |
|
"logps/chosen": -529.6067504882812, |
|
"logps/rejected": -606.2987060546875, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.5179548263549805, |
|
"rewards/margins": 1.1106722354888916, |
|
"rewards/rejected": -3.628627061843872, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -1.12172269821167, |
|
"logits/rejected": -0.968579888343811, |
|
"logps/chosen": -553.111083984375, |
|
"logps/rejected": -600.1488037109375, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.7947652339935303, |
|
"rewards/margins": 0.9243541955947876, |
|
"rewards/rejected": -3.7191195487976074, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.75, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -0.9661678075790405, |
|
"logits/rejected": -0.9819488525390625, |
|
"logps/chosen": -551.6143798828125, |
|
"logps/rejected": -679.9763793945312, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.2225677967071533, |
|
"rewards/margins": 1.0803557634353638, |
|
"rewards/rejected": -4.30292272567749, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.625, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -1.0553234815597534, |
|
"logits/rejected": -0.9197478294372559, |
|
"logps/chosen": -592.0967407226562, |
|
"logps/rejected": -687.3663940429688, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.442605495452881, |
|
"rewards/margins": 1.1034131050109863, |
|
"rewards/rejected": -4.546019077301025, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 11.375, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -1.1949965953826904, |
|
"logits/rejected": -1.0267183780670166, |
|
"logps/chosen": -653.2984619140625, |
|
"logps/rejected": -709.1905517578125, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.6396350860595703, |
|
"rewards/margins": 1.1184080839157104, |
|
"rewards/rejected": -4.75804328918457, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -0.9427788257598877, |
|
"eval_logits/rejected": -0.8139032125473022, |
|
"eval_logps/chosen": -624.5250244140625, |
|
"eval_logps/rejected": -711.0853271484375, |
|
"eval_loss": 0.49741417169570923, |
|
"eval_rewards/accuracies": 0.746999979019165, |
|
"eval_rewards/chosen": -3.5987296104431152, |
|
"eval_rewards/margins": 1.0660440921783447, |
|
"eval_rewards/rejected": -4.664773941040039, |
|
"eval_runtime": 382.3502, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -1.0279147624969482, |
|
"logits/rejected": -0.9855324625968933, |
|
"logps/chosen": -615.8596801757812, |
|
"logps/rejected": -704.7830200195312, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.510098934173584, |
|
"rewards/margins": 0.954069972038269, |
|
"rewards/rejected": -4.464169025421143, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 9.625, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -1.177202582359314, |
|
"logits/rejected": -1.0146461725234985, |
|
"logps/chosen": -568.2487182617188, |
|
"logps/rejected": -659.0941162109375, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.152796745300293, |
|
"rewards/margins": 1.0889527797698975, |
|
"rewards/rejected": -4.2417497634887695, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 13.6875, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -1.2432745695114136, |
|
"logits/rejected": -1.0716017484664917, |
|
"logps/chosen": -619.502685546875, |
|
"logps/rejected": -699.7628173828125, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.2460086345672607, |
|
"rewards/margins": 1.2264302968978882, |
|
"rewards/rejected": -4.472439289093018, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 9.875, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -1.0596590042114258, |
|
"logits/rejected": -1.0028278827667236, |
|
"logps/chosen": -631.6947021484375, |
|
"logps/rejected": -728.5841674804688, |
|
"loss": 0.4471, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.6112823486328125, |
|
"rewards/margins": 1.1590890884399414, |
|
"rewards/rejected": -4.770371437072754, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 11.375, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -1.067651629447937, |
|
"logits/rejected": -0.8701795339584351, |
|
"logps/chosen": -667.7681274414062, |
|
"logps/rejected": -718.9295654296875, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.1937079429626465, |
|
"rewards/margins": 1.016485333442688, |
|
"rewards/rejected": -5.210193634033203, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.125, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -1.1067336797714233, |
|
"logits/rejected": -0.9798781275749207, |
|
"logps/chosen": -685.84228515625, |
|
"logps/rejected": -784.4832763671875, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.049218654632568, |
|
"rewards/margins": 1.2331972122192383, |
|
"rewards/rejected": -5.282416343688965, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -1.017889380455017, |
|
"logits/rejected": -1.0066477060317993, |
|
"logps/chosen": -639.3632202148438, |
|
"logps/rejected": -738.4698486328125, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.8783206939697266, |
|
"rewards/margins": 0.9146150350570679, |
|
"rewards/rejected": -4.792935848236084, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -1.1123883724212646, |
|
"logits/rejected": -0.9572793245315552, |
|
"logps/chosen": -607.7706909179688, |
|
"logps/rejected": -670.916015625, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.5239059925079346, |
|
"rewards/margins": 0.9338981509208679, |
|
"rewards/rejected": -4.457803726196289, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 13.375, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -1.118983507156372, |
|
"logits/rejected": -0.9597452282905579, |
|
"logps/chosen": -620.9295043945312, |
|
"logps/rejected": -721.0320434570312, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.2717292308807373, |
|
"rewards/margins": 1.1265954971313477, |
|
"rewards/rejected": -4.398324489593506, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 11.75, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -1.075067400932312, |
|
"logits/rejected": -0.937818706035614, |
|
"logps/chosen": -547.7574462890625, |
|
"logps/rejected": -641.327392578125, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0641894340515137, |
|
"rewards/margins": 1.0973466634750366, |
|
"rewards/rejected": -4.161535739898682, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -1.0030875205993652, |
|
"eval_logits/rejected": -0.8739129900932312, |
|
"eval_logps/chosen": -582.1657104492188, |
|
"eval_logps/rejected": -667.5426025390625, |
|
"eval_loss": 0.4922982156276703, |
|
"eval_rewards/accuracies": 0.7519999742507935, |
|
"eval_rewards/chosen": -3.1751370429992676, |
|
"eval_rewards/margins": 1.0542099475860596, |
|
"eval_rewards/rejected": -4.229346752166748, |
|
"eval_runtime": 382.3169, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -1.137091875076294, |
|
"logits/rejected": -0.9877273440361023, |
|
"logps/chosen": -593.8831787109375, |
|
"logps/rejected": -648.8990478515625, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.1807122230529785, |
|
"rewards/margins": 0.9622472524642944, |
|
"rewards/rejected": -4.1429595947265625, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -1.117033839225769, |
|
"logits/rejected": -1.0428097248077393, |
|
"logps/chosen": -536.7808227539062, |
|
"logps/rejected": -599.55908203125, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.912168025970459, |
|
"rewards/margins": 0.9705360531806946, |
|
"rewards/rejected": -3.882704257965088, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -1.1962370872497559, |
|
"logits/rejected": -1.0757726430892944, |
|
"logps/chosen": -545.7897338867188, |
|
"logps/rejected": -594.7244873046875, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6997714042663574, |
|
"rewards/margins": 0.8151930570602417, |
|
"rewards/rejected": -3.5149643421173096, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.75, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -1.2405675649642944, |
|
"logits/rejected": -1.0946118831634521, |
|
"logps/chosen": -528.01611328125, |
|
"logps/rejected": -594.1393432617188, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6354479789733887, |
|
"rewards/margins": 1.046671748161316, |
|
"rewards/rejected": -3.682119846343994, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -1.1746861934661865, |
|
"logits/rejected": -1.0529396533966064, |
|
"logps/chosen": -520.6478271484375, |
|
"logps/rejected": -572.0309448242188, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.8694403171539307, |
|
"rewards/margins": 0.8386090397834778, |
|
"rewards/rejected": -3.7080490589141846, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 11.25, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -1.2890937328338623, |
|
"logits/rejected": -1.0460366010665894, |
|
"logps/chosen": -595.07275390625, |
|
"logps/rejected": -639.810791015625, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0806915760040283, |
|
"rewards/margins": 1.094292402267456, |
|
"rewards/rejected": -4.174983978271484, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 11.5, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -1.1292383670806885, |
|
"logits/rejected": -0.9545844793319702, |
|
"logps/chosen": -596.5003662109375, |
|
"logps/rejected": -650.0792236328125, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.3818931579589844, |
|
"rewards/margins": 0.922932505607605, |
|
"rewards/rejected": -4.304825782775879, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.0, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -1.158575415611267, |
|
"logits/rejected": -1.0298246145248413, |
|
"logps/chosen": -583.9124755859375, |
|
"logps/rejected": -629.5396118164062, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.3395965099334717, |
|
"rewards/margins": 0.8280007243156433, |
|
"rewards/rejected": -4.16759729385376, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -1.088763952255249, |
|
"logits/rejected": -0.9791523218154907, |
|
"logps/chosen": -556.0525512695312, |
|
"logps/rejected": -641.457763671875, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0112056732177734, |
|
"rewards/margins": 0.8770611882209778, |
|
"rewards/rejected": -3.8882670402526855, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.25, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -1.2351996898651123, |
|
"logits/rejected": -1.1065688133239746, |
|
"logps/chosen": -519.6819458007812, |
|
"logps/rejected": -608.1278686523438, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5824990272521973, |
|
"rewards/margins": 1.0897197723388672, |
|
"rewards/rejected": -3.6722190380096436, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.0880188941955566, |
|
"eval_logits/rejected": -0.9566530585289001, |
|
"eval_logps/chosen": -525.9181518554688, |
|
"eval_logps/rejected": -601.7401733398438, |
|
"eval_loss": 0.48973530530929565, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -2.612661123275757, |
|
"eval_rewards/margins": 0.9586613774299622, |
|
"eval_rewards/rejected": -3.571322441101074, |
|
"eval_runtime": 382.0537, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -1.2977464199066162, |
|
"logits/rejected": -1.1296590566635132, |
|
"logps/chosen": -553.06103515625, |
|
"logps/rejected": -621.307861328125, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7755794525146484, |
|
"rewards/margins": 0.8637927174568176, |
|
"rewards/rejected": -3.6393723487854004, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -1.0942963361740112, |
|
"logits/rejected": -1.042419195175171, |
|
"logps/chosen": -573.537841796875, |
|
"logps/rejected": -642.611572265625, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.997450351715088, |
|
"rewards/margins": 0.8198318481445312, |
|
"rewards/rejected": -3.8172824382781982, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -1.1044989824295044, |
|
"logits/rejected": -1.0208889245986938, |
|
"logps/chosen": -553.853515625, |
|
"logps/rejected": -632.1079711914062, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7685611248016357, |
|
"rewards/margins": 1.132253646850586, |
|
"rewards/rejected": -3.9008147716522217, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.75, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -1.1996960639953613, |
|
"logits/rejected": -1.0961394309997559, |
|
"logps/chosen": -557.0603637695312, |
|
"logps/rejected": -641.5968017578125, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.600390672683716, |
|
"rewards/margins": 1.021994948387146, |
|
"rewards/rejected": -3.6223855018615723, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -1.2133983373641968, |
|
"logits/rejected": -1.109574556350708, |
|
"logps/chosen": -511.7315979003906, |
|
"logps/rejected": -615.6173095703125, |
|
"loss": 0.416, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.4276764392852783, |
|
"rewards/margins": 1.2624719142913818, |
|
"rewards/rejected": -3.690148115158081, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -1.1181437969207764, |
|
"logits/rejected": -0.9982963800430298, |
|
"logps/chosen": -565.5302124023438, |
|
"logps/rejected": -622.5106811523438, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.9434773921966553, |
|
"rewards/margins": 0.8478938341140747, |
|
"rewards/rejected": -3.7913711071014404, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -1.2033512592315674, |
|
"logits/rejected": -1.0716886520385742, |
|
"logps/chosen": -557.2864379882812, |
|
"logps/rejected": -603.1704711914062, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.752382755279541, |
|
"rewards/margins": 0.9814404249191284, |
|
"rewards/rejected": -3.73382306098938, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -1.177409052848816, |
|
"logits/rejected": -0.9869596362113953, |
|
"logps/chosen": -525.6967163085938, |
|
"logps/rejected": -594.2974853515625, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.641634941101074, |
|
"rewards/margins": 0.9909149408340454, |
|
"rewards/rejected": -3.63254976272583, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -1.1395372152328491, |
|
"logits/rejected": -1.0176304578781128, |
|
"logps/chosen": -522.652099609375, |
|
"logps/rejected": -593.3766479492188, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.6371326446533203, |
|
"rewards/margins": 1.0305713415145874, |
|
"rewards/rejected": -3.667703628540039, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -1.2590233087539673, |
|
"logits/rejected": -1.052428960800171, |
|
"logps/chosen": -578.8065185546875, |
|
"logps/rejected": -654.0260009765625, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.0124454498291016, |
|
"rewards/margins": 1.0927618741989136, |
|
"rewards/rejected": -4.1052069664001465, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -1.0313422679901123, |
|
"eval_logits/rejected": -0.9032019972801208, |
|
"eval_logps/chosen": -559.955810546875, |
|
"eval_logps/rejected": -654.1510620117188, |
|
"eval_loss": 0.4887396991252899, |
|
"eval_rewards/accuracies": 0.7484999895095825, |
|
"eval_rewards/chosen": -2.953037738800049, |
|
"eval_rewards/margins": 1.1423934698104858, |
|
"eval_rewards/rejected": -4.095431804656982, |
|
"eval_runtime": 381.9442, |
|
"eval_samples_per_second": 5.236, |
|
"eval_steps_per_second": 0.655, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 14.625, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -1.0072084665298462, |
|
"logits/rejected": -0.9061794281005859, |
|
"logps/chosen": -509.2284240722656, |
|
"logps/rejected": -580.1068725585938, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8472893238067627, |
|
"rewards/margins": 1.0346016883850098, |
|
"rewards/rejected": -3.8818912506103516, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 8.25, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -1.1624600887298584, |
|
"logits/rejected": -1.04361891746521, |
|
"logps/chosen": -552.9761962890625, |
|
"logps/rejected": -621.9478759765625, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9763803482055664, |
|
"rewards/margins": 1.018448829650879, |
|
"rewards/rejected": -3.9948291778564453, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 10.75, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -1.1844617128372192, |
|
"logits/rejected": -1.1262612342834473, |
|
"logps/chosen": -559.8540649414062, |
|
"logps/rejected": -640.3355712890625, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.8085100650787354, |
|
"rewards/margins": 0.865519642829895, |
|
"rewards/rejected": -3.67402982711792, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -1.1309086084365845, |
|
"logits/rejected": -0.9911936521530151, |
|
"logps/chosen": -502.54608154296875, |
|
"logps/rejected": -611.60693359375, |
|
"loss": 0.4877, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6956772804260254, |
|
"rewards/margins": 1.1704528331756592, |
|
"rewards/rejected": -3.8661301136016846, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -1.196989893913269, |
|
"logits/rejected": -1.0731130838394165, |
|
"logps/chosen": -527.642578125, |
|
"logps/rejected": -616.3968505859375, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.6888630390167236, |
|
"rewards/margins": 1.2105457782745361, |
|
"rewards/rejected": -3.8994088172912598, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -1.2187442779541016, |
|
"logits/rejected": -1.0278013944625854, |
|
"logps/chosen": -555.1328125, |
|
"logps/rejected": -650.1771240234375, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.9184062480926514, |
|
"rewards/margins": 1.0958768129348755, |
|
"rewards/rejected": -4.014283180236816, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -1.173482894897461, |
|
"logits/rejected": -0.9950237274169922, |
|
"logps/chosen": -563.7232666015625, |
|
"logps/rejected": -654.51611328125, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.9941768646240234, |
|
"rewards/margins": 1.1962960958480835, |
|
"rewards/rejected": -4.1904730796813965, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -1.0182400941848755, |
|
"logits/rejected": -0.9381190538406372, |
|
"logps/chosen": -567.86376953125, |
|
"logps/rejected": -672.0901489257812, |
|
"loss": 0.4496, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.1928865909576416, |
|
"rewards/margins": 1.1378134489059448, |
|
"rewards/rejected": -4.330699920654297, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -1.0953130722045898, |
|
"logits/rejected": -0.9763644337654114, |
|
"logps/chosen": -591.7195434570312, |
|
"logps/rejected": -673.8305053710938, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.1930747032165527, |
|
"rewards/margins": 1.0575921535491943, |
|
"rewards/rejected": -4.250667095184326, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -1.093685507774353, |
|
"logits/rejected": -0.9428181648254395, |
|
"logps/chosen": -569.9864501953125, |
|
"logps/rejected": -664.4702758789062, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0702195167541504, |
|
"rewards/margins": 1.1502970457077026, |
|
"rewards/rejected": -4.220516681671143, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -0.9764781594276428, |
|
"eval_logits/rejected": -0.849520742893219, |
|
"eval_logps/chosen": -579.4506225585938, |
|
"eval_logps/rejected": -672.75830078125, |
|
"eval_loss": 0.49094268679618835, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -3.147986888885498, |
|
"eval_rewards/margins": 1.1335173845291138, |
|
"eval_rewards/rejected": -4.281503677368164, |
|
"eval_runtime": 382.2569, |
|
"eval_samples_per_second": 5.232, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 18.625, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -1.1393061876296997, |
|
"logits/rejected": -1.0437672138214111, |
|
"logps/chosen": -565.1177978515625, |
|
"logps/rejected": -631.9932861328125, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.2091946601867676, |
|
"rewards/margins": 0.9168522953987122, |
|
"rewards/rejected": -4.126046180725098, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 10.375, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -1.182935118675232, |
|
"logits/rejected": -1.0437054634094238, |
|
"logps/chosen": -523.6812133789062, |
|
"logps/rejected": -616.8724975585938, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.713310480117798, |
|
"rewards/margins": 1.1654255390167236, |
|
"rewards/rejected": -3.8787360191345215, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 14.125, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -1.1824162006378174, |
|
"logits/rejected": -1.051477313041687, |
|
"logps/chosen": -526.3547973632812, |
|
"logps/rejected": -624.6488647460938, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6958136558532715, |
|
"rewards/margins": 1.1019628047943115, |
|
"rewards/rejected": -3.797776460647583, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.375, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -1.1761425733566284, |
|
"logits/rejected": -0.9889799952507019, |
|
"logps/chosen": -560.6347045898438, |
|
"logps/rejected": -604.340576171875, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.900243043899536, |
|
"rewards/margins": 1.019816279411316, |
|
"rewards/rejected": -3.9200592041015625, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -1.1455858945846558, |
|
"logits/rejected": -0.9903894662857056, |
|
"logps/chosen": -568.4344482421875, |
|
"logps/rejected": -638.8942260742188, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.8164334297180176, |
|
"rewards/margins": 1.0884320735931396, |
|
"rewards/rejected": -3.9048657417297363, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.625, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -1.2747197151184082, |
|
"logits/rejected": -1.101963758468628, |
|
"logps/chosen": -554.3800659179688, |
|
"logps/rejected": -616.3612060546875, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.774376392364502, |
|
"rewards/margins": 1.1384481191635132, |
|
"rewards/rejected": -3.9128241539001465, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -1.2133910655975342, |
|
"logits/rejected": -1.1471474170684814, |
|
"logps/chosen": -530.1273193359375, |
|
"logps/rejected": -630.1476440429688, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7732110023498535, |
|
"rewards/margins": 0.9591614007949829, |
|
"rewards/rejected": -3.7323715686798096, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 11.875, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -1.0105046033859253, |
|
"logits/rejected": -0.9159660339355469, |
|
"logps/chosen": -560.3009643554688, |
|
"logps/rejected": -653.0996704101562, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.069490909576416, |
|
"rewards/margins": 1.270485520362854, |
|
"rewards/rejected": -4.3399763107299805, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -1.0919368267059326, |
|
"logits/rejected": -1.0423280000686646, |
|
"logps/chosen": -547.108154296875, |
|
"logps/rejected": -651.2943725585938, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.1227710247039795, |
|
"rewards/margins": 1.1962798833847046, |
|
"rewards/rejected": -4.3190507888793945, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 14.625, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -1.1967922449111938, |
|
"logits/rejected": -1.0710703134536743, |
|
"logps/chosen": -573.5470581054688, |
|
"logps/rejected": -666.3677978515625, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.163433313369751, |
|
"rewards/margins": 1.0070708990097046, |
|
"rewards/rejected": -4.170504093170166, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.0434505939483643, |
|
"eval_logits/rejected": -0.9135813117027283, |
|
"eval_logps/chosen": -574.00927734375, |
|
"eval_logps/rejected": -664.8961791992188, |
|
"eval_loss": 0.4891022741794586, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -3.0935721397399902, |
|
"eval_rewards/margins": 1.1093100309371948, |
|
"eval_rewards/rejected": -4.202882289886475, |
|
"eval_runtime": 382.3246, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -1.1978858709335327, |
|
"logits/rejected": -1.0786705017089844, |
|
"logps/chosen": -573.3375244140625, |
|
"logps/rejected": -648.0001831054688, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.959294080734253, |
|
"rewards/margins": 1.132147192955017, |
|
"rewards/rejected": -4.0914411544799805, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.875, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -1.1913158893585205, |
|
"logits/rejected": -0.9305517077445984, |
|
"logps/chosen": -603.19873046875, |
|
"logps/rejected": -671.5530395507812, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.1415326595306396, |
|
"rewards/margins": 1.2662583589553833, |
|
"rewards/rejected": -4.4077911376953125, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -1.1753239631652832, |
|
"logits/rejected": -1.0243064165115356, |
|
"logps/chosen": -557.4925537109375, |
|
"logps/rejected": -671.5239868164062, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.1072232723236084, |
|
"rewards/margins": 1.246586561203003, |
|
"rewards/rejected": -4.353809833526611, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -1.0654633045196533, |
|
"logits/rejected": -0.9947797656059265, |
|
"logps/chosen": -555.5715942382812, |
|
"logps/rejected": -674.6041259765625, |
|
"loss": 0.428, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.1076245307922363, |
|
"rewards/margins": 1.2515560388565063, |
|
"rewards/rejected": -4.359180927276611, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 17.5, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -1.117201328277588, |
|
"logits/rejected": -1.024710774421692, |
|
"logps/chosen": -542.8455200195312, |
|
"logps/rejected": -661.6935424804688, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.0537521839141846, |
|
"rewards/margins": 1.2326675653457642, |
|
"rewards/rejected": -4.286419868469238, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 13.75, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -1.1161174774169922, |
|
"logits/rejected": -0.9579364061355591, |
|
"logps/chosen": -589.6568603515625, |
|
"logps/rejected": -655.3709716796875, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.976780414581299, |
|
"rewards/margins": 1.220205307006836, |
|
"rewards/rejected": -4.196985721588135, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 18.625, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -1.1335794925689697, |
|
"logits/rejected": -1.004740595817566, |
|
"logps/chosen": -549.504150390625, |
|
"logps/rejected": -625.6862182617188, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -3.0777907371520996, |
|
"rewards/margins": 0.8784114122390747, |
|
"rewards/rejected": -3.9562020301818848, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -1.2103271484375, |
|
"logits/rejected": -1.1048699617385864, |
|
"logps/chosen": -521.5680541992188, |
|
"logps/rejected": -641.9281616210938, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6569151878356934, |
|
"rewards/margins": 1.1702333688735962, |
|
"rewards/rejected": -3.8271484375, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 13.0, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -1.1741114854812622, |
|
"logits/rejected": -1.0814844369888306, |
|
"logps/chosen": -523.6945190429688, |
|
"logps/rejected": -640.1383056640625, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.7306861877441406, |
|
"rewards/margins": 1.2494643926620483, |
|
"rewards/rejected": -3.9801506996154785, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.75, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -1.2068405151367188, |
|
"logits/rejected": -1.0479636192321777, |
|
"logps/chosen": -556.838623046875, |
|
"logps/rejected": -625.3237915039062, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.882551908493042, |
|
"rewards/margins": 1.0710302591323853, |
|
"rewards/rejected": -3.953582286834717, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.0548917055130005, |
|
"eval_logits/rejected": -0.9263830184936523, |
|
"eval_logps/chosen": -559.5697631835938, |
|
"eval_logps/rejected": -654.4569702148438, |
|
"eval_loss": 0.493943989276886, |
|
"eval_rewards/accuracies": 0.7400000095367432, |
|
"eval_rewards/chosen": -2.9491782188415527, |
|
"eval_rewards/margins": 1.149312973022461, |
|
"eval_rewards/rejected": -4.098491191864014, |
|
"eval_runtime": 381.8434, |
|
"eval_samples_per_second": 5.238, |
|
"eval_steps_per_second": 0.655, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.5, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -1.2618989944458008, |
|
"logits/rejected": -1.1420848369598389, |
|
"logps/chosen": -562.00146484375, |
|
"logps/rejected": -670.0994873046875, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.889355182647705, |
|
"rewards/margins": 1.2824347019195557, |
|
"rewards/rejected": -4.17179012298584, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 29.125, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -1.0890090465545654, |
|
"logits/rejected": -0.966164767742157, |
|
"logps/chosen": -583.3985595703125, |
|
"logps/rejected": -678.4222412109375, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.2020103931427, |
|
"rewards/margins": 1.1710442304611206, |
|
"rewards/rejected": -4.373054504394531, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -1.2348748445510864, |
|
"logits/rejected": -1.083888053894043, |
|
"logps/chosen": -578.0357666015625, |
|
"logps/rejected": -659.4061279296875, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.088522434234619, |
|
"rewards/margins": 1.0845201015472412, |
|
"rewards/rejected": -4.173042297363281, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -1.1974236965179443, |
|
"logits/rejected": -1.0507824420928955, |
|
"logps/chosen": -558.0233764648438, |
|
"logps/rejected": -667.1177978515625, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.2156710624694824, |
|
"rewards/margins": 1.2272260189056396, |
|
"rewards/rejected": -4.442896842956543, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -1.2283174991607666, |
|
"logits/rejected": -1.0654624700546265, |
|
"logps/chosen": -604.8272705078125, |
|
"logps/rejected": -724.4739379882812, |
|
"loss": 0.4118, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.079939365386963, |
|
"rewards/margins": 1.4414037466049194, |
|
"rewards/rejected": -4.521343231201172, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 23.625, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -1.1807067394256592, |
|
"logits/rejected": -1.0574986934661865, |
|
"logps/chosen": -610.4517822265625, |
|
"logps/rejected": -721.9064331054688, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.385681629180908, |
|
"rewards/margins": 1.2347917556762695, |
|
"rewards/rejected": -4.620473384857178, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 12.5, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -1.129962682723999, |
|
"logits/rejected": -1.0201483964920044, |
|
"logps/chosen": -585.4744262695312, |
|
"logps/rejected": -686.8712158203125, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.2754268646240234, |
|
"rewards/margins": 1.2443504333496094, |
|
"rewards/rejected": -4.519776821136475, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 12.25, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -1.155137300491333, |
|
"logits/rejected": -1.012924313545227, |
|
"logps/chosen": -587.5916748046875, |
|
"logps/rejected": -667.8207397460938, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.2043495178222656, |
|
"rewards/margins": 1.134204626083374, |
|
"rewards/rejected": -4.338554859161377, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -1.1951662302017212, |
|
"logits/rejected": -1.140353798866272, |
|
"logps/chosen": -567.091552734375, |
|
"logps/rejected": -681.1925048828125, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.0157859325408936, |
|
"rewards/margins": 1.1822477579116821, |
|
"rewards/rejected": -4.198033332824707, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -1.2230998277664185, |
|
"logits/rejected": -1.1086806058883667, |
|
"logps/chosen": -558.0277099609375, |
|
"logps/rejected": -655.5286865234375, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9380927085876465, |
|
"rewards/margins": 1.2388523817062378, |
|
"rewards/rejected": -4.176945209503174, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -1.052660346031189, |
|
"eval_logits/rejected": -0.9249356985092163, |
|
"eval_logps/chosen": -566.6193237304688, |
|
"eval_logps/rejected": -660.3236083984375, |
|
"eval_loss": 0.49224671721458435, |
|
"eval_rewards/accuracies": 0.7440000176429749, |
|
"eval_rewards/chosen": -3.0196733474731445, |
|
"eval_rewards/margins": 1.1374843120574951, |
|
"eval_rewards/rejected": -4.1571574211120605, |
|
"eval_runtime": 382.3055, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 10.625, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -1.1344083547592163, |
|
"logits/rejected": -1.0651142597198486, |
|
"logps/chosen": -557.28125, |
|
"logps/rejected": -663.6448974609375, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9143428802490234, |
|
"rewards/margins": 1.0695984363555908, |
|
"rewards/rejected": -3.9839415550231934, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -1.2466278076171875, |
|
"logits/rejected": -1.1265995502471924, |
|
"logps/chosen": -571.1409301757812, |
|
"logps/rejected": -636.3128662109375, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.799872875213623, |
|
"rewards/margins": 0.9532085657119751, |
|
"rewards/rejected": -3.7530815601348877, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -1.1743571758270264, |
|
"logits/rejected": -1.024549126625061, |
|
"logps/chosen": -524.5095825195312, |
|
"logps/rejected": -638.3724365234375, |
|
"loss": 0.3937, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.7037060260772705, |
|
"rewards/margins": 1.5533134937286377, |
|
"rewards/rejected": -4.257019519805908, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -1.186992883682251, |
|
"logits/rejected": -1.0803272724151611, |
|
"logps/chosen": -567.0985107421875, |
|
"logps/rejected": -677.031005859375, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.834251880645752, |
|
"rewards/margins": 1.422716498374939, |
|
"rewards/rejected": -4.256968021392822, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -1.1976065635681152, |
|
"logits/rejected": -1.1182498931884766, |
|
"logps/chosen": -537.647216796875, |
|
"logps/rejected": -647.6975708007812, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.8253092765808105, |
|
"rewards/margins": 1.0392690896987915, |
|
"rewards/rejected": -3.8645782470703125, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -1.1978458166122437, |
|
"logits/rejected": -1.0323292016983032, |
|
"logps/chosen": -516.9109497070312, |
|
"logps/rejected": -612.7794189453125, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.7053141593933105, |
|
"rewards/margins": 1.3017933368682861, |
|
"rewards/rejected": -4.007107257843018, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 18.5, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -1.15623140335083, |
|
"logits/rejected": -1.0121409893035889, |
|
"logps/chosen": -561.9942626953125, |
|
"logps/rejected": -639.5707397460938, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.893584728240967, |
|
"rewards/margins": 1.0627275705337524, |
|
"rewards/rejected": -3.956312656402588, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -1.1546833515167236, |
|
"logits/rejected": -1.0220603942871094, |
|
"logps/chosen": -599.4776611328125, |
|
"logps/rejected": -710.4974975585938, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.1167383193969727, |
|
"rewards/margins": 1.39237380027771, |
|
"rewards/rejected": -4.5091118812561035, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.75, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -1.1516591310501099, |
|
"logits/rejected": -1.0285645723342896, |
|
"logps/chosen": -560.0816650390625, |
|
"logps/rejected": -675.4591064453125, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.9946718215942383, |
|
"rewards/margins": 1.4317247867584229, |
|
"rewards/rejected": -4.42639684677124, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -1.1821314096450806, |
|
"logits/rejected": -1.0650185346603394, |
|
"logps/chosen": -576.8660278320312, |
|
"logps/rejected": -679.5147705078125, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.115962266921997, |
|
"rewards/margins": 1.1937239170074463, |
|
"rewards/rejected": -4.309685707092285, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.053481936454773, |
|
"eval_logits/rejected": -0.9260234236717224, |
|
"eval_logps/chosen": -571.3138427734375, |
|
"eval_logps/rejected": -668.0293579101562, |
|
"eval_loss": 0.49084553122520447, |
|
"eval_rewards/accuracies": 0.7415000200271606, |
|
"eval_rewards/chosen": -3.066617965698242, |
|
"eval_rewards/margins": 1.1675963401794434, |
|
"eval_rewards/rejected": -4.2342143058776855, |
|
"eval_runtime": 382.1708, |
|
"eval_samples_per_second": 5.233, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 10.625, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -1.1559561491012573, |
|
"logits/rejected": -1.0568530559539795, |
|
"logps/chosen": -582.1268310546875, |
|
"logps/rejected": -691.96875, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.210031509399414, |
|
"rewards/margins": 1.1272989511489868, |
|
"rewards/rejected": -4.337330341339111, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 15.25, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -1.0973955392837524, |
|
"logits/rejected": -0.9485132098197937, |
|
"logps/chosen": -550.6517333984375, |
|
"logps/rejected": -629.6903686523438, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.0995354652404785, |
|
"rewards/margins": 0.9246597290039062, |
|
"rewards/rejected": -4.024195671081543, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -1.1514110565185547, |
|
"logits/rejected": -1.0361002683639526, |
|
"logps/chosen": -582.6224365234375, |
|
"logps/rejected": -664.3800659179688, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.2128403186798096, |
|
"rewards/margins": 1.0774794816970825, |
|
"rewards/rejected": -4.290319442749023, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -1.2338387966156006, |
|
"logits/rejected": -1.1366431713104248, |
|
"logps/chosen": -566.8738403320312, |
|
"logps/rejected": -616.0252685546875, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.0610568523406982, |
|
"rewards/margins": 0.8211328387260437, |
|
"rewards/rejected": -3.882189989089966, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 9.75, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -1.1542332172393799, |
|
"logits/rejected": -1.017268180847168, |
|
"logps/chosen": -568.286376953125, |
|
"logps/rejected": -668.4588623046875, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.9533185958862305, |
|
"rewards/margins": 1.2311924695968628, |
|
"rewards/rejected": -4.184511184692383, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -1.1374441385269165, |
|
"logits/rejected": -1.1105449199676514, |
|
"logps/chosen": -544.0662231445312, |
|
"logps/rejected": -676.7945556640625, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.872863292694092, |
|
"rewards/margins": 1.2689627408981323, |
|
"rewards/rejected": -4.1418256759643555, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 14.5625, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -1.263672947883606, |
|
"logits/rejected": -1.2044599056243896, |
|
"logps/chosen": -563.6286010742188, |
|
"logps/rejected": -651.6553955078125, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.9439358711242676, |
|
"rewards/margins": 0.9601505398750305, |
|
"rewards/rejected": -3.9040865898132324, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.75, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -1.2087829113006592, |
|
"logits/rejected": -1.0665159225463867, |
|
"logps/chosen": -564.14892578125, |
|
"logps/rejected": -660.1915893554688, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.1072099208831787, |
|
"rewards/margins": 1.2149550914764404, |
|
"rewards/rejected": -4.322165489196777, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.625, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -1.2422146797180176, |
|
"logits/rejected": -1.1245746612548828, |
|
"logps/chosen": -556.6324462890625, |
|
"logps/rejected": -642.3776245117188, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.0577263832092285, |
|
"rewards/margins": 1.1418030261993408, |
|
"rewards/rejected": -4.19952917098999, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -1.1230237483978271, |
|
"logits/rejected": -1.0151801109313965, |
|
"logps/chosen": -566.5771484375, |
|
"logps/rejected": -679.12646484375, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.1102497577667236, |
|
"rewards/margins": 1.1686756610870361, |
|
"rewards/rejected": -4.278925895690918, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": -1.059489130973816, |
|
"eval_logits/rejected": -0.9320334792137146, |
|
"eval_logps/chosen": -574.426025390625, |
|
"eval_logps/rejected": -668.4285278320312, |
|
"eval_loss": 0.4876534342765808, |
|
"eval_rewards/accuracies": 0.7465000152587891, |
|
"eval_rewards/chosen": -3.0977394580841064, |
|
"eval_rewards/margins": 1.1404662132263184, |
|
"eval_rewards/rejected": -4.238205432891846, |
|
"eval_runtime": 382.316, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 0.654, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.125, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -1.1287494897842407, |
|
"logits/rejected": -0.955623984336853, |
|
"logps/chosen": -547.8873291015625, |
|
"logps/rejected": -640.971923828125, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.0631890296936035, |
|
"rewards/margins": 1.2520211935043335, |
|
"rewards/rejected": -4.315210342407227, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -1.158349871635437, |
|
"logits/rejected": -1.0400350093841553, |
|
"logps/chosen": -583.4833374023438, |
|
"logps/rejected": -674.5729370117188, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.0665407180786133, |
|
"rewards/margins": 0.9871135950088501, |
|
"rewards/rejected": -4.053654193878174, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -1.2469195127487183, |
|
"logits/rejected": -1.0984286069869995, |
|
"logps/chosen": -567.165283203125, |
|
"logps/rejected": -650.6731567382812, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.977949380874634, |
|
"rewards/margins": 1.090990424156189, |
|
"rewards/rejected": -4.068940162658691, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -1.1217727661132812, |
|
"logits/rejected": -0.9839452505111694, |
|
"logps/chosen": -561.6543579101562, |
|
"logps/rejected": -643.6695556640625, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.009657382965088, |
|
"rewards/margins": 1.2389792203903198, |
|
"rewards/rejected": -4.248636722564697, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -1.129913568496704, |
|
"logits/rejected": -1.0110609531402588, |
|
"logps/chosen": -554.6318969726562, |
|
"logps/rejected": -622.6085205078125, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.0194828510284424, |
|
"rewards/margins": 0.9275726079940796, |
|
"rewards/rejected": -3.9470553398132324, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -1.1370588541030884, |
|
"logits/rejected": -1.0633890628814697, |
|
"logps/chosen": -551.51123046875, |
|
"logps/rejected": -660.9559936523438, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.892915725708008, |
|
"rewards/margins": 1.3355481624603271, |
|
"rewards/rejected": -4.228463649749756, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 18.625, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -1.1904377937316895, |
|
"logits/rejected": -1.0318008661270142, |
|
"logps/chosen": -591.028564453125, |
|
"logps/rejected": -693.4492797851562, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.0825228691101074, |
|
"rewards/margins": 1.1828874349594116, |
|
"rewards/rejected": -4.26540994644165, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -1.2153565883636475, |
|
"logits/rejected": -1.0524095296859741, |
|
"logps/chosen": -546.4449462890625, |
|
"logps/rejected": -655.5341186523438, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.9415435791015625, |
|
"rewards/margins": 1.2188594341278076, |
|
"rewards/rejected": -4.160403251647949, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 9.5, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -1.2460225820541382, |
|
"logits/rejected": -1.1109936237335205, |
|
"logps/chosen": -518.8594970703125, |
|
"logps/rejected": -621.4867553710938, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.8794169425964355, |
|
"rewards/margins": 1.1684167385101318, |
|
"rewards/rejected": -4.047833442687988, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.375, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -1.1477627754211426, |
|
"logits/rejected": -1.0689526796340942, |
|
"logps/chosen": -520.4979858398438, |
|
"logps/rejected": -650.7647094726562, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.862730026245117, |
|
"rewards/margins": 1.2541263103485107, |
|
"rewards/rejected": -4.116856575012207, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.0787907838821411, |
|
"eval_logits/rejected": -0.9509702324867249, |
|
"eval_logps/chosen": -563.9876708984375, |
|
"eval_logps/rejected": -655.9471435546875, |
|
"eval_loss": 0.48733198642730713, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -2.993356466293335, |
|
"eval_rewards/margins": 1.1200352907180786, |
|
"eval_rewards/rejected": -4.113391399383545, |
|
"eval_runtime": 382.8007, |
|
"eval_samples_per_second": 5.225, |
|
"eval_steps_per_second": 0.653, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 11.25, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -1.1598658561706543, |
|
"logits/rejected": -1.0877033472061157, |
|
"logps/chosen": -578.8084106445312, |
|
"logps/rejected": -665.4705200195312, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.1011300086975098, |
|
"rewards/margins": 1.050903081893921, |
|
"rewards/rejected": -4.152032852172852, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -1.0840625762939453, |
|
"logits/rejected": -0.9331427812576294, |
|
"logps/chosen": -521.8424682617188, |
|
"logps/rejected": -612.9337768554688, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.95574688911438, |
|
"rewards/margins": 1.1547616720199585, |
|
"rewards/rejected": -4.110508441925049, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 12.375, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -1.1582403182983398, |
|
"logits/rejected": -1.0655838251113892, |
|
"logps/chosen": -526.1175537109375, |
|
"logps/rejected": -627.6626586914062, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9271697998046875, |
|
"rewards/margins": 1.1482912302017212, |
|
"rewards/rejected": -4.075460910797119, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.25, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -1.323209524154663, |
|
"logits/rejected": -1.0637619495391846, |
|
"logps/chosen": -561.310791015625, |
|
"logps/rejected": -609.7546997070312, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.856724500656128, |
|
"rewards/margins": 1.1393463611602783, |
|
"rewards/rejected": -3.9960708618164062, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 55.5, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -1.1378008127212524, |
|
"logits/rejected": -1.033092737197876, |
|
"logps/chosen": -511.969482421875, |
|
"logps/rejected": -648.4796752929688, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.8365421295166016, |
|
"rewards/margins": 1.3306509256362915, |
|
"rewards/rejected": -4.1671929359436035, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -1.1030110120773315, |
|
"logits/rejected": -1.0928280353546143, |
|
"logps/chosen": -549.79052734375, |
|
"logps/rejected": -727.48876953125, |
|
"loss": 0.4284, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.8726837635040283, |
|
"rewards/margins": 1.3525440692901611, |
|
"rewards/rejected": -4.225228309631348, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -1.2849022150039673, |
|
"logits/rejected": -1.2295571565628052, |
|
"logps/chosen": -560.9410400390625, |
|
"logps/rejected": -645.2173461914062, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7952258586883545, |
|
"rewards/margins": 1.048758864402771, |
|
"rewards/rejected": -3.843984603881836, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -1.2467188835144043, |
|
"logits/rejected": -1.0997190475463867, |
|
"logps/chosen": -557.9779052734375, |
|
"logps/rejected": -661.7819213867188, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.9708826541900635, |
|
"rewards/margins": 1.3023018836975098, |
|
"rewards/rejected": -4.273184776306152, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.25, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -1.2789522409439087, |
|
"logits/rejected": -1.0984174013137817, |
|
"logps/chosen": -573.76123046875, |
|
"logps/rejected": -637.1810302734375, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.969475269317627, |
|
"rewards/margins": 1.136474370956421, |
|
"rewards/rejected": -4.105949878692627, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.625, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -1.1879446506500244, |
|
"logits/rejected": -1.0638211965560913, |
|
"logps/chosen": -563.8876342773438, |
|
"logps/rejected": -645.8259887695312, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.8758597373962402, |
|
"rewards/margins": 1.0394397974014282, |
|
"rewards/rejected": -3.9152991771698, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -1.0740700960159302, |
|
"eval_logits/rejected": -0.9464629888534546, |
|
"eval_logps/chosen": -562.904296875, |
|
"eval_logps/rejected": -656.5853271484375, |
|
"eval_loss": 0.48781928420066833, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -2.982522964477539, |
|
"eval_rewards/margins": 1.1372504234313965, |
|
"eval_rewards/rejected": -4.1197733879089355, |
|
"eval_runtime": 382.0441, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -1.2681617736816406, |
|
"logits/rejected": -1.0949214696884155, |
|
"logps/chosen": -578.9161376953125, |
|
"logps/rejected": -673.3041381835938, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.9349396228790283, |
|
"rewards/margins": 1.1018182039260864, |
|
"rewards/rejected": -4.036757469177246, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 10.125, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -1.2264713048934937, |
|
"logits/rejected": -1.0714534521102905, |
|
"logps/chosen": -574.7757568359375, |
|
"logps/rejected": -661.6316528320312, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.056429624557495, |
|
"rewards/margins": 1.1420024633407593, |
|
"rewards/rejected": -4.198431968688965, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 11.625, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -1.1282936334609985, |
|
"logits/rejected": -1.0485918521881104, |
|
"logps/chosen": -566.1331787109375, |
|
"logps/rejected": -665.1694946289062, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.1516964435577393, |
|
"rewards/margins": 1.0504977703094482, |
|
"rewards/rejected": -4.202193737030029, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 10.25, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -1.1074498891830444, |
|
"logits/rejected": -0.9338695406913757, |
|
"logps/chosen": -530.6743774414062, |
|
"logps/rejected": -670.0709838867188, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.914252281188965, |
|
"rewards/margins": 1.4155068397521973, |
|
"rewards/rejected": -4.329759120941162, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -1.2932242155075073, |
|
"logits/rejected": -1.10805344581604, |
|
"logps/chosen": -569.38818359375, |
|
"logps/rejected": -671.8726806640625, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0557217597961426, |
|
"rewards/margins": 1.1381008625030518, |
|
"rewards/rejected": -4.193822860717773, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.75, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -1.1201348304748535, |
|
"logits/rejected": -1.0320645570755005, |
|
"logps/chosen": -575.7613525390625, |
|
"logps/rejected": -669.4164428710938, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.0022788047790527, |
|
"rewards/margins": 1.0558512210845947, |
|
"rewards/rejected": -4.058130264282227, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -1.114639401435852, |
|
"logits/rejected": -0.9161049127578735, |
|
"logps/chosen": -552.2017211914062, |
|
"logps/rejected": -653.031005859375, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.864920139312744, |
|
"rewards/margins": 1.3247652053833008, |
|
"rewards/rejected": -4.189684867858887, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -1.216722846031189, |
|
"logits/rejected": -1.144590139389038, |
|
"logps/chosen": -516.3905639648438, |
|
"logps/rejected": -639.3621826171875, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9348020553588867, |
|
"rewards/margins": 0.9914267659187317, |
|
"rewards/rejected": -3.9262290000915527, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.125, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -1.1988582611083984, |
|
"logits/rejected": -1.1241180896759033, |
|
"logps/chosen": -567.2083740234375, |
|
"logps/rejected": -659.522216796875, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.033240795135498, |
|
"rewards/margins": 1.0658702850341797, |
|
"rewards/rejected": -4.0991106033325195, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -1.1077312231063843, |
|
"logits/rejected": -1.0953607559204102, |
|
"logps/chosen": -557.6238403320312, |
|
"logps/rejected": -689.794921875, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.9987998008728027, |
|
"rewards/margins": 1.1976327896118164, |
|
"rewards/rejected": -4.196433067321777, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.0807329416275024, |
|
"eval_logits/rejected": -0.9531368613243103, |
|
"eval_logps/chosen": -559.239990234375, |
|
"eval_logps/rejected": -652.1516723632812, |
|
"eval_loss": 0.4873969852924347, |
|
"eval_rewards/accuracies": 0.7455000281333923, |
|
"eval_rewards/chosen": -2.9458799362182617, |
|
"eval_rewards/margins": 1.1295573711395264, |
|
"eval_rewards/rejected": -4.075437068939209, |
|
"eval_runtime": 381.6886, |
|
"eval_samples_per_second": 5.24, |
|
"eval_steps_per_second": 0.655, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -1.1163936853408813, |
|
"logits/rejected": -1.0554332733154297, |
|
"logps/chosen": -541.477294921875, |
|
"logps/rejected": -619.0269165039062, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.800494909286499, |
|
"rewards/margins": 1.0160177946090698, |
|
"rewards/rejected": -3.8165130615234375, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 7.625, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -1.2064073085784912, |
|
"logits/rejected": -1.0841269493103027, |
|
"logps/chosen": -582.91259765625, |
|
"logps/rejected": -648.9725952148438, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.892125129699707, |
|
"rewards/margins": 1.091489315032959, |
|
"rewards/rejected": -3.983614444732666, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -1.123425841331482, |
|
"logits/rejected": -1.0302746295928955, |
|
"logps/chosen": -566.611328125, |
|
"logps/rejected": -648.7924194335938, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.048774003982544, |
|
"rewards/margins": 0.9477185010910034, |
|
"rewards/rejected": -3.996492385864258, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -1.3182079792022705, |
|
"logits/rejected": -1.1424782276153564, |
|
"logps/chosen": -549.4573364257812, |
|
"logps/rejected": -634.7244873046875, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.8423376083374023, |
|
"rewards/margins": 1.2275350093841553, |
|
"rewards/rejected": -4.069872856140137, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 12.125, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -1.2581889629364014, |
|
"logits/rejected": -1.1089788675308228, |
|
"logps/chosen": -572.9723510742188, |
|
"logps/rejected": -676.4666137695312, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.796302318572998, |
|
"rewards/margins": 1.2862600088119507, |
|
"rewards/rejected": -4.082562446594238, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -1.1619012355804443, |
|
"logits/rejected": -1.0473229885101318, |
|
"logps/chosen": -574.4371948242188, |
|
"logps/rejected": -662.361083984375, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.1731839179992676, |
|
"rewards/margins": 1.0502443313598633, |
|
"rewards/rejected": -4.223428249359131, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 12.0, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -1.2181814908981323, |
|
"logits/rejected": -1.1487758159637451, |
|
"logps/chosen": -531.4067993164062, |
|
"logps/rejected": -627.6771240234375, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.8366494178771973, |
|
"rewards/margins": 1.1211111545562744, |
|
"rewards/rejected": -3.9577605724334717, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 14.25, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -1.147289514541626, |
|
"logits/rejected": -1.1335127353668213, |
|
"logps/chosen": -557.3380737304688, |
|
"logps/rejected": -666.0869140625, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.828235626220703, |
|
"rewards/margins": 1.0581908226013184, |
|
"rewards/rejected": -3.8864264488220215, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -1.1895829439163208, |
|
"logits/rejected": -1.1866552829742432, |
|
"logps/chosen": -561.8271484375, |
|
"logps/rejected": -686.0377197265625, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.007755756378174, |
|
"rewards/margins": 1.261817216873169, |
|
"rewards/rejected": -4.269573211669922, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -1.1794744729995728, |
|
"logits/rejected": -1.0672075748443604, |
|
"logps/chosen": -547.8685302734375, |
|
"logps/rejected": -650.7493286132812, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9051201343536377, |
|
"rewards/margins": 1.0670777559280396, |
|
"rewards/rejected": -3.972198009490967, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -1.0755009651184082, |
|
"eval_logits/rejected": -0.9480787515640259, |
|
"eval_logps/chosen": -560.1488647460938, |
|
"eval_logps/rejected": -652.9912109375, |
|
"eval_loss": 0.4874354600906372, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -2.9549689292907715, |
|
"eval_rewards/margins": 1.128864049911499, |
|
"eval_rewards/rejected": -4.083832740783691, |
|
"eval_runtime": 383.0008, |
|
"eval_samples_per_second": 5.222, |
|
"eval_steps_per_second": 0.653, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 12.75, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -1.1829874515533447, |
|
"logits/rejected": -1.0443121194839478, |
|
"logps/chosen": -508.328857421875, |
|
"logps/rejected": -573.6398315429688, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.785529851913452, |
|
"rewards/margins": 1.0893114805221558, |
|
"rewards/rejected": -3.8748409748077393, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 9.375, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -1.1776726245880127, |
|
"logits/rejected": -1.0596325397491455, |
|
"logps/chosen": -558.5306396484375, |
|
"logps/rejected": -649.6300048828125, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.942483425140381, |
|
"rewards/margins": 1.159317135810852, |
|
"rewards/rejected": -4.101800441741943, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -1.3345047235488892, |
|
"logits/rejected": -1.1562585830688477, |
|
"logps/chosen": -557.7296142578125, |
|
"logps/rejected": -661.87109375, |
|
"loss": 0.4702, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8015923500061035, |
|
"rewards/margins": 1.3364170789718628, |
|
"rewards/rejected": -4.138009548187256, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -1.1994738578796387, |
|
"logits/rejected": -1.097899079322815, |
|
"logps/chosen": -579.8328857421875, |
|
"logps/rejected": -674.6861572265625, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.1103997230529785, |
|
"rewards/margins": 1.0284258127212524, |
|
"rewards/rejected": -4.138825416564941, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.0, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -1.1523630619049072, |
|
"logits/rejected": -1.0521692037582397, |
|
"logps/chosen": -551.5980224609375, |
|
"logps/rejected": -671.2841186523438, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.8580057621002197, |
|
"rewards/margins": 1.2792617082595825, |
|
"rewards/rejected": -4.137267112731934, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -1.2028191089630127, |
|
"logits/rejected": -1.0033330917358398, |
|
"logps/chosen": -563.6111450195312, |
|
"logps/rejected": -704.59228515625, |
|
"loss": 0.4213, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.9093270301818848, |
|
"rewards/margins": 1.4688284397125244, |
|
"rewards/rejected": -4.378155708312988, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -1.1482703685760498, |
|
"logits/rejected": -1.0027369260787964, |
|
"logps/chosen": -540.7926635742188, |
|
"logps/rejected": -644.9227294921875, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.006060838699341, |
|
"rewards/margins": 1.2438604831695557, |
|
"rewards/rejected": -4.2499213218688965, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 13.625, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -1.1630356311798096, |
|
"logits/rejected": -1.0312206745147705, |
|
"logps/chosen": -559.203125, |
|
"logps/rejected": -652.8720092773438, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.046480417251587, |
|
"rewards/margins": 1.1182465553283691, |
|
"rewards/rejected": -4.164727210998535, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -1.2169429063796997, |
|
"logits/rejected": -0.9856022596359253, |
|
"logps/chosen": -575.2197875976562, |
|
"logps/rejected": -634.4151000976562, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0310537815093994, |
|
"rewards/margins": 1.0889911651611328, |
|
"rewards/rejected": -4.120044708251953, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -1.2307440042495728, |
|
"logits/rejected": -1.0502979755401611, |
|
"logps/chosen": -592.2626342773438, |
|
"logps/rejected": -664.5699462890625, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.771921396255493, |
|
"rewards/margins": 1.2936856746673584, |
|
"rewards/rejected": -4.065607070922852, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -1.077279806137085, |
|
"eval_logits/rejected": -0.9499141573905945, |
|
"eval_logps/chosen": -561.6380615234375, |
|
"eval_logps/rejected": -654.8016967773438, |
|
"eval_loss": 0.4871050715446472, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -2.969860553741455, |
|
"eval_rewards/margins": 1.1320772171020508, |
|
"eval_rewards/rejected": -4.101937770843506, |
|
"eval_runtime": 382.1089, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -1.174843430519104, |
|
"logits/rejected": -1.0021690130233765, |
|
"logps/chosen": -579.2442626953125, |
|
"logps/rejected": -661.9432373046875, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.9789488315582275, |
|
"rewards/margins": 1.2606755495071411, |
|
"rewards/rejected": -4.239624500274658, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 16.25, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -1.2912896871566772, |
|
"logits/rejected": -1.1392004489898682, |
|
"logps/chosen": -576.8416748046875, |
|
"logps/rejected": -653.64501953125, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.956543207168579, |
|
"rewards/margins": 1.119319200515747, |
|
"rewards/rejected": -4.075861930847168, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 9.625, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -1.2196199893951416, |
|
"logits/rejected": -1.0388673543930054, |
|
"logps/chosen": -565.8033447265625, |
|
"logps/rejected": -686.4707641601562, |
|
"loss": 0.4313, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.9110517501831055, |
|
"rewards/margins": 1.3773367404937744, |
|
"rewards/rejected": -4.288388729095459, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.25, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -1.1975353956222534, |
|
"logits/rejected": -1.0588737726211548, |
|
"logps/chosen": -544.4766845703125, |
|
"logps/rejected": -618.7376098632812, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.888631820678711, |
|
"rewards/margins": 1.1052820682525635, |
|
"rewards/rejected": -3.9939143657684326, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 14.5625, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -1.230802297592163, |
|
"logits/rejected": -1.1439770460128784, |
|
"logps/chosen": -553.6980590820312, |
|
"logps/rejected": -644.7520751953125, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8526644706726074, |
|
"rewards/margins": 1.1686475276947021, |
|
"rewards/rejected": -4.0213117599487305, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -1.1431211233139038, |
|
"logits/rejected": -1.1035680770874023, |
|
"logps/chosen": -544.6209106445312, |
|
"logps/rejected": -662.7022705078125, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9836173057556152, |
|
"rewards/margins": 1.2009574174880981, |
|
"rewards/rejected": -4.184575080871582, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.625, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -1.2415331602096558, |
|
"logits/rejected": -1.0389716625213623, |
|
"logps/chosen": -587.2742309570312, |
|
"logps/rejected": -644.300048828125, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.9073996543884277, |
|
"rewards/margins": 1.1453628540039062, |
|
"rewards/rejected": -4.052762508392334, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -1.2313424348831177, |
|
"logits/rejected": -1.1037070751190186, |
|
"logps/chosen": -548.36962890625, |
|
"logps/rejected": -643.1497192382812, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.043722152709961, |
|
"rewards/margins": 0.9787699580192566, |
|
"rewards/rejected": -4.022491931915283, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -1.2611653804779053, |
|
"logits/rejected": -1.1225281953811646, |
|
"logps/chosen": -574.4703369140625, |
|
"logps/rejected": -650.3907470703125, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.924811840057373, |
|
"rewards/margins": 1.0881900787353516, |
|
"rewards/rejected": -4.013001918792725, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.875, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -1.2225737571716309, |
|
"logits/rejected": -1.0173273086547852, |
|
"logps/chosen": -580.7540283203125, |
|
"logps/rejected": -643.2467651367188, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9996070861816406, |
|
"rewards/margins": 1.0754629373550415, |
|
"rewards/rejected": -4.075070381164551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.074249267578125, |
|
"eval_logits/rejected": -0.9468256831169128, |
|
"eval_logps/chosen": -561.2808227539062, |
|
"eval_logps/rejected": -654.5006103515625, |
|
"eval_loss": 0.48737701773643494, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -2.9662883281707764, |
|
"eval_rewards/margins": 1.1326382160186768, |
|
"eval_rewards/rejected": -4.098926544189453, |
|
"eval_runtime": 382.1229, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -1.1973202228546143, |
|
"logits/rejected": -1.0237270593643188, |
|
"logps/chosen": -544.4231567382812, |
|
"logps/rejected": -646.541015625, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.902569532394409, |
|
"rewards/margins": 1.1967476606369019, |
|
"rewards/rejected": -4.0993170738220215, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -1.227426290512085, |
|
"logits/rejected": -1.1496341228485107, |
|
"logps/chosen": -555.9089965820312, |
|
"logps/rejected": -687.0352783203125, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.873260974884033, |
|
"rewards/margins": 1.3088066577911377, |
|
"rewards/rejected": -4.182066917419434, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -1.1654760837554932, |
|
"logits/rejected": -1.0858592987060547, |
|
"logps/chosen": -537.4627685546875, |
|
"logps/rejected": -640.0810546875, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.0675268173217773, |
|
"rewards/margins": 1.0338232517242432, |
|
"rewards/rejected": -4.101349830627441, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -1.2670751810073853, |
|
"logits/rejected": -1.104811191558838, |
|
"logps/chosen": -576.2312622070312, |
|
"logps/rejected": -708.0988159179688, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.9314615726470947, |
|
"rewards/margins": 1.371382236480713, |
|
"rewards/rejected": -4.3028435707092285, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.5, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -1.1996467113494873, |
|
"logits/rejected": -1.1349631547927856, |
|
"logps/chosen": -546.021240234375, |
|
"logps/rejected": -657.2860107421875, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9192748069763184, |
|
"rewards/margins": 1.102920651435852, |
|
"rewards/rejected": -4.022195816040039, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -1.2043834924697876, |
|
"logits/rejected": -1.0872790813446045, |
|
"logps/chosen": -546.2125854492188, |
|
"logps/rejected": -628.4691162109375, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.899247646331787, |
|
"rewards/margins": 1.2382572889328003, |
|
"rewards/rejected": -4.1375041007995605, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -1.2246639728546143, |
|
"logits/rejected": -1.1050646305084229, |
|
"logps/chosen": -588.5670166015625, |
|
"logps/rejected": -679.2962646484375, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9634485244750977, |
|
"rewards/margins": 1.1589770317077637, |
|
"rewards/rejected": -4.1224260330200195, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 9.125, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -1.2126811742782593, |
|
"logits/rejected": -1.1511167287826538, |
|
"logps/chosen": -555.2306518554688, |
|
"logps/rejected": -671.8084716796875, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.911715030670166, |
|
"rewards/margins": 1.1182584762573242, |
|
"rewards/rejected": -4.029973030090332, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -1.1890180110931396, |
|
"logits/rejected": -1.0672190189361572, |
|
"logps/chosen": -549.8150634765625, |
|
"logps/rejected": -647.8394775390625, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.880577564239502, |
|
"rewards/margins": 1.1271222829818726, |
|
"rewards/rejected": -4.007699489593506, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -1.109243392944336, |
|
"logits/rejected": -1.0201053619384766, |
|
"logps/chosen": -514.1246337890625, |
|
"logps/rejected": -630.8916625976562, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.8855831623077393, |
|
"rewards/margins": 1.122081995010376, |
|
"rewards/rejected": -4.007665157318115, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.0748145580291748, |
|
"eval_logits/rejected": -0.9474833607673645, |
|
"eval_logps/chosen": -561.028564453125, |
|
"eval_logps/rejected": -654.279052734375, |
|
"eval_loss": 0.48736903071403503, |
|
"eval_rewards/accuracies": 0.7425000071525574, |
|
"eval_rewards/chosen": -2.9637651443481445, |
|
"eval_rewards/margins": 1.1329458951950073, |
|
"eval_rewards/rejected": -4.096711158752441, |
|
"eval_runtime": 382.7111, |
|
"eval_samples_per_second": 5.226, |
|
"eval_steps_per_second": 0.653, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 9.125, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -1.1433720588684082, |
|
"logits/rejected": -1.0008645057678223, |
|
"logps/chosen": -530.3574829101562, |
|
"logps/rejected": -652.87255859375, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.9491429328918457, |
|
"rewards/margins": 1.3483526706695557, |
|
"rewards/rejected": -4.2974958419799805, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -1.1681492328643799, |
|
"logits/rejected": -1.0669422149658203, |
|
"logps/chosen": -553.4034423828125, |
|
"logps/rejected": -626.2314453125, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.0548007488250732, |
|
"rewards/margins": 1.0290553569793701, |
|
"rewards/rejected": -4.083855628967285, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.2855480909347534, |
|
"logits/rejected": -1.1476643085479736, |
|
"logps/chosen": -555.6473999023438, |
|
"logps/rejected": -633.9293823242188, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.9149746894836426, |
|
"rewards/margins": 1.1746852397918701, |
|
"rewards/rejected": -4.089660167694092, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 10.25, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -1.1988470554351807, |
|
"logits/rejected": -0.9911161661148071, |
|
"logps/chosen": -560.5934448242188, |
|
"logps/rejected": -617.925048828125, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.873368740081787, |
|
"rewards/margins": 1.1307556629180908, |
|
"rewards/rejected": -4.004124641418457, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 9.5, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -1.2440365552902222, |
|
"logits/rejected": -1.0937076807022095, |
|
"logps/chosen": -528.578125, |
|
"logps/rejected": -617.3880004882812, |
|
"loss": 0.448, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7646141052246094, |
|
"rewards/margins": 1.2126356363296509, |
|
"rewards/rejected": -3.9772496223449707, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 10.0, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -1.1879501342773438, |
|
"logits/rejected": -1.1580005884170532, |
|
"logps/chosen": -571.6791381835938, |
|
"logps/rejected": -661.9193725585938, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.06877064704895, |
|
"rewards/margins": 0.8969556093215942, |
|
"rewards/rejected": -3.965726375579834, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 11.625, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -1.303144931793213, |
|
"logits/rejected": -1.1418662071228027, |
|
"logps/chosen": -583.8892822265625, |
|
"logps/rejected": -663.1383666992188, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.113860845565796, |
|
"rewards/margins": 1.1326040029525757, |
|
"rewards/rejected": -4.246464729309082, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -1.156292200088501, |
|
"logits/rejected": -1.146905541419983, |
|
"logps/chosen": -559.3345336914062, |
|
"logps/rejected": -672.4237060546875, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.9779343605041504, |
|
"rewards/margins": 1.253351092338562, |
|
"rewards/rejected": -4.231285572052002, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -1.266901969909668, |
|
"logits/rejected": -1.072322964668274, |
|
"logps/chosen": -611.783203125, |
|
"logps/rejected": -680.0989379882812, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.9480648040771484, |
|
"rewards/margins": 1.1735531091690063, |
|
"rewards/rejected": -4.121617794036865, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -1.1054164171218872, |
|
"logits/rejected": -0.985053539276123, |
|
"logps/chosen": -610.8778076171875, |
|
"logps/rejected": -699.9169921875, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.1426022052764893, |
|
"rewards/margins": 1.1523752212524414, |
|
"rewards/rejected": -4.29497766494751, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -1.0769954919815063, |
|
"eval_logits/rejected": -0.9495205879211426, |
|
"eval_logps/chosen": -561.3129272460938, |
|
"eval_logps/rejected": -654.6014404296875, |
|
"eval_loss": 0.48729926347732544, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -2.966609239578247, |
|
"eval_rewards/margins": 1.1333256959915161, |
|
"eval_rewards/rejected": -4.099935054779053, |
|
"eval_runtime": 382.1, |
|
"eval_samples_per_second": 5.234, |
|
"eval_steps_per_second": 0.654, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 11.75, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -1.0827583074569702, |
|
"logits/rejected": -1.0175631046295166, |
|
"logps/chosen": -534.8372192382812, |
|
"logps/rejected": -647.8695678710938, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.9847655296325684, |
|
"rewards/margins": 1.2061764001846313, |
|
"rewards/rejected": -4.190942287445068, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -1.0798698663711548, |
|
"logits/rejected": -0.9149328470230103, |
|
"logps/chosen": -549.2965087890625, |
|
"logps/rejected": -631.1814575195312, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.001096248626709, |
|
"rewards/margins": 1.178213119506836, |
|
"rewards/rejected": -4.179308891296387, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.5, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -1.1081641912460327, |
|
"logits/rejected": -1.0127241611480713, |
|
"logps/chosen": -530.3636474609375, |
|
"logps/rejected": -619.5350341796875, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.8507590293884277, |
|
"rewards/margins": 1.1195757389068604, |
|
"rewards/rejected": -3.97033429145813, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 10.0, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -1.16922128200531, |
|
"logits/rejected": -1.0506504774093628, |
|
"logps/chosen": -563.3178100585938, |
|
"logps/rejected": -685.6429443359375, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.912113666534424, |
|
"rewards/margins": 1.3391534090042114, |
|
"rewards/rejected": -4.251267433166504, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -1.2230274677276611, |
|
"logits/rejected": -1.0869773626327515, |
|
"logps/chosen": -557.9898681640625, |
|
"logps/rejected": -660.3572998046875, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9352307319641113, |
|
"rewards/margins": 1.1718149185180664, |
|
"rewards/rejected": -4.107045650482178, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.75, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -1.2255470752716064, |
|
"logits/rejected": -1.1375856399536133, |
|
"logps/chosen": -556.98193359375, |
|
"logps/rejected": -639.6429443359375, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.91620135307312, |
|
"rewards/margins": 1.1449532508850098, |
|
"rewards/rejected": -4.061154842376709, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 12.25, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -1.242653727531433, |
|
"logits/rejected": -1.0747482776641846, |
|
"logps/chosen": -574.4716796875, |
|
"logps/rejected": -665.8096313476562, |
|
"loss": 0.4263, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.901991605758667, |
|
"rewards/margins": 1.1683391332626343, |
|
"rewards/rejected": -4.070330619812012, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -1.1282501220703125, |
|
"logits/rejected": -1.0916457176208496, |
|
"logps/chosen": -552.8446655273438, |
|
"logps/rejected": -669.7666015625, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.030531406402588, |
|
"rewards/margins": 0.9483699798583984, |
|
"rewards/rejected": -3.9789013862609863, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 13.125, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -1.1534841060638428, |
|
"logits/rejected": -0.994836151599884, |
|
"logps/chosen": -570.4867553710938, |
|
"logps/rejected": -668.6637573242188, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.9619479179382324, |
|
"rewards/margins": 1.2098205089569092, |
|
"rewards/rejected": -4.171768665313721, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -1.2624783515930176, |
|
"logits/rejected": -1.1238892078399658, |
|
"logps/chosen": -582.1685180664062, |
|
"logps/rejected": -673.0120239257812, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.003277540206909, |
|
"rewards/margins": 1.1538227796554565, |
|
"rewards/rejected": -4.157099723815918, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.0723599195480347, |
|
"eval_logits/rejected": -0.9449748396873474, |
|
"eval_logps/chosen": -561.3216552734375, |
|
"eval_logps/rejected": -654.607177734375, |
|
"eval_loss": 0.48731154203414917, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -2.966696262359619, |
|
"eval_rewards/margins": 1.133296012878418, |
|
"eval_rewards/rejected": -4.099992275238037, |
|
"eval_runtime": 382.0182, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.875, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -1.171769142150879, |
|
"logits/rejected": -1.0495896339416504, |
|
"logps/chosen": -542.8326416015625, |
|
"logps/rejected": -627.7073974609375, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.9036014080047607, |
|
"rewards/margins": 1.005274772644043, |
|
"rewards/rejected": -3.9088759422302246, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 18.25, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -1.1617281436920166, |
|
"logits/rejected": -0.9952475428581238, |
|
"logps/chosen": -585.9136962890625, |
|
"logps/rejected": -680.009521484375, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.991471767425537, |
|
"rewards/margins": 1.3875491619110107, |
|
"rewards/rejected": -4.379020690917969, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5021860111574015, |
|
"train_runtime": 41123.41, |
|
"train_samples_per_second": 1.487, |
|
"train_steps_per_second": 0.093 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|