|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998027613412229, |
|
"eval_steps": 50000, |
|
"global_step": 1824, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00821827744904668, |
|
"grad_norm": 48.55606780690502, |
|
"learning_rate": 1.358695652173913e-08, |
|
"logits/chosen": 26.16689682006836, |
|
"logits/rejected": 25.511425018310547, |
|
"logps/chosen": -189.36741638183594, |
|
"logps/rejected": -78.73792266845703, |
|
"loss": 1.79, |
|
"rewards/accuracies": 0.2800000011920929, |
|
"rewards/chosen": -0.006983796134591103, |
|
"rewards/margins": 3.662884410005063e-05, |
|
"rewards/rejected": -0.007020425051450729, |
|
"sft_loss": 0.661233127117157, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01643655489809336, |
|
"grad_norm": 50.84809399854242, |
|
"learning_rate": 2.717391304347826e-08, |
|
"logits/chosen": 25.634292602539062, |
|
"logits/rejected": 25.165508270263672, |
|
"logps/chosen": -175.30511474609375, |
|
"logps/rejected": -79.45011901855469, |
|
"loss": 1.7672, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -0.01493214163929224, |
|
"rewards/margins": 0.032123688608407974, |
|
"rewards/rejected": -0.04705582931637764, |
|
"sft_loss": 0.6432023644447327, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02465483234714004, |
|
"grad_norm": 41.52221293409133, |
|
"learning_rate": 4.076086956521739e-08, |
|
"logits/chosen": 25.897306442260742, |
|
"logits/rejected": 25.234777450561523, |
|
"logps/chosen": -204.5565643310547, |
|
"logps/rejected": -85.37405395507812, |
|
"loss": 1.6603, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": -0.028912657871842384, |
|
"rewards/margins": 0.18977542221546173, |
|
"rewards/rejected": -0.21868810057640076, |
|
"sft_loss": 0.7592554688453674, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03287310979618672, |
|
"grad_norm": 30.024466917447533, |
|
"learning_rate": 5.434782608695652e-08, |
|
"logits/chosen": 26.472496032714844, |
|
"logits/rejected": 26.013669967651367, |
|
"logps/chosen": -178.9062042236328, |
|
"logps/rejected": -87.18224334716797, |
|
"loss": 1.5519, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -0.11060313880443573, |
|
"rewards/margins": 0.3851660490036011, |
|
"rewards/rejected": -0.495769202709198, |
|
"sft_loss": 0.6785654425621033, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.041091387245233396, |
|
"grad_norm": 23.574332052101575, |
|
"learning_rate": 6.793478260869565e-08, |
|
"logits/chosen": 26.571308135986328, |
|
"logits/rejected": 26.069765090942383, |
|
"logps/chosen": -204.71995544433594, |
|
"logps/rejected": -95.25181579589844, |
|
"loss": 1.4535, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -0.2731512486934662, |
|
"rewards/margins": 0.7024775743484497, |
|
"rewards/rejected": -0.9756287336349487, |
|
"sft_loss": 0.6605415344238281, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04930966469428008, |
|
"grad_norm": 18.127113157576492, |
|
"learning_rate": 8.152173913043478e-08, |
|
"logits/chosen": 26.70085906982422, |
|
"logits/rejected": 26.199695587158203, |
|
"logps/chosen": -189.0041961669922, |
|
"logps/rejected": -95.67135620117188, |
|
"loss": 1.3598, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -0.4376958906650543, |
|
"rewards/margins": 0.9910183548927307, |
|
"rewards/rejected": -1.4287142753601074, |
|
"sft_loss": 0.6798427700996399, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05752794214332676, |
|
"grad_norm": 16.856249874916603, |
|
"learning_rate": 9.510869565217392e-08, |
|
"logits/chosen": 27.086894989013672, |
|
"logits/rejected": 26.779054641723633, |
|
"logps/chosen": -202.5185546875, |
|
"logps/rejected": -98.5663070678711, |
|
"loss": 1.2944, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -0.5852899551391602, |
|
"rewards/margins": 1.2753018140792847, |
|
"rewards/rejected": -1.8605915307998657, |
|
"sft_loss": 0.6831802129745483, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06574621959237344, |
|
"grad_norm": 15.222314216803584, |
|
"learning_rate": 1.0869565217391303e-07, |
|
"logits/chosen": 26.470937728881836, |
|
"logits/rejected": 26.266651153564453, |
|
"logps/chosen": -185.2868194580078, |
|
"logps/rejected": -96.5091781616211, |
|
"loss": 1.2027, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -0.6554566025733948, |
|
"rewards/margins": 1.4152508974075317, |
|
"rewards/rejected": -2.0707075595855713, |
|
"sft_loss": 0.6970738768577576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07396449704142012, |
|
"grad_norm": 14.365159397400335, |
|
"learning_rate": 1.2228260869565216e-07, |
|
"logits/chosen": 25.881906509399414, |
|
"logits/rejected": 25.525175094604492, |
|
"logps/chosen": -202.46238708496094, |
|
"logps/rejected": -108.43726348876953, |
|
"loss": 1.1328, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.787525475025177, |
|
"rewards/margins": 1.8143333196640015, |
|
"rewards/rejected": -2.6018588542938232, |
|
"sft_loss": 0.6782786846160889, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08218277449046679, |
|
"grad_norm": 13.924602084521048, |
|
"learning_rate": 1.358695652173913e-07, |
|
"logits/chosen": 24.610755920410156, |
|
"logits/rejected": 24.408979415893555, |
|
"logps/chosen": -206.8500213623047, |
|
"logps/rejected": -113.42557525634766, |
|
"loss": 1.0599, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -0.9198395609855652, |
|
"rewards/margins": 1.9545520544052124, |
|
"rewards/rejected": -2.874391555786133, |
|
"sft_loss": 0.7132790088653564, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09040105193951348, |
|
"grad_norm": 11.972485852637668, |
|
"learning_rate": 1.4945652173913042e-07, |
|
"logits/chosen": 23.996862411499023, |
|
"logits/rejected": 24.392988204956055, |
|
"logps/chosen": -176.3905487060547, |
|
"logps/rejected": -110.62020874023438, |
|
"loss": 1.0223, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -0.8744373321533203, |
|
"rewards/margins": 2.172375440597534, |
|
"rewards/rejected": -3.0468130111694336, |
|
"sft_loss": 0.7045189738273621, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09861932938856016, |
|
"grad_norm": 13.242028156676367, |
|
"learning_rate": 1.6304347826086955e-07, |
|
"logits/chosen": 23.04694366455078, |
|
"logits/rejected": 23.079355239868164, |
|
"logps/chosen": -186.1154327392578, |
|
"logps/rejected": -107.23130798339844, |
|
"loss": 1.0046, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -1.0562888383865356, |
|
"rewards/margins": 2.0806047916412354, |
|
"rewards/rejected": -3.1368932723999023, |
|
"sft_loss": 0.6290792226791382, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10683760683760683, |
|
"grad_norm": 11.030176141313747, |
|
"learning_rate": 1.766304347826087e-07, |
|
"logits/chosen": 21.996606826782227, |
|
"logits/rejected": 22.384113311767578, |
|
"logps/chosen": -199.38589477539062, |
|
"logps/rejected": -116.91275024414062, |
|
"loss": 0.9338, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.1412394046783447, |
|
"rewards/margins": 2.474609613418579, |
|
"rewards/rejected": -3.615849018096924, |
|
"sft_loss": 0.697711706161499, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11505588428665352, |
|
"grad_norm": 11.76117705302215, |
|
"learning_rate": 1.9021739130434784e-07, |
|
"logits/chosen": 22.534835815429688, |
|
"logits/rejected": 23.107168197631836, |
|
"logps/chosen": -216.9481964111328, |
|
"logps/rejected": -129.04183959960938, |
|
"loss": 0.8671, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -1.3997070789337158, |
|
"rewards/margins": 2.9236786365509033, |
|
"rewards/rejected": -4.323385715484619, |
|
"sft_loss": 0.728801965713501, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1232741617357002, |
|
"grad_norm": 32.386219318167385, |
|
"learning_rate": 2.0380434782608694e-07, |
|
"logits/chosen": 20.90481948852539, |
|
"logits/rejected": 21.215843200683594, |
|
"logps/chosen": -247.61224365234375, |
|
"logps/rejected": -138.62893676757812, |
|
"loss": 0.8076, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -1.5252928733825684, |
|
"rewards/margins": 3.3426883220672607, |
|
"rewards/rejected": -4.86798095703125, |
|
"sft_loss": 0.7596563696861267, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13149243918474687, |
|
"grad_norm": 11.700521911598706, |
|
"learning_rate": 2.1739130434782607e-07, |
|
"logits/chosen": 20.761672973632812, |
|
"logits/rejected": 20.871828079223633, |
|
"logps/chosen": -236.5396728515625, |
|
"logps/rejected": -138.31297302246094, |
|
"loss": 0.842, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -1.9364999532699585, |
|
"rewards/margins": 3.281285047531128, |
|
"rewards/rejected": -5.217784881591797, |
|
"sft_loss": 0.7300873398780823, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13971071663379356, |
|
"grad_norm": 11.895414317868761, |
|
"learning_rate": 2.309782608695652e-07, |
|
"logits/chosen": 21.150850296020508, |
|
"logits/rejected": 21.817951202392578, |
|
"logps/chosen": -223.0463104248047, |
|
"logps/rejected": -139.8596954345703, |
|
"loss": 0.7489, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -2.007277250289917, |
|
"rewards/margins": 3.5759541988372803, |
|
"rewards/rejected": -5.5832319259643555, |
|
"sft_loss": 0.7483465075492859, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.14792899408284024, |
|
"grad_norm": 11.018586570679572, |
|
"learning_rate": 2.445652173913043e-07, |
|
"logits/chosen": 22.40447998046875, |
|
"logits/rejected": 22.448156356811523, |
|
"logps/chosen": -201.39810180664062, |
|
"logps/rejected": -126.50525665283203, |
|
"loss": 0.8269, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -2.1027634143829346, |
|
"rewards/margins": 3.118117332458496, |
|
"rewards/rejected": -5.220880508422852, |
|
"sft_loss": 0.7317149639129639, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15614727153188693, |
|
"grad_norm": 9.026135528071627, |
|
"learning_rate": 2.499981493451693e-07, |
|
"logits/chosen": 20.40322494506836, |
|
"logits/rejected": 20.44278907775879, |
|
"logps/chosen": -203.20326232910156, |
|
"logps/rejected": -124.00860595703125, |
|
"loss": 0.8771, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": -1.6680656671524048, |
|
"rewards/margins": 3.2214581966400146, |
|
"rewards/rejected": -4.889523983001709, |
|
"sft_loss": 0.7273903489112854, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16436554898093358, |
|
"grad_norm": 10.366938012622036, |
|
"learning_rate": 2.499868399863186e-07, |
|
"logits/chosen": 20.907590866088867, |
|
"logits/rejected": 21.92055892944336, |
|
"logps/chosen": -226.97225952148438, |
|
"logps/rejected": -144.5021514892578, |
|
"loss": 0.7676, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -2.1906163692474365, |
|
"rewards/margins": 3.6612253189086914, |
|
"rewards/rejected": -5.851841449737549, |
|
"sft_loss": 0.7680675983428955, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17258382642998027, |
|
"grad_norm": 9.779078878164054, |
|
"learning_rate": 2.4996525033926786e-07, |
|
"logits/chosen": 19.350120544433594, |
|
"logits/rejected": 19.718740463256836, |
|
"logps/chosen": -209.20166015625, |
|
"logps/rejected": -136.57321166992188, |
|
"loss": 0.7133, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -2.251823663711548, |
|
"rewards/margins": 3.696510076522827, |
|
"rewards/rejected": -5.948334217071533, |
|
"sft_loss": 0.7179654836654663, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18080210387902695, |
|
"grad_norm": 8.45489237540799, |
|
"learning_rate": 2.499333821797864e-07, |
|
"logits/chosen": 20.7148380279541, |
|
"logits/rejected": 20.950342178344727, |
|
"logps/chosen": -197.59976196289062, |
|
"logps/rejected": -124.13175964355469, |
|
"loss": 0.7642, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -2.359647750854492, |
|
"rewards/margins": 3.3463170528411865, |
|
"rewards/rejected": -5.705965042114258, |
|
"sft_loss": 0.7615786790847778, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18902038132807364, |
|
"grad_norm": 10.762078567025862, |
|
"learning_rate": 2.4989123812906105e-07, |
|
"logits/chosen": 19.379554748535156, |
|
"logits/rejected": 20.651145935058594, |
|
"logps/chosen": -219.8887176513672, |
|
"logps/rejected": -148.8833770751953, |
|
"loss": 0.7483, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.958165168762207, |
|
"rewards/margins": 3.9372713565826416, |
|
"rewards/rejected": -6.895437240600586, |
|
"sft_loss": 0.7731737494468689, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.19723865877712032, |
|
"grad_norm": 10.354433872987686, |
|
"learning_rate": 2.498388216534807e-07, |
|
"logits/chosen": 19.773361206054688, |
|
"logits/rejected": 21.142953872680664, |
|
"logps/chosen": -238.31101989746094, |
|
"logps/rejected": -152.0144500732422, |
|
"loss": 0.7063, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": -2.7792108058929443, |
|
"rewards/margins": 4.163509845733643, |
|
"rewards/rejected": -6.942720413208008, |
|
"sft_loss": 0.7693167328834534, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.205456936226167, |
|
"grad_norm": 11.490346482929228, |
|
"learning_rate": 2.49776137064351e-07, |
|
"logits/chosen": 19.508024215698242, |
|
"logits/rejected": 19.62151527404785, |
|
"logps/chosen": -232.81178283691406, |
|
"logps/rejected": -151.69398498535156, |
|
"loss": 0.7148, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.6999313831329346, |
|
"rewards/margins": 3.9598686695098877, |
|
"rewards/rejected": -6.659799575805664, |
|
"sft_loss": 0.8186704516410828, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"grad_norm": 13.390026452837366, |
|
"learning_rate": 2.4970318951754e-07, |
|
"logits/chosen": 19.62987518310547, |
|
"logits/rejected": 20.120250701904297, |
|
"logps/chosen": -247.29205322265625, |
|
"logps/rejected": -159.60348510742188, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -2.8834011554718018, |
|
"rewards/margins": 4.369426727294922, |
|
"rewards/rejected": -7.252828598022461, |
|
"sft_loss": 0.7933542728424072, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22189349112426035, |
|
"grad_norm": 20.479502968540558, |
|
"learning_rate": 2.496199850130537e-07, |
|
"logits/chosen": 18.90142059326172, |
|
"logits/rejected": 19.151918411254883, |
|
"logps/chosen": -231.70069885253906, |
|
"logps/rejected": -141.99693298339844, |
|
"loss": 0.7109, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -2.819154977798462, |
|
"rewards/margins": 3.806306838989258, |
|
"rewards/rejected": -6.625460624694824, |
|
"sft_loss": 0.7920200228691101, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.23011176857330704, |
|
"grad_norm": 16.190350556337812, |
|
"learning_rate": 2.4952653039454297e-07, |
|
"logits/chosen": 18.546707153320312, |
|
"logits/rejected": 18.616119384765625, |
|
"logps/chosen": -251.7685089111328, |
|
"logps/rejected": -160.7568817138672, |
|
"loss": 0.703, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -3.2368268966674805, |
|
"rewards/margins": 4.385184288024902, |
|
"rewards/rejected": -7.622011184692383, |
|
"sft_loss": 0.8116011023521423, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23833004602235372, |
|
"grad_norm": 14.348906773180857, |
|
"learning_rate": 2.494228333487403e-07, |
|
"logits/chosen": 18.956235885620117, |
|
"logits/rejected": 19.919641494750977, |
|
"logps/chosen": -210.7549591064453, |
|
"logps/rejected": -144.51132202148438, |
|
"loss": 0.6182, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.001668691635132, |
|
"rewards/margins": 3.866687536239624, |
|
"rewards/rejected": -6.868356227874756, |
|
"sft_loss": 0.7950787544250488, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2465483234714004, |
|
"grad_norm": 11.009157695890236, |
|
"learning_rate": 2.4930890240482784e-07, |
|
"logits/chosen": 18.876365661621094, |
|
"logits/rejected": 19.30438804626465, |
|
"logps/chosen": -229.18504333496094, |
|
"logps/rejected": -150.90707397460938, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3249759674072266, |
|
"rewards/margins": 4.095080375671387, |
|
"rewards/rejected": -7.4200568199157715, |
|
"sft_loss": 0.7879451513290405, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25476660092044706, |
|
"grad_norm": 11.264576367918604, |
|
"learning_rate": 2.491847469337356e-07, |
|
"logits/chosen": 18.14313316345215, |
|
"logits/rejected": 18.77975082397461, |
|
"logps/chosen": -219.8468780517578, |
|
"logps/rejected": -150.99098205566406, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -3.250223159790039, |
|
"rewards/margins": 4.443104267120361, |
|
"rewards/rejected": -7.6933274269104, |
|
"sft_loss": 0.8351505994796753, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.26298487836949375, |
|
"grad_norm": 17.15390685304222, |
|
"learning_rate": 2.4905037714737094e-07, |
|
"logits/chosen": 19.779348373413086, |
|
"logits/rejected": 19.593463897705078, |
|
"logps/chosen": -259.2501220703125, |
|
"logps/rejected": -162.26368713378906, |
|
"loss": 0.7398, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -3.7065176963806152, |
|
"rewards/margins": 4.470663070678711, |
|
"rewards/rejected": -8.177180290222168, |
|
"sft_loss": 0.8221470713615417, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27120315581854043, |
|
"grad_norm": 10.266952014618042, |
|
"learning_rate": 2.489058040977784e-07, |
|
"logits/chosen": 19.731273651123047, |
|
"logits/rejected": 19.947425842285156, |
|
"logps/chosen": -222.83753967285156, |
|
"logps/rejected": -142.3966522216797, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.9937241077423096, |
|
"rewards/margins": 4.092346668243408, |
|
"rewards/rejected": -7.086071968078613, |
|
"sft_loss": 0.8631803393363953, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2794214332675871, |
|
"grad_norm": 13.183734224346434, |
|
"learning_rate": 2.487510396762309e-07, |
|
"logits/chosen": 18.506755828857422, |
|
"logits/rejected": 19.725309371948242, |
|
"logps/chosen": -246.2398223876953, |
|
"logps/rejected": -171.14974975585938, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -3.316751480102539, |
|
"rewards/margins": 4.549408912658691, |
|
"rewards/rejected": -7.8661603927612305, |
|
"sft_loss": 0.9392525553703308, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2876397107166338, |
|
"grad_norm": 12.820383998338311, |
|
"learning_rate": 2.485860966122514e-07, |
|
"logits/chosen": 18.673315048217773, |
|
"logits/rejected": 19.47124671936035, |
|
"logps/chosen": -239.1477508544922, |
|
"logps/rejected": -168.49923706054688, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -3.3230719566345215, |
|
"rewards/margins": 4.699094295501709, |
|
"rewards/rejected": -8.022165298461914, |
|
"sft_loss": 0.8536433577537537, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2958579881656805, |
|
"grad_norm": 10.336252791103886, |
|
"learning_rate": 2.484109884725661e-07, |
|
"logits/chosen": 17.68476104736328, |
|
"logits/rejected": 18.92132568359375, |
|
"logps/chosen": -248.71087646484375, |
|
"logps/rejected": -164.80517578125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.7376978397369385, |
|
"rewards/margins": 4.469425678253174, |
|
"rewards/rejected": -8.207123756408691, |
|
"sft_loss": 0.7900984883308411, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.30407626561472717, |
|
"grad_norm": 9.07674205143479, |
|
"learning_rate": 2.4822572965998844e-07, |
|
"logits/chosen": 17.927953720092773, |
|
"logits/rejected": 18.744905471801758, |
|
"logps/chosen": -256.3652038574219, |
|
"logps/rejected": -169.36451721191406, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -3.603369951248169, |
|
"rewards/margins": 4.865907192230225, |
|
"rewards/rejected": -8.469277381896973, |
|
"sft_loss": 0.8645619750022888, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.31229454306377386, |
|
"grad_norm": 11.293965527732967, |
|
"learning_rate": 2.4803033541223455e-07, |
|
"logits/chosen": 19.39400863647461, |
|
"logits/rejected": 19.796106338500977, |
|
"logps/chosen": -245.06739807128906, |
|
"logps/rejected": -164.10296630859375, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -3.5845892429351807, |
|
"rewards/margins": 4.6414408683776855, |
|
"rewards/rejected": -8.226030349731445, |
|
"sft_loss": 0.8358697295188904, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 11.390930360072153, |
|
"learning_rate": 2.478248218006699e-07, |
|
"logits/chosen": 17.902259826660156, |
|
"logits/rejected": 18.019027709960938, |
|
"logps/chosen": -265.0622253417969, |
|
"logps/rejected": -175.5810546875, |
|
"loss": 0.6158, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -3.9043285846710205, |
|
"rewards/margins": 5.115177154541016, |
|
"rewards/rejected": -9.019506454467773, |
|
"sft_loss": 0.8782904148101807, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.32873109796186717, |
|
"grad_norm": 52.895489458940915, |
|
"learning_rate": 2.476092057289873e-07, |
|
"logits/chosen": 17.241554260253906, |
|
"logits/rejected": 18.226573944091797, |
|
"logps/chosen": -249.59454345703125, |
|
"logps/rejected": -181.9971923828125, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -4.305534362792969, |
|
"rewards/margins": 5.0905351638793945, |
|
"rewards/rejected": -9.396068572998047, |
|
"sft_loss": 0.9349213242530823, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33694937541091385, |
|
"grad_norm": 13.12464260474008, |
|
"learning_rate": 2.473835049318167e-07, |
|
"logits/chosen": 18.299766540527344, |
|
"logits/rejected": 19.57137107849121, |
|
"logps/chosen": -248.37832641601562, |
|
"logps/rejected": -171.3523406982422, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -4.206078052520752, |
|
"rewards/margins": 4.699835300445557, |
|
"rewards/rejected": -8.905913352966309, |
|
"sft_loss": 0.9326413869857788, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.34516765285996054, |
|
"grad_norm": 8.71116895518069, |
|
"learning_rate": 2.4714773797326657e-07, |
|
"logits/chosen": 18.58841896057129, |
|
"logits/rejected": 19.255895614624023, |
|
"logps/chosen": -247.08616638183594, |
|
"logps/rejected": -165.45547485351562, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -3.8454854488372803, |
|
"rewards/margins": 4.812742710113525, |
|
"rewards/rejected": -8.658228874206543, |
|
"sft_loss": 0.853776752948761, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3533859303090072, |
|
"grad_norm": 17.852596870413777, |
|
"learning_rate": 2.4690192424539663e-07, |
|
"logits/chosen": 18.283300399780273, |
|
"logits/rejected": 19.169416427612305, |
|
"logps/chosen": -241.07122802734375, |
|
"logps/rejected": -173.18699645996094, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -4.187161445617676, |
|
"rewards/margins": 5.0552144050598145, |
|
"rewards/rejected": -9.242376327514648, |
|
"sft_loss": 0.8952550292015076, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3616042077580539, |
|
"grad_norm": 12.136136465528743, |
|
"learning_rate": 2.466460839666233e-07, |
|
"logits/chosen": 17.772991180419922, |
|
"logits/rejected": 18.684547424316406, |
|
"logps/chosen": -255.16156005859375, |
|
"logps/rejected": -183.1548614501953, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -4.293615341186523, |
|
"rewards/margins": 5.553874969482422, |
|
"rewards/rejected": -9.847491264343262, |
|
"sft_loss": 0.8942830562591553, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3698224852071006, |
|
"grad_norm": 13.249996024918259, |
|
"learning_rate": 2.463802381800563e-07, |
|
"logits/chosen": 17.9425106048584, |
|
"logits/rejected": 18.508359909057617, |
|
"logps/chosen": -260.12322998046875, |
|
"logps/rejected": -176.5136260986328, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -3.9985711574554443, |
|
"rewards/margins": 5.279909133911133, |
|
"rewards/rejected": -9.278480529785156, |
|
"sft_loss": 0.890729546546936, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3780407626561473, |
|
"grad_norm": 13.483286780837357, |
|
"learning_rate": 2.461044087517682e-07, |
|
"logits/chosen": 19.322052001953125, |
|
"logits/rejected": 19.914690017700195, |
|
"logps/chosen": -267.1094970703125, |
|
"logps/rejected": -181.53118896484375, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -4.28004264831543, |
|
"rewards/margins": 5.2816243171691895, |
|
"rewards/rejected": -9.561667442321777, |
|
"sft_loss": 0.8358654975891113, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38625904010519396, |
|
"grad_norm": 10.134479758320998, |
|
"learning_rate": 2.458186183689957e-07, |
|
"logits/chosen": 18.751750946044922, |
|
"logits/rejected": 18.550024032592773, |
|
"logps/chosen": -237.7452392578125, |
|
"logps/rejected": -155.38726806640625, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -3.9234371185302734, |
|
"rewards/margins": 4.515294075012207, |
|
"rewards/rejected": -8.438732147216797, |
|
"sft_loss": 0.9805070757865906, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.39447731755424065, |
|
"grad_norm": 13.771161444519256, |
|
"learning_rate": 2.4552289053827344e-07, |
|
"logits/chosen": 18.025060653686523, |
|
"logits/rejected": 18.463733673095703, |
|
"logps/chosen": -252.61175537109375, |
|
"logps/rejected": -171.77259826660156, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -4.3357720375061035, |
|
"rewards/margins": 5.04067325592041, |
|
"rewards/rejected": -9.376445770263672, |
|
"sft_loss": 0.7902787923812866, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.40269559500328733, |
|
"grad_norm": 9.886456705994728, |
|
"learning_rate": 2.4521724958350093e-07, |
|
"logits/chosen": 18.645158767700195, |
|
"logits/rejected": 19.603240966796875, |
|
"logps/chosen": -239.74526977539062, |
|
"logps/rejected": -162.94131469726562, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -4.351040840148926, |
|
"rewards/margins": 4.734447002410889, |
|
"rewards/rejected": -9.085487365722656, |
|
"sft_loss": 0.8848291635513306, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.410913872452334, |
|
"grad_norm": 16.574947299413026, |
|
"learning_rate": 2.449017206439417e-07, |
|
"logits/chosen": 18.770355224609375, |
|
"logits/rejected": 19.167869567871094, |
|
"logps/chosen": -257.2867431640625, |
|
"logps/rejected": -180.79721069335938, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -4.755511283874512, |
|
"rewards/margins": 5.377356052398682, |
|
"rewards/rejected": -10.132868766784668, |
|
"sft_loss": 0.9855692982673645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.41913214990138065, |
|
"grad_norm": 15.729142249690554, |
|
"learning_rate": 2.445763296721554e-07, |
|
"logits/chosen": 18.016155242919922, |
|
"logits/rejected": 18.655664443969727, |
|
"logps/chosen": -243.2661590576172, |
|
"logps/rejected": -178.59429931640625, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.371219635009766, |
|
"rewards/margins": 5.091875076293945, |
|
"rewards/rejected": -10.463094711303711, |
|
"sft_loss": 1.0052944421768188, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 14.846371154809418, |
|
"learning_rate": 2.4424110343186345e-07, |
|
"logits/chosen": 18.64227867126465, |
|
"logits/rejected": 19.062152862548828, |
|
"logps/chosen": -241.11070251464844, |
|
"logps/rejected": -167.0811767578125, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -3.9312877655029297, |
|
"rewards/margins": 4.8627119064331055, |
|
"rewards/rejected": -8.793999671936035, |
|
"sft_loss": 0.8778759837150574, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.435568704799474, |
|
"grad_norm": 16.788820590336183, |
|
"learning_rate": 2.4389606949574767e-07, |
|
"logits/chosen": 18.801990509033203, |
|
"logits/rejected": 20.348352432250977, |
|
"logps/chosen": -266.7105407714844, |
|
"logps/rejected": -190.86622619628906, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -4.232571601867676, |
|
"rewards/margins": 5.312459945678711, |
|
"rewards/rejected": -9.545029640197754, |
|
"sft_loss": 0.8269821405410767, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4437869822485207, |
|
"grad_norm": 9.660029588751273, |
|
"learning_rate": 2.435412562431823e-07, |
|
"logits/chosen": 18.019432067871094, |
|
"logits/rejected": 18.232667922973633, |
|
"logps/chosen": -254.80136108398438, |
|
"logps/rejected": -172.0924835205078, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -4.478307723999023, |
|
"rewards/margins": 5.105349540710449, |
|
"rewards/rejected": -9.583656311035156, |
|
"sft_loss": 0.8911004662513733, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4520052596975674, |
|
"grad_norm": 8.447767610497143, |
|
"learning_rate": 2.4317669285789964e-07, |
|
"logits/chosen": 18.408342361450195, |
|
"logits/rejected": 18.87084197998047, |
|
"logps/chosen": -296.8369445800781, |
|
"logps/rejected": -195.3644561767578, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -4.8854217529296875, |
|
"rewards/margins": 5.9024529457092285, |
|
"rewards/rejected": -10.787875175476074, |
|
"sft_loss": 0.8718220591545105, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.46022353714661407, |
|
"grad_norm": 14.077509009393875, |
|
"learning_rate": 2.428024093255901e-07, |
|
"logits/chosen": 17.676301956176758, |
|
"logits/rejected": 19.232654571533203, |
|
"logps/chosen": -261.8072509765625, |
|
"logps/rejected": -193.81626892089844, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -4.590798854827881, |
|
"rewards/margins": 5.75556755065918, |
|
"rewards/rejected": -10.346365928649902, |
|
"sft_loss": 0.8692941069602966, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46844181459566075, |
|
"grad_norm": 12.255103077032402, |
|
"learning_rate": 2.424184364314352e-07, |
|
"logits/chosen": 19.874698638916016, |
|
"logits/rejected": 19.855077743530273, |
|
"logps/chosen": -263.8525085449219, |
|
"logps/rejected": -174.5958251953125, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.0808610916137695, |
|
"rewards/margins": 5.203913688659668, |
|
"rewards/rejected": -9.284773826599121, |
|
"sft_loss": 0.8956073522567749, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.47666009204470744, |
|
"grad_norm": 15.082062203409798, |
|
"learning_rate": 2.420248057575761e-07, |
|
"logits/chosen": 17.83322525024414, |
|
"logits/rejected": 17.633359909057617, |
|
"logps/chosen": -278.74298095703125, |
|
"logps/rejected": -181.1900634765625, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -4.548935890197754, |
|
"rewards/margins": 5.899779796600342, |
|
"rewards/rejected": -10.448714256286621, |
|
"sft_loss": 0.8952395915985107, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4848783694937541, |
|
"grad_norm": 11.834958728287821, |
|
"learning_rate": 2.416215496805156e-07, |
|
"logits/chosen": 18.121597290039062, |
|
"logits/rejected": 19.50238037109375, |
|
"logps/chosen": -252.4333038330078, |
|
"logps/rejected": -197.94659423828125, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.617161273956299, |
|
"rewards/margins": 5.908203125, |
|
"rewards/rejected": -11.52536392211914, |
|
"sft_loss": 0.9183645844459534, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4930966469428008, |
|
"grad_norm": 13.662146621659161, |
|
"learning_rate": 2.412087013684552e-07, |
|
"logits/chosen": 16.815900802612305, |
|
"logits/rejected": 17.304187774658203, |
|
"logps/chosen": -276.7563781738281, |
|
"logps/rejected": -191.68553161621094, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.5067291259765625, |
|
"rewards/margins": 5.485719680786133, |
|
"rewards/rejected": -10.992449760437012, |
|
"sft_loss": 0.9233679175376892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5013149243918474, |
|
"grad_norm": 12.176993675847571, |
|
"learning_rate": 2.407862947785669e-07, |
|
"logits/chosen": 18.833539962768555, |
|
"logits/rejected": 18.9912109375, |
|
"logps/chosen": -301.635498046875, |
|
"logps/rejected": -204.53671264648438, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.389955997467041, |
|
"rewards/margins": 6.232929706573486, |
|
"rewards/rejected": -11.622885704040527, |
|
"sft_loss": 0.92539381980896, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5095332018408941, |
|
"grad_norm": 8.075422505238562, |
|
"learning_rate": 2.403543646542003e-07, |
|
"logits/chosen": 18.5779972076416, |
|
"logits/rejected": 19.133594512939453, |
|
"logps/chosen": -267.43695068359375, |
|
"logps/rejected": -186.43345642089844, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -4.979398727416992, |
|
"rewards/margins": 5.5010504722595215, |
|
"rewards/rejected": -10.480450630187988, |
|
"sft_loss": 0.9564525485038757, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5177514792899408, |
|
"grad_norm": 8.97962168945258, |
|
"learning_rate": 2.39912946522025e-07, |
|
"logits/chosen": 19.53040313720703, |
|
"logits/rejected": 20.46470069885254, |
|
"logps/chosen": -244.89207458496094, |
|
"logps/rejected": -172.9203643798828, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -4.482312202453613, |
|
"rewards/margins": 5.123040676116943, |
|
"rewards/rejected": -9.605354309082031, |
|
"sft_loss": 0.9498026371002197, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5259697567389875, |
|
"grad_norm": 12.054702965132526, |
|
"learning_rate": 2.3946207668910833e-07, |
|
"logits/chosen": 18.005373001098633, |
|
"logits/rejected": 18.470924377441406, |
|
"logps/chosen": -231.72732543945312, |
|
"logps/rejected": -168.2989044189453, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -4.519069671630859, |
|
"rewards/margins": 4.713679313659668, |
|
"rewards/rejected": -9.232749938964844, |
|
"sft_loss": 0.8408420085906982, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5341880341880342, |
|
"grad_norm": 25.950655473924865, |
|
"learning_rate": 2.390017922399292e-07, |
|
"logits/chosen": 18.79814910888672, |
|
"logits/rejected": 19.250444412231445, |
|
"logps/chosen": -247.69647216796875, |
|
"logps/rejected": -174.35218811035156, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.683900833129883, |
|
"rewards/margins": 5.248979568481445, |
|
"rewards/rejected": -9.932881355285645, |
|
"sft_loss": 0.9410896301269531, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5424063116370809, |
|
"grad_norm": 10.907505413471052, |
|
"learning_rate": 2.385321310333276e-07, |
|
"logits/chosen": 17.780803680419922, |
|
"logits/rejected": 18.34245491027832, |
|
"logps/chosen": -248.3139190673828, |
|
"logps/rejected": -172.43350219726562, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": -5.367508411407471, |
|
"rewards/margins": 4.742012977600098, |
|
"rewards/rejected": -10.109521865844727, |
|
"sft_loss": 0.9266583323478699, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5506245890861275, |
|
"grad_norm": 29.199966853282145, |
|
"learning_rate": 2.38053131699391e-07, |
|
"logits/chosen": 18.024690628051758, |
|
"logits/rejected": 18.614425659179688, |
|
"logps/chosen": -290.8337707519531, |
|
"logps/rejected": -203.31809997558594, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.330504894256592, |
|
"rewards/margins": 6.163724422454834, |
|
"rewards/rejected": -11.49422836303711, |
|
"sft_loss": 0.9595879316329956, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5588428665351742, |
|
"grad_norm": 11.416242977585302, |
|
"learning_rate": 2.3756483363627694e-07, |
|
"logits/chosen": 17.60715103149414, |
|
"logits/rejected": 18.161012649536133, |
|
"logps/chosen": -250.91665649414062, |
|
"logps/rejected": -184.4646453857422, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.246757984161377, |
|
"rewards/margins": 5.438488006591797, |
|
"rewards/rejected": -10.685246467590332, |
|
"sft_loss": 0.9181762933731079, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5670611439842209, |
|
"grad_norm": 9.90321260332983, |
|
"learning_rate": 2.3706727700697226e-07, |
|
"logits/chosen": 17.566362380981445, |
|
"logits/rejected": 18.253488540649414, |
|
"logps/chosen": -284.3514404296875, |
|
"logps/rejected": -193.24594116210938, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -4.994836807250977, |
|
"rewards/margins": 5.735879421234131, |
|
"rewards/rejected": -10.73071575164795, |
|
"sft_loss": 1.0169059038162231, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5752794214332676, |
|
"grad_norm": 15.546918377467371, |
|
"learning_rate": 2.3656050273598986e-07, |
|
"logits/chosen": 17.2511043548584, |
|
"logits/rejected": 18.237810134887695, |
|
"logps/chosen": -258.5328369140625, |
|
"logps/rejected": -191.0077362060547, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -5.097340106964111, |
|
"rewards/margins": 5.559810638427734, |
|
"rewards/rejected": -10.657149314880371, |
|
"sft_loss": 0.8693541884422302, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5834976988823143, |
|
"grad_norm": 10.563639895115125, |
|
"learning_rate": 2.3604455250600256e-07, |
|
"logits/chosen": 18.051647186279297, |
|
"logits/rejected": 18.685161590576172, |
|
"logps/chosen": -273.46368408203125, |
|
"logps/rejected": -202.36537170410156, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.360798358917236, |
|
"rewards/margins": 5.977966785430908, |
|
"rewards/rejected": -11.338766098022461, |
|
"sft_loss": 0.9063312411308289, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"grad_norm": 11.326441657016302, |
|
"learning_rate": 2.3551946875441467e-07, |
|
"logits/chosen": 19.21741485595703, |
|
"logits/rejected": 19.171350479125977, |
|
"logps/chosen": -265.16619873046875, |
|
"logps/rejected": -185.63027954101562, |
|
"loss": 0.586, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.824009895324707, |
|
"rewards/margins": 5.92770528793335, |
|
"rewards/rejected": -10.751714706420898, |
|
"sft_loss": 0.967497706413269, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5999342537804077, |
|
"grad_norm": 16.154882276044376, |
|
"learning_rate": 2.3498529466987147e-07, |
|
"logits/chosen": 18.083656311035156, |
|
"logits/rejected": 19.166841506958008, |
|
"logps/chosen": -275.3788146972656, |
|
"logps/rejected": -196.90736389160156, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.075117111206055, |
|
"rewards/margins": 6.357577323913574, |
|
"rewards/rejected": -11.432694435119629, |
|
"sft_loss": 0.9689314961433411, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6081525312294543, |
|
"grad_norm": 12.069410065037287, |
|
"learning_rate": 2.3444207418870688e-07, |
|
"logits/chosen": 17.682310104370117, |
|
"logits/rejected": 18.865554809570312, |
|
"logps/chosen": -277.48114013671875, |
|
"logps/rejected": -195.44508361816406, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": -4.911283493041992, |
|
"rewards/margins": 5.8411865234375, |
|
"rewards/rejected": -10.75246810913086, |
|
"sft_loss": 0.8908612728118896, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.616370808678501, |
|
"grad_norm": 17.941774722560346, |
|
"learning_rate": 2.3388985199132962e-07, |
|
"logits/chosen": 17.635793685913086, |
|
"logits/rejected": 18.530078887939453, |
|
"logps/chosen": -265.6659240722656, |
|
"logps/rejected": -185.41099548339844, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.487802982330322, |
|
"rewards/margins": 5.8236083984375, |
|
"rewards/rejected": -10.311410903930664, |
|
"sft_loss": 0.8852910399436951, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6245890861275477, |
|
"grad_norm": 16.222798143855407, |
|
"learning_rate": 2.3332867349854844e-07, |
|
"logits/chosen": 18.22924041748047, |
|
"logits/rejected": 19.445384979248047, |
|
"logps/chosen": -267.8589172363281, |
|
"logps/rejected": -200.61328125, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.089979648590088, |
|
"rewards/margins": 6.0606160163879395, |
|
"rewards/rejected": -11.150596618652344, |
|
"sft_loss": 0.85948646068573, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6328073635765944, |
|
"grad_norm": 58.78518201844404, |
|
"learning_rate": 2.3275858486783578e-07, |
|
"logits/chosen": 17.743967056274414, |
|
"logits/rejected": 19.073143005371094, |
|
"logps/chosen": -229.31361389160156, |
|
"logps/rejected": -178.3441162109375, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -4.969345569610596, |
|
"rewards/margins": 5.37393045425415, |
|
"rewards/rejected": -10.343276023864746, |
|
"sft_loss": 0.9465056657791138, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 15.400545086822072, |
|
"learning_rate": 2.321796329895317e-07, |
|
"logits/chosen": 16.995241165161133, |
|
"logits/rejected": 18.397994995117188, |
|
"logps/chosen": -266.69647216796875, |
|
"logps/rejected": -193.65902709960938, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.233003616333008, |
|
"rewards/margins": 6.00741720199585, |
|
"rewards/rejected": -11.240421295166016, |
|
"sft_loss": 0.9756826758384705, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6492439184746877, |
|
"grad_norm": 11.604457345989609, |
|
"learning_rate": 2.3159186548298688e-07, |
|
"logits/chosen": 16.9737606048584, |
|
"logits/rejected": 18.478750228881836, |
|
"logps/chosen": -257.61419677734375, |
|
"logps/rejected": -194.60252380371094, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.19744873046875, |
|
"rewards/margins": 6.024503707885742, |
|
"rewards/rejected": -11.221953392028809, |
|
"sft_loss": 0.972574770450592, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6574621959237343, |
|
"grad_norm": 14.695134059357779, |
|
"learning_rate": 2.3099533069264594e-07, |
|
"logits/chosen": 17.685321807861328, |
|
"logits/rejected": 18.130495071411133, |
|
"logps/chosen": -257.6887512207031, |
|
"logps/rejected": -180.2339324951172, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -5.080874919891357, |
|
"rewards/margins": 5.387575626373291, |
|
"rewards/rejected": -10.468450546264648, |
|
"sft_loss": 1.00028657913208, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.665680473372781, |
|
"grad_norm": 14.2588021174925, |
|
"learning_rate": 2.3039007768407098e-07, |
|
"logits/chosen": 17.992835998535156, |
|
"logits/rejected": 18.434703826904297, |
|
"logps/chosen": -278.3475341796875, |
|
"logps/rejected": -196.46011352539062, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.298067092895508, |
|
"rewards/margins": 6.079626560211182, |
|
"rewards/rejected": -11.377694129943848, |
|
"sft_loss": 0.9695589542388916, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6738987508218277, |
|
"grad_norm": 14.653004208659825, |
|
"learning_rate": 2.2977615623990603e-07, |
|
"logits/chosen": 18.65854263305664, |
|
"logits/rejected": 19.244489669799805, |
|
"logps/chosen": -263.1656188964844, |
|
"logps/rejected": -193.50169372558594, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.245527267456055, |
|
"rewards/margins": 5.687096118927002, |
|
"rewards/rejected": -10.932621955871582, |
|
"sft_loss": 0.9538100957870483, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6821170282708744, |
|
"grad_norm": 16.632773914957095, |
|
"learning_rate": 2.2915361685578235e-07, |
|
"logits/chosen": 18.390525817871094, |
|
"logits/rejected": 19.31244468688965, |
|
"logps/chosen": -259.29205322265625, |
|
"logps/rejected": -189.3291015625, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.290169715881348, |
|
"rewards/margins": 5.542262077331543, |
|
"rewards/rejected": -10.83243179321289, |
|
"sft_loss": 0.9607923030853271, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6903353057199211, |
|
"grad_norm": 14.010413486772263, |
|
"learning_rate": 2.2852251073616503e-07, |
|
"logits/chosen": 17.323869705200195, |
|
"logits/rejected": 18.94650650024414, |
|
"logps/chosen": -282.4395751953125, |
|
"logps/rejected": -215.9941864013672, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.772212505340576, |
|
"rewards/margins": 6.878769397735596, |
|
"rewards/rejected": -12.650981903076172, |
|
"sft_loss": 0.993140697479248, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6985535831689678, |
|
"grad_norm": 14.508340310090572, |
|
"learning_rate": 2.2788288979014132e-07, |
|
"logits/chosen": 18.25994300842285, |
|
"logits/rejected": 19.41350555419922, |
|
"logps/chosen": -279.428955078125, |
|
"logps/rejected": -197.93687438964844, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -5.4432454109191895, |
|
"rewards/margins": 5.909384250640869, |
|
"rewards/rejected": -11.352629661560059, |
|
"sft_loss": 0.9294517040252686, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7067718606180144, |
|
"grad_norm": 15.828121421000128, |
|
"learning_rate": 2.2723480662715134e-07, |
|
"logits/chosen": 17.447628021240234, |
|
"logits/rejected": 18.819887161254883, |
|
"logps/chosen": -253.06153869628906, |
|
"logps/rejected": -190.72598266601562, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.8899999856948853, |
|
"rewards/chosen": -5.495950698852539, |
|
"rewards/margins": 5.677833080291748, |
|
"rewards/rejected": -11.173783302307129, |
|
"sft_loss": 1.0165560245513916, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7149901380670611, |
|
"grad_norm": 21.070659832772854, |
|
"learning_rate": 2.2657831455266063e-07, |
|
"logits/chosen": 19.03611946105957, |
|
"logits/rejected": 19.757238388061523, |
|
"logps/chosen": -281.93084716796875, |
|
"logps/rejected": -194.18865966796875, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -5.303485870361328, |
|
"rewards/margins": 5.8611884117126465, |
|
"rewards/rejected": -11.164673805236816, |
|
"sft_loss": 1.0157676935195923, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7232084155161078, |
|
"grad_norm": 10.044668338093802, |
|
"learning_rate": 2.2591346756377588e-07, |
|
"logits/chosen": 18.8349666595459, |
|
"logits/rejected": 19.587926864624023, |
|
"logps/chosen": -245.26052856445312, |
|
"logps/rejected": -174.76011657714844, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -4.77711820602417, |
|
"rewards/margins": 5.245749473571777, |
|
"rewards/rejected": -10.022867202758789, |
|
"sft_loss": 0.9105268120765686, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7314266929651545, |
|
"grad_norm": 13.114453854538773, |
|
"learning_rate": 2.252403203448034e-07, |
|
"logits/chosen": 19.10161781311035, |
|
"logits/rejected": 20.04970932006836, |
|
"logps/chosen": -325.4466552734375, |
|
"logps/rejected": -227.55043029785156, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.897343635559082, |
|
"rewards/margins": 6.912624359130859, |
|
"rewards/rejected": -12.809967994689941, |
|
"sft_loss": 0.9535994529724121, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7396449704142012, |
|
"grad_norm": 12.5969825666755, |
|
"learning_rate": 2.2455892826275155e-07, |
|
"logits/chosen": 18.5415096282959, |
|
"logits/rejected": 19.55573844909668, |
|
"logps/chosen": -302.2394714355469, |
|
"logps/rejected": -217.98895263671875, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -6.171204090118408, |
|
"rewards/margins": 6.812131881713867, |
|
"rewards/rejected": -12.9833345413208, |
|
"sft_loss": 0.9671850800514221, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7478632478632479, |
|
"grad_norm": 11.483896112432117, |
|
"learning_rate": 2.2386934736277666e-07, |
|
"logits/chosen": 18.071735382080078, |
|
"logits/rejected": 19.025733947753906, |
|
"logps/chosen": -237.59962463378906, |
|
"logps/rejected": -185.32635498046875, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -5.7157416343688965, |
|
"rewards/margins": 5.618371963500977, |
|
"rewards/rejected": -11.334112167358398, |
|
"sft_loss": 0.9591123461723328, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7560815253122946, |
|
"grad_norm": 13.120210730356671, |
|
"learning_rate": 2.2317163436357317e-07, |
|
"logits/chosen": 16.842187881469727, |
|
"logits/rejected": 18.437271118164062, |
|
"logps/chosen": -282.98541259765625, |
|
"logps/rejected": -213.07257080078125, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.7529802322387695, |
|
"rewards/margins": 6.836727142333984, |
|
"rewards/rejected": -12.589707374572754, |
|
"sft_loss": 0.9440767168998718, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7642998027613412, |
|
"grad_norm": 12.516354265498741, |
|
"learning_rate": 2.2246584665270855e-07, |
|
"logits/chosen": 18.161880493164062, |
|
"logits/rejected": 19.371177673339844, |
|
"logps/chosen": -298.9051513671875, |
|
"logps/rejected": -213.79953002929688, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.515788555145264, |
|
"rewards/margins": 6.570387363433838, |
|
"rewards/rejected": -12.086176872253418, |
|
"sft_loss": 0.9586593508720398, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7725180802103879, |
|
"grad_norm": 11.048153129151439, |
|
"learning_rate": 2.2175204228190308e-07, |
|
"logits/chosen": 18.859655380249023, |
|
"logits/rejected": 20.116731643676758, |
|
"logps/chosen": -261.10186767578125, |
|
"logps/rejected": -194.5068817138672, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.460696220397949, |
|
"rewards/margins": 5.922670841217041, |
|
"rewards/rejected": -11.383367538452148, |
|
"sft_loss": 0.9851782321929932, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7807363576594346, |
|
"grad_norm": 34.036831132798504, |
|
"learning_rate": 2.2103027996225512e-07, |
|
"logits/chosen": 17.431440353393555, |
|
"logits/rejected": 18.033245086669922, |
|
"logps/chosen": -278.5311584472656, |
|
"logps/rejected": -198.3171844482422, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -5.360807418823242, |
|
"rewards/margins": 6.381589412689209, |
|
"rewards/rejected": -11.74239730834961, |
|
"sft_loss": 1.0034022331237793, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7889546351084813, |
|
"grad_norm": 14.859702493359293, |
|
"learning_rate": 2.2030061905941193e-07, |
|
"logits/chosen": 18.73612403869629, |
|
"logits/rejected": 18.83433723449707, |
|
"logps/chosen": -264.3339538574219, |
|
"logps/rejected": -190.15017700195312, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.439321517944336, |
|
"rewards/margins": 5.989686489105225, |
|
"rewards/rejected": -11.429006576538086, |
|
"sft_loss": 0.9705156087875366, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.797172912557528, |
|
"grad_norm": 10.75919165569494, |
|
"learning_rate": 2.1956311958868684e-07, |
|
"logits/chosen": 19.243186950683594, |
|
"logits/rejected": 19.267446517944336, |
|
"logps/chosen": -267.3321228027344, |
|
"logps/rejected": -196.00926208496094, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.953473091125488, |
|
"rewards/margins": 5.860842227935791, |
|
"rewards/rejected": -11.814314842224121, |
|
"sft_loss": 0.9466427564620972, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8053911900065747, |
|
"grad_norm": 17.23206010012729, |
|
"learning_rate": 2.1881784221012307e-07, |
|
"logits/chosen": 17.544191360473633, |
|
"logits/rejected": 18.491127014160156, |
|
"logps/chosen": -250.6893768310547, |
|
"logps/rejected": -189.68630981445312, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": -6.418759822845459, |
|
"rewards/margins": 5.677851676940918, |
|
"rewards/rejected": -12.096611022949219, |
|
"sft_loss": 1.0340924263000488, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8136094674556213, |
|
"grad_norm": 14.38672703795697, |
|
"learning_rate": 2.1806484822350417e-07, |
|
"logits/chosen": 17.07558250427246, |
|
"logits/rejected": 17.701539993286133, |
|
"logps/chosen": -301.8546142578125, |
|
"logps/rejected": -211.86402893066406, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.485326766967773, |
|
"rewards/margins": 6.297828197479248, |
|
"rewards/rejected": -12.78315544128418, |
|
"sft_loss": 1.0085182189941406, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.821827744904668, |
|
"grad_norm": 11.220505543423183, |
|
"learning_rate": 2.1730419956331215e-07, |
|
"logits/chosen": 17.45648956298828, |
|
"logits/rejected": 18.378616333007812, |
|
"logps/chosen": -281.8039245605469, |
|
"logps/rejected": -211.0707550048828, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.824225425720215, |
|
"rewards/margins": 6.649372577667236, |
|
"rewards/rejected": -12.47359848022461, |
|
"sft_loss": 0.9624088406562805, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8300460223537146, |
|
"grad_norm": 19.974838378014, |
|
"learning_rate": 2.1653595879363335e-07, |
|
"logits/chosen": 18.410470962524414, |
|
"logits/rejected": 18.558494567871094, |
|
"logps/chosen": -267.88653564453125, |
|
"logps/rejected": -197.4770050048828, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.227014541625977, |
|
"rewards/margins": 6.174468517303467, |
|
"rewards/rejected": -12.401481628417969, |
|
"sft_loss": 0.9929137229919434, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8382642998027613, |
|
"grad_norm": 20.289642932843638, |
|
"learning_rate": 2.1576018910301238e-07, |
|
"logits/chosen": 18.445819854736328, |
|
"logits/rejected": 18.456052780151367, |
|
"logps/chosen": -268.7127990722656, |
|
"logps/rejected": -191.65673828125, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.160595893859863, |
|
"rewards/margins": 5.674745559692383, |
|
"rewards/rejected": -11.835343360900879, |
|
"sft_loss": 0.9606292843818665, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.846482577251808, |
|
"grad_norm": 12.060714182430129, |
|
"learning_rate": 2.1497695429925497e-07, |
|
"logits/chosen": 17.933076858520508, |
|
"logits/rejected": 18.939220428466797, |
|
"logps/chosen": -267.7327575683594, |
|
"logps/rejected": -197.41754150390625, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -5.445801258087158, |
|
"rewards/margins": 6.1840291023254395, |
|
"rewards/rejected": -11.629830360412598, |
|
"sft_loss": 0.8621335029602051, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 10.501846825508975, |
|
"learning_rate": 2.1418631880417954e-07, |
|
"logits/chosen": 17.952999114990234, |
|
"logits/rejected": 19.42998504638672, |
|
"logps/chosen": -270.5357360839844, |
|
"logps/rejected": -212.4191436767578, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -6.491232872009277, |
|
"rewards/margins": 6.157339096069336, |
|
"rewards/rejected": -12.648571968078613, |
|
"sft_loss": 1.0165194272994995, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8629191321499013, |
|
"grad_norm": 38.938347224135214, |
|
"learning_rate": 2.1338834764831843e-07, |
|
"logits/chosen": 18.03480339050293, |
|
"logits/rejected": 18.895524978637695, |
|
"logps/chosen": -288.3295593261719, |
|
"logps/rejected": -212.9174041748047, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.212762355804443, |
|
"rewards/margins": 6.556905746459961, |
|
"rewards/rejected": -12.769665718078613, |
|
"sft_loss": 1.0657466650009155, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.871137409598948, |
|
"grad_norm": 23.662606552485556, |
|
"learning_rate": 2.125831064655693e-07, |
|
"logits/chosen": 18.570951461791992, |
|
"logits/rejected": 19.01372528076172, |
|
"logps/chosen": -299.0896911621094, |
|
"logps/rejected": -218.2689666748047, |
|
"loss": 0.4869, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.196591377258301, |
|
"rewards/margins": 6.7210693359375, |
|
"rewards/rejected": -12.9176607131958, |
|
"sft_loss": 1.0185062885284424, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8793556870479947, |
|
"grad_norm": 19.788570154737137, |
|
"learning_rate": 2.1177066148779655e-07, |
|
"logits/chosen": 18.860197067260742, |
|
"logits/rejected": 19.767044067382812, |
|
"logps/chosen": -318.2361755371094, |
|
"logps/rejected": -226.54783630371094, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.095911979675293, |
|
"rewards/margins": 7.498478412628174, |
|
"rewards/rejected": -13.594389915466309, |
|
"sft_loss": 0.9245139360427856, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8875739644970414, |
|
"grad_norm": 9.861201904757298, |
|
"learning_rate": 2.1095107953938348e-07, |
|
"logits/chosen": 18.201683044433594, |
|
"logits/rejected": 18.54186248779297, |
|
"logps/chosen": -252.76708984375, |
|
"logps/rejected": -189.79519653320312, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.304187774658203, |
|
"rewards/margins": 5.595078945159912, |
|
"rewards/rejected": -11.899266242980957, |
|
"sft_loss": 1.0021482706069946, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8957922419460881, |
|
"grad_norm": 12.854026542061266, |
|
"learning_rate": 2.1012442803173634e-07, |
|
"logits/chosen": 16.392040252685547, |
|
"logits/rejected": 18.43426513671875, |
|
"logps/chosen": -268.9873962402344, |
|
"logps/rejected": -213.36622619628906, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -6.529672622680664, |
|
"rewards/margins": 6.670236110687256, |
|
"rewards/rejected": -13.199908256530762, |
|
"sft_loss": 1.0502568483352661, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9040105193951348, |
|
"grad_norm": 14.317934082382363, |
|
"learning_rate": 2.0929077495773927e-07, |
|
"logits/chosen": 17.196094512939453, |
|
"logits/rejected": 18.512819290161133, |
|
"logps/chosen": -301.5859375, |
|
"logps/rejected": -215.9300994873047, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.289539813995361, |
|
"rewards/margins": 7.147468090057373, |
|
"rewards/rejected": -13.43700885772705, |
|
"sft_loss": 1.052231788635254, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9122287968441815, |
|
"grad_norm": 13.793660373919764, |
|
"learning_rate": 2.0845018888616212e-07, |
|
"logits/chosen": 17.761926651000977, |
|
"logits/rejected": 18.349868774414062, |
|
"logps/chosen": -275.8336486816406, |
|
"logps/rejected": -202.1535186767578, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -5.62368106842041, |
|
"rewards/margins": 6.281108856201172, |
|
"rewards/rejected": -11.904790878295898, |
|
"sft_loss": 0.9447892904281616, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9204470742932281, |
|
"grad_norm": 13.501353742225147, |
|
"learning_rate": 2.0760273895602037e-07, |
|
"logits/chosen": 17.632814407348633, |
|
"logits/rejected": 17.65854263305664, |
|
"logps/chosen": -254.25704956054688, |
|
"logps/rejected": -177.63784790039062, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.114619255065918, |
|
"rewards/margins": 5.592235565185547, |
|
"rewards/rejected": -10.706855773925781, |
|
"sft_loss": 0.9995157718658447, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9286653517422748, |
|
"grad_norm": 19.535542998103256, |
|
"learning_rate": 2.0674849487088864e-07, |
|
"logits/chosen": 18.379846572875977, |
|
"logits/rejected": 19.475313186645508, |
|
"logps/chosen": -249.86785888671875, |
|
"logps/rejected": -187.93824768066406, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.827848434448242, |
|
"rewards/margins": 5.467617034912109, |
|
"rewards/rejected": -11.295466423034668, |
|
"sft_loss": 0.9322109222412109, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9368836291913215, |
|
"grad_norm": 25.195757238729385, |
|
"learning_rate": 2.0588752689316723e-07, |
|
"logits/chosen": 18.46122169494629, |
|
"logits/rejected": 18.586881637573242, |
|
"logps/chosen": -286.5140075683594, |
|
"logps/rejected": -202.23248291015625, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.130897045135498, |
|
"rewards/margins": 6.1991753578186035, |
|
"rewards/rejected": -12.330072402954102, |
|
"sft_loss": 0.924500048160553, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9451019066403682, |
|
"grad_norm": 14.694663908634908, |
|
"learning_rate": 2.0501990583830315e-07, |
|
"logits/chosen": 17.5371036529541, |
|
"logits/rejected": 18.469070434570312, |
|
"logps/chosen": -274.0564270019531, |
|
"logps/rejected": -211.01268005371094, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.664008617401123, |
|
"rewards/margins": 6.217647552490234, |
|
"rewards/rejected": -12.8816556930542, |
|
"sft_loss": 1.0239460468292236, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9533201840894149, |
|
"grad_norm": 8.507356630817076, |
|
"learning_rate": 2.0414570306896536e-07, |
|
"logits/chosen": 17.411376953125, |
|
"logits/rejected": 18.47208023071289, |
|
"logps/chosen": -295.3019714355469, |
|
"logps/rejected": -213.13792419433594, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -6.6735124588012695, |
|
"rewards/margins": 6.6261305809021, |
|
"rewards/rejected": -13.299642562866211, |
|
"sft_loss": 1.529820442199707, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 25.681414018757476, |
|
"learning_rate": 2.0326499048917527e-07, |
|
"logits/chosen": 17.31963348388672, |
|
"logits/rejected": 18.280134201049805, |
|
"logps/chosen": -282.2524108886719, |
|
"logps/rejected": -218.47996520996094, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.157464981079102, |
|
"rewards/margins": 6.622015953063965, |
|
"rewards/rejected": -13.779480934143066, |
|
"sft_loss": 0.9510271549224854, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9697567389875082, |
|
"grad_norm": 13.878204470039535, |
|
"learning_rate": 2.023778405383925e-07, |
|
"logits/chosen": 18.141050338745117, |
|
"logits/rejected": 18.204177856445312, |
|
"logps/chosen": -273.6821594238281, |
|
"logps/rejected": -200.89984130859375, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -5.963834762573242, |
|
"rewards/margins": 6.579600811004639, |
|
"rewards/rejected": -12.543435096740723, |
|
"sft_loss": 0.9940951466560364, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9779750164365549, |
|
"grad_norm": 11.452199407752436, |
|
"learning_rate": 2.0148432618555651e-07, |
|
"logits/chosen": 18.627866744995117, |
|
"logits/rejected": 18.42972755432129, |
|
"logps/chosen": -258.9418029785156, |
|
"logps/rejected": -185.6231231689453, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -5.471505165100098, |
|
"rewards/margins": 5.772936820983887, |
|
"rewards/rejected": -11.244441032409668, |
|
"sft_loss": 0.9383735060691833, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.9861932938856016, |
|
"grad_norm": 11.942794396918284, |
|
"learning_rate": 2.005845209230851e-07, |
|
"logits/chosen": 18.03531265258789, |
|
"logits/rejected": 18.720346450805664, |
|
"logps/chosen": -292.6284484863281, |
|
"logps/rejected": -217.44017028808594, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.853020191192627, |
|
"rewards/margins": 6.340816497802734, |
|
"rewards/rejected": -13.193839073181152, |
|
"sft_loss": 1.0825438499450684, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9944115713346483, |
|
"grad_norm": 11.995957867465538, |
|
"learning_rate": 1.9967849876082937e-07, |
|
"logits/chosen": 16.612958908081055, |
|
"logits/rejected": 17.676807403564453, |
|
"logps/chosen": -290.99993896484375, |
|
"logps/rejected": -217.08941650390625, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.961750030517578, |
|
"rewards/margins": 6.5437798500061035, |
|
"rewards/rejected": -13.505529403686523, |
|
"sft_loss": 1.0639195442199707, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0026298487836949, |
|
"grad_norm": 10.297644271924568, |
|
"learning_rate": 1.9876633421998652e-07, |
|
"logits/chosen": 17.37873649597168, |
|
"logits/rejected": 18.0369815826416, |
|
"logps/chosen": -277.8174133300781, |
|
"logps/rejected": -203.3291473388672, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.353253364562988, |
|
"rewards/margins": 6.258001804351807, |
|
"rewards/rejected": -12.611254692077637, |
|
"sft_loss": 0.9542250037193298, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0108481262327416, |
|
"grad_norm": 11.471429971847657, |
|
"learning_rate": 1.9784810232697024e-07, |
|
"logits/chosen": 17.6014461517334, |
|
"logits/rejected": 18.502716064453125, |
|
"logps/chosen": -295.8468017578125, |
|
"logps/rejected": -225.82949829101562, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -6.305618762969971, |
|
"rewards/margins": 7.557163238525391, |
|
"rewards/rejected": -13.862781524658203, |
|
"sft_loss": 0.9756129384040833, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.0190664036817882, |
|
"grad_norm": 14.22423049629626, |
|
"learning_rate": 1.969238786072398e-07, |
|
"logits/chosen": 17.072832107543945, |
|
"logits/rejected": 17.857742309570312, |
|
"logps/chosen": -318.9200134277344, |
|
"logps/rejected": -236.0108184814453, |
|
"loss": 0.423, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.207548141479492, |
|
"rewards/margins": 7.39101505279541, |
|
"rewards/rejected": -14.598563194274902, |
|
"sft_loss": 0.9570875763893127, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.027284681130835, |
|
"grad_norm": 14.863752308544749, |
|
"learning_rate": 1.9599373907908803e-07, |
|
"logits/chosen": 18.62479591369629, |
|
"logits/rejected": 19.332067489624023, |
|
"logps/chosen": -311.5079650878906, |
|
"logps/rejected": -230.38861083984375, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.520875453948975, |
|
"rewards/margins": 7.087317943572998, |
|
"rewards/rejected": -14.608192443847656, |
|
"sft_loss": 1.0305228233337402, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0355029585798816, |
|
"grad_norm": 11.389098298703924, |
|
"learning_rate": 1.9505776024738873e-07, |
|
"logits/chosen": 17.646556854248047, |
|
"logits/rejected": 18.52758026123047, |
|
"logps/chosen": -267.45611572265625, |
|
"logps/rejected": -202.84034729003906, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.995048999786377, |
|
"rewards/margins": 5.844033241271973, |
|
"rewards/rejected": -12.839081764221191, |
|
"sft_loss": 1.0837846994400024, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0437212360289283, |
|
"grad_norm": 17.383619355827555, |
|
"learning_rate": 1.9411601909730397e-07, |
|
"logits/chosen": 16.90384292602539, |
|
"logits/rejected": 17.69657325744629, |
|
"logps/chosen": -276.2812805175781, |
|
"logps/rejected": -210.5614471435547, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.192663669586182, |
|
"rewards/margins": 6.900697231292725, |
|
"rewards/rejected": -13.093358993530273, |
|
"sft_loss": 1.2382417917251587, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.051939513477975, |
|
"grad_norm": 15.094044445712935, |
|
"learning_rate": 1.9316859308795215e-07, |
|
"logits/chosen": 16.81202507019043, |
|
"logits/rejected": 18.695880889892578, |
|
"logps/chosen": -257.9354553222656, |
|
"logps/rejected": -203.78866577148438, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.167855262756348, |
|
"rewards/margins": 6.644321441650391, |
|
"rewards/rejected": -12.812177658081055, |
|
"sft_loss": 1.173020839691162, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0601577909270217, |
|
"grad_norm": 15.863163074258626, |
|
"learning_rate": 1.9221556014603674e-07, |
|
"logits/chosen": 16.538555145263672, |
|
"logits/rejected": 18.44594955444336, |
|
"logps/chosen": -299.3294982910156, |
|
"logps/rejected": -236.79315185546875, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.601771354675293, |
|
"rewards/margins": 7.276884078979492, |
|
"rewards/rejected": -14.878654479980469, |
|
"sft_loss": 1.1147685050964355, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.0683760683760684, |
|
"grad_norm": 12.95009158653796, |
|
"learning_rate": 1.9125699865943696e-07, |
|
"logits/chosen": 17.819013595581055, |
|
"logits/rejected": 18.056425094604492, |
|
"logps/chosen": -280.44134521484375, |
|
"logps/rejected": -211.0347900390625, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -6.4677534103393555, |
|
"rewards/margins": 6.797198295593262, |
|
"rewards/rejected": -13.26495361328125, |
|
"sft_loss": 1.0369815826416016, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.076594345825115, |
|
"grad_norm": 9.53030890727526, |
|
"learning_rate": 1.9029298747076e-07, |
|
"logits/chosen": 18.56303596496582, |
|
"logits/rejected": 19.128713607788086, |
|
"logps/chosen": -301.52069091796875, |
|
"logps/rejected": -222.11752319335938, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.511043071746826, |
|
"rewards/margins": 7.3326520919799805, |
|
"rewards/rejected": -13.843696594238281, |
|
"sft_loss": 1.039981722831726, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.0848126232741617, |
|
"grad_norm": 12.49460951335956, |
|
"learning_rate": 1.893236058708565e-07, |
|
"logits/chosen": 17.331298828125, |
|
"logits/rejected": 18.1816463470459, |
|
"logps/chosen": -290.297607421875, |
|
"logps/rejected": -212.6442413330078, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.593270301818848, |
|
"rewards/margins": 6.5445356369018555, |
|
"rewards/rejected": -13.137805938720703, |
|
"sft_loss": 1.0305876731872559, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0930309007232084, |
|
"grad_norm": 10.084660494140396, |
|
"learning_rate": 1.8834893359229839e-07, |
|
"logits/chosen": 17.249683380126953, |
|
"logits/rejected": 18.377492904663086, |
|
"logps/chosen": -317.7668151855469, |
|
"logps/rejected": -234.8712158203125, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.917696952819824, |
|
"rewards/margins": 7.316926956176758, |
|
"rewards/rejected": -14.234623908996582, |
|
"sft_loss": 1.0477817058563232, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.101249178172255, |
|
"grad_norm": 11.370135962731284, |
|
"learning_rate": 1.8736905080282117e-07, |
|
"logits/chosen": 17.393232345581055, |
|
"logits/rejected": 18.21647071838379, |
|
"logps/chosen": -291.6396789550781, |
|
"logps/rejected": -215.71307373046875, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.400353908538818, |
|
"rewards/margins": 6.503895282745361, |
|
"rewards/rejected": -12.904250144958496, |
|
"sft_loss": 1.0789752006530762, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1094674556213018, |
|
"grad_norm": 14.128398069389478, |
|
"learning_rate": 1.8638403809872988e-07, |
|
"logits/chosen": 18.000486373901367, |
|
"logits/rejected": 19.02123260498047, |
|
"logps/chosen": -238.9346923828125, |
|
"logps/rejected": -187.83901977539062, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": -5.991827011108398, |
|
"rewards/margins": 6.166553974151611, |
|
"rewards/rejected": -12.158380508422852, |
|
"sft_loss": 1.0633037090301514, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.1176857330703485, |
|
"grad_norm": 10.039232848979895, |
|
"learning_rate": 1.8539397649826993e-07, |
|
"logits/chosen": 17.416231155395508, |
|
"logits/rejected": 18.53554344177246, |
|
"logps/chosen": -271.6786193847656, |
|
"logps/rejected": -208.55459594726562, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.946457386016846, |
|
"rewards/margins": 6.493756294250488, |
|
"rewards/rejected": -13.440213203430176, |
|
"sft_loss": 1.0465832948684692, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1259040105193951, |
|
"grad_norm": 17.7290983481912, |
|
"learning_rate": 1.8439894743496336e-07, |
|
"logits/chosen": 17.006452560424805, |
|
"logits/rejected": 17.804595947265625, |
|
"logps/chosen": -289.0384826660156, |
|
"logps/rejected": -228.98916625976562, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.2547478675842285, |
|
"rewards/margins": 7.524634838104248, |
|
"rewards/rejected": -14.779382705688477, |
|
"sft_loss": 1.0623209476470947, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.1341222879684418, |
|
"grad_norm": 15.995020113178853, |
|
"learning_rate": 1.8339903275091085e-07, |
|
"logits/chosen": 17.363964080810547, |
|
"logits/rejected": 18.096250534057617, |
|
"logps/chosen": -313.4389343261719, |
|
"logps/rejected": -239.9541015625, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.249270439147949, |
|
"rewards/margins": 7.737963676452637, |
|
"rewards/rejected": -14.987234115600586, |
|
"sft_loss": 1.1172467470169067, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1423405654174885, |
|
"grad_norm": 10.290356468777885, |
|
"learning_rate": 1.8239431469006e-07, |
|
"logits/chosen": 16.6265811920166, |
|
"logits/rejected": 18.333799362182617, |
|
"logps/chosen": -268.6365966796875, |
|
"logps/rejected": -221.0557098388672, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -6.95206356048584, |
|
"rewards/margins": 7.242475986480713, |
|
"rewards/rejected": -14.194538116455078, |
|
"sft_loss": 1.2080581188201904, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.1505588428665352, |
|
"grad_norm": 12.079608347733119, |
|
"learning_rate": 1.8138487589144093e-07, |
|
"logits/chosen": 16.631559371948242, |
|
"logits/rejected": 16.87362289428711, |
|
"logps/chosen": -273.40997314453125, |
|
"logps/rejected": -210.4160614013672, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.272107124328613, |
|
"rewards/margins": 6.501527786254883, |
|
"rewards/rejected": -13.77363395690918, |
|
"sft_loss": 1.0478310585021973, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1587771203155819, |
|
"grad_norm": 17.778097749378432, |
|
"learning_rate": 1.8037079938236894e-07, |
|
"logits/chosen": 17.234224319458008, |
|
"logits/rejected": 18.432863235473633, |
|
"logps/chosen": -281.38458251953125, |
|
"logps/rejected": -223.9882049560547, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.636561870574951, |
|
"rewards/margins": 7.072784423828125, |
|
"rewards/rejected": -14.709345817565918, |
|
"sft_loss": 0.9729472398757935, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.1669953977646286, |
|
"grad_norm": 13.760102505142987, |
|
"learning_rate": 1.793521685716154e-07, |
|
"logits/chosen": 17.158409118652344, |
|
"logits/rejected": 18.147829055786133, |
|
"logps/chosen": -339.1050720214844, |
|
"logps/rejected": -257.5541687011719, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.170562744140625, |
|
"rewards/margins": 8.136800765991211, |
|
"rewards/rejected": -16.307363510131836, |
|
"sft_loss": 1.087196946144104, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1752136752136753, |
|
"grad_norm": 12.543576537196508, |
|
"learning_rate": 1.7832906724254747e-07, |
|
"logits/chosen": 16.710582733154297, |
|
"logits/rejected": 17.746997833251953, |
|
"logps/chosen": -279.0878601074219, |
|
"logps/rejected": -217.86927795410156, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.324019908905029, |
|
"rewards/margins": 6.887091636657715, |
|
"rewards/rejected": -14.211112022399902, |
|
"sft_loss": 1.0954669713974, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.183431952662722, |
|
"grad_norm": 14.156394204679035, |
|
"learning_rate": 1.7730157954623685e-07, |
|
"logits/chosen": 17.9290828704834, |
|
"logits/rejected": 17.706289291381836, |
|
"logps/chosen": -284.99176025390625, |
|
"logps/rejected": -210.2812957763672, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.146309852600098, |
|
"rewards/margins": 6.681734085083008, |
|
"rewards/rejected": -13.828044891357422, |
|
"sft_loss": 1.0680426359176636, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1916502301117686, |
|
"grad_norm": 12.575179703681824, |
|
"learning_rate": 1.7626978999453794e-07, |
|
"logits/chosen": 17.4116268157959, |
|
"logits/rejected": 17.362062454223633, |
|
"logps/chosen": -319.6551818847656, |
|
"logps/rejected": -242.6376495361328, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.572165012359619, |
|
"rewards/margins": 7.830206871032715, |
|
"rewards/rejected": -15.402371406555176, |
|
"sft_loss": 1.0497316122055054, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.1998685075608153, |
|
"grad_norm": 9.969097695004054, |
|
"learning_rate": 1.7523378345313714e-07, |
|
"logits/chosen": 17.700010299682617, |
|
"logits/rejected": 18.3839168548584, |
|
"logps/chosen": -291.83917236328125, |
|
"logps/rejected": -215.37081909179688, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.1273322105407715, |
|
"rewards/margins": 6.290266036987305, |
|
"rewards/rejected": -13.417597770690918, |
|
"sft_loss": 1.382573127746582, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.208086785009862, |
|
"grad_norm": 17.17576749860381, |
|
"learning_rate": 1.741936451345722e-07, |
|
"logits/chosen": 18.578615188598633, |
|
"logits/rejected": 19.108678817749023, |
|
"logps/chosen": -271.18505859375, |
|
"logps/rejected": -205.25746154785156, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -6.272554397583008, |
|
"rewards/margins": 6.781675815582275, |
|
"rewards/rejected": -13.054230690002441, |
|
"sft_loss": 1.151402473449707, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.2163050624589087, |
|
"grad_norm": 17.314304500732653, |
|
"learning_rate": 1.731494605912235e-07, |
|
"logits/chosen": 17.34149932861328, |
|
"logits/rejected": 18.757190704345703, |
|
"logps/chosen": -262.0509948730469, |
|
"logps/rejected": -208.38226318359375, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -6.2556867599487305, |
|
"rewards/margins": 6.655214309692383, |
|
"rewards/rejected": -12.910900115966797, |
|
"sft_loss": 1.0516655445098877, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2245233399079554, |
|
"grad_norm": 15.379389005940164, |
|
"learning_rate": 1.721013157082774e-07, |
|
"logits/chosen": 16.926176071166992, |
|
"logits/rejected": 18.068889617919922, |
|
"logps/chosen": -276.72833251953125, |
|
"logps/rejected": -224.33856201171875, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.5205397605896, |
|
"rewards/margins": 6.801075458526611, |
|
"rewards/rejected": -14.321615219116211, |
|
"sft_loss": 1.0424396991729736, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.232741617357002, |
|
"grad_norm": 16.009052812361457, |
|
"learning_rate": 1.7104929669666194e-07, |
|
"logits/chosen": 16.49311065673828, |
|
"logits/rejected": 17.206867218017578, |
|
"logps/chosen": -299.70855712890625, |
|
"logps/rejected": -234.7362060546875, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.55043888092041, |
|
"rewards/margins": 7.260469436645508, |
|
"rewards/rejected": -14.810908317565918, |
|
"sft_loss": 1.148091197013855, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2409598948060487, |
|
"grad_norm": 12.479892072042215, |
|
"learning_rate": 1.69993490085956e-07, |
|
"logits/chosen": 16.645790100097656, |
|
"logits/rejected": 18.348690032958984, |
|
"logps/chosen": -289.54217529296875, |
|
"logps/rejected": -232.9552001953125, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.2228593826293945, |
|
"rewards/margins": 7.266669273376465, |
|
"rewards/rejected": -14.48952865600586, |
|
"sft_loss": 1.0830727815628052, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.2491781722550954, |
|
"grad_norm": 13.701336630947893, |
|
"learning_rate": 1.6893398271727222e-07, |
|
"logits/chosen": 17.36661148071289, |
|
"logits/rejected": 18.305465698242188, |
|
"logps/chosen": -300.6762390136719, |
|
"logps/rejected": -228.61175537109375, |
|
"loss": 0.4574, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.493809223175049, |
|
"rewards/margins": 7.260177135467529, |
|
"rewards/rejected": -14.753986358642578, |
|
"sft_loss": 1.016793966293335, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2573964497041419, |
|
"grad_norm": 10.12301776047569, |
|
"learning_rate": 1.6787086173611407e-07, |
|
"logits/chosen": 17.593551635742188, |
|
"logits/rejected": 18.34381675720215, |
|
"logps/chosen": -280.0817565917969, |
|
"logps/rejected": -211.71542358398438, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.457971096038818, |
|
"rewards/margins": 6.6875996589660645, |
|
"rewards/rejected": -14.1455717086792, |
|
"sft_loss": 1.0228469371795654, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.2656147271531886, |
|
"grad_norm": 7.684067785358655, |
|
"learning_rate": 1.6680421458520813e-07, |
|
"logits/chosen": 18.189321517944336, |
|
"logits/rejected": 18.308818817138672, |
|
"logps/chosen": -280.6365966796875, |
|
"logps/rejected": -212.9956817626953, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.9928879737854, |
|
"rewards/margins": 6.62729024887085, |
|
"rewards/rejected": -13.62017822265625, |
|
"sft_loss": 1.4820358753204346, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2738330046022353, |
|
"grad_norm": 12.91245370337745, |
|
"learning_rate": 1.6573412899731187e-07, |
|
"logits/chosen": 17.40738868713379, |
|
"logits/rejected": 18.874313354492188, |
|
"logps/chosen": -299.2168884277344, |
|
"logps/rejected": -221.5058135986328, |
|
"loss": 0.4091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.799927234649658, |
|
"rewards/margins": 6.812719821929932, |
|
"rewards/rejected": -13.612646102905273, |
|
"sft_loss": 1.1041682958602905, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 10.002770129869452, |
|
"learning_rate": 1.646606929879975e-07, |
|
"logits/chosen": 18.40058135986328, |
|
"logits/rejected": 19.07294273376465, |
|
"logps/chosen": -323.3199157714844, |
|
"logps/rejected": -239.97935485839844, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.476480484008789, |
|
"rewards/margins": 8.036779403686523, |
|
"rewards/rejected": -15.513258934020996, |
|
"sft_loss": 1.0359128713607788, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2902695595003286, |
|
"grad_norm": 13.874094233494837, |
|
"learning_rate": 1.6358399484841268e-07, |
|
"logits/chosen": 16.465330123901367, |
|
"logits/rejected": 17.001684188842773, |
|
"logps/chosen": -302.719482421875, |
|
"logps/rejected": -224.98745727539062, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.293752670288086, |
|
"rewards/margins": 7.167456150054932, |
|
"rewards/rejected": -14.46120834350586, |
|
"sft_loss": 1.1338067054748535, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.2984878369493753, |
|
"grad_norm": 16.794137790287348, |
|
"learning_rate": 1.625041231380184e-07, |
|
"logits/chosen": 16.809955596923828, |
|
"logits/rejected": 18.395627975463867, |
|
"logps/chosen": -310.674560546875, |
|
"logps/rejected": -239.32200622558594, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.000899791717529, |
|
"rewards/margins": 7.625972747802734, |
|
"rewards/rejected": -14.626873016357422, |
|
"sft_loss": 0.9849548935890198, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.306706114398422, |
|
"grad_norm": 12.439364730991043, |
|
"learning_rate": 1.6142116667730482e-07, |
|
"logits/chosen": 19.75507164001465, |
|
"logits/rejected": 20.32160758972168, |
|
"logps/chosen": -293.4500732421875, |
|
"logps/rejected": -214.4062042236328, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.081357955932617, |
|
"rewards/margins": 7.148606777191162, |
|
"rewards/rejected": -13.229966163635254, |
|
"sft_loss": 0.9287933111190796, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.3149243918474687, |
|
"grad_norm": 11.945683940407063, |
|
"learning_rate": 1.6033521454048597e-07, |
|
"logits/chosen": 18.249954223632812, |
|
"logits/rejected": 19.019634246826172, |
|
"logps/chosen": -271.8877258300781, |
|
"logps/rejected": -217.09132385253906, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -6.703191757202148, |
|
"rewards/margins": 7.068259239196777, |
|
"rewards/rejected": -13.77145004272461, |
|
"sft_loss": 1.0365476608276367, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3231426692965154, |
|
"grad_norm": 10.191092591520466, |
|
"learning_rate": 1.5924635604817306e-07, |
|
"logits/chosen": 17.222694396972656, |
|
"logits/rejected": 18.468660354614258, |
|
"logps/chosen": -288.8092041015625, |
|
"logps/rejected": -236.29319763183594, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.580431938171387, |
|
"rewards/margins": 7.9504780769348145, |
|
"rewards/rejected": -15.530909538269043, |
|
"sft_loss": 1.162276268005371, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.331360946745562, |
|
"grad_norm": 9.751260919138856, |
|
"learning_rate": 1.5815468076002771e-07, |
|
"logits/chosen": 16.873342514038086, |
|
"logits/rejected": 18.183860778808594, |
|
"logps/chosen": -312.6845397949219, |
|
"logps/rejected": -240.49859619140625, |
|
"loss": 0.429, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.772741794586182, |
|
"rewards/margins": 8.080373764038086, |
|
"rewards/rejected": -15.853116035461426, |
|
"sft_loss": 0.9787502288818359, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3395792241946087, |
|
"grad_norm": 13.966159704549986, |
|
"learning_rate": 1.5706027846739588e-07, |
|
"logits/chosen": 17.78404426574707, |
|
"logits/rejected": 18.716482162475586, |
|
"logps/chosen": -265.793701171875, |
|
"logps/rejected": -212.49057006835938, |
|
"loss": 0.4521, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -6.772706985473633, |
|
"rewards/margins": 6.92323112487793, |
|
"rewards/rejected": -13.695940017700195, |
|
"sft_loss": 1.0237793922424316, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.3477975016436554, |
|
"grad_norm": 32.697820524211366, |
|
"learning_rate": 1.5596323918592227e-07, |
|
"logits/chosen": 18.034412384033203, |
|
"logits/rejected": 18.671672821044922, |
|
"logps/chosen": -253.35609436035156, |
|
"logps/rejected": -206.98895263671875, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -7.246993541717529, |
|
"rewards/margins": 6.500965595245361, |
|
"rewards/rejected": -13.74795913696289, |
|
"sft_loss": 1.0642235279083252, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.356015779092702, |
|
"grad_norm": 12.398186085004639, |
|
"learning_rate": 1.5486365314814637e-07, |
|
"logits/chosen": 17.62421226501465, |
|
"logits/rejected": 18.33708953857422, |
|
"logps/chosen": -292.3586120605469, |
|
"logps/rejected": -230.61155700683594, |
|
"loss": 0.4084, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.588433742523193, |
|
"rewards/margins": 7.831187725067139, |
|
"rewards/rejected": -15.4196195602417, |
|
"sft_loss": 1.0407756567001343, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.3642340565417488, |
|
"grad_norm": 12.166605913363364, |
|
"learning_rate": 1.5376161079608088e-07, |
|
"logits/chosen": 17.150541305541992, |
|
"logits/rejected": 18.62920379638672, |
|
"logps/chosen": -296.70465087890625, |
|
"logps/rejected": -242.9381866455078, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.590549945831299, |
|
"rewards/margins": 8.183311462402344, |
|
"rewards/rejected": -15.773859977722168, |
|
"sft_loss": 1.191388487815857, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3724523339907955, |
|
"grad_norm": 10.880603493347238, |
|
"learning_rate": 1.5265720277377273e-07, |
|
"logits/chosen": 17.14630889892578, |
|
"logits/rejected": 19.08263397216797, |
|
"logps/chosen": -288.0076904296875, |
|
"logps/rejected": -237.15341186523438, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.4387054443359375, |
|
"rewards/margins": 7.707547664642334, |
|
"rewards/rejected": -15.146254539489746, |
|
"sft_loss": 1.0695911645889282, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.3806706114398422, |
|
"grad_norm": 50.18720477246092, |
|
"learning_rate": 1.5155051991984745e-07, |
|
"logits/chosen": 18.334110260009766, |
|
"logits/rejected": 18.69322967529297, |
|
"logps/chosen": -315.9974365234375, |
|
"logps/rejected": -228.48602294921875, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.224093437194824, |
|
"rewards/margins": 7.033995151519775, |
|
"rewards/rejected": -14.258088111877441, |
|
"sft_loss": 0.9990159869194031, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 8.437783211213006, |
|
"learning_rate": 1.504416532600378e-07, |
|
"logits/chosen": 17.403743743896484, |
|
"logits/rejected": 18.235454559326172, |
|
"logps/chosen": -242.6099853515625, |
|
"logps/rejected": -199.91429138183594, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.768044948577881, |
|
"rewards/margins": 6.265518665313721, |
|
"rewards/rejected": -13.033564567565918, |
|
"sft_loss": 1.0013427734375, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.3971071663379355, |
|
"grad_norm": 14.969642809049821, |
|
"learning_rate": 1.4933069399969653e-07, |
|
"logits/chosen": 17.80324935913086, |
|
"logits/rejected": 18.639148712158203, |
|
"logps/chosen": -272.4168395996094, |
|
"logps/rejected": -217.99310302734375, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.3702874183654785, |
|
"rewards/margins": 6.988955020904541, |
|
"rewards/rejected": -14.359243392944336, |
|
"sft_loss": 1.1217681169509888, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4053254437869822, |
|
"grad_norm": 14.289009158482923, |
|
"learning_rate": 1.4821773351629487e-07, |
|
"logits/chosen": 18.467451095581055, |
|
"logits/rejected": 19.347543716430664, |
|
"logps/chosen": -302.4975280761719, |
|
"logps/rejected": -243.9453125, |
|
"loss": 0.4132, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.117691040039062, |
|
"rewards/margins": 8.244772911071777, |
|
"rewards/rejected": -16.362462997436523, |
|
"sft_loss": 1.1255364418029785, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.413543721236029, |
|
"grad_norm": 10.706706272611981, |
|
"learning_rate": 1.4710286335190664e-07, |
|
"logits/chosen": 18.262802124023438, |
|
"logits/rejected": 18.210296630859375, |
|
"logps/chosen": -306.64691162109375, |
|
"logps/rejected": -234.53460693359375, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.4043498039245605, |
|
"rewards/margins": 7.886282920837402, |
|
"rewards/rejected": -15.290633201599121, |
|
"sft_loss": 1.080936074256897, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4217619986850756, |
|
"grad_norm": 13.539503399960063, |
|
"learning_rate": 1.4598617520567863e-07, |
|
"logits/chosen": 18.688413619995117, |
|
"logits/rejected": 19.166378021240234, |
|
"logps/chosen": -295.90008544921875, |
|
"logps/rejected": -231.57505798339844, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.257371425628662, |
|
"rewards/margins": 7.788801193237305, |
|
"rewards/rejected": -15.046174049377441, |
|
"sft_loss": 1.04954195022583, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.4299802761341223, |
|
"grad_norm": 20.41588952283392, |
|
"learning_rate": 1.448677609262885e-07, |
|
"logits/chosen": 17.124914169311523, |
|
"logits/rejected": 18.068174362182617, |
|
"logps/chosen": -291.83245849609375, |
|
"logps/rejected": -229.2489776611328, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -7.751894474029541, |
|
"rewards/margins": 7.248422145843506, |
|
"rewards/rejected": -15.000316619873047, |
|
"sft_loss": 1.1058861017227173, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.438198553583169, |
|
"grad_norm": 10.416378982514427, |
|
"learning_rate": 1.4374771250438997e-07, |
|
"logits/chosen": 17.683748245239258, |
|
"logits/rejected": 18.105945587158203, |
|
"logps/chosen": -338.9434814453125, |
|
"logps/rejected": -252.90367126464844, |
|
"loss": 0.353, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.734278678894043, |
|
"rewards/margins": 8.11069107055664, |
|
"rewards/rejected": -16.844970703125, |
|
"sft_loss": 1.1128793954849243, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.4464168310322156, |
|
"grad_norm": 15.631489594193368, |
|
"learning_rate": 1.4262612206504653e-07, |
|
"logits/chosen": 19.22788429260254, |
|
"logits/rejected": 18.560340881347656, |
|
"logps/chosen": -288.2774658203125, |
|
"logps/rejected": -221.1851806640625, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.204787254333496, |
|
"rewards/margins": 6.889291763305664, |
|
"rewards/rejected": -15.094079971313477, |
|
"sft_loss": 1.0347801446914673, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4546351084812623, |
|
"grad_norm": 22.470025016143673, |
|
"learning_rate": 1.4150308186015428e-07, |
|
"logits/chosen": 18.78541374206543, |
|
"logits/rejected": 19.072355270385742, |
|
"logps/chosen": -266.7073669433594, |
|
"logps/rejected": -214.3734130859375, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.12351131439209, |
|
"rewards/margins": 7.012777328491211, |
|
"rewards/rejected": -14.1362886428833, |
|
"sft_loss": 1.0819884538650513, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.462853385930309, |
|
"grad_norm": 11.047306179137715, |
|
"learning_rate": 1.4037868426085368e-07, |
|
"logits/chosen": 17.600828170776367, |
|
"logits/rejected": 17.870738983154297, |
|
"logps/chosen": -321.2472229003906, |
|
"logps/rejected": -237.96395874023438, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.274439811706543, |
|
"rewards/margins": 8.21683120727539, |
|
"rewards/rejected": -15.49127197265625, |
|
"sft_loss": 1.1358665227890015, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4710716633793557, |
|
"grad_norm": 9.894309836137355, |
|
"learning_rate": 1.3925302174993233e-07, |
|
"logits/chosen": 16.768348693847656, |
|
"logits/rejected": 18.076475143432617, |
|
"logps/chosen": -295.2914123535156, |
|
"logps/rejected": -222.6123504638672, |
|
"loss": 0.4288, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.861530780792236, |
|
"rewards/margins": 7.223613262176514, |
|
"rewards/rejected": -14.085144996643066, |
|
"sft_loss": 0.9808722734451294, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.4792899408284024, |
|
"grad_norm": 15.122256978486702, |
|
"learning_rate": 1.3812618691421803e-07, |
|
"logits/chosen": 17.618257522583008, |
|
"logits/rejected": 18.547971725463867, |
|
"logps/chosen": -307.7926025390625, |
|
"logps/rejected": -228.6370849609375, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.855221748352051, |
|
"rewards/margins": 7.493732929229736, |
|
"rewards/rejected": -14.348955154418945, |
|
"sft_loss": 0.975628137588501, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.487508218277449, |
|
"grad_norm": 14.990640701163656, |
|
"learning_rate": 1.3699827243696336e-07, |
|
"logits/chosen": 17.19367027282715, |
|
"logits/rejected": 18.374305725097656, |
|
"logps/chosen": -286.5935363769531, |
|
"logps/rejected": -236.76593017578125, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -7.718534469604492, |
|
"rewards/margins": 7.860580921173096, |
|
"rewards/rejected": -15.57911491394043, |
|
"sft_loss": 1.1146594285964966, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.4957264957264957, |
|
"grad_norm": 10.50314444472379, |
|
"learning_rate": 1.3586937109022251e-07, |
|
"logits/chosen": 16.421382904052734, |
|
"logits/rejected": 17.77210235595703, |
|
"logps/chosen": -324.25927734375, |
|
"logps/rejected": -260.9275207519531, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.703363418579102, |
|
"rewards/margins": 8.462730407714844, |
|
"rewards/rejected": -17.166095733642578, |
|
"sft_loss": 1.0979522466659546, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5039447731755424, |
|
"grad_norm": 16.690789592498312, |
|
"learning_rate": 1.347395757272207e-07, |
|
"logits/chosen": 19.563251495361328, |
|
"logits/rejected": 19.970426559448242, |
|
"logps/chosen": -271.6186218261719, |
|
"logps/rejected": -212.50277709960938, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.6580634117126465, |
|
"rewards/margins": 7.265621185302734, |
|
"rewards/rejected": -13.923684120178223, |
|
"sft_loss": 1.0007566213607788, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.5121630506245891, |
|
"grad_norm": 21.799881591539336, |
|
"learning_rate": 1.3360897927471668e-07, |
|
"logits/chosen": 18.252246856689453, |
|
"logits/rejected": 18.873050689697266, |
|
"logps/chosen": -278.3526611328125, |
|
"logps/rejected": -221.5440216064453, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -7.180948257446289, |
|
"rewards/margins": 7.29295539855957, |
|
"rewards/rejected": -14.473901748657227, |
|
"sft_loss": 1.0442688465118408, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5203813280736358, |
|
"grad_norm": 10.712033452260947, |
|
"learning_rate": 1.3247767472535972e-07, |
|
"logits/chosen": 18.07443618774414, |
|
"logits/rejected": 19.142240524291992, |
|
"logps/chosen": -294.86700439453125, |
|
"logps/rejected": -238.5161895751953, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.611084461212158, |
|
"rewards/margins": 8.040576934814453, |
|
"rewards/rejected": -15.651662826538086, |
|
"sft_loss": 1.0576171875, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.5285996055226825, |
|
"grad_norm": 7.019511894014553, |
|
"learning_rate": 1.3134575513004073e-07, |
|
"logits/chosen": 18.114564895629883, |
|
"logits/rejected": 18.515487670898438, |
|
"logps/chosen": -303.06329345703125, |
|
"logps/rejected": -237.0087432861328, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.551575183868408, |
|
"rewards/margins": 7.9892473220825195, |
|
"rewards/rejected": -15.540822982788086, |
|
"sft_loss": 1.048262119293213, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5368178829717292, |
|
"grad_norm": 14.009760349607332, |
|
"learning_rate": 1.3021331359023874e-07, |
|
"logits/chosen": 17.101354598999023, |
|
"logits/rejected": 18.246139526367188, |
|
"logps/chosen": -310.4385070800781, |
|
"logps/rejected": -244.6991424560547, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.569284439086914, |
|
"rewards/margins": 8.347086906433105, |
|
"rewards/rejected": -15.916370391845703, |
|
"sft_loss": 1.0606290102005005, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.5450361604207759, |
|
"grad_norm": 15.650861724973655, |
|
"learning_rate": 1.2908044325036312e-07, |
|
"logits/chosen": 17.97089195251465, |
|
"logits/rejected": 18.223573684692383, |
|
"logps/chosen": -296.1282958984375, |
|
"logps/rejected": -233.69146728515625, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.757159233093262, |
|
"rewards/margins": 7.639113903045654, |
|
"rewards/rejected": -15.396271705627441, |
|
"sft_loss": 1.138619065284729, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5532544378698225, |
|
"grad_norm": 17.515447400155715, |
|
"learning_rate": 1.2794723729009255e-07, |
|
"logits/chosen": 16.958641052246094, |
|
"logits/rejected": 18.472318649291992, |
|
"logps/chosen": -298.9012756347656, |
|
"logps/rejected": -239.90469360351562, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.437976837158203, |
|
"rewards/margins": 8.138365745544434, |
|
"rewards/rejected": -15.576342582702637, |
|
"sft_loss": 1.0626742839813232, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.5614727153188692, |
|
"grad_norm": 45.641039135520685, |
|
"learning_rate": 1.2681378891671082e-07, |
|
"logits/chosen": 17.490928649902344, |
|
"logits/rejected": 17.976585388183594, |
|
"logps/chosen": -306.0874328613281, |
|
"logps/rejected": -237.03607177734375, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.880832672119141, |
|
"rewards/margins": 7.584968090057373, |
|
"rewards/rejected": -15.465802192687988, |
|
"sft_loss": 1.0900439023971558, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.569690992767916, |
|
"grad_norm": 19.898061737121086, |
|
"learning_rate": 1.2568019135744044e-07, |
|
"logits/chosen": 16.957841873168945, |
|
"logits/rejected": 17.985727310180664, |
|
"logps/chosen": -291.70135498046875, |
|
"logps/rejected": -229.38314819335938, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.644362926483154, |
|
"rewards/margins": 7.429901123046875, |
|
"rewards/rejected": -15.074263572692871, |
|
"sft_loss": 1.0944395065307617, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.5779092702169626, |
|
"grad_norm": 12.39680434949017, |
|
"learning_rate": 1.2454653785177445e-07, |
|
"logits/chosen": 17.493330001831055, |
|
"logits/rejected": 18.42995834350586, |
|
"logps/chosen": -278.9170837402344, |
|
"logps/rejected": -230.72608947753906, |
|
"loss": 0.4231, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.324814319610596, |
|
"rewards/margins": 7.593767166137695, |
|
"rewards/rejected": -14.918582916259766, |
|
"sft_loss": 1.0732117891311646, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5861275476660093, |
|
"grad_norm": 21.306042868258853, |
|
"learning_rate": 1.2341292164380783e-07, |
|
"logits/chosen": 18.833568572998047, |
|
"logits/rejected": 18.869935989379883, |
|
"logps/chosen": -286.1907653808594, |
|
"logps/rejected": -224.49281311035156, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -7.71510124206543, |
|
"rewards/margins": 7.221285820007324, |
|
"rewards/rejected": -14.936385154724121, |
|
"sft_loss": 1.3040668964385986, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.594345825115056, |
|
"grad_norm": 14.69263028616145, |
|
"learning_rate": 1.222794359745675e-07, |
|
"logits/chosen": 16.27896499633789, |
|
"logits/rejected": 18.376323699951172, |
|
"logps/chosen": -300.5797424316406, |
|
"logps/rejected": -242.6514129638672, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.52255392074585, |
|
"rewards/margins": 8.169685363769531, |
|
"rewards/rejected": -15.692238807678223, |
|
"sft_loss": 1.0308858156204224, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6025641025641026, |
|
"grad_norm": 13.802476438483277, |
|
"learning_rate": 1.2114617407434354e-07, |
|
"logits/chosen": 18.055139541625977, |
|
"logits/rejected": 19.250368118286133, |
|
"logps/chosen": -309.2381286621094, |
|
"logps/rejected": -245.81809997558594, |
|
"loss": 0.4326, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.345053672790527, |
|
"rewards/margins": 8.126486778259277, |
|
"rewards/rejected": -15.471541404724121, |
|
"sft_loss": 1.123140811920166, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.6107823800131493, |
|
"grad_norm": 10.423391619330996, |
|
"learning_rate": 1.2001322915502091e-07, |
|
"logits/chosen": 16.897199630737305, |
|
"logits/rejected": 18.748310089111328, |
|
"logps/chosen": -292.1817932128906, |
|
"logps/rejected": -235.8812255859375, |
|
"loss": 0.3942, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.173976898193359, |
|
"rewards/margins": 7.872208118438721, |
|
"rewards/rejected": -15.046185493469238, |
|
"sft_loss": 1.1811002492904663, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.619000657462196, |
|
"grad_norm": 26.973905524007105, |
|
"learning_rate": 1.1888069440241243e-07, |
|
"logits/chosen": 18.107698440551758, |
|
"logits/rejected": 19.736108779907227, |
|
"logps/chosen": -317.0016174316406, |
|
"logps/rejected": -252.54832458496094, |
|
"loss": 0.4222, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.532571792602539, |
|
"rewards/margins": 9.049071311950684, |
|
"rewards/rejected": -16.581642150878906, |
|
"sft_loss": 1.075319766998291, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.6272189349112427, |
|
"grad_norm": 16.255178289646476, |
|
"learning_rate": 1.1774866296859448e-07, |
|
"logits/chosen": 17.9573917388916, |
|
"logits/rejected": 19.03142738342285, |
|
"logps/chosen": -301.56561279296875, |
|
"logps/rejected": -243.9299774169922, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.284952163696289, |
|
"rewards/margins": 8.675047874450684, |
|
"rewards/rejected": -15.960000038146973, |
|
"sft_loss": 1.1328290700912476, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6354372123602894, |
|
"grad_norm": 10.065426351498546, |
|
"learning_rate": 1.1661722796424478e-07, |
|
"logits/chosen": 17.292905807495117, |
|
"logits/rejected": 18.3796443939209, |
|
"logps/chosen": -309.9263000488281, |
|
"logps/rejected": -241.42181396484375, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.671374320983887, |
|
"rewards/margins": 8.211640357971191, |
|
"rewards/rejected": -15.883017539978027, |
|
"sft_loss": 1.0408843755722046, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.643655489809336, |
|
"grad_norm": 12.50718545323396, |
|
"learning_rate": 1.1548648245098432e-07, |
|
"logits/chosen": 17.582983016967773, |
|
"logits/rejected": 18.472742080688477, |
|
"logps/chosen": -319.5430908203125, |
|
"logps/rejected": -253.3585968017578, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.135196685791016, |
|
"rewards/margins": 8.56678295135498, |
|
"rewards/rejected": -16.701980590820312, |
|
"sft_loss": 1.121424674987793, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6518737672583828, |
|
"grad_norm": 9.456497156444888, |
|
"learning_rate": 1.1435651943372278e-07, |
|
"logits/chosen": 16.574844360351562, |
|
"logits/rejected": 17.709199905395508, |
|
"logps/chosen": -286.1977844238281, |
|
"logps/rejected": -229.33741760253906, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.042440414428711, |
|
"rewards/margins": 7.619970798492432, |
|
"rewards/rejected": -15.662409782409668, |
|
"sft_loss": 1.1242254972457886, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.6600920447074294, |
|
"grad_norm": 12.581807587635986, |
|
"learning_rate": 1.1322743185300865e-07, |
|
"logits/chosen": 17.700603485107422, |
|
"logits/rejected": 19.024187088012695, |
|
"logps/chosen": -296.780029296875, |
|
"logps/rejected": -233.88160705566406, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -7.796105861663818, |
|
"rewards/margins": 7.478055953979492, |
|
"rewards/rejected": -15.274161338806152, |
|
"sft_loss": 1.075081467628479, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6683103221564761, |
|
"grad_norm": 14.09597654178517, |
|
"learning_rate": 1.1209931257738503e-07, |
|
"logits/chosen": 17.260271072387695, |
|
"logits/rejected": 18.022357940673828, |
|
"logps/chosen": -306.3436584472656, |
|
"logps/rejected": -227.7841339111328, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -6.75, |
|
"rewards/margins": 7.679973602294922, |
|
"rewards/rejected": -14.429974555969238, |
|
"sft_loss": 1.1023831367492676, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.6765285996055228, |
|
"grad_norm": 10.14530298124155, |
|
"learning_rate": 1.1097225439575096e-07, |
|
"logits/chosen": 16.790157318115234, |
|
"logits/rejected": 17.936586380004883, |
|
"logps/chosen": -274.2288818359375, |
|
"logps/rejected": -220.5703125, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.9578022956848145, |
|
"rewards/margins": 7.266170501708984, |
|
"rewards/rejected": -14.22397232055664, |
|
"sft_loss": 1.0298852920532227, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6847468770545695, |
|
"grad_norm": 14.64734935061402, |
|
"learning_rate": 1.0984635000972946e-07, |
|
"logits/chosen": 16.42229461669922, |
|
"logits/rejected": 17.54804229736328, |
|
"logps/chosen": -277.86077880859375, |
|
"logps/rejected": -223.43917846679688, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.510883808135986, |
|
"rewards/margins": 7.296814441680908, |
|
"rewards/rejected": -14.807699203491211, |
|
"sft_loss": 1.089572548866272, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.6929651545036162, |
|
"grad_norm": 14.998745686830942, |
|
"learning_rate": 1.0872169202604284e-07, |
|
"logits/chosen": 17.45005226135254, |
|
"logits/rejected": 18.329872131347656, |
|
"logps/chosen": -335.4214782714844, |
|
"logps/rejected": -264.5696105957031, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.625652313232422, |
|
"rewards/margins": 8.4821138381958, |
|
"rewards/rejected": -17.107765197753906, |
|
"sft_loss": 1.1337147951126099, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7011834319526629, |
|
"grad_norm": 15.126502195785678, |
|
"learning_rate": 1.0759837294889546e-07, |
|
"logits/chosen": 15.89870834350586, |
|
"logits/rejected": 17.66954803466797, |
|
"logps/chosen": -324.4315185546875, |
|
"logps/rejected": -251.8769073486328, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.891256332397461, |
|
"rewards/margins": 8.40850830078125, |
|
"rewards/rejected": -16.299766540527344, |
|
"sft_loss": 1.0551294088363647, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.7094017094017095, |
|
"grad_norm": 11.887438634341896, |
|
"learning_rate": 1.0647648517236547e-07, |
|
"logits/chosen": 17.808908462524414, |
|
"logits/rejected": 17.868276596069336, |
|
"logps/chosen": -318.5857849121094, |
|
"logps/rejected": -237.06268310546875, |
|
"loss": 0.4077, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.441680431365967, |
|
"rewards/margins": 7.951440811157227, |
|
"rewards/rejected": -15.393121719360352, |
|
"sft_loss": 1.0577045679092407, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7176199868507562, |
|
"grad_norm": 13.592964221155555, |
|
"learning_rate": 1.0535612097280505e-07, |
|
"logits/chosen": 17.357389450073242, |
|
"logits/rejected": 18.236921310424805, |
|
"logps/chosen": -309.05316162109375, |
|
"logps/rejected": -234.39718627929688, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.402204513549805, |
|
"rewards/margins": 7.640995025634766, |
|
"rewards/rejected": -15.043200492858887, |
|
"sft_loss": 1.1290278434753418, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.725838264299803, |
|
"grad_norm": 17.516227986033588, |
|
"learning_rate": 1.042373725012508e-07, |
|
"logits/chosen": 15.968868255615234, |
|
"logits/rejected": 17.182361602783203, |
|
"logps/chosen": -277.1082763671875, |
|
"logps/rejected": -217.5791778564453, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.2921223640441895, |
|
"rewards/margins": 7.116176605224609, |
|
"rewards/rejected": -14.40829849243164, |
|
"sft_loss": 1.1019597053527832, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7340565417488496, |
|
"grad_norm": 14.545988790543376, |
|
"learning_rate": 1.0312033177584409e-07, |
|
"logits/chosen": 18.982242584228516, |
|
"logits/rejected": 18.7514705657959, |
|
"logps/chosen": -293.9178466796875, |
|
"logps/rejected": -226.5133819580078, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.807718276977539, |
|
"rewards/margins": 7.406096935272217, |
|
"rewards/rejected": -15.213815689086914, |
|
"sft_loss": 1.0929393768310547, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.7422748191978963, |
|
"grad_norm": 10.680737229216966, |
|
"learning_rate": 1.0200509067426243e-07, |
|
"logits/chosen": 16.079814910888672, |
|
"logits/rejected": 17.51044273376465, |
|
"logps/chosen": -302.1490173339844, |
|
"logps/rejected": -233.8198699951172, |
|
"loss": 0.444, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.101183891296387, |
|
"rewards/margins": 7.351180553436279, |
|
"rewards/rejected": -15.452364921569824, |
|
"sft_loss": 1.2096168994903564, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.7504930966469427, |
|
"grad_norm": 9.891781648367795, |
|
"learning_rate": 1.0089174092616271e-07, |
|
"logits/chosen": 17.791248321533203, |
|
"logits/rejected": 18.2585506439209, |
|
"logps/chosen": -280.9420166015625, |
|
"logps/rejected": -224.9687957763672, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -7.537823677062988, |
|
"rewards/margins": 7.212753772735596, |
|
"rewards/rejected": -14.750576972961426, |
|
"sft_loss": 1.0387908220291138, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.7587113740959894, |
|
"grad_norm": 18.289134457763506, |
|
"learning_rate": 9.97803741056361e-08, |
|
"logits/chosen": 16.976699829101562, |
|
"logits/rejected": 17.30523109436035, |
|
"logps/chosen": -275.5840148925781, |
|
"logps/rejected": -215.13279724121094, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.284540176391602, |
|
"rewards/margins": 6.921156406402588, |
|
"rewards/rejected": -14.205697059631348, |
|
"sft_loss": 1.0973351001739502, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7669296515450361, |
|
"grad_norm": 17.72039206697929, |
|
"learning_rate": 9.867108162367594e-08, |
|
"logits/chosen": 16.939437866210938, |
|
"logits/rejected": 18.218585968017578, |
|
"logps/chosen": -294.5352478027344, |
|
"logps/rejected": -230.98623657226562, |
|
"loss": 0.3974, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.325733661651611, |
|
"rewards/margins": 7.582549571990967, |
|
"rewards/rejected": -14.908282279968262, |
|
"sft_loss": 1.034481406211853, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.7751479289940828, |
|
"grad_norm": 13.466593004835952, |
|
"learning_rate": 9.756395472065947e-08, |
|
"logits/chosen": 17.363365173339844, |
|
"logits/rejected": 18.14643669128418, |
|
"logps/chosen": -275.0605163574219, |
|
"logps/rejected": -223.0447998046875, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.5301194190979, |
|
"rewards/margins": 7.630979537963867, |
|
"rewards/rejected": -15.161099433898926, |
|
"sft_loss": 1.191418170928955, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7833662064431295, |
|
"grad_norm": 17.525060893448625, |
|
"learning_rate": 9.645908445884271e-08, |
|
"logits/chosen": 17.93121910095215, |
|
"logits/rejected": 19.609464645385742, |
|
"logps/chosen": -313.574951171875, |
|
"logps/rejected": -255.39015197753906, |
|
"loss": 0.392, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.089523315429688, |
|
"rewards/margins": 8.564504623413086, |
|
"rewards/rejected": -16.654027938842773, |
|
"sft_loss": 1.0859136581420898, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.7915844838921762, |
|
"grad_norm": 14.818652238656334, |
|
"learning_rate": 9.535656171487096e-08, |
|
"logits/chosen": 17.432899475097656, |
|
"logits/rejected": 18.06930160522461, |
|
"logps/chosen": -306.2559814453125, |
|
"logps/rejected": -247.05564880371094, |
|
"loss": 0.4113, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.375761985778809, |
|
"rewards/margins": 8.475983619689941, |
|
"rewards/rejected": -16.85174560546875, |
|
"sft_loss": 1.2146451473236084, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7998027613412229, |
|
"grad_norm": 64.12698029544616, |
|
"learning_rate": 9.425647717230382e-08, |
|
"logits/chosen": 17.3497257232666, |
|
"logits/rejected": 18.322324752807617, |
|
"logps/chosen": -314.32830810546875, |
|
"logps/rejected": -253.83473205566406, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.878050804138184, |
|
"rewards/margins": 8.278247833251953, |
|
"rewards/rejected": -17.15629768371582, |
|
"sft_loss": 1.077860713005066, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.8080210387902695, |
|
"grad_norm": 13.052337358867197, |
|
"learning_rate": 9.315892131415642e-08, |
|
"logits/chosen": 16.90951919555664, |
|
"logits/rejected": 18.101472854614258, |
|
"logps/chosen": -344.9137878417969, |
|
"logps/rejected": -264.2882080078125, |
|
"loss": 0.3948, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.652148246765137, |
|
"rewards/margins": 9.170465469360352, |
|
"rewards/rejected": -17.822612762451172, |
|
"sft_loss": 1.2117801904678345, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8162393162393162, |
|
"grad_norm": 11.613352799050077, |
|
"learning_rate": 9.206398441545729e-08, |
|
"logits/chosen": 17.647083282470703, |
|
"logits/rejected": 18.84397315979004, |
|
"logps/chosen": -312.7010498046875, |
|
"logps/rejected": -254.3484344482422, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.489236831665039, |
|
"rewards/margins": 8.119637489318848, |
|
"rewards/rejected": -16.608875274658203, |
|
"sft_loss": 1.01621675491333, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.824457593688363, |
|
"grad_norm": 11.15254994077485, |
|
"learning_rate": 9.097175653582299e-08, |
|
"logits/chosen": 17.26348114013672, |
|
"logits/rejected": 18.160728454589844, |
|
"logps/chosen": -284.86114501953125, |
|
"logps/rejected": -232.5272979736328, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.840343475341797, |
|
"rewards/margins": 7.370659351348877, |
|
"rewards/rejected": -15.211003303527832, |
|
"sft_loss": 1.1511608362197876, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8326758711374096, |
|
"grad_norm": 14.803907963552794, |
|
"learning_rate": 8.988232751205051e-08, |
|
"logits/chosen": 17.386255264282227, |
|
"logits/rejected": 17.55118751525879, |
|
"logps/chosen": -271.7340087890625, |
|
"logps/rejected": -208.06320190429688, |
|
"loss": 0.4401, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.613986968994141, |
|
"rewards/margins": 6.274531841278076, |
|
"rewards/rejected": -13.888518333435059, |
|
"sft_loss": 1.144532322883606, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.8408941485864563, |
|
"grad_norm": 14.423568520659874, |
|
"learning_rate": 8.879578695072846e-08, |
|
"logits/chosen": 17.274259567260742, |
|
"logits/rejected": 18.399911880493164, |
|
"logps/chosen": -289.1215515136719, |
|
"logps/rejected": -230.22369384765625, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.828088760375977, |
|
"rewards/margins": 7.673010349273682, |
|
"rewards/rejected": -15.5010986328125, |
|
"sft_loss": 1.1277306079864502, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.849112426035503, |
|
"grad_norm": 11.37404702454821, |
|
"learning_rate": 8.771222422086639e-08, |
|
"logits/chosen": 16.860265731811523, |
|
"logits/rejected": 17.736581802368164, |
|
"logps/chosen": -297.23956298828125, |
|
"logps/rejected": -233.06109619140625, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.860833168029785, |
|
"rewards/margins": 8.063416481018066, |
|
"rewards/rejected": -15.924250602722168, |
|
"sft_loss": 1.2870830297470093, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.8573307034845496, |
|
"grad_norm": 12.257681191538563, |
|
"learning_rate": 8.663172844654452e-08, |
|
"logits/chosen": 17.366941452026367, |
|
"logits/rejected": 17.93768882751465, |
|
"logps/chosen": -300.5145263671875, |
|
"logps/rejected": -230.68685913085938, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.420682430267334, |
|
"rewards/margins": 7.759568691253662, |
|
"rewards/rejected": -15.180251121520996, |
|
"sft_loss": 1.0831838846206665, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8655489809335963, |
|
"grad_norm": 11.344131200773928, |
|
"learning_rate": 8.555438849958296e-08, |
|
"logits/chosen": 17.97229766845703, |
|
"logits/rejected": 18.921049118041992, |
|
"logps/chosen": -319.6356201171875, |
|
"logps/rejected": -246.49024963378906, |
|
"loss": 0.3864, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.522003650665283, |
|
"rewards/margins": 8.551565170288086, |
|
"rewards/rejected": -16.07356834411621, |
|
"sft_loss": 1.150990605354309, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.873767258382643, |
|
"grad_norm": 20.985079338983198, |
|
"learning_rate": 8.448029299223194e-08, |
|
"logits/chosen": 17.783571243286133, |
|
"logits/rejected": 18.174728393554688, |
|
"logps/chosen": -312.2618713378906, |
|
"logps/rejected": -233.99496459960938, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.624851226806641, |
|
"rewards/margins": 7.475332260131836, |
|
"rewards/rejected": -15.100183486938477, |
|
"sft_loss": 1.1498528718948364, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8819855358316897, |
|
"grad_norm": 14.844798746234286, |
|
"learning_rate": 8.340953026988351e-08, |
|
"logits/chosen": 17.779254913330078, |
|
"logits/rejected": 19.071887969970703, |
|
"logps/chosen": -311.01190185546875, |
|
"logps/rejected": -248.10272216796875, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.804770469665527, |
|
"rewards/margins": 8.161953926086426, |
|
"rewards/rejected": -15.966724395751953, |
|
"sft_loss": 1.1634888648986816, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.8902038132807364, |
|
"grad_norm": 11.515222849514643, |
|
"learning_rate": 8.234218840380475e-08, |
|
"logits/chosen": 16.18383026123047, |
|
"logits/rejected": 17.827003479003906, |
|
"logps/chosen": -301.19659423828125, |
|
"logps/rejected": -245.50054931640625, |
|
"loss": 0.4341, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.15174388885498, |
|
"rewards/margins": 7.812210559844971, |
|
"rewards/rejected": -15.963953971862793, |
|
"sft_loss": 1.0311837196350098, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.898422090729783, |
|
"grad_norm": 14.564597779855657, |
|
"learning_rate": 8.127835518389417e-08, |
|
"logits/chosen": 16.831256866455078, |
|
"logits/rejected": 18.508529663085938, |
|
"logps/chosen": -311.1943054199219, |
|
"logps/rejected": -245.4080047607422, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.765483856201172, |
|
"rewards/margins": 8.15777587890625, |
|
"rewards/rejected": -15.923259735107422, |
|
"sft_loss": 1.114915132522583, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.9066403681788298, |
|
"grad_norm": 26.10926811927184, |
|
"learning_rate": 8.021811811146075e-08, |
|
"logits/chosen": 16.842208862304688, |
|
"logits/rejected": 17.959400177001953, |
|
"logps/chosen": -291.0676574707031, |
|
"logps/rejected": -237.74246215820312, |
|
"loss": 0.4551, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.678957939147949, |
|
"rewards/margins": 8.211709022521973, |
|
"rewards/rejected": -15.890668869018555, |
|
"sft_loss": 1.1757006645202637, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9148586456278764, |
|
"grad_norm": 12.813401775007092, |
|
"learning_rate": 7.916156439202672e-08, |
|
"logits/chosen": 17.37171173095703, |
|
"logits/rejected": 18.593181610107422, |
|
"logps/chosen": -289.34759521484375, |
|
"logps/rejected": -234.8267059326172, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.662449836730957, |
|
"rewards/margins": 7.566576957702637, |
|
"rewards/rejected": -15.229025840759277, |
|
"sft_loss": 1.1354382038116455, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 6.596137423450017, |
|
"learning_rate": 7.810878092815512e-08, |
|
"logits/chosen": 17.296720504760742, |
|
"logits/rejected": 17.11487579345703, |
|
"logps/chosen": -307.8653869628906, |
|
"logps/rejected": -237.65505981445312, |
|
"loss": 0.3663, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.926757335662842, |
|
"rewards/margins": 7.959318161010742, |
|
"rewards/rejected": -15.886076927185059, |
|
"sft_loss": 1.1921048164367676, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9312952005259696, |
|
"grad_norm": 14.579022955412034, |
|
"learning_rate": 7.705985431230183e-08, |
|
"logits/chosen": 15.675207138061523, |
|
"logits/rejected": 16.91021156311035, |
|
"logps/chosen": -322.23992919921875, |
|
"logps/rejected": -266.904296875, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.967777252197266, |
|
"rewards/margins": 8.5900297164917, |
|
"rewards/rejected": -17.557802200317383, |
|
"sft_loss": 1.228776454925537, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.9395134779750163, |
|
"grad_norm": 12.828599154800472, |
|
"learning_rate": 7.601487081969307e-08, |
|
"logits/chosen": 18.340225219726562, |
|
"logits/rejected": 19.142946243286133, |
|
"logps/chosen": -350.186279296875, |
|
"logps/rejected": -269.3705749511719, |
|
"loss": 0.3851, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -8.475415229797363, |
|
"rewards/margins": 9.2521390914917, |
|
"rewards/rejected": -17.727554321289062, |
|
"sft_loss": 1.1213669776916504, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.947731755424063, |
|
"grad_norm": 13.15248193805534, |
|
"learning_rate": 7.497391640122967e-08, |
|
"logits/chosen": 18.557586669921875, |
|
"logits/rejected": 19.259462356567383, |
|
"logps/chosen": -311.15838623046875, |
|
"logps/rejected": -252.96751403808594, |
|
"loss": 0.4041, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.537500381469727, |
|
"rewards/margins": 8.597896575927734, |
|
"rewards/rejected": -17.13539695739746, |
|
"sft_loss": 1.1180825233459473, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.9559500328731096, |
|
"grad_norm": 20.887376048027924, |
|
"learning_rate": 7.393707667641691e-08, |
|
"logits/chosen": 16.45261573791504, |
|
"logits/rejected": 17.498512268066406, |
|
"logps/chosen": -310.4942626953125, |
|
"logps/rejected": -250.18203735351562, |
|
"loss": 0.4276, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.239749908447266, |
|
"rewards/margins": 8.2033109664917, |
|
"rewards/rejected": -16.44305992126465, |
|
"sft_loss": 1.188431739807129, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9641683103221563, |
|
"grad_norm": 32.140189305396625, |
|
"learning_rate": 7.290443692632281e-08, |
|
"logits/chosen": 19.094688415527344, |
|
"logits/rejected": 19.616283416748047, |
|
"logps/chosen": -291.1233825683594, |
|
"logps/rejected": -234.5458526611328, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.3053131103515625, |
|
"rewards/margins": 7.835725784301758, |
|
"rewards/rejected": -15.141037940979004, |
|
"sft_loss": 1.075373888015747, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.972386587771203, |
|
"grad_norm": 13.526795062615003, |
|
"learning_rate": 7.187608208656328e-08, |
|
"logits/chosen": 16.982704162597656, |
|
"logits/rejected": 17.547874450683594, |
|
"logps/chosen": -293.3042297363281, |
|
"logps/rejected": -233.2967987060547, |
|
"loss": 0.3964, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.24399185180664, |
|
"rewards/margins": 7.097829818725586, |
|
"rewards/rejected": -15.341819763183594, |
|
"sft_loss": 1.063591718673706, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9806048652202497, |
|
"grad_norm": 12.330320612053741, |
|
"learning_rate": 7.085209674031618e-08, |
|
"logits/chosen": 18.508739471435547, |
|
"logits/rejected": 19.527912139892578, |
|
"logps/chosen": -318.8953857421875, |
|
"logps/rejected": -255.2642822265625, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.808796405792236, |
|
"rewards/margins": 8.834001541137695, |
|
"rewards/rejected": -16.642797470092773, |
|
"sft_loss": 1.0131335258483887, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.9888231426692964, |
|
"grad_norm": 19.628735128907458, |
|
"learning_rate": 6.983256511136442e-08, |
|
"logits/chosen": 17.349624633789062, |
|
"logits/rejected": 18.25617218017578, |
|
"logps/chosen": -315.596923828125, |
|
"logps/rejected": -252.95460510253906, |
|
"loss": 0.3878, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.501006126403809, |
|
"rewards/margins": 8.494573593139648, |
|
"rewards/rejected": -16.995580673217773, |
|
"sft_loss": 1.0632458925247192, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.997041420118343, |
|
"grad_norm": 14.674802699510677, |
|
"learning_rate": 6.881757105716831e-08, |
|
"logits/chosen": 17.45104217529297, |
|
"logits/rejected": 18.316680908203125, |
|
"logps/chosen": -330.3178405761719, |
|
"logps/rejected": -251.63551330566406, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.423145294189453, |
|
"rewards/margins": 8.154979705810547, |
|
"rewards/rejected": -16.578125, |
|
"sft_loss": 1.0945472717285156, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.0052596975673898, |
|
"grad_norm": 12.624994593347873, |
|
"learning_rate": 6.780719806196828e-08, |
|
"logits/chosen": 17.815471649169922, |
|
"logits/rejected": 19.435829162597656, |
|
"logps/chosen": -326.4144287109375, |
|
"logps/rejected": -260.8008117675781, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.356893539428711, |
|
"rewards/margins": 8.929654121398926, |
|
"rewards/rejected": -17.286548614501953, |
|
"sft_loss": 1.1082605123519897, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.0134779750164364, |
|
"grad_norm": 12.725913199026877, |
|
"learning_rate": 6.680152922991822e-08, |
|
"logits/chosen": 16.1939754486084, |
|
"logits/rejected": 17.380538940429688, |
|
"logps/chosen": -282.47589111328125, |
|
"logps/rejected": -237.52879333496094, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.493667602539062, |
|
"rewards/margins": 7.595485687255859, |
|
"rewards/rejected": -16.089153289794922, |
|
"sft_loss": 1.1127554178237915, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.021696252465483, |
|
"grad_norm": 16.704211079520014, |
|
"learning_rate": 6.580064727824994e-08, |
|
"logits/chosen": 17.634016036987305, |
|
"logits/rejected": 18.210420608520508, |
|
"logps/chosen": -294.94793701171875, |
|
"logps/rejected": -239.2569122314453, |
|
"loss": 0.4093, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -8.08222770690918, |
|
"rewards/margins": 7.951410293579102, |
|
"rewards/rejected": -16.03363609313965, |
|
"sft_loss": 1.0821824073791504, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.02991452991453, |
|
"grad_norm": 11.063546418547208, |
|
"learning_rate": 6.480463453046985e-08, |
|
"logits/chosen": 18.466581344604492, |
|
"logits/rejected": 18.895183563232422, |
|
"logps/chosen": -304.6612243652344, |
|
"logps/rejected": -241.4573974609375, |
|
"loss": 0.4202, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.562366485595703, |
|
"rewards/margins": 7.818039417266846, |
|
"rewards/rejected": -16.38040542602539, |
|
"sft_loss": 1.1190707683563232, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.0381328073635765, |
|
"grad_norm": 14.946770498466886, |
|
"learning_rate": 6.381357290958767e-08, |
|
"logits/chosen": 16.804920196533203, |
|
"logits/rejected": 17.867015838623047, |
|
"logps/chosen": -296.5435485839844, |
|
"logps/rejected": -246.7471466064453, |
|
"loss": 0.3722, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.007586479187012, |
|
"rewards/margins": 8.32363224029541, |
|
"rewards/rejected": -16.33121681213379, |
|
"sft_loss": 1.104773759841919, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.046351084812623, |
|
"grad_norm": 11.140569334845633, |
|
"learning_rate": 6.282754393137796e-08, |
|
"logits/chosen": 17.95855140686035, |
|
"logits/rejected": 18.640541076660156, |
|
"logps/chosen": -310.16778564453125, |
|
"logps/rejected": -239.66641235351562, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.086520195007324, |
|
"rewards/margins": 8.207837104797363, |
|
"rewards/rejected": -16.294357299804688, |
|
"sft_loss": 1.023207187652588, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.05456936226167, |
|
"grad_norm": 15.822685116826385, |
|
"learning_rate": 6.184662869767577e-08, |
|
"logits/chosen": 17.26742172241211, |
|
"logits/rejected": 17.335512161254883, |
|
"logps/chosen": -328.2395324707031, |
|
"logps/rejected": -263.0542297363281, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.798872947692871, |
|
"rewards/margins": 8.849559783935547, |
|
"rewards/rejected": -17.648433685302734, |
|
"sft_loss": 1.1304852962493896, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.0627876397107165, |
|
"grad_norm": 17.368549612926913, |
|
"learning_rate": 6.08709078897056e-08, |
|
"logits/chosen": 17.57396125793457, |
|
"logits/rejected": 17.95652198791504, |
|
"logps/chosen": -302.7294006347656, |
|
"logps/rejected": -251.41261291503906, |
|
"loss": 0.4021, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.686173439025879, |
|
"rewards/margins": 8.318988800048828, |
|
"rewards/rejected": -17.005163192749023, |
|
"sft_loss": 1.119976282119751, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.0710059171597632, |
|
"grad_norm": 13.875960320644882, |
|
"learning_rate": 5.990046176144551e-08, |
|
"logits/chosen": 16.934846878051758, |
|
"logits/rejected": 17.557884216308594, |
|
"logps/chosen": -274.9892578125, |
|
"logps/rejected": -239.31570434570312, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.9100000262260437, |
|
"rewards/chosen": -8.879440307617188, |
|
"rewards/margins": 7.452706336975098, |
|
"rewards/rejected": -16.3321475982666, |
|
"sft_loss": 1.162746787071228, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.07922419460881, |
|
"grad_norm": 12.354544231223421, |
|
"learning_rate": 5.893537013302602e-08, |
|
"logits/chosen": 17.52082061767578, |
|
"logits/rejected": 18.2637939453125, |
|
"logps/chosen": -304.33441162109375, |
|
"logps/rejected": -243.52101135253906, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.936227321624756, |
|
"rewards/margins": 8.085640907287598, |
|
"rewards/rejected": -16.021867752075195, |
|
"sft_loss": 1.0547149181365967, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.0874424720578566, |
|
"grad_norm": 14.221340160175023, |
|
"learning_rate": 5.7975712384164795e-08, |
|
"logits/chosen": 17.841602325439453, |
|
"logits/rejected": 17.95541000366211, |
|
"logps/chosen": -295.451416015625, |
|
"logps/rejected": -230.86936950683594, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -7.948279857635498, |
|
"rewards/margins": 7.590776443481445, |
|
"rewards/rejected": -15.539057731628418, |
|
"sft_loss": 1.1430902481079102, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0956607495069033, |
|
"grad_norm": 15.060512661462361, |
|
"learning_rate": 5.702156744763784e-08, |
|
"logits/chosen": 17.457277297973633, |
|
"logits/rejected": 18.601512908935547, |
|
"logps/chosen": -286.6520080566406, |
|
"logps/rejected": -236.4774627685547, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.859719276428223, |
|
"rewards/margins": 7.776011943817139, |
|
"rewards/rejected": -15.635732650756836, |
|
"sft_loss": 1.039507269859314, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.10387902695595, |
|
"grad_norm": 14.43891440512856, |
|
"learning_rate": 5.607301380278683e-08, |
|
"logits/chosen": 17.887542724609375, |
|
"logits/rejected": 18.098596572875977, |
|
"logps/chosen": -287.3581848144531, |
|
"logps/rejected": -228.4025421142578, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.526928901672363, |
|
"rewards/margins": 8.00684642791748, |
|
"rewards/rejected": -15.533775329589844, |
|
"sft_loss": 1.1267131567001343, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.1120973044049967, |
|
"grad_norm": 12.750181563192855, |
|
"learning_rate": 5.513012946906445e-08, |
|
"logits/chosen": 17.97955322265625, |
|
"logits/rejected": 18.05929183959961, |
|
"logps/chosen": -319.4637145996094, |
|
"logps/rejected": -245.5413818359375, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.154685020446777, |
|
"rewards/margins": 8.448837280273438, |
|
"rewards/rejected": -16.6035213470459, |
|
"sft_loss": 1.193272590637207, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.1203155818540433, |
|
"grad_norm": 9.936573876560704, |
|
"learning_rate": 5.419299199961708e-08, |
|
"logits/chosen": 17.2838077545166, |
|
"logits/rejected": 17.822799682617188, |
|
"logps/chosen": -337.51031494140625, |
|
"logps/rejected": -259.70428466796875, |
|
"loss": 0.3565, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.9683146476745605, |
|
"rewards/margins": 8.882369041442871, |
|
"rewards/rejected": -16.850685119628906, |
|
"sft_loss": 1.041199803352356, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.12853385930309, |
|
"grad_norm": 13.38745866462026, |
|
"learning_rate": 5.3261678474905785e-08, |
|
"logits/chosen": 18.08312225341797, |
|
"logits/rejected": 18.110692977905273, |
|
"logps/chosen": -324.0693359375, |
|
"logps/rejected": -256.90234375, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.347922325134277, |
|
"rewards/margins": 8.93021011352539, |
|
"rewards/rejected": -17.27813148498535, |
|
"sft_loss": 1.1214524507522583, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.1367521367521367, |
|
"grad_norm": 12.52755859911023, |
|
"learning_rate": 5.2336265496366774e-08, |
|
"logits/chosen": 16.553739547729492, |
|
"logits/rejected": 18.280567169189453, |
|
"logps/chosen": -298.98480224609375, |
|
"logps/rejected": -247.38160705566406, |
|
"loss": 0.3604, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.771576881408691, |
|
"rewards/margins": 7.8479180335998535, |
|
"rewards/rejected": -16.619495391845703, |
|
"sft_loss": 1.1661113500595093, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1449704142011834, |
|
"grad_norm": 15.615159328078256, |
|
"learning_rate": 5.141682918011055e-08, |
|
"logits/chosen": 17.72024917602539, |
|
"logits/rejected": 18.12508773803711, |
|
"logps/chosen": -311.2801818847656, |
|
"logps/rejected": -245.24436950683594, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.237349510192871, |
|
"rewards/margins": 8.371785163879395, |
|
"rewards/rejected": -16.609132766723633, |
|
"sft_loss": 1.1050708293914795, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.15318869165023, |
|
"grad_norm": 9.121410431305465, |
|
"learning_rate": 5.0503445150661306e-08, |
|
"logits/chosen": 17.203432083129883, |
|
"logits/rejected": 18.309484481811523, |
|
"logps/chosen": -282.98101806640625, |
|
"logps/rejected": -231.17942810058594, |
|
"loss": 0.3828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.479619979858398, |
|
"rewards/margins": 7.617303371429443, |
|
"rewards/rejected": -16.096921920776367, |
|
"sft_loss": 1.110097050666809, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.1614069690992768, |
|
"grad_norm": 16.777024727194785, |
|
"learning_rate": 4.959618853473696e-08, |
|
"logits/chosen": 16.61244010925293, |
|
"logits/rejected": 17.989538192749023, |
|
"logps/chosen": -302.3112487792969, |
|
"logps/rejected": -246.63719177246094, |
|
"loss": 0.4158, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.352829933166504, |
|
"rewards/margins": 8.38165283203125, |
|
"rewards/rejected": -16.734481811523438, |
|
"sft_loss": 1.1263587474822998, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.1696252465483234, |
|
"grad_norm": 12.912024759458012, |
|
"learning_rate": 4.8695133955069564e-08, |
|
"logits/chosen": 15.624103546142578, |
|
"logits/rejected": 16.827468872070312, |
|
"logps/chosen": -306.7551574707031, |
|
"logps/rejected": -244.3481903076172, |
|
"loss": 0.429, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.34555435180664, |
|
"rewards/margins": 8.023102760314941, |
|
"rewards/rejected": -16.3686580657959, |
|
"sft_loss": 1.2823337316513062, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.17784352399737, |
|
"grad_norm": 13.478297039710323, |
|
"learning_rate": 4.780035552426787e-08, |
|
"logits/chosen": 16.33539581298828, |
|
"logits/rejected": 18.024782180786133, |
|
"logps/chosen": -325.2061767578125, |
|
"logps/rejected": -265.6727600097656, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.308411598205566, |
|
"rewards/margins": 9.635498046875, |
|
"rewards/rejected": -17.943910598754883, |
|
"sft_loss": 1.1859756708145142, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.186061801446417, |
|
"grad_norm": 13.817488574864614, |
|
"learning_rate": 4.691192683872129e-08, |
|
"logits/chosen": 16.309165954589844, |
|
"logits/rejected": 17.056123733520508, |
|
"logps/chosen": -319.3105163574219, |
|
"logps/rejected": -256.0588073730469, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.325318336486816, |
|
"rewards/margins": 8.578317642211914, |
|
"rewards/rejected": -16.903636932373047, |
|
"sft_loss": 1.0492181777954102, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.1942800788954635, |
|
"grad_norm": 10.242449837573213, |
|
"learning_rate": 4.602992097254646e-08, |
|
"logits/chosen": 17.743621826171875, |
|
"logits/rejected": 19.387224197387695, |
|
"logps/chosen": -307.1810607910156, |
|
"logps/rejected": -254.61309814453125, |
|
"loss": 0.3948, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.201935768127441, |
|
"rewards/margins": 8.72970962524414, |
|
"rewards/rejected": -16.9316463470459, |
|
"sft_loss": 1.159468173980713, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.20249835634451, |
|
"grad_norm": 12.840091970424348, |
|
"learning_rate": 4.515441047157707e-08, |
|
"logits/chosen": 17.517444610595703, |
|
"logits/rejected": 18.110706329345703, |
|
"logps/chosen": -303.7611083984375, |
|
"logps/rejected": -246.00747680664062, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.031211853027344, |
|
"rewards/margins": 8.574084281921387, |
|
"rewards/rejected": -16.605297088623047, |
|
"sft_loss": 1.1109663248062134, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.210716633793557, |
|
"grad_norm": 11.358689713775057, |
|
"learning_rate": 4.428546734739666e-08, |
|
"logits/chosen": 17.79754066467285, |
|
"logits/rejected": 18.65445327758789, |
|
"logps/chosen": -310.1402587890625, |
|
"logps/rejected": -257.2119445800781, |
|
"loss": 0.3393, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.179025650024414, |
|
"rewards/margins": 9.184054374694824, |
|
"rewards/rejected": -17.363079071044922, |
|
"sft_loss": 1.040381669998169, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 2.2189349112426036, |
|
"grad_norm": 10.0246369651475, |
|
"learning_rate": 4.342316307141568e-08, |
|
"logits/chosen": 15.378368377685547, |
|
"logits/rejected": 17.601299285888672, |
|
"logps/chosen": -293.3377380371094, |
|
"logps/rejected": -252.94558715820312, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.73454761505127, |
|
"rewards/margins": 8.430728912353516, |
|
"rewards/rejected": -17.16527557373047, |
|
"sft_loss": 1.088813304901123, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.2271531886916502, |
|
"grad_norm": 8.66405912578809, |
|
"learning_rate": 4.256756856899299e-08, |
|
"logits/chosen": 16.15410041809082, |
|
"logits/rejected": 17.089345932006836, |
|
"logps/chosen": -293.54864501953125, |
|
"logps/rejected": -243.08554077148438, |
|
"loss": 0.3688, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.237372398376465, |
|
"rewards/margins": 8.213920593261719, |
|
"rewards/rejected": -16.4512939453125, |
|
"sft_loss": 1.1098147630691528, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 2.235371466140697, |
|
"grad_norm": 15.953137176215671, |
|
"learning_rate": 4.171875421360202e-08, |
|
"logits/chosen": 16.227901458740234, |
|
"logits/rejected": 16.872665405273438, |
|
"logps/chosen": -329.6645202636719, |
|
"logps/rejected": -257.57489013671875, |
|
"loss": 0.4039, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.39266300201416, |
|
"rewards/margins": 8.796185493469238, |
|
"rewards/rejected": -17.1888484954834, |
|
"sft_loss": 1.1166497468948364, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2435897435897436, |
|
"grad_norm": 9.812512910956865, |
|
"learning_rate": 4.0876789821042606e-08, |
|
"logits/chosen": 16.98467445373535, |
|
"logits/rejected": 17.594194412231445, |
|
"logps/chosen": -308.341064453125, |
|
"logps/rejected": -250.00465393066406, |
|
"loss": 0.3941, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.45275592803955, |
|
"rewards/margins": 8.58054256439209, |
|
"rewards/rejected": -17.03329849243164, |
|
"sft_loss": 1.1679203510284424, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 2.2518080210387903, |
|
"grad_norm": 11.249560857734895, |
|
"learning_rate": 4.0041744643698585e-08, |
|
"logits/chosen": 17.271631240844727, |
|
"logits/rejected": 18.480789184570312, |
|
"logps/chosen": -323.33148193359375, |
|
"logps/rejected": -265.4918212890625, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.759995460510254, |
|
"rewards/margins": 9.08838176727295, |
|
"rewards/rejected": -17.848377227783203, |
|
"sft_loss": 1.1703903675079346, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.260026298487837, |
|
"grad_norm": 13.168234484012684, |
|
"learning_rate": 3.9213687364841514e-08, |
|
"logits/chosen": 17.725706100463867, |
|
"logits/rejected": 18.4434871673584, |
|
"logps/chosen": -265.5625305175781, |
|
"logps/rejected": -229.72801208496094, |
|
"loss": 0.3827, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -7.455626964569092, |
|
"rewards/margins": 8.284765243530273, |
|
"rewards/rejected": -15.740392684936523, |
|
"sft_loss": 1.061354160308838, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.2682445759368837, |
|
"grad_norm": 10.595007690116647, |
|
"learning_rate": 3.8392686092981716e-08, |
|
"logits/chosen": 16.218524932861328, |
|
"logits/rejected": 17.454858779907227, |
|
"logps/chosen": -330.2020263671875, |
|
"logps/rejected": -259.57513427734375, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.325506210327148, |
|
"rewards/margins": 8.986472129821777, |
|
"rewards/rejected": -17.311979293823242, |
|
"sft_loss": 1.1411256790161133, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.2764628533859304, |
|
"grad_norm": 16.544764732871236, |
|
"learning_rate": 3.757880835626601e-08, |
|
"logits/chosen": 19.006175994873047, |
|
"logits/rejected": 20.302326202392578, |
|
"logps/chosen": -322.05242919921875, |
|
"logps/rejected": -260.6827087402344, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.566563606262207, |
|
"rewards/margins": 8.706660270690918, |
|
"rewards/rejected": -17.273221969604492, |
|
"sft_loss": 1.01236891746521, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 2.284681130834977, |
|
"grad_norm": 9.92900744936661, |
|
"learning_rate": 3.677212109692364e-08, |
|
"logits/chosen": 16.336091995239258, |
|
"logits/rejected": 18.393173217773438, |
|
"logps/chosen": -296.00811767578125, |
|
"logps/rejected": -255.37149047851562, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.0043363571167, |
|
"rewards/margins": 9.217806816101074, |
|
"rewards/rejected": -17.222143173217773, |
|
"sft_loss": 1.1503466367721558, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.2928994082840237, |
|
"grad_norm": 9.386107838289549, |
|
"learning_rate": 3.597269066576017e-08, |
|
"logits/chosen": 17.042190551757812, |
|
"logits/rejected": 18.17107582092285, |
|
"logps/chosen": -300.5311584472656, |
|
"logps/rejected": -244.8414306640625, |
|
"loss": 0.3695, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.179112434387207, |
|
"rewards/margins": 8.550080299377441, |
|
"rewards/rejected": -16.72919273376465, |
|
"sft_loss": 1.1738831996917725, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.3011176857330704, |
|
"grad_norm": 13.817759213393538, |
|
"learning_rate": 3.518058281669996e-08, |
|
"logits/chosen": 17.452651977539062, |
|
"logits/rejected": 19.167875289916992, |
|
"logps/chosen": -325.5849914550781, |
|
"logps/rejected": -261.9805908203125, |
|
"loss": 0.405, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.316492080688477, |
|
"rewards/margins": 9.281232833862305, |
|
"rewards/rejected": -17.59772491455078, |
|
"sft_loss": 1.0759243965148926, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.309335963182117, |
|
"grad_norm": 14.27386340226445, |
|
"learning_rate": 3.439586270137797e-08, |
|
"logits/chosen": 16.01079750061035, |
|
"logits/rejected": 17.990955352783203, |
|
"logps/chosen": -317.35968017578125, |
|
"logps/rejected": -265.36737060546875, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.888944625854492, |
|
"rewards/margins": 9.316179275512695, |
|
"rewards/rejected": -18.205123901367188, |
|
"sft_loss": 1.064568281173706, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 2.3175542406311638, |
|
"grad_norm": 17.736685407866446, |
|
"learning_rate": 3.3618594863780993e-08, |
|
"logits/chosen": 18.37812042236328, |
|
"logits/rejected": 19.024595260620117, |
|
"logps/chosen": -319.2788391113281, |
|
"logps/rejected": -255.89810180664062, |
|
"loss": 0.3468, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.851397514343262, |
|
"rewards/margins": 8.883750915527344, |
|
"rewards/rejected": -17.73514747619629, |
|
"sft_loss": 1.235966682434082, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.3257725180802105, |
|
"grad_norm": 22.487095580329445, |
|
"learning_rate": 3.2848843234938694e-08, |
|
"logits/chosen": 17.141220092773438, |
|
"logits/rejected": 17.714786529541016, |
|
"logps/chosen": -302.5834045410156, |
|
"logps/rejected": -254.14559936523438, |
|
"loss": 0.36, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.972297668457031, |
|
"rewards/margins": 8.797745704650879, |
|
"rewards/rejected": -17.770044326782227, |
|
"sft_loss": 1.1860109567642212, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 2.333990795529257, |
|
"grad_norm": 11.596948370393193, |
|
"learning_rate": 3.208667112766529e-08, |
|
"logits/chosen": 17.32436752319336, |
|
"logits/rejected": 18.515031814575195, |
|
"logps/chosen": -312.43267822265625, |
|
"logps/rejected": -266.10052490234375, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -9.664741516113281, |
|
"rewards/margins": 9.025125503540039, |
|
"rewards/rejected": -18.689865112304688, |
|
"sft_loss": 1.17525315284729, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.342209072978304, |
|
"grad_norm": 16.78732289470905, |
|
"learning_rate": 3.1332141231352194e-08, |
|
"logits/chosen": 17.367273330688477, |
|
"logits/rejected": 17.978761672973633, |
|
"logps/chosen": -325.341552734375, |
|
"logps/rejected": -261.8766784667969, |
|
"loss": 0.3954, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -9.092870712280273, |
|
"rewards/margins": 9.265833854675293, |
|
"rewards/rejected": -18.358705520629883, |
|
"sft_loss": 1.1345161199569702, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.3504273504273505, |
|
"grad_norm": 13.214123565552589, |
|
"learning_rate": 3.058531560681141e-08, |
|
"logits/chosen": 18.152240753173828, |
|
"logits/rejected": 19.055191040039062, |
|
"logps/chosen": -327.43487548828125, |
|
"logps/rejected": -266.76446533203125, |
|
"loss": 0.3363, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.500913619995117, |
|
"rewards/margins": 9.283426284790039, |
|
"rewards/rejected": -17.784339904785156, |
|
"sft_loss": 1.231545329093933, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.358645627876397, |
|
"grad_norm": 22.300498596470074, |
|
"learning_rate": 2.984625568117129e-08, |
|
"logits/chosen": 18.67966079711914, |
|
"logits/rejected": 19.73933982849121, |
|
"logps/chosen": -334.677734375, |
|
"logps/rejected": -265.227783203125, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.572039604187012, |
|
"rewards/margins": 8.896354675292969, |
|
"rewards/rejected": -17.468393325805664, |
|
"sft_loss": 1.1262859106063843, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 2.366863905325444, |
|
"grad_norm": 37.074159109819185, |
|
"learning_rate": 2.9115022242823862e-08, |
|
"logits/chosen": 17.512964248657227, |
|
"logits/rejected": 18.453014373779297, |
|
"logps/chosen": -326.0170593261719, |
|
"logps/rejected": -263.2306213378906, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.473074913024902, |
|
"rewards/margins": 9.212455749511719, |
|
"rewards/rejected": -17.685529708862305, |
|
"sft_loss": 1.092557430267334, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3750821827744906, |
|
"grad_norm": 11.887196623999731, |
|
"learning_rate": 2.839167543642511e-08, |
|
"logits/chosen": 17.14059066772461, |
|
"logits/rejected": 18.407007217407227, |
|
"logps/chosen": -291.7596435546875, |
|
"logps/rejected": -250.99574279785156, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.36557388305664, |
|
"rewards/margins": 8.695883750915527, |
|
"rewards/rejected": -17.06145668029785, |
|
"sft_loss": 1.202438235282898, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 2.3833004602235373, |
|
"grad_norm": 17.524490621614987, |
|
"learning_rate": 2.7676274757947816e-08, |
|
"logits/chosen": 18.85689926147461, |
|
"logits/rejected": 19.545021057128906, |
|
"logps/chosen": -287.7202453613281, |
|
"logps/rejected": -244.56924438476562, |
|
"loss": 0.3838, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.284355163574219, |
|
"rewards/margins": 8.70119857788086, |
|
"rewards/rejected": -16.985553741455078, |
|
"sft_loss": 1.0111671686172485, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.391518737672584, |
|
"grad_norm": 9.1542241365719, |
|
"learning_rate": 2.696887904978819e-08, |
|
"logits/chosen": 18.2181453704834, |
|
"logits/rejected": 18.709545135498047, |
|
"logps/chosen": -280.198974609375, |
|
"logps/rejected": -226.15415954589844, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.791984558105469, |
|
"rewards/margins": 7.134130477905273, |
|
"rewards/rejected": -15.926115989685059, |
|
"sft_loss": 1.1695269346237183, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 2.3997370151216306, |
|
"grad_norm": 13.076192251177769, |
|
"learning_rate": 2.6269546495925886e-08, |
|
"logits/chosen": 16.172388076782227, |
|
"logits/rejected": 17.052417755126953, |
|
"logps/chosen": -287.6596984863281, |
|
"logps/rejected": -242.12660217285156, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.466004371643066, |
|
"rewards/margins": 8.168050765991211, |
|
"rewards/rejected": -16.634056091308594, |
|
"sft_loss": 1.1705952882766724, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.4079552925706773, |
|
"grad_norm": 14.024390303705356, |
|
"learning_rate": 2.5578334617138236e-08, |
|
"logits/chosen": 17.606464385986328, |
|
"logits/rejected": 18.12337303161621, |
|
"logps/chosen": -301.743408203125, |
|
"logps/rejected": -248.48464965820312, |
|
"loss": 0.3833, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.655086517333984, |
|
"rewards/margins": 8.575737953186035, |
|
"rewards/rejected": -17.230825424194336, |
|
"sft_loss": 1.08839750289917, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 2.416173570019724, |
|
"grad_norm": 17.812699456228195, |
|
"learning_rate": 2.489530026626932e-08, |
|
"logits/chosen": 17.72669219970703, |
|
"logits/rejected": 18.6758975982666, |
|
"logps/chosen": -306.7005310058594, |
|
"logps/rejected": -242.5311279296875, |
|
"loss": 0.379, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.553206443786621, |
|
"rewards/margins": 8.148569107055664, |
|
"rewards/rejected": -16.70177459716797, |
|
"sft_loss": 1.1815282106399536, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.4243918474687707, |
|
"grad_norm": 8.680672775515832, |
|
"learning_rate": 2.422049962355366e-08, |
|
"logits/chosen": 18.41983413696289, |
|
"logits/rejected": 19.47545623779297, |
|
"logps/chosen": -282.099609375, |
|
"logps/rejected": -238.36300659179688, |
|
"loss": 0.3486, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.316095352172852, |
|
"rewards/margins": 8.191699981689453, |
|
"rewards/rejected": -16.507797241210938, |
|
"sft_loss": 1.205697774887085, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.4326101249178174, |
|
"grad_norm": 16.775370793665815, |
|
"learning_rate": 2.3553988191995208e-08, |
|
"logits/chosen": 16.783174514770508, |
|
"logits/rejected": 18.405048370361328, |
|
"logps/chosen": -304.1385498046875, |
|
"logps/rejected": -256.7261047363281, |
|
"loss": 0.3744, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.7383451461792, |
|
"rewards/margins": 8.930658340454102, |
|
"rewards/rejected": -17.669002532958984, |
|
"sft_loss": 1.1059280633926392, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.440828402366864, |
|
"grad_norm": 10.437808650182905, |
|
"learning_rate": 2.2895820792802474e-08, |
|
"logits/chosen": 16.727697372436523, |
|
"logits/rejected": 17.59294891357422, |
|
"logps/chosen": -328.55389404296875, |
|
"logps/rejected": -269.7945251464844, |
|
"loss": 0.3695, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -9.062349319458008, |
|
"rewards/margins": 9.41024112701416, |
|
"rewards/rejected": -18.472591400146484, |
|
"sft_loss": 1.173682451248169, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 2.4490466798159107, |
|
"grad_norm": 20.73492010593765, |
|
"learning_rate": 2.2246051560879095e-08, |
|
"logits/chosen": 16.899852752685547, |
|
"logits/rejected": 17.82339096069336, |
|
"logps/chosen": -338.22186279296875, |
|
"logps/rejected": -279.0784912109375, |
|
"loss": 0.4179, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -9.162134170532227, |
|
"rewards/margins": 9.35285758972168, |
|
"rewards/rejected": -18.514989852905273, |
|
"sft_loss": 1.17171311378479, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4572649572649574, |
|
"grad_norm": 12.636170820327301, |
|
"learning_rate": 2.160473394037149e-08, |
|
"logits/chosen": 17.118467330932617, |
|
"logits/rejected": 17.36690330505371, |
|
"logps/chosen": -335.8661804199219, |
|
"logps/rejected": -262.7174072265625, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.756584167480469, |
|
"rewards/margins": 8.854002952575684, |
|
"rewards/rejected": -17.610586166381836, |
|
"sft_loss": 1.1354836225509644, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 2.465483234714004, |
|
"grad_norm": 11.838207356236568, |
|
"learning_rate": 2.097192068027276e-08, |
|
"logits/chosen": 16.54058837890625, |
|
"logits/rejected": 17.930091857910156, |
|
"logps/chosen": -329.2217712402344, |
|
"logps/rejected": -270.410888671875, |
|
"loss": 0.3262, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.5565185546875, |
|
"rewards/margins": 9.7839937210083, |
|
"rewards/rejected": -18.340513229370117, |
|
"sft_loss": 1.0987026691436768, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.473701512163051, |
|
"grad_norm": 17.261726485061967, |
|
"learning_rate": 2.0347663830084182e-08, |
|
"logits/chosen": 16.857637405395508, |
|
"logits/rejected": 17.605924606323242, |
|
"logps/chosen": -278.7782287597656, |
|
"logps/rejected": -237.3050537109375, |
|
"loss": 0.3978, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.520981788635254, |
|
"rewards/margins": 7.912033557891846, |
|
"rewards/rejected": -16.433013916015625, |
|
"sft_loss": 1.1526176929473877, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 2.4819197896120975, |
|
"grad_norm": 19.01971806956554, |
|
"learning_rate": 1.9732014735534168e-08, |
|
"logits/chosen": 17.1612606048584, |
|
"logits/rejected": 17.63095474243164, |
|
"logps/chosen": -307.8269958496094, |
|
"logps/rejected": -234.46160888671875, |
|
"loss": 0.4156, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.234945297241211, |
|
"rewards/margins": 7.63665771484375, |
|
"rewards/rejected": -15.871603012084961, |
|
"sft_loss": 1.1170748472213745, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.490138067061144, |
|
"grad_norm": 10.87683842585221, |
|
"learning_rate": 1.9125024034354758e-08, |
|
"logits/chosen": 17.20734214782715, |
|
"logits/rejected": 17.946365356445312, |
|
"logps/chosen": -312.4763488769531, |
|
"logps/rejected": -246.2183837890625, |
|
"loss": 0.351, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.035243034362793, |
|
"rewards/margins": 8.52718734741211, |
|
"rewards/rejected": -16.56243133544922, |
|
"sft_loss": 1.071519374847412, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 2.498356344510191, |
|
"grad_norm": 10.244386828979161, |
|
"learning_rate": 1.85267416521169e-08, |
|
"logits/chosen": 17.724872589111328, |
|
"logits/rejected": 18.053852081298828, |
|
"logps/chosen": -320.68597412109375, |
|
"logps/rejected": -246.91893005371094, |
|
"loss": 0.3733, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.582959175109863, |
|
"rewards/margins": 8.986913681030273, |
|
"rewards/rejected": -16.56987190246582, |
|
"sft_loss": 1.0908424854278564, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.5065746219592375, |
|
"grad_norm": 12.813362766851835, |
|
"learning_rate": 1.793721679812389e-08, |
|
"logits/chosen": 18.601253509521484, |
|
"logits/rejected": 19.362607955932617, |
|
"logps/chosen": -288.14776611328125, |
|
"logps/rejected": -237.0640106201172, |
|
"loss": 0.3857, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.49190616607666, |
|
"rewards/margins": 7.810946464538574, |
|
"rewards/rejected": -16.302852630615234, |
|
"sft_loss": 1.091495394706726, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.5147928994082838, |
|
"grad_norm": 11.364641270765484, |
|
"learning_rate": 1.735649796136382e-08, |
|
"logits/chosen": 15.785613059997559, |
|
"logits/rejected": 17.070707321166992, |
|
"logps/chosen": -324.2843017578125, |
|
"logps/rejected": -258.7143859863281, |
|
"loss": 0.3883, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -8.855399131774902, |
|
"rewards/margins": 8.804574012756348, |
|
"rewards/rejected": -17.65997314453125, |
|
"sft_loss": 1.1961203813552856, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.523011176857331, |
|
"grad_norm": 15.007717453848354, |
|
"learning_rate": 1.678463290652142e-08, |
|
"logits/chosen": 17.604642868041992, |
|
"logits/rejected": 17.90863609313965, |
|
"logps/chosen": -312.69024658203125, |
|
"logps/rejected": -249.09962463378906, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.328804016113281, |
|
"rewards/margins": 8.493717193603516, |
|
"rewards/rejected": -16.822521209716797, |
|
"sft_loss": 1.157140851020813, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 2.531229454306377, |
|
"grad_norm": 8.792843348493232, |
|
"learning_rate": 1.6221668670049315e-08, |
|
"logits/chosen": 16.296873092651367, |
|
"logits/rejected": 17.479211807250977, |
|
"logps/chosen": -327.1073303222656, |
|
"logps/rejected": -271.3090515136719, |
|
"loss": 0.3481, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.877535820007324, |
|
"rewards/margins": 9.413492202758789, |
|
"rewards/rejected": -18.291027069091797, |
|
"sft_loss": 1.1908369064331055, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5394477317554243, |
|
"grad_norm": 14.557660052303598, |
|
"learning_rate": 1.5667651556299178e-08, |
|
"logits/chosen": 16.44731903076172, |
|
"logits/rejected": 17.4537296295166, |
|
"logps/chosen": -306.1639709472656, |
|
"logps/rejected": -253.69247436523438, |
|
"loss": 0.3531, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.432329177856445, |
|
"rewards/margins": 8.949870109558105, |
|
"rewards/rejected": -17.382200241088867, |
|
"sft_loss": 1.114105463027954, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 2.5476660092044705, |
|
"grad_norm": 15.415629759090677, |
|
"learning_rate": 1.5122627133713262e-08, |
|
"logits/chosen": 15.742711067199707, |
|
"logits/rejected": 17.65005874633789, |
|
"logps/chosen": -317.73675537109375, |
|
"logps/rejected": -260.23907470703125, |
|
"loss": 0.3849, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.840112209320068, |
|
"rewards/margins": 9.513845443725586, |
|
"rewards/rejected": -17.35395622253418, |
|
"sft_loss": 1.292752981185913, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.5558842866535176, |
|
"grad_norm": 11.677314306376815, |
|
"learning_rate": 1.4586640231076226e-08, |
|
"logits/chosen": 17.83001708984375, |
|
"logits/rejected": 18.008840560913086, |
|
"logps/chosen": -290.8938293457031, |
|
"logps/rejected": -234.77801513671875, |
|
"loss": 0.3699, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -8.604043006896973, |
|
"rewards/margins": 7.615962028503418, |
|
"rewards/rejected": -16.22000503540039, |
|
"sft_loss": 1.1707122325897217, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"grad_norm": 17.403632913196056, |
|
"learning_rate": 1.405973493382806e-08, |
|
"logits/chosen": 16.150592803955078, |
|
"logits/rejected": 17.557065963745117, |
|
"logps/chosen": -321.72802734375, |
|
"logps/rejected": -270.1099548339844, |
|
"loss": 0.3552, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -9.425326347351074, |
|
"rewards/margins": 9.209266662597656, |
|
"rewards/rejected": -18.634592056274414, |
|
"sft_loss": 1.0887880325317383, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.572320841551611, |
|
"grad_norm": 16.12695707285676, |
|
"learning_rate": 1.3541954580437941e-08, |
|
"logits/chosen": 18.370115280151367, |
|
"logits/rejected": 18.63874626159668, |
|
"logps/chosen": -321.462646484375, |
|
"logps/rejected": -259.6288757324219, |
|
"loss": 0.3254, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.813506126403809, |
|
"rewards/margins": 9.208869934082031, |
|
"rewards/rejected": -18.022377014160156, |
|
"sft_loss": 1.0541073083877563, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 2.5805391190006572, |
|
"grad_norm": 18.71581348868284, |
|
"learning_rate": 1.3033341758839592e-08, |
|
"logits/chosen": 16.9278621673584, |
|
"logits/rejected": 17.87784767150879, |
|
"logps/chosen": -333.1341552734375, |
|
"logps/rejected": -271.1338195800781, |
|
"loss": 0.4055, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -9.0064058303833, |
|
"rewards/margins": 9.4508638381958, |
|
"rewards/rejected": -18.4572696685791, |
|
"sft_loss": 1.1667834520339966, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5887573964497044, |
|
"grad_norm": 11.817463136679503, |
|
"learning_rate": 1.2533938302928329e-08, |
|
"logits/chosen": 17.372867584228516, |
|
"logits/rejected": 18.298500061035156, |
|
"logps/chosen": -346.6560974121094, |
|
"logps/rejected": -274.773681640625, |
|
"loss": 0.3683, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.409444808959961, |
|
"rewards/margins": 9.886656761169434, |
|
"rewards/rejected": -18.296100616455078, |
|
"sft_loss": 1.183761477470398, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.5969756738987506, |
|
"grad_norm": 11.086694788731137, |
|
"learning_rate": 1.2043785289120409e-08, |
|
"logits/chosen": 16.920242309570312, |
|
"logits/rejected": 18.36749839782715, |
|
"logps/chosen": -333.09539794921875, |
|
"logps/rejected": -272.066162109375, |
|
"loss": 0.378, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.567721366882324, |
|
"rewards/margins": 9.71126937866211, |
|
"rewards/rejected": -18.278989791870117, |
|
"sft_loss": 1.226511001586914, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.6051939513477977, |
|
"grad_norm": 19.57395022687368, |
|
"learning_rate": 1.1562923032974125e-08, |
|
"logits/chosen": 17.482685089111328, |
|
"logits/rejected": 18.186784744262695, |
|
"logps/chosen": -336.7694396972656, |
|
"logps/rejected": -273.9622497558594, |
|
"loss": 0.3656, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.473450660705566, |
|
"rewards/margins": 9.806720733642578, |
|
"rewards/rejected": -18.280170440673828, |
|
"sft_loss": 1.0997947454452515, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 2.613412228796844, |
|
"grad_norm": 13.846460377871546, |
|
"learning_rate": 1.1091391085874161e-08, |
|
"logits/chosen": 17.66254425048828, |
|
"logits/rejected": 17.869403839111328, |
|
"logps/chosen": -355.09124755859375, |
|
"logps/rejected": -262.7408142089844, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.950118064880371, |
|
"rewards/margins": 8.9635009765625, |
|
"rewards/rejected": -17.913618087768555, |
|
"sft_loss": 1.2338536977767944, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.621630506245891, |
|
"grad_norm": 18.388530342654583, |
|
"learning_rate": 1.06292282317781e-08, |
|
"logits/chosen": 18.353347778320312, |
|
"logits/rejected": 19.111572265625, |
|
"logps/chosen": -293.8038024902344, |
|
"logps/rejected": -240.29061889648438, |
|
"loss": 0.3818, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.344436645507812, |
|
"rewards/margins": 8.241558074951172, |
|
"rewards/rejected": -16.58599281311035, |
|
"sft_loss": 1.1101101636886597, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 2.6298487836949374, |
|
"grad_norm": 48.6421585527008, |
|
"learning_rate": 1.017647248402674e-08, |
|
"logits/chosen": 17.27472686767578, |
|
"logits/rejected": 17.775699615478516, |
|
"logps/chosen": -338.6330871582031, |
|
"logps/rejected": -265.278564453125, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.897347450256348, |
|
"rewards/margins": 8.987475395202637, |
|
"rewards/rejected": -17.884824752807617, |
|
"sft_loss": 1.1422169208526611, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6380670611439845, |
|
"grad_norm": 13.863026192467665, |
|
"learning_rate": 9.733161082217223e-09, |
|
"logits/chosen": 16.872806549072266, |
|
"logits/rejected": 17.572965621948242, |
|
"logps/chosen": -321.6798095703125, |
|
"logps/rejected": -258.7831115722656, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.822911262512207, |
|
"rewards/margins": 8.982833862304688, |
|
"rewards/rejected": -17.805744171142578, |
|
"sft_loss": 1.125891923904419, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 2.6462853385930307, |
|
"grad_norm": 15.402379291218823, |
|
"learning_rate": 9.299330489140125e-09, |
|
"logits/chosen": 17.64206314086914, |
|
"logits/rejected": 18.37377166748047, |
|
"logps/chosen": -285.738037109375, |
|
"logps/rejected": -240.1550750732422, |
|
"loss": 0.4197, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.605627059936523, |
|
"rewards/margins": 8.010725021362305, |
|
"rewards/rejected": -16.616352081298828, |
|
"sft_loss": 1.0786948204040527, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.654503616042078, |
|
"grad_norm": 11.137896212671778, |
|
"learning_rate": 8.87501638778039e-09, |
|
"logits/chosen": 16.587888717651367, |
|
"logits/rejected": 17.759031295776367, |
|
"logps/chosen": -309.4990539550781, |
|
"logps/rejected": -254.31495666503906, |
|
"loss": 0.4112, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.20348834991455, |
|
"rewards/margins": 8.457581520080566, |
|
"rewards/rejected": -17.66107177734375, |
|
"sft_loss": 1.10163414478302, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 2.662721893491124, |
|
"grad_norm": 10.357256991488983, |
|
"learning_rate": 8.460253678382296e-09, |
|
"logits/chosen": 17.529693603515625, |
|
"logits/rejected": 18.570171356201172, |
|
"logps/chosen": -337.939453125, |
|
"logps/rejected": -269.9917297363281, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.473341941833496, |
|
"rewards/margins": 9.756902694702148, |
|
"rewards/rejected": -18.230243682861328, |
|
"sft_loss": 1.0737409591674805, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.6709401709401708, |
|
"grad_norm": 12.585703695972256, |
|
"learning_rate": 8.055076475578918e-09, |
|
"logits/chosen": 17.500032424926758, |
|
"logits/rejected": 18.32237434387207, |
|
"logps/chosen": -326.6228942871094, |
|
"logps/rejected": -261.5873107910156, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.563287734985352, |
|
"rewards/margins": 9.018136024475098, |
|
"rewards/rejected": -17.581424713134766, |
|
"sft_loss": 1.1417536735534668, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.6791584483892175, |
|
"grad_norm": 12.800841299642682, |
|
"learning_rate": 7.659518105586238e-09, |
|
"logits/chosen": 16.294475555419922, |
|
"logits/rejected": 18.111600875854492, |
|
"logps/chosen": -335.0698547363281, |
|
"logps/rejected": -275.7948913574219, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.872127532958984, |
|
"rewards/margins": 9.738655090332031, |
|
"rewards/rejected": -18.610782623291016, |
|
"sft_loss": 1.171600341796875, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.687376725838264, |
|
"grad_norm": 11.437442046862925, |
|
"learning_rate": 7.273611103461836e-09, |
|
"logits/chosen": 17.347509384155273, |
|
"logits/rejected": 18.348569869995117, |
|
"logps/chosen": -303.4100341796875, |
|
"logps/rejected": -250.9491729736328, |
|
"loss": 0.3316, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.186358451843262, |
|
"rewards/margins": 8.977690696716309, |
|
"rewards/rejected": -17.164051055908203, |
|
"sft_loss": 1.1324518918991089, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 2.695595003287311, |
|
"grad_norm": 12.932581100678355, |
|
"learning_rate": 6.897387210429067e-09, |
|
"logits/chosen": 17.321182250976562, |
|
"logits/rejected": 18.35422134399414, |
|
"logps/chosen": -298.5028381347656, |
|
"logps/rejected": -246.92356872558594, |
|
"loss": 0.4056, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.715899467468262, |
|
"rewards/margins": 8.367709159851074, |
|
"rewards/rejected": -17.08361053466797, |
|
"sft_loss": 1.143718957901001, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.7038132807363575, |
|
"grad_norm": 14.723722025410018, |
|
"learning_rate": 6.530877371266175e-09, |
|
"logits/chosen": 16.489261627197266, |
|
"logits/rejected": 17.733213424682617, |
|
"logps/chosen": -305.61749267578125, |
|
"logps/rejected": -256.1786804199219, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.568675994873047, |
|
"rewards/margins": 9.070348739624023, |
|
"rewards/rejected": -17.639026641845703, |
|
"sft_loss": 1.176300048828125, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 2.712031558185404, |
|
"grad_norm": 22.923491412294727, |
|
"learning_rate": 6.1741117317611196e-09, |
|
"logits/chosen": 17.291810989379883, |
|
"logits/rejected": 18.644412994384766, |
|
"logps/chosen": -321.612060546875, |
|
"logps/rejected": -269.1338195800781, |
|
"loss": 0.4291, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -9.136231422424316, |
|
"rewards/margins": 9.433501243591309, |
|
"rewards/rejected": -18.569734573364258, |
|
"sft_loss": 1.2353969812393188, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.720249835634451, |
|
"grad_norm": 10.617946186080342, |
|
"learning_rate": 5.827119636232017e-09, |
|
"logits/chosen": 17.4252872467041, |
|
"logits/rejected": 18.208906173706055, |
|
"logps/chosen": -308.66943359375, |
|
"logps/rejected": -251.34764099121094, |
|
"loss": 0.4103, |
|
"rewards/accuracies": 0.9300000071525574, |
|
"rewards/chosen": -8.655915260314941, |
|
"rewards/margins": 8.82339096069336, |
|
"rewards/rejected": -17.479307174682617, |
|
"sft_loss": 1.2225102186203003, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 2.7284681130834976, |
|
"grad_norm": 9.888027224233095, |
|
"learning_rate": 5.489929625113549e-09, |
|
"logits/chosen": 16.691282272338867, |
|
"logits/rejected": 17.671295166015625, |
|
"logps/chosen": -328.8042297363281, |
|
"logps/rejected": -267.9706726074219, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.37188720703125, |
|
"rewards/margins": 9.643902778625488, |
|
"rewards/rejected": -18.015790939331055, |
|
"sft_loss": 1.2559726238250732, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.7366863905325443, |
|
"grad_norm": 20.805457290074077, |
|
"learning_rate": 5.1625694326095506e-09, |
|
"logits/chosen": 16.405752182006836, |
|
"logits/rejected": 17.14948081970215, |
|
"logps/chosen": -341.1684875488281, |
|
"logps/rejected": -271.09710693359375, |
|
"loss": 0.3332, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.78724193572998, |
|
"rewards/margins": 9.57591724395752, |
|
"rewards/rejected": -18.363157272338867, |
|
"sft_loss": 1.0471839904785156, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 2.744904667981591, |
|
"grad_norm": 13.812771677348046, |
|
"learning_rate": 4.845065984411742e-09, |
|
"logits/chosen": 16.383556365966797, |
|
"logits/rejected": 17.95462989807129, |
|
"logps/chosen": -331.20526123046875, |
|
"logps/rejected": -279.220458984375, |
|
"loss": 0.357, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -9.316568374633789, |
|
"rewards/margins": 9.552423477172852, |
|
"rewards/rejected": -18.86899185180664, |
|
"sft_loss": 1.163619875907898, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7531229454306376, |
|
"grad_norm": 14.688896292238876, |
|
"learning_rate": 4.5374453954851035e-09, |
|
"logits/chosen": 18.362672805786133, |
|
"logits/rejected": 19.01654815673828, |
|
"logps/chosen": -307.0843200683594, |
|
"logps/rejected": -246.02671813964844, |
|
"loss": 0.3677, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.525540351867676, |
|
"rewards/margins": 8.622610092163086, |
|
"rewards/rejected": -17.148151397705078, |
|
"sft_loss": 1.1697852611541748, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.7613412228796843, |
|
"grad_norm": 15.151111907515142, |
|
"learning_rate": 4.239732967919976e-09, |
|
"logits/chosen": 18.35997772216797, |
|
"logits/rejected": 18.519113540649414, |
|
"logps/chosen": -283.6457214355469, |
|
"logps/rejected": -239.9178466796875, |
|
"loss": 0.3946, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.615645408630371, |
|
"rewards/margins": 8.21639633178711, |
|
"rewards/rejected": -16.832042694091797, |
|
"sft_loss": 1.185640573501587, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.769559500328731, |
|
"grad_norm": 8.993747704826987, |
|
"learning_rate": 3.951953188850762e-09, |
|
"logits/chosen": 15.838356018066406, |
|
"logits/rejected": 17.58329963684082, |
|
"logps/chosen": -300.3641662597656, |
|
"logps/rejected": -254.8916473388672, |
|
"loss": 0.3834, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.536845207214355, |
|
"rewards/margins": 8.757308006286621, |
|
"rewards/rejected": -17.294153213500977, |
|
"sft_loss": 1.1382744312286377, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 13.655366152597214, |
|
"learning_rate": 3.674129728442013e-09, |
|
"logits/chosen": 17.68130874633789, |
|
"logits/rejected": 19.080127716064453, |
|
"logps/chosen": -268.5239562988281, |
|
"logps/rejected": -229.50523376464844, |
|
"loss": 0.3877, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.217788696289062, |
|
"rewards/margins": 7.664586067199707, |
|
"rewards/rejected": -15.882373809814453, |
|
"sft_loss": 1.0555132627487183, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7859960552268244, |
|
"grad_norm": 11.562075341982874, |
|
"learning_rate": 3.4062854379414694e-09, |
|
"logits/chosen": 17.3222599029541, |
|
"logits/rejected": 18.08160972595215, |
|
"logps/chosen": -306.2829284667969, |
|
"logps/rejected": -251.935546875, |
|
"loss": 0.3607, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.6878023147583, |
|
"rewards/margins": 8.51749038696289, |
|
"rewards/rejected": -17.205289840698242, |
|
"sft_loss": 1.1840558052062988, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 2.794214332675871, |
|
"grad_norm": 25.80729521542422, |
|
"learning_rate": 3.1484423478004563e-09, |
|
"logits/chosen": 17.99493408203125, |
|
"logits/rejected": 18.518619537353516, |
|
"logps/chosen": -289.563232421875, |
|
"logps/rejected": -243.09219360351562, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.428235054016113, |
|
"rewards/margins": 8.079200744628906, |
|
"rewards/rejected": -16.50743865966797, |
|
"sft_loss": 1.108068585395813, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.8024326101249177, |
|
"grad_norm": 13.892592196473423, |
|
"learning_rate": 2.9006216658619687e-09, |
|
"logits/chosen": 16.929012298583984, |
|
"logits/rejected": 17.672870635986328, |
|
"logps/chosen": -313.2660217285156, |
|
"logps/rejected": -257.04034423828125, |
|
"loss": 0.399, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.524652481079102, |
|
"rewards/margins": 9.001575469970703, |
|
"rewards/rejected": -17.526227951049805, |
|
"sft_loss": 1.1432716846466064, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 2.8106508875739644, |
|
"grad_norm": 9.790469642612795, |
|
"learning_rate": 2.6628437756162635e-09, |
|
"logits/chosen": 17.310102462768555, |
|
"logits/rejected": 18.04708480834961, |
|
"logps/chosen": -293.9396057128906, |
|
"logps/rejected": -240.7176971435547, |
|
"loss": 0.3473, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.278569221496582, |
|
"rewards/margins": 8.390737533569336, |
|
"rewards/rejected": -16.669307708740234, |
|
"sft_loss": 1.1700962781906128, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.818869165023011, |
|
"grad_norm": 9.444898259948333, |
|
"learning_rate": 2.435128234524228e-09, |
|
"logits/chosen": 17.586627960205078, |
|
"logits/rejected": 18.176280975341797, |
|
"logps/chosen": -299.7925109863281, |
|
"logps/rejected": -243.57485961914062, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -8.356633186340332, |
|
"rewards/margins": 8.317458152770996, |
|
"rewards/rejected": -16.674091339111328, |
|
"sft_loss": 1.1623938083648682, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 2.827087442472058, |
|
"grad_norm": 11.302990178309454, |
|
"learning_rate": 2.2174937724088877e-09, |
|
"logits/chosen": 17.02381134033203, |
|
"logits/rejected": 18.46286392211914, |
|
"logps/chosen": -314.8418273925781, |
|
"logps/rejected": -259.57745361328125, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -8.880843162536621, |
|
"rewards/margins": 8.848891258239746, |
|
"rewards/rejected": -17.729736328125, |
|
"sft_loss": 1.1079494953155518, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.8353057199211045, |
|
"grad_norm": 9.711126487613186, |
|
"learning_rate": 2.009958289914765e-09, |
|
"logits/chosen": 17.012800216674805, |
|
"logits/rejected": 18.349876403808594, |
|
"logps/chosen": -321.7917175292969, |
|
"logps/rejected": -270.1522216796875, |
|
"loss": 0.3451, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.694296836853027, |
|
"rewards/margins": 9.856916427612305, |
|
"rewards/rejected": -18.551212310791016, |
|
"sft_loss": 1.0486385822296143, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.843523997370151, |
|
"grad_norm": 22.398743525886992, |
|
"learning_rate": 1.8125388570355422e-09, |
|
"logits/chosen": 16.76806640625, |
|
"logits/rejected": 17.946535110473633, |
|
"logps/chosen": -312.1168212890625, |
|
"logps/rejected": -266.208984375, |
|
"loss": 0.3337, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.700928688049316, |
|
"rewards/margins": 9.464086532592773, |
|
"rewards/rejected": -18.165014266967773, |
|
"sft_loss": 1.162864327430725, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.851742274819198, |
|
"grad_norm": 14.549076580676688, |
|
"learning_rate": 1.6252517117101017e-09, |
|
"logits/chosen": 16.1746768951416, |
|
"logits/rejected": 17.028032302856445, |
|
"logps/chosen": -316.6230773925781, |
|
"logps/rejected": -258.9454345703125, |
|
"loss": 0.4137, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.804771423339844, |
|
"rewards/margins": 8.894736289978027, |
|
"rewards/rejected": -17.699508666992188, |
|
"sft_loss": 1.2625643014907837, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 2.8599605522682445, |
|
"grad_norm": 22.305121337267558, |
|
"learning_rate": 1.4481122584868582e-09, |
|
"logits/chosen": 16.654598236083984, |
|
"logits/rejected": 17.727828979492188, |
|
"logps/chosen": -327.3823547363281, |
|
"logps/rejected": -264.8335876464844, |
|
"loss": 0.4201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.304695129394531, |
|
"rewards/margins": 8.695550918579102, |
|
"rewards/rejected": -18.000246047973633, |
|
"sft_loss": 1.1684330701828003, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.868178829717291, |
|
"grad_norm": 12.445565014042396, |
|
"learning_rate": 1.2811350672568138e-09, |
|
"logits/chosen": 16.678804397583008, |
|
"logits/rejected": 18.215984344482422, |
|
"logps/chosen": -340.2626953125, |
|
"logps/rejected": -277.87872314453125, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.802419662475586, |
|
"rewards/margins": 9.925808906555176, |
|
"rewards/rejected": -18.728229522705078, |
|
"sft_loss": 1.1311696767807007, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 2.876397107166338, |
|
"grad_norm": 16.390316522695066, |
|
"learning_rate": 1.1243338720550445e-09, |
|
"logits/chosen": 16.955345153808594, |
|
"logits/rejected": 18.02084732055664, |
|
"logps/chosen": -291.6322937011719, |
|
"logps/rejected": -249.6865234375, |
|
"loss": 0.4018, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.541548728942871, |
|
"rewards/margins": 8.891424179077148, |
|
"rewards/rejected": -17.432973861694336, |
|
"sft_loss": 1.122809648513794, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8846153846153846, |
|
"grad_norm": 12.452008236969373, |
|
"learning_rate": 9.777215699311725e-10, |
|
"logits/chosen": 17.285600662231445, |
|
"logits/rejected": 18.065244674682617, |
|
"logps/chosen": -304.11834716796875, |
|
"logps/rejected": -252.4257049560547, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.604863166809082, |
|
"rewards/margins": 8.664654731750488, |
|
"rewards/rejected": -17.269519805908203, |
|
"sft_loss": 1.2225173711776733, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 2.8928336620644313, |
|
"grad_norm": 26.02343316648693, |
|
"learning_rate": 8.413102198885358e-10, |
|
"logits/chosen": 15.385034561157227, |
|
"logits/rejected": 16.86432456970215, |
|
"logps/chosen": -327.46160888671875, |
|
"logps/rejected": -264.8345031738281, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.194596290588379, |
|
"rewards/margins": 8.853148460388184, |
|
"rewards/rejected": -18.047740936279297, |
|
"sft_loss": 1.0643724203109741, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.901051939513478, |
|
"grad_norm": 9.970098814112205, |
|
"learning_rate": 7.151110418923134e-10, |
|
"logits/chosen": 18.434673309326172, |
|
"logits/rejected": 18.675090789794922, |
|
"logps/chosen": -302.91534423828125, |
|
"logps/rejected": -248.91583251953125, |
|
"loss": 0.3988, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -8.768433570861816, |
|
"rewards/margins": 8.766546249389648, |
|
"rewards/rejected": -17.53498077392578, |
|
"sft_loss": 1.1868294477462769, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 2.9092702169625246, |
|
"grad_norm": 18.45143826968204, |
|
"learning_rate": 5.991344159466672e-10, |
|
"logits/chosen": 16.24605941772461, |
|
"logits/rejected": 17.377365112304688, |
|
"logps/chosen": -318.8271789550781, |
|
"logps/rejected": -257.1405334472656, |
|
"loss": 0.352, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.925313949584961, |
|
"rewards/margins": 8.410311698913574, |
|
"rewards/rejected": -17.335628509521484, |
|
"sft_loss": 1.1228437423706055, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.9174884944115713, |
|
"grad_norm": 12.923252042791281, |
|
"learning_rate": 4.933898812409937e-10, |
|
"logits/chosen": 16.73847198486328, |
|
"logits/rejected": 17.230134963989258, |
|
"logps/chosen": -338.15118408203125, |
|
"logps/rejected": -271.0611267089844, |
|
"loss": 0.3936, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -9.159259796142578, |
|
"rewards/margins": 9.365001678466797, |
|
"rewards/rejected": -18.524259567260742, |
|
"sft_loss": 1.1974759101867676, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.925706771860618, |
|
"grad_norm": 18.59092085629164, |
|
"learning_rate": 3.978861353653301e-10, |
|
"logits/chosen": 17.0466251373291, |
|
"logits/rejected": 17.81385612487793, |
|
"logps/chosen": -301.74603271484375, |
|
"logps/rejected": -247.91571044921875, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.685689926147461, |
|
"rewards/margins": 8.343822479248047, |
|
"rewards/rejected": -17.02951431274414, |
|
"sft_loss": 1.086068034172058, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.9339250493096647, |
|
"grad_norm": 16.48047435187235, |
|
"learning_rate": 3.1263103359494005e-10, |
|
"logits/chosen": 17.160581588745117, |
|
"logits/rejected": 18.65143585205078, |
|
"logps/chosen": -300.696533203125, |
|
"logps/rejected": -245.1064453125, |
|
"loss": 0.3632, |
|
"rewards/accuracies": 0.9399999976158142, |
|
"rewards/chosen": -7.98746395111084, |
|
"rewards/margins": 8.9141206741333, |
|
"rewards/rejected": -16.90158462524414, |
|
"sft_loss": 1.4039214849472046, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 2.9421433267587114, |
|
"grad_norm": 14.541496183664128, |
|
"learning_rate": 2.3763158824419147e-10, |
|
"logits/chosen": 16.89483642578125, |
|
"logits/rejected": 17.82222557067871, |
|
"logps/chosen": -328.6429748535156, |
|
"logps/rejected": -268.05938720703125, |
|
"loss": 0.3455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.467479705810547, |
|
"rewards/margins": 9.740607261657715, |
|
"rewards/rejected": -18.208087921142578, |
|
"sft_loss": 1.1161048412322998, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.950361604207758, |
|
"grad_norm": 14.008459290888235, |
|
"learning_rate": 1.728939680898517e-10, |
|
"logits/chosen": 16.10931396484375, |
|
"logits/rejected": 17.673229217529297, |
|
"logps/chosen": -308.1393737792969, |
|
"logps/rejected": -262.28009033203125, |
|
"loss": 0.367, |
|
"rewards/accuracies": 0.9900000095367432, |
|
"rewards/chosen": -9.119178771972656, |
|
"rewards/margins": 8.93433666229248, |
|
"rewards/rejected": -18.05351448059082, |
|
"sft_loss": 1.1963419914245605, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 2.9585798816568047, |
|
"grad_norm": 16.666553214725845, |
|
"learning_rate": 1.184234978636456e-10, |
|
"logits/chosen": 16.49167823791504, |
|
"logits/rejected": 17.332914352416992, |
|
"logps/chosen": -282.5769348144531, |
|
"logps/rejected": -248.25242614746094, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.752376556396484, |
|
"rewards/margins": 8.20280933380127, |
|
"rewards/rejected": -16.955184936523438, |
|
"sft_loss": 1.2729109525680542, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.9667981591058514, |
|
"grad_norm": 15.626709598718353, |
|
"learning_rate": 7.422465781431464e-11, |
|
"logits/chosen": 16.95427894592285, |
|
"logits/rejected": 17.818552017211914, |
|
"logps/chosen": -329.6918640136719, |
|
"logps/rejected": -266.5020446777344, |
|
"loss": 0.3843, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.854848861694336, |
|
"rewards/margins": 9.140162467956543, |
|
"rewards/rejected": -17.995010375976562, |
|
"sft_loss": 1.1496516466140747, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 2.975016436554898, |
|
"grad_norm": 12.600231440275685, |
|
"learning_rate": 4.030108333910598e-11, |
|
"logits/chosen": 17.70891571044922, |
|
"logits/rejected": 18.366714477539062, |
|
"logps/chosen": -295.2488708496094, |
|
"logps/rejected": -242.97634887695312, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.9700000286102295, |
|
"rewards/chosen": -8.695423126220703, |
|
"rewards/margins": 8.07010269165039, |
|
"rewards/rejected": -16.765525817871094, |
|
"sft_loss": 1.1890416145324707, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.983234714003945, |
|
"grad_norm": 14.846673339349834, |
|
"learning_rate": 1.6655564684747713e-11, |
|
"logits/chosen": 17.073108673095703, |
|
"logits/rejected": 17.751785278320312, |
|
"logps/chosen": -334.0798034667969, |
|
"logps/rejected": -261.91644287109375, |
|
"loss": 0.372, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.156329154968262, |
|
"rewards/margins": 8.551715850830078, |
|
"rewards/rejected": -17.708045959472656, |
|
"sft_loss": 1.1268292665481567, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 2.9914529914529915, |
|
"grad_norm": 9.077728581968316, |
|
"learning_rate": 3.290046717979722e-12, |
|
"logits/chosen": 16.00580406188965, |
|
"logits/rejected": 16.491676330566406, |
|
"logps/chosen": -320.336181640625, |
|
"logps/rejected": -255.6234588623047, |
|
"loss": 0.3871, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -7.969948768615723, |
|
"rewards/margins": 9.396775245666504, |
|
"rewards/rejected": -17.366724014282227, |
|
"sft_loss": 1.16538667678833, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.998027613412229, |
|
"step": 1824, |
|
"total_flos": 287426369617920.0, |
|
"train_loss": 0.5032803327368017, |
|
"train_runtime": 76434.0426, |
|
"train_samples_per_second": 1.433, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1824, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 287426369617920.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|