|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 2907, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.1327524185180664, |
|
"logits/rejected": -1.609220027923584, |
|
"logps/chosen": -91.77880859375, |
|
"logps/rejected": -81.204345703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.3409039974212646, |
|
"logits/rejected": -2.2914419174194336, |
|
"logps/chosen": -218.2519989013672, |
|
"logps/rejected": -192.96514892578125, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.6111111044883728, |
|
"rewards/chosen": 0.008321777917444706, |
|
"rewards/margins": 0.005371565464884043, |
|
"rewards/rejected": 0.0029502129182219505, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.259495735168457, |
|
"logits/rejected": -2.44956636428833, |
|
"logps/chosen": -274.8431091308594, |
|
"logps/rejected": -225.97793579101562, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01440061442553997, |
|
"rewards/margins": 0.013972322456538677, |
|
"rewards/rejected": 0.00042829400626942515, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.5081024169921875, |
|
"logits/rejected": -2.4530844688415527, |
|
"logps/chosen": -292.6970520019531, |
|
"logps/rejected": -215.64553833007812, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.008286448195576668, |
|
"rewards/margins": 0.008681153878569603, |
|
"rewards/rejected": -0.00039470643969252706, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.6037235260009766, |
|
"logits/rejected": -2.4282469749450684, |
|
"logps/chosen": -378.5215759277344, |
|
"logps/rejected": -226.55136108398438, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004302692599594593, |
|
"rewards/margins": 0.008366527035832405, |
|
"rewards/rejected": -0.004063835833221674, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.3304362297058105, |
|
"logits/rejected": -2.30436372756958, |
|
"logps/chosen": -275.41754150390625, |
|
"logps/rejected": -209.0893096923828, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.01541445404291153, |
|
"rewards/margins": -0.023714235052466393, |
|
"rewards/rejected": 0.008299780078232288, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.408499240875244, |
|
"logits/rejected": -2.518784284591675, |
|
"logps/chosen": -298.5494689941406, |
|
"logps/rejected": -267.29962158203125, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.01625905930995941, |
|
"rewards/margins": -0.008457355201244354, |
|
"rewards/rejected": -0.007801705040037632, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.4770851135253906, |
|
"logits/rejected": -2.2523791790008545, |
|
"logps/chosen": -296.0211486816406, |
|
"logps/rejected": -145.6873321533203, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.010021962225437164, |
|
"rewards/margins": -0.006689900998026133, |
|
"rewards/rejected": -0.00333206239156425, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.4601337909698486, |
|
"logits/rejected": -2.488020181655884, |
|
"logps/chosen": -243.9931182861328, |
|
"logps/rejected": -210.5398406982422, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0015067579224705696, |
|
"rewards/margins": 0.0036226934753358364, |
|
"rewards/rejected": -0.002115936018526554, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.45121431350708, |
|
"logits/rejected": -2.3661465644836426, |
|
"logps/chosen": -214.99539184570312, |
|
"logps/rejected": -186.57984924316406, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.017697982490062714, |
|
"rewards/margins": 0.007963714189827442, |
|
"rewards/rejected": 0.009734268300235271, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.476562261581421, |
|
"logits/rejected": -2.409389019012451, |
|
"logps/chosen": -310.9223327636719, |
|
"logps/rejected": -162.9552764892578, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007375421933829784, |
|
"rewards/margins": 0.01819022186100483, |
|
"rewards/rejected": -0.010814799927175045, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.426025867462158, |
|
"logits/rejected": -2.0674071311950684, |
|
"logps/chosen": -303.84112548828125, |
|
"logps/rejected": -193.99363708496094, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01276397705078125, |
|
"rewards/margins": 0.010143814608454704, |
|
"rewards/rejected": 0.0026201631408184767, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.2423105239868164, |
|
"logits/rejected": -2.5096383094787598, |
|
"logps/chosen": -295.35986328125, |
|
"logps/rejected": -273.8446350097656, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.015526200644671917, |
|
"rewards/margins": -0.02725866064429283, |
|
"rewards/rejected": 0.011732463724911213, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.538625955581665, |
|
"logits/rejected": -2.2574923038482666, |
|
"logps/chosen": -289.69061279296875, |
|
"logps/rejected": -183.3281707763672, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.0016457748133689165, |
|
"rewards/margins": -0.00430142879486084, |
|
"rewards/rejected": 0.0026556537486612797, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.540769338607788, |
|
"logits/rejected": -2.2134933471679688, |
|
"logps/chosen": -221.8517303466797, |
|
"logps/rejected": -183.58389282226562, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004911613650619984, |
|
"rewards/margins": 0.005466667469590902, |
|
"rewards/rejected": -0.0005550530040636659, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.1684834957122803, |
|
"logits/rejected": -2.418734073638916, |
|
"logps/chosen": -366.19830322265625, |
|
"logps/rejected": -190.3058319091797, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.025678252801299095, |
|
"rewards/margins": 0.029336413368582726, |
|
"rewards/rejected": -0.0036581619642674923, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.698945999145508, |
|
"logits/rejected": -2.6068835258483887, |
|
"logps/chosen": -441.375, |
|
"logps/rejected": -300.767822265625, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.030073052272200584, |
|
"rewards/margins": 0.047654617577791214, |
|
"rewards/rejected": -0.017581569030880928, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.3280515670776367, |
|
"logits/rejected": -2.320730686187744, |
|
"logps/chosen": -271.07452392578125, |
|
"logps/rejected": -224.6231231689453, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.02470027096569538, |
|
"rewards/margins": 0.01814831793308258, |
|
"rewards/rejected": 0.006551951169967651, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.5555853843688965, |
|
"logits/rejected": -2.6365621089935303, |
|
"logps/chosen": -271.2387390136719, |
|
"logps/rejected": -235.908935546875, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03413509577512741, |
|
"rewards/margins": 0.046364832669496536, |
|
"rewards/rejected": -0.012229740619659424, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.7220354080200195, |
|
"logits/rejected": -2.5860915184020996, |
|
"logps/chosen": -302.74267578125, |
|
"logps/rejected": -208.666015625, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01786138489842415, |
|
"rewards/margins": 0.0315425843000412, |
|
"rewards/rejected": -0.013681203126907349, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.4432010650634766, |
|
"logits/rejected": -2.346902370452881, |
|
"logps/chosen": -259.94525146484375, |
|
"logps/rejected": -188.97792053222656, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.021963179111480713, |
|
"rewards/margins": 0.03701996058225632, |
|
"rewards/rejected": -0.015056787058711052, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.793883800506592, |
|
"logits/rejected": -2.4634127616882324, |
|
"logps/chosen": -378.5939025878906, |
|
"logps/rejected": -226.9193115234375, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.013226890936493874, |
|
"rewards/margins": 0.02839084342122078, |
|
"rewards/rejected": -0.015163958072662354, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.3424696922302246, |
|
"logits/rejected": -2.2594411373138428, |
|
"logps/chosen": -166.11044311523438, |
|
"logps/rejected": -209.3076934814453, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.006952253170311451, |
|
"rewards/margins": 0.02028687857091427, |
|
"rewards/rejected": -0.027239132672548294, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.5350894927978516, |
|
"logits/rejected": -2.4550373554229736, |
|
"logps/chosen": -335.16162109375, |
|
"logps/rejected": -251.5087890625, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0555986650288105, |
|
"rewards/margins": 0.072585329413414, |
|
"rewards/rejected": -0.01698666624724865, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.4425277709960938, |
|
"logits/rejected": -2.4560999870300293, |
|
"logps/chosen": -364.9286193847656, |
|
"logps/rejected": -251.4180450439453, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.006999121513217688, |
|
"rewards/margins": 0.015280758030712605, |
|
"rewards/rejected": -0.00828163605183363, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.358187675476074, |
|
"logits/rejected": -2.3916361331939697, |
|
"logps/chosen": -278.9718322753906, |
|
"logps/rejected": -262.45458984375, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.025192927569150925, |
|
"rewards/margins": 0.08774158358573914, |
|
"rewards/rejected": -0.06254865229129791, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.117164134979248, |
|
"logits/rejected": -2.433020830154419, |
|
"logps/chosen": -203.03009033203125, |
|
"logps/rejected": -217.13827514648438, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.003108497243374586, |
|
"rewards/margins": 0.0338444858789444, |
|
"rewards/rejected": -0.030735988169908524, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.236057996749878, |
|
"logits/rejected": -2.4115004539489746, |
|
"logps/chosen": -236.7349395751953, |
|
"logps/rejected": -238.6702880859375, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0009882673621177673, |
|
"rewards/margins": 0.07193086296319962, |
|
"rewards/rejected": -0.07094259560108185, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.474492311477661, |
|
"logits/rejected": -2.3905348777770996, |
|
"logps/chosen": -330.3529357910156, |
|
"logps/rejected": -231.5391082763672, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03507215529680252, |
|
"rewards/margins": 0.10007444769144058, |
|
"rewards/rejected": -0.06500230729579926, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.564732074737549, |
|
"logits/rejected": -2.3240487575531006, |
|
"logps/chosen": -277.9839172363281, |
|
"logps/rejected": -197.73281860351562, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.05555497482419014, |
|
"rewards/margins": 0.15978679060935974, |
|
"rewards/rejected": -0.1042318120598793, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982798165137615e-07, |
|
"logits/chosen": -2.3241677284240723, |
|
"logits/rejected": -2.348520040512085, |
|
"logps/chosen": -209.7371063232422, |
|
"logps/rejected": -238.69595336914062, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.041191719472408295, |
|
"rewards/margins": 0.12445306777954102, |
|
"rewards/rejected": -0.08326133340597153, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963685015290519e-07, |
|
"logits/chosen": -2.466608762741089, |
|
"logits/rejected": -2.338318347930908, |
|
"logps/chosen": -360.8646545410156, |
|
"logps/rejected": -246.0187530517578, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.04523809999227524, |
|
"rewards/margins": 0.1772933453321457, |
|
"rewards/rejected": -0.13205525279045105, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944571865443424e-07, |
|
"logits/chosen": -2.318599224090576, |
|
"logits/rejected": -2.3406758308410645, |
|
"logps/chosen": -217.85122680664062, |
|
"logps/rejected": -143.0299072265625, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.05060691758990288, |
|
"rewards/margins": 0.1060304194688797, |
|
"rewards/rejected": -0.05542349815368652, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92545871559633e-07, |
|
"logits/chosen": -2.477591037750244, |
|
"logits/rejected": -2.4527573585510254, |
|
"logps/chosen": -326.4366455078125, |
|
"logps/rejected": -284.9319763183594, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0541137270629406, |
|
"rewards/margins": 0.10549300909042358, |
|
"rewards/rejected": -0.051379282027482986, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906345565749235e-07, |
|
"logits/chosen": -2.402550220489502, |
|
"logits/rejected": -2.2470858097076416, |
|
"logps/chosen": -232.6081085205078, |
|
"logps/rejected": -135.73312377929688, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.018155479803681374, |
|
"rewards/margins": 0.245174378156662, |
|
"rewards/rejected": -0.22701887786388397, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.88723241590214e-07, |
|
"logits/chosen": -2.4398138523101807, |
|
"logits/rejected": -2.2956748008728027, |
|
"logps/chosen": -379.9845275878906, |
|
"logps/rejected": -224.34408569335938, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.09958186000585556, |
|
"rewards/margins": 0.3024463653564453, |
|
"rewards/rejected": -0.20286449790000916, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868119266055046e-07, |
|
"logits/chosen": -2.2719640731811523, |
|
"logits/rejected": -2.3680129051208496, |
|
"logps/chosen": -213.4238739013672, |
|
"logps/rejected": -282.51495361328125, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.02809527888894081, |
|
"rewards/margins": 0.08343404531478882, |
|
"rewards/rejected": -0.11152933537960052, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.849006116207951e-07, |
|
"logits/chosen": -2.400339365005493, |
|
"logits/rejected": -2.400993824005127, |
|
"logps/chosen": -310.1695861816406, |
|
"logps/rejected": -258.10113525390625, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.06970132142305374, |
|
"rewards/margins": 0.2196888029575348, |
|
"rewards/rejected": -0.14998750388622284, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.829892966360856e-07, |
|
"logits/chosen": -2.42053484916687, |
|
"logits/rejected": -2.3133673667907715, |
|
"logps/chosen": -279.19329833984375, |
|
"logps/rejected": -253.25363159179688, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.14709529280662537, |
|
"rewards/margins": 0.32831987738609314, |
|
"rewards/rejected": -0.18122461438179016, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810779816513762e-07, |
|
"logits/chosen": -2.3398146629333496, |
|
"logits/rejected": -2.497467041015625, |
|
"logps/chosen": -199.78628540039062, |
|
"logps/rejected": -176.40805053710938, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.019132133573293686, |
|
"rewards/margins": 0.16773070394992828, |
|
"rewards/rejected": -0.18686284124851227, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -2.5169873237609863, |
|
"logits/rejected": -2.568854808807373, |
|
"logps/chosen": -289.6071472167969, |
|
"logps/rejected": -184.77845764160156, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08133763074874878, |
|
"rewards/margins": 0.25977957248687744, |
|
"rewards/rejected": -0.17844195663928986, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772553516819572e-07, |
|
"logits/chosen": -2.1512584686279297, |
|
"logits/rejected": -2.186790943145752, |
|
"logps/chosen": -119.762451171875, |
|
"logps/rejected": -174.71890258789062, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12059535086154938, |
|
"rewards/margins": 0.047021061182022095, |
|
"rewards/rejected": -0.16761639714241028, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753440366972477e-07, |
|
"logits/chosen": -2.5430076122283936, |
|
"logits/rejected": -2.671963691711426, |
|
"logps/chosen": -280.49481201171875, |
|
"logps/rejected": -277.0762939453125, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05606144666671753, |
|
"rewards/margins": 0.046925198286771774, |
|
"rewards/rejected": -0.10298663377761841, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7343272171253825e-07, |
|
"logits/chosen": -2.290693759918213, |
|
"logits/rejected": -2.2883286476135254, |
|
"logps/chosen": -248.0079345703125, |
|
"logps/rejected": -237.4206085205078, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.013114357367157936, |
|
"rewards/margins": 0.2169400155544281, |
|
"rewards/rejected": -0.2038256675004959, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.715214067278288e-07, |
|
"logits/chosen": -2.404177188873291, |
|
"logits/rejected": -2.43928861618042, |
|
"logps/chosen": -233.565185546875, |
|
"logps/rejected": -212.0560302734375, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.052248239517211914, |
|
"rewards/margins": 0.1868211030960083, |
|
"rewards/rejected": -0.23906934261322021, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.696100917431192e-07, |
|
"logits/chosen": -2.405780792236328, |
|
"logits/rejected": -2.449907064437866, |
|
"logps/chosen": -254.3538818359375, |
|
"logps/rejected": -255.2841796875, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.08873947709798813, |
|
"rewards/margins": 0.23247499763965607, |
|
"rewards/rejected": -0.14373552799224854, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6769877675840974e-07, |
|
"logits/chosen": -2.3749642372131348, |
|
"logits/rejected": -2.4225552082061768, |
|
"logps/chosen": -226.59994506835938, |
|
"logps/rejected": -198.53065490722656, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0910528227686882, |
|
"rewards/margins": 0.3178223967552185, |
|
"rewards/rejected": -0.2267695963382721, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6578746177370027e-07, |
|
"logits/chosen": -2.500232696533203, |
|
"logits/rejected": -2.5585453510284424, |
|
"logps/chosen": -290.7154541015625, |
|
"logps/rejected": -141.37062072753906, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.04495004937052727, |
|
"rewards/margins": 0.2956233620643616, |
|
"rewards/rejected": -0.2506733536720276, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.638761467889908e-07, |
|
"logits/chosen": -2.41825532913208, |
|
"logits/rejected": -2.539612054824829, |
|
"logps/chosen": -201.26425170898438, |
|
"logps/rejected": -197.85702514648438, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03925345838069916, |
|
"rewards/margins": 0.20073261857032776, |
|
"rewards/rejected": -0.2399860918521881, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196483180428133e-07, |
|
"logits/chosen": -2.517301559448242, |
|
"logits/rejected": -2.471219539642334, |
|
"logps/chosen": -248.6940460205078, |
|
"logps/rejected": -149.48056030273438, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.005128766410052776, |
|
"rewards/margins": 0.24176044762134552, |
|
"rewards/rejected": -0.24688920378684998, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.600535168195718e-07, |
|
"logits/chosen": -2.421499729156494, |
|
"logits/rejected": -2.313544511795044, |
|
"logps/chosen": -198.4822540283203, |
|
"logps/rejected": -208.656982421875, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08058812469244003, |
|
"rewards/margins": 0.1655338704586029, |
|
"rewards/rejected": -0.24612200260162354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5814220183486234e-07, |
|
"logits/chosen": -2.4364054203033447, |
|
"logits/rejected": -2.474491834640503, |
|
"logps/chosen": -357.8948059082031, |
|
"logps/rejected": -313.86407470703125, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09066192060709, |
|
"rewards/margins": 0.11807866394519806, |
|
"rewards/rejected": -0.20874059200286865, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.562308868501529e-07, |
|
"logits/chosen": -2.4329869747161865, |
|
"logits/rejected": -2.3959157466888428, |
|
"logps/chosen": -252.12997436523438, |
|
"logps/rejected": -272.59930419921875, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09437114745378494, |
|
"rewards/margins": 0.3432508707046509, |
|
"rewards/rejected": -0.24887971580028534, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543195718654434e-07, |
|
"logits/chosen": -2.5292797088623047, |
|
"logits/rejected": -2.472195625305176, |
|
"logps/chosen": -311.61810302734375, |
|
"logps/rejected": -204.62269592285156, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.022383611649274826, |
|
"rewards/margins": 0.4391644597053528, |
|
"rewards/rejected": -0.4615480303764343, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5240825688073394e-07, |
|
"logits/chosen": -2.4026424884796143, |
|
"logits/rejected": -2.410029649734497, |
|
"logps/chosen": -254.65774536132812, |
|
"logps/rejected": -220.6438751220703, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06496790796518326, |
|
"rewards/margins": 0.32214078307151794, |
|
"rewards/rejected": -0.2571728527545929, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.504969418960244e-07, |
|
"logits/chosen": -2.489189624786377, |
|
"logits/rejected": -2.509007215499878, |
|
"logps/chosen": -304.0901184082031, |
|
"logps/rejected": -276.55523681640625, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.08526446670293808, |
|
"rewards/margins": 0.23875808715820312, |
|
"rewards/rejected": -0.15349361300468445, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4858562691131495e-07, |
|
"logits/chosen": -2.626237392425537, |
|
"logits/rejected": -2.4529411792755127, |
|
"logps/chosen": -250.4290008544922, |
|
"logps/rejected": -236.1934814453125, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.14224590361118317, |
|
"rewards/margins": 0.5076346397399902, |
|
"rewards/rejected": -0.3653886914253235, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.466743119266055e-07, |
|
"logits/chosen": -2.608710527420044, |
|
"logits/rejected": -2.4026219844818115, |
|
"logps/chosen": -322.64434814453125, |
|
"logps/rejected": -315.7530212402344, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.017860546708106995, |
|
"rewards/margins": 0.29392343759536743, |
|
"rewards/rejected": -0.27606287598609924, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.44762996941896e-07, |
|
"logits/chosen": -2.3832476139068604, |
|
"logits/rejected": -2.451291561126709, |
|
"logps/chosen": -298.6803894042969, |
|
"logps/rejected": -213.0013885498047, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08139020204544067, |
|
"rewards/margins": 0.48270922899246216, |
|
"rewards/rejected": -0.4013189375400543, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4285168195718655e-07, |
|
"logits/chosen": -2.4148542881011963, |
|
"logits/rejected": -2.6071524620056152, |
|
"logps/chosen": -235.28567504882812, |
|
"logps/rejected": -229.70938110351562, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05157552286982536, |
|
"rewards/margins": 0.3185363709926605, |
|
"rewards/rejected": -0.3701118528842926, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.40940366972477e-07, |
|
"logits/chosen": -2.6201112270355225, |
|
"logits/rejected": -2.5222606658935547, |
|
"logps/chosen": -187.01014709472656, |
|
"logps/rejected": -190.72042846679688, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02282007224857807, |
|
"rewards/margins": 0.2915310561656952, |
|
"rewards/rejected": -0.3143511116504669, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3902905198776756e-07, |
|
"logits/chosen": -2.701819896697998, |
|
"logits/rejected": -2.6200711727142334, |
|
"logps/chosen": -400.83453369140625, |
|
"logps/rejected": -312.381591796875, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1581762284040451, |
|
"rewards/margins": 0.5981796979904175, |
|
"rewards/rejected": -0.4400033950805664, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371177370030581e-07, |
|
"logits/chosen": -2.5501413345336914, |
|
"logits/rejected": -2.316926956176758, |
|
"logps/chosen": -223.07455444335938, |
|
"logps/rejected": -188.2316436767578, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12886852025985718, |
|
"rewards/margins": 0.3416977822780609, |
|
"rewards/rejected": -0.47056636214256287, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.352064220183486e-07, |
|
"logits/chosen": -2.261867046356201, |
|
"logits/rejected": -2.159829616546631, |
|
"logps/chosen": -219.3220977783203, |
|
"logps/rejected": -205.33786010742188, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04584250971674919, |
|
"rewards/margins": 0.3203400671482086, |
|
"rewards/rejected": -0.2744975686073303, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3329510703363915e-07, |
|
"logits/chosen": -2.319148063659668, |
|
"logits/rejected": -2.3123698234558105, |
|
"logps/chosen": -306.0313415527344, |
|
"logps/rejected": -234.1546630859375, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04760587960481644, |
|
"rewards/margins": 0.6112427711486816, |
|
"rewards/rejected": -0.563636839389801, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313837920489297e-07, |
|
"logits/chosen": -2.3105361461639404, |
|
"logits/rejected": -2.3982295989990234, |
|
"logps/chosen": -284.56072998046875, |
|
"logps/rejected": -315.3321838378906, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0843208059668541, |
|
"rewards/margins": 0.43474799394607544, |
|
"rewards/rejected": -0.35042712092399597, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2947247706422016e-07, |
|
"logits/chosen": -2.4114909172058105, |
|
"logits/rejected": -2.4491775035858154, |
|
"logps/chosen": -219.2405548095703, |
|
"logps/rejected": -253.74575805664062, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.01176774688065052, |
|
"rewards/margins": 0.2667906582355499, |
|
"rewards/rejected": -0.2785584330558777, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.275611620795107e-07, |
|
"logits/chosen": -2.519127368927002, |
|
"logits/rejected": -2.356675863265991, |
|
"logps/chosen": -315.9088439941406, |
|
"logps/rejected": -185.24163818359375, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10262980312108994, |
|
"rewards/margins": 0.5418930053710938, |
|
"rewards/rejected": -0.6445227861404419, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2564984709480123e-07, |
|
"logits/chosen": -2.520284652709961, |
|
"logits/rejected": -2.5212578773498535, |
|
"logps/chosen": -339.563720703125, |
|
"logps/rejected": -229.5682373046875, |
|
"loss": 0.6126, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.15836027264595032, |
|
"rewards/margins": 0.2690756916999817, |
|
"rewards/rejected": -0.427435964345932, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2373853211009176e-07, |
|
"logits/chosen": -2.461088180541992, |
|
"logits/rejected": -2.431992292404175, |
|
"logps/chosen": -314.1544494628906, |
|
"logps/rejected": -220.8463897705078, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0033186853397637606, |
|
"rewards/margins": 0.4757082462310791, |
|
"rewards/rejected": -0.4790269732475281, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2182721712538224e-07, |
|
"logits/chosen": -2.554452657699585, |
|
"logits/rejected": -2.2479560375213623, |
|
"logps/chosen": -234.7042236328125, |
|
"logps/rejected": -188.83786010742188, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0076250312849879265, |
|
"rewards/margins": 0.5563846230506897, |
|
"rewards/rejected": -0.5487595796585083, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199159021406727e-07, |
|
"logits/chosen": -2.318946361541748, |
|
"logits/rejected": -2.453735828399658, |
|
"logps/chosen": -209.1856231689453, |
|
"logps/rejected": -227.36953735351562, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.13343460857868195, |
|
"rewards/margins": 0.22984579205513, |
|
"rewards/rejected": -0.36328038573265076, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1800458715596325e-07, |
|
"logits/chosen": -2.4781854152679443, |
|
"logits/rejected": -2.2781224250793457, |
|
"logps/chosen": -226.40469360351562, |
|
"logps/rejected": -289.3362121582031, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2653411030769348, |
|
"rewards/margins": 0.09751178324222565, |
|
"rewards/rejected": -0.36285287141799927, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.160932721712538e-07, |
|
"logits/chosen": -2.4424407482147217, |
|
"logits/rejected": -2.4211478233337402, |
|
"logps/chosen": -273.59869384765625, |
|
"logps/rejected": -269.41717529296875, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.003456842852756381, |
|
"rewards/margins": 0.21872258186340332, |
|
"rewards/rejected": -0.21526578068733215, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.141819571865443e-07, |
|
"logits/chosen": -2.2280640602111816, |
|
"logits/rejected": -2.307762622833252, |
|
"logps/chosen": -247.0033721923828, |
|
"logps/rejected": -227.2200927734375, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06740056723356247, |
|
"rewards/margins": 0.43576961755752563, |
|
"rewards/rejected": -0.5031701326370239, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1227064220183485e-07, |
|
"logits/chosen": -2.397576332092285, |
|
"logits/rejected": -2.354574680328369, |
|
"logps/chosen": -214.11892700195312, |
|
"logps/rejected": -215.72268676757812, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17300334572792053, |
|
"rewards/margins": 0.3608870208263397, |
|
"rewards/rejected": -0.5338903665542603, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.103593272171253e-07, |
|
"logits/chosen": -2.33254337310791, |
|
"logits/rejected": -2.3918721675872803, |
|
"logps/chosen": -340.80072021484375, |
|
"logps/rejected": -347.1974792480469, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.130618155002594, |
|
"rewards/margins": 0.32134371995925903, |
|
"rewards/rejected": -0.4519619047641754, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0844801223241586e-07, |
|
"logits/chosen": -2.3820478916168213, |
|
"logits/rejected": -2.4310269355773926, |
|
"logps/chosen": -228.812744140625, |
|
"logps/rejected": -197.6693115234375, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0885491669178009, |
|
"rewards/margins": 0.15732769668102264, |
|
"rewards/rejected": -0.24587683379650116, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.065366972477064e-07, |
|
"logits/chosen": -2.5541110038757324, |
|
"logits/rejected": -2.3786189556121826, |
|
"logps/chosen": -273.0157165527344, |
|
"logps/rejected": -203.12271118164062, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07696160674095154, |
|
"rewards/margins": 0.5536447763442993, |
|
"rewards/rejected": -0.6306063532829285, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.046253822629969e-07, |
|
"logits/chosen": -2.2037081718444824, |
|
"logits/rejected": -2.20261549949646, |
|
"logps/chosen": -158.65432739257812, |
|
"logps/rejected": -185.50302124023438, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.19409586489200592, |
|
"rewards/margins": 0.23793645203113556, |
|
"rewards/rejected": -0.4320322871208191, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0271406727828745e-07, |
|
"logits/chosen": -2.465301990509033, |
|
"logits/rejected": -2.5763792991638184, |
|
"logps/chosen": -271.5575256347656, |
|
"logps/rejected": -209.1376953125, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.005071232561022043, |
|
"rewards/margins": 0.3356703221797943, |
|
"rewards/rejected": -0.3407415747642517, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.00802752293578e-07, |
|
"logits/chosen": -2.3711938858032227, |
|
"logits/rejected": -2.4800353050231934, |
|
"logps/chosen": -240.88314819335938, |
|
"logps/rejected": -233.5401153564453, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08367098867893219, |
|
"rewards/margins": 0.7126539945602417, |
|
"rewards/rejected": -0.7963249683380127, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9889143730886847e-07, |
|
"logits/chosen": -2.42783784866333, |
|
"logits/rejected": -2.4193737506866455, |
|
"logps/chosen": -281.68548583984375, |
|
"logps/rejected": -190.40451049804688, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.049332987517118454, |
|
"rewards/margins": 0.5959105491638184, |
|
"rewards/rejected": -0.5465775728225708, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.96980122324159e-07, |
|
"logits/chosen": -2.5133376121520996, |
|
"logits/rejected": -2.4929747581481934, |
|
"logps/chosen": -226.41513061523438, |
|
"logps/rejected": -218.5712890625, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.004038684070110321, |
|
"rewards/margins": 0.6471161842346191, |
|
"rewards/rejected": -0.643077552318573, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9506880733944953e-07, |
|
"logits/chosen": -2.366255760192871, |
|
"logits/rejected": -2.1581902503967285, |
|
"logps/chosen": -243.42214965820312, |
|
"logps/rejected": -193.34178161621094, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.29814183712005615, |
|
"rewards/margins": 0.10912153869867325, |
|
"rewards/rejected": -0.4072634279727936, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9315749235474006e-07, |
|
"logits/chosen": -2.4702069759368896, |
|
"logits/rejected": -2.2905399799346924, |
|
"logps/chosen": -202.70205688476562, |
|
"logps/rejected": -229.86154174804688, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14235912263393402, |
|
"rewards/margins": 0.43106895685195923, |
|
"rewards/rejected": -0.5734280347824097, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.912461773700306e-07, |
|
"logits/chosen": -2.488694429397583, |
|
"logits/rejected": -2.2790422439575195, |
|
"logps/chosen": -393.3223876953125, |
|
"logps/rejected": -257.35986328125, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.01125104445964098, |
|
"rewards/margins": 0.7673934698104858, |
|
"rewards/rejected": -0.7561424374580383, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8933486238532107e-07, |
|
"logits/chosen": -2.2432663440704346, |
|
"logits/rejected": -2.2947564125061035, |
|
"logps/chosen": -359.70989990234375, |
|
"logps/rejected": -240.7715301513672, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03136935830116272, |
|
"rewards/margins": 0.8124542236328125, |
|
"rewards/rejected": -0.8438236117362976, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874235474006116e-07, |
|
"logits/chosen": -2.173022747039795, |
|
"logits/rejected": -2.1610169410705566, |
|
"logps/chosen": -201.29830932617188, |
|
"logps/rejected": -236.1139678955078, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15942071378231049, |
|
"rewards/margins": 0.318124383687973, |
|
"rewards/rejected": -0.4775451123714447, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8551223241590214e-07, |
|
"logits/chosen": -2.410968542098999, |
|
"logits/rejected": -2.1644670963287354, |
|
"logps/chosen": -223.5022735595703, |
|
"logps/rejected": -212.260986328125, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12731042504310608, |
|
"rewards/margins": 0.8949457406997681, |
|
"rewards/rejected": -1.0222561359405518, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8360091743119267e-07, |
|
"logits/chosen": -2.505922794342041, |
|
"logits/rejected": -2.4096271991729736, |
|
"logps/chosen": -276.6337890625, |
|
"logps/rejected": -244.76632690429688, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0883268266916275, |
|
"rewards/margins": 0.560738205909729, |
|
"rewards/rejected": -0.4724113345146179, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.816896024464832e-07, |
|
"logits/chosen": -2.4202027320861816, |
|
"logits/rejected": -2.3245034217834473, |
|
"logps/chosen": -171.32737731933594, |
|
"logps/rejected": -179.85043334960938, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12893405556678772, |
|
"rewards/margins": 0.2722606360912323, |
|
"rewards/rejected": -0.4011947214603424, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797782874617737e-07, |
|
"logits/chosen": -2.489109754562378, |
|
"logits/rejected": -2.5008530616760254, |
|
"logps/chosen": -306.02740478515625, |
|
"logps/rejected": -235.5460662841797, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11618290096521378, |
|
"rewards/margins": 0.5509032011032104, |
|
"rewards/rejected": -0.6670862436294556, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778669724770642e-07, |
|
"logits/chosen": -2.308039426803589, |
|
"logits/rejected": -2.1885459423065186, |
|
"logps/chosen": -318.63189697265625, |
|
"logps/rejected": -247.6229248046875, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18256303668022156, |
|
"rewards/margins": 0.7064282298088074, |
|
"rewards/rejected": -0.8889913558959961, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7595565749235474e-07, |
|
"logits/chosen": -2.522726535797119, |
|
"logits/rejected": -2.366619110107422, |
|
"logps/chosen": -268.7926025390625, |
|
"logps/rejected": -197.31088256835938, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.130192369222641, |
|
"rewards/margins": 0.7135027647018433, |
|
"rewards/rejected": -0.8436950445175171, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.740443425076452e-07, |
|
"logits/chosen": -2.3446176052093506, |
|
"logits/rejected": -2.3508477210998535, |
|
"logps/chosen": -267.4695739746094, |
|
"logps/rejected": -273.3948974609375, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.022189509123563766, |
|
"rewards/margins": 0.3890397548675537, |
|
"rewards/rejected": -0.3668502867221832, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7213302752293575e-07, |
|
"logits/chosen": -2.5760252475738525, |
|
"logits/rejected": -2.461642026901245, |
|
"logps/chosen": -264.9154052734375, |
|
"logps/rejected": -198.0699005126953, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11176663637161255, |
|
"rewards/margins": 0.19738611578941345, |
|
"rewards/rejected": -0.309152752161026, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.1374547481536865, |
|
"eval_logits/rejected": -1.9755022525787354, |
|
"eval_logps/chosen": -248.40455627441406, |
|
"eval_logps/rejected": -195.833251953125, |
|
"eval_loss": 0.5562577247619629, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.09624822437763214, |
|
"eval_rewards/margins": 0.6263414025306702, |
|
"eval_rewards/rejected": -0.7225896120071411, |
|
"eval_runtime": 49.7794, |
|
"eval_samples_per_second": 40.177, |
|
"eval_steps_per_second": 0.321, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.702217125382263e-07, |
|
"logits/chosen": -2.383577823638916, |
|
"logits/rejected": -2.2432830333709717, |
|
"logps/chosen": -269.8841247558594, |
|
"logps/rejected": -266.1859436035156, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.028007108718156815, |
|
"rewards/margins": 0.6903571486473083, |
|
"rewards/rejected": -0.662350058555603, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6831039755351677e-07, |
|
"logits/chosen": -2.481004238128662, |
|
"logits/rejected": -2.3184635639190674, |
|
"logps/chosen": -239.337890625, |
|
"logps/rejected": -234.6234893798828, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06412211060523987, |
|
"rewards/margins": 0.32830190658569336, |
|
"rewards/rejected": -0.3924240171909332, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663990825688073e-07, |
|
"logits/chosen": -2.437516689300537, |
|
"logits/rejected": -2.390587091445923, |
|
"logps/chosen": -252.07528686523438, |
|
"logps/rejected": -195.83224487304688, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1875399798154831, |
|
"rewards/margins": 0.6106002330780029, |
|
"rewards/rejected": -0.7981401681900024, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6448776758409783e-07, |
|
"logits/chosen": -2.3859527111053467, |
|
"logits/rejected": -2.4744107723236084, |
|
"logps/chosen": -246.77200317382812, |
|
"logps/rejected": -183.67501831054688, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18464355170726776, |
|
"rewards/margins": 0.5135353803634644, |
|
"rewards/rejected": -0.6981789469718933, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6257645259938836e-07, |
|
"logits/chosen": -2.596139669418335, |
|
"logits/rejected": -2.5789005756378174, |
|
"logps/chosen": -280.490966796875, |
|
"logps/rejected": -216.73489379882812, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08334894478321075, |
|
"rewards/margins": 0.8474000692367554, |
|
"rewards/rejected": -0.7640511393547058, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.606651376146789e-07, |
|
"logits/chosen": -2.3291637897491455, |
|
"logits/rejected": -2.3714513778686523, |
|
"logps/chosen": -224.9653778076172, |
|
"logps/rejected": -184.2610321044922, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2414972335100174, |
|
"rewards/margins": 0.3471061587333679, |
|
"rewards/rejected": -0.5886033773422241, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5875382262996937e-07, |
|
"logits/chosen": -2.275418758392334, |
|
"logits/rejected": -2.099208354949951, |
|
"logps/chosen": -197.90513610839844, |
|
"logps/rejected": -201.25003051757812, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.23300638794898987, |
|
"rewards/margins": 0.42851170897483826, |
|
"rewards/rejected": -0.6615180969238281, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568425076452599e-07, |
|
"logits/chosen": -2.285912036895752, |
|
"logits/rejected": -2.2500832080841064, |
|
"logps/chosen": -213.13992309570312, |
|
"logps/rejected": -196.5510711669922, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3874604105949402, |
|
"rewards/margins": 0.24448566138744354, |
|
"rewards/rejected": -0.6319460868835449, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5493119266055044e-07, |
|
"logits/chosen": -2.257537364959717, |
|
"logits/rejected": -2.1868162155151367, |
|
"logps/chosen": -262.30859375, |
|
"logps/rejected": -174.8310546875, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09565829485654831, |
|
"rewards/margins": 0.5677075982093811, |
|
"rewards/rejected": -0.6633658409118652, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5301987767584097e-07, |
|
"logits/chosen": -2.5957443714141846, |
|
"logits/rejected": -2.3026747703552246, |
|
"logps/chosen": -248.4452667236328, |
|
"logps/rejected": -291.78692626953125, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01751767471432686, |
|
"rewards/margins": 1.0502655506134033, |
|
"rewards/rejected": -1.0327479839324951, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.511085626911315e-07, |
|
"logits/chosen": -2.495492458343506, |
|
"logits/rejected": -2.360352039337158, |
|
"logps/chosen": -379.7645568847656, |
|
"logps/rejected": -288.1813049316406, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29672688245773315, |
|
"rewards/margins": 0.2822774350643158, |
|
"rewards/rejected": -0.5790044069290161, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.49197247706422e-07, |
|
"logits/chosen": -2.5327889919281006, |
|
"logits/rejected": -2.4286463260650635, |
|
"logps/chosen": -283.114501953125, |
|
"logps/rejected": -184.7639923095703, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.13121911883354187, |
|
"rewards/margins": 0.8478299975395203, |
|
"rewards/rejected": -0.716610848903656, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.472859327217125e-07, |
|
"logits/chosen": -2.3253655433654785, |
|
"logits/rejected": -2.4310760498046875, |
|
"logps/chosen": -363.16827392578125, |
|
"logps/rejected": -301.8953552246094, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.21858613193035126, |
|
"rewards/margins": 0.8502481579780579, |
|
"rewards/rejected": -0.631662130355835, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4537461773700304e-07, |
|
"logits/chosen": -2.407773494720459, |
|
"logits/rejected": -2.4023048877716064, |
|
"logps/chosen": -173.5346221923828, |
|
"logps/rejected": -123.51615905761719, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0015801489353179932, |
|
"rewards/margins": 0.45527520775794983, |
|
"rewards/rejected": -0.4568553566932678, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.434633027522936e-07, |
|
"logits/chosen": -2.2176413536071777, |
|
"logits/rejected": -2.1938424110412598, |
|
"logps/chosen": -279.9801330566406, |
|
"logps/rejected": -209.1507568359375, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.01898578554391861, |
|
"rewards/margins": 0.47637319564819336, |
|
"rewards/rejected": -0.45738738775253296, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.415519877675841e-07, |
|
"logits/chosen": -2.5352399349212646, |
|
"logits/rejected": -2.4146199226379395, |
|
"logps/chosen": -213.72891235351562, |
|
"logps/rejected": -270.6646728515625, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11530435085296631, |
|
"rewards/margins": 0.4574407935142517, |
|
"rewards/rejected": -0.5727452039718628, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3964067278287464e-07, |
|
"logits/chosen": -2.37849760055542, |
|
"logits/rejected": -2.2588951587677, |
|
"logps/chosen": -306.17840576171875, |
|
"logps/rejected": -276.8265075683594, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.07973052561283112, |
|
"rewards/margins": 0.7714017033576965, |
|
"rewards/rejected": -0.6916711926460266, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.377293577981651e-07, |
|
"logits/chosen": -2.130993366241455, |
|
"logits/rejected": -2.4299304485321045, |
|
"logps/chosen": -229.61618041992188, |
|
"logps/rejected": -189.0056915283203, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2313126027584076, |
|
"rewards/margins": 0.4152100682258606, |
|
"rewards/rejected": -0.6465227007865906, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3581804281345565e-07, |
|
"logits/chosen": -2.485884428024292, |
|
"logits/rejected": -2.3729846477508545, |
|
"logps/chosen": -346.53497314453125, |
|
"logps/rejected": -233.9870147705078, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.24053406715393066, |
|
"rewards/margins": 0.43819236755371094, |
|
"rewards/rejected": -0.6787264943122864, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.339067278287462e-07, |
|
"logits/chosen": -2.3505210876464844, |
|
"logits/rejected": -2.153390645980835, |
|
"logps/chosen": -230.1878204345703, |
|
"logps/rejected": -220.8087158203125, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11666283756494522, |
|
"rewards/margins": 0.6117745637893677, |
|
"rewards/rejected": -0.7284374237060547, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.319954128440367e-07, |
|
"logits/chosen": -2.525207281112671, |
|
"logits/rejected": -2.4221832752227783, |
|
"logps/chosen": -354.8018493652344, |
|
"logps/rejected": -194.69482421875, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03960106521844864, |
|
"rewards/margins": 0.9505462646484375, |
|
"rewards/rejected": -0.9901474118232727, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3008409785932725e-07, |
|
"logits/chosen": -2.316929340362549, |
|
"logits/rejected": -2.223125696182251, |
|
"logps/chosen": -189.4244384765625, |
|
"logps/rejected": -190.21742248535156, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4602813720703125, |
|
"rewards/margins": 0.2248728722333908, |
|
"rewards/rejected": -0.6851542592048645, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2817278287461773e-07, |
|
"logits/chosen": -2.5135419368743896, |
|
"logits/rejected": -2.398132801055908, |
|
"logps/chosen": -205.8257293701172, |
|
"logps/rejected": -299.1632385253906, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21163006126880646, |
|
"rewards/margins": 0.4841296672821045, |
|
"rewards/rejected": -0.6957597732543945, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.262614678899082e-07, |
|
"logits/chosen": -2.211836338043213, |
|
"logits/rejected": -2.2863128185272217, |
|
"logps/chosen": -228.488525390625, |
|
"logps/rejected": -252.99923706054688, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0907645896077156, |
|
"rewards/margins": 0.7704148292541504, |
|
"rewards/rejected": -0.8611793518066406, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2435015290519874e-07, |
|
"logits/chosen": -2.300359010696411, |
|
"logits/rejected": -2.210038185119629, |
|
"logps/chosen": -193.10565185546875, |
|
"logps/rejected": -223.6150665283203, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04345179349184036, |
|
"rewards/margins": 0.6841613054275513, |
|
"rewards/rejected": -0.727613091468811, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2243883792048927e-07, |
|
"logits/chosen": -2.5476744174957275, |
|
"logits/rejected": -2.430663585662842, |
|
"logps/chosen": -229.6248321533203, |
|
"logps/rejected": -247.6537322998047, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20007988810539246, |
|
"rewards/margins": 0.406427800655365, |
|
"rewards/rejected": -0.6065077185630798, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.205275229357798e-07, |
|
"logits/chosen": -2.5259690284729004, |
|
"logits/rejected": -2.2766363620758057, |
|
"logps/chosen": -265.3452453613281, |
|
"logps/rejected": -221.7385711669922, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11124776303768158, |
|
"rewards/margins": 0.5815609693527222, |
|
"rewards/rejected": -0.6928088068962097, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186162079510703e-07, |
|
"logits/chosen": -2.3755877017974854, |
|
"logits/rejected": -2.4818127155303955, |
|
"logps/chosen": -253.17007446289062, |
|
"logps/rejected": -265.7911071777344, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14208722114562988, |
|
"rewards/margins": 0.624567985534668, |
|
"rewards/rejected": -0.7666550874710083, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.167048929663608e-07, |
|
"logits/chosen": -2.2667508125305176, |
|
"logits/rejected": -2.1175003051757812, |
|
"logps/chosen": -214.2106475830078, |
|
"logps/rejected": -216.2061767578125, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12058589607477188, |
|
"rewards/margins": 0.762450098991394, |
|
"rewards/rejected": -0.8830360174179077, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1479357798165134e-07, |
|
"logits/chosen": -2.475630283355713, |
|
"logits/rejected": -2.5030765533447266, |
|
"logps/chosen": -229.7735595703125, |
|
"logps/rejected": -204.53561401367188, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.023252641782164574, |
|
"rewards/margins": 0.7630415558815002, |
|
"rewards/rejected": -0.7397890090942383, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.128822629969419e-07, |
|
"logits/chosen": -2.7179012298583984, |
|
"logits/rejected": -2.6036970615386963, |
|
"logps/chosen": -276.84918212890625, |
|
"logps/rejected": -254.32498168945312, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22881421446800232, |
|
"rewards/margins": 0.6891492009162903, |
|
"rewards/rejected": -0.9179633259773254, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.109709480122324e-07, |
|
"logits/chosen": -2.344045877456665, |
|
"logits/rejected": -2.5483055114746094, |
|
"logps/chosen": -286.34063720703125, |
|
"logps/rejected": -220.1922607421875, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.27246418595314026, |
|
"rewards/margins": 0.20061273872852325, |
|
"rewards/rejected": -0.4730769991874695, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0905963302752294e-07, |
|
"logits/chosen": -2.1131272315979004, |
|
"logits/rejected": -2.2049593925476074, |
|
"logps/chosen": -215.07150268554688, |
|
"logps/rejected": -210.6974639892578, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24951574206352234, |
|
"rewards/margins": 0.28748106956481934, |
|
"rewards/rejected": -0.5369968414306641, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.071483180428134e-07, |
|
"logits/chosen": -2.578439950942993, |
|
"logits/rejected": -2.587803602218628, |
|
"logps/chosen": -395.8404846191406, |
|
"logps/rejected": -238.2346954345703, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.028068387880921364, |
|
"rewards/margins": 0.7790915966033936, |
|
"rewards/rejected": -0.7510231733322144, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0523700305810395e-07, |
|
"logits/chosen": -2.45281720161438, |
|
"logits/rejected": -2.3570828437805176, |
|
"logps/chosen": -279.39410400390625, |
|
"logps/rejected": -279.7311096191406, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1633271872997284, |
|
"rewards/margins": 0.6046531200408936, |
|
"rewards/rejected": -0.7679802775382996, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.033256880733945e-07, |
|
"logits/chosen": -2.2366137504577637, |
|
"logits/rejected": -2.1086764335632324, |
|
"logps/chosen": -169.0766143798828, |
|
"logps/rejected": -210.2600555419922, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2424238622188568, |
|
"rewards/margins": 0.16653873026371002, |
|
"rewards/rejected": -0.4089626371860504, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.01414373088685e-07, |
|
"logits/chosen": -2.2935876846313477, |
|
"logits/rejected": -2.4133498668670654, |
|
"logps/chosen": -272.6535949707031, |
|
"logps/rejected": -256.1263122558594, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06631962209939957, |
|
"rewards/margins": 0.8952635526657104, |
|
"rewards/rejected": -0.8289439082145691, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9950305810397555e-07, |
|
"logits/chosen": -2.4236862659454346, |
|
"logits/rejected": -2.5300862789154053, |
|
"logps/chosen": -264.49200439453125, |
|
"logps/rejected": -250.42605590820312, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.4723237156867981, |
|
"rewards/margins": 0.4657681882381439, |
|
"rewards/rejected": -0.9380919337272644, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9759174311926603e-07, |
|
"logits/chosen": -2.388587236404419, |
|
"logits/rejected": -2.268752336502075, |
|
"logps/chosen": -311.207763671875, |
|
"logps/rejected": -154.0712127685547, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.054443489760160446, |
|
"rewards/margins": 0.8568767309188843, |
|
"rewards/rejected": -0.9113203883171082, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9568042813455656e-07, |
|
"logits/chosen": -2.134000301361084, |
|
"logits/rejected": -2.2762789726257324, |
|
"logps/chosen": -236.97683715820312, |
|
"logps/rejected": -211.66000366210938, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.23458454012870789, |
|
"rewards/margins": 0.4157086908817291, |
|
"rewards/rejected": -0.650293231010437, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.937691131498471e-07, |
|
"logits/chosen": -2.4096202850341797, |
|
"logits/rejected": -2.525867462158203, |
|
"logps/chosen": -311.18206787109375, |
|
"logps/rejected": -282.1830749511719, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14300379157066345, |
|
"rewards/margins": 0.38485369086265564, |
|
"rewards/rejected": -0.5278575420379639, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.918577981651376e-07, |
|
"logits/chosen": -2.3166534900665283, |
|
"logits/rejected": -2.1045656204223633, |
|
"logps/chosen": -318.7552490234375, |
|
"logps/rejected": -169.04383850097656, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.16076600551605225, |
|
"rewards/margins": 0.43872982263565063, |
|
"rewards/rejected": -0.5994957685470581, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.8994648318042816e-07, |
|
"logits/chosen": -2.477952718734741, |
|
"logits/rejected": -2.3459110260009766, |
|
"logps/chosen": -246.3095245361328, |
|
"logps/rejected": -196.61399841308594, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.06157383322715759, |
|
"rewards/margins": 0.7134921550750732, |
|
"rewards/rejected": -0.6519182920455933, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8803516819571863e-07, |
|
"logits/chosen": -2.489950180053711, |
|
"logits/rejected": -2.50527286529541, |
|
"logps/chosen": -308.19573974609375, |
|
"logps/rejected": -222.0939178466797, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.28509682416915894, |
|
"rewards/margins": 0.6225865483283997, |
|
"rewards/rejected": -0.9076833724975586, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8612385321100917e-07, |
|
"logits/chosen": -2.2142746448516846, |
|
"logits/rejected": -2.1834685802459717, |
|
"logps/chosen": -199.5335235595703, |
|
"logps/rejected": -232.9855499267578, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.20894494652748108, |
|
"rewards/margins": 0.24761509895324707, |
|
"rewards/rejected": -0.45656007528305054, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842125382262997e-07, |
|
"logits/chosen": -2.416759490966797, |
|
"logits/rejected": -2.349916934967041, |
|
"logps/chosen": -277.214599609375, |
|
"logps/rejected": -271.69854736328125, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.003354682121425867, |
|
"rewards/margins": 0.93927001953125, |
|
"rewards/rejected": -0.9359153509140015, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8230122324159023e-07, |
|
"logits/chosen": -2.2614686489105225, |
|
"logits/rejected": -2.343787670135498, |
|
"logps/chosen": -205.03573608398438, |
|
"logps/rejected": -297.828369140625, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3863915205001831, |
|
"rewards/margins": 0.32738059759140015, |
|
"rewards/rejected": -0.7137721180915833, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8038990825688076e-07, |
|
"logits/chosen": -2.5332446098327637, |
|
"logits/rejected": -2.485593795776367, |
|
"logps/chosen": -204.37960815429688, |
|
"logps/rejected": -191.53567504882812, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4227626323699951, |
|
"rewards/margins": 0.1967758685350418, |
|
"rewards/rejected": -0.619538426399231, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.784785932721712e-07, |
|
"logits/chosen": -2.4867165088653564, |
|
"logits/rejected": -2.367835521697998, |
|
"logps/chosen": -264.09783935546875, |
|
"logps/rejected": -226.6724395751953, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03983074054121971, |
|
"rewards/margins": 1.0130517482757568, |
|
"rewards/rejected": -1.052882432937622, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.765672782874617e-07, |
|
"logits/chosen": -2.376364231109619, |
|
"logits/rejected": -2.221336841583252, |
|
"logps/chosen": -327.6852111816406, |
|
"logps/rejected": -228.00344848632812, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.287401020526886, |
|
"rewards/margins": 0.42843931913375854, |
|
"rewards/rejected": -0.7158403396606445, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7465596330275225e-07, |
|
"logits/chosen": -2.417738437652588, |
|
"logits/rejected": -2.3728537559509277, |
|
"logps/chosen": -222.4464111328125, |
|
"logps/rejected": -159.91334533691406, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3034583330154419, |
|
"rewards/margins": 0.43387946486473083, |
|
"rewards/rejected": -0.7373377680778503, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.727446483180428e-07, |
|
"logits/chosen": -2.5264039039611816, |
|
"logits/rejected": -2.549905776977539, |
|
"logps/chosen": -311.6380615234375, |
|
"logps/rejected": -241.33175659179688, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31216129660606384, |
|
"rewards/margins": 0.6543269157409668, |
|
"rewards/rejected": -0.966488242149353, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -2.4928135871887207, |
|
"logits/rejected": -2.5519299507141113, |
|
"logps/chosen": -278.2198181152344, |
|
"logps/rejected": -230.333984375, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3294491767883301, |
|
"rewards/margins": 0.6884486079216003, |
|
"rewards/rejected": -1.0178978443145752, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6892201834862385e-07, |
|
"logits/chosen": -2.3910984992980957, |
|
"logits/rejected": -2.5310654640197754, |
|
"logps/chosen": -318.42059326171875, |
|
"logps/rejected": -227.05142211914062, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03567828983068466, |
|
"rewards/margins": 1.0869849920272827, |
|
"rewards/rejected": -1.1226632595062256, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6701070336391433e-07, |
|
"logits/chosen": -2.120537042617798, |
|
"logits/rejected": -1.9794002771377563, |
|
"logps/chosen": -287.77618408203125, |
|
"logps/rejected": -180.72586059570312, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17248104512691498, |
|
"rewards/margins": 0.49385708570480347, |
|
"rewards/rejected": -0.6663382053375244, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6509938837920486e-07, |
|
"logits/chosen": -2.598473310470581, |
|
"logits/rejected": -2.6752312183380127, |
|
"logps/chosen": -303.84912109375, |
|
"logps/rejected": -220.67562866210938, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2263271063566208, |
|
"rewards/margins": 0.805108904838562, |
|
"rewards/rejected": -1.0314362049102783, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.631880733944954e-07, |
|
"logits/chosen": -2.563636541366577, |
|
"logits/rejected": -2.4898366928100586, |
|
"logps/chosen": -317.47259521484375, |
|
"logps/rejected": -213.1184844970703, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07109083235263824, |
|
"rewards/margins": 0.6358667612075806, |
|
"rewards/rejected": -0.7069576382637024, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.612767584097859e-07, |
|
"logits/chosen": -2.4102084636688232, |
|
"logits/rejected": -2.329968214035034, |
|
"logps/chosen": -262.5071716308594, |
|
"logps/rejected": -234.21939086914062, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.028744656592607498, |
|
"rewards/margins": 0.5971611738204956, |
|
"rewards/rejected": -0.6259058713912964, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936544342507646e-07, |
|
"logits/chosen": -2.52365779876709, |
|
"logits/rejected": -2.2771174907684326, |
|
"logps/chosen": -292.43560791015625, |
|
"logps/rejected": -262.5408020019531, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04525692015886307, |
|
"rewards/margins": 0.5967898368835449, |
|
"rewards/rejected": -0.5515329241752625, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5745412844036693e-07, |
|
"logits/chosen": -2.4652435779571533, |
|
"logits/rejected": -2.3399059772491455, |
|
"logps/chosen": -295.6546325683594, |
|
"logps/rejected": -239.66946411132812, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.045461829751729965, |
|
"rewards/margins": 0.6490108370780945, |
|
"rewards/rejected": -0.6944726705551147, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5554281345565747e-07, |
|
"logits/chosen": -2.425602674484253, |
|
"logits/rejected": -2.3701093196868896, |
|
"logps/chosen": -333.5090637207031, |
|
"logps/rejected": -249.1916961669922, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.054739952087402344, |
|
"rewards/margins": 1.113675832748413, |
|
"rewards/rejected": -1.0589358806610107, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.53631498470948e-07, |
|
"logits/chosen": -2.476578712463379, |
|
"logits/rejected": -2.3098702430725098, |
|
"logps/chosen": -277.14373779296875, |
|
"logps/rejected": -208.9362335205078, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12385344505310059, |
|
"rewards/margins": 0.5760471820831299, |
|
"rewards/rejected": -0.6999006867408752, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5172018348623853e-07, |
|
"logits/chosen": -2.3377368450164795, |
|
"logits/rejected": -2.5816705226898193, |
|
"logps/chosen": -333.68310546875, |
|
"logps/rejected": -167.23974609375, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.043125562369823456, |
|
"rewards/margins": 0.8181279897689819, |
|
"rewards/rejected": -0.7750024795532227, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4980886850152906e-07, |
|
"logits/chosen": -2.3544070720672607, |
|
"logits/rejected": -2.522584915161133, |
|
"logps/chosen": -372.16339111328125, |
|
"logps/rejected": -314.8898620605469, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.10558883845806122, |
|
"rewards/margins": 1.0972599983215332, |
|
"rewards/rejected": -0.9916712045669556, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.478975535168196e-07, |
|
"logits/chosen": -2.4156651496887207, |
|
"logits/rejected": -2.321596622467041, |
|
"logps/chosen": -234.2825164794922, |
|
"logps/rejected": -187.53053283691406, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13712987303733826, |
|
"rewards/margins": 0.6930993795394897, |
|
"rewards/rejected": -0.8302291631698608, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.459862385321101e-07, |
|
"logits/chosen": -2.536264657974243, |
|
"logits/rejected": -2.373924732208252, |
|
"logps/chosen": -377.7065734863281, |
|
"logps/rejected": -278.75494384765625, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.011585181578993797, |
|
"rewards/margins": 0.7624789476394653, |
|
"rewards/rejected": -0.750893771648407, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.440749235474006e-07, |
|
"logits/chosen": -2.322601795196533, |
|
"logits/rejected": -2.2549948692321777, |
|
"logps/chosen": -292.2939147949219, |
|
"logps/rejected": -266.98040771484375, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1699330061674118, |
|
"rewards/margins": 0.8751620054244995, |
|
"rewards/rejected": -0.7052290439605713, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.421636085626911e-07, |
|
"logits/chosen": -2.3065285682678223, |
|
"logits/rejected": -2.3722896575927734, |
|
"logps/chosen": -338.43115234375, |
|
"logps/rejected": -282.4959411621094, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2029719352722168, |
|
"rewards/margins": 0.37522464990615845, |
|
"rewards/rejected": -0.57819664478302, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.402522935779816e-07, |
|
"logits/chosen": -2.526841163635254, |
|
"logits/rejected": -2.2794435024261475, |
|
"logps/chosen": -308.1125183105469, |
|
"logps/rejected": -253.7852020263672, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15976569056510925, |
|
"rewards/margins": 0.9058405160903931, |
|
"rewards/rejected": -1.0656063556671143, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3834097859327215e-07, |
|
"logits/chosen": -2.308931350708008, |
|
"logits/rejected": -2.195425033569336, |
|
"logps/chosen": -227.3255615234375, |
|
"logps/rejected": -257.3186950683594, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.19937238097190857, |
|
"rewards/margins": 0.6596201658248901, |
|
"rewards/rejected": -0.8589925765991211, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3642966360856268e-07, |
|
"logits/chosen": -2.505361795425415, |
|
"logits/rejected": -2.2748889923095703, |
|
"logps/chosen": -275.67230224609375, |
|
"logps/rejected": -213.0087890625, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.023041725158691406, |
|
"rewards/margins": 0.6609295010566711, |
|
"rewards/rejected": -0.6839712262153625, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.345183486238532e-07, |
|
"logits/chosen": -2.4382357597351074, |
|
"logits/rejected": -2.337517499923706, |
|
"logps/chosen": -257.69146728515625, |
|
"logps/rejected": -238.8438262939453, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17831094563007355, |
|
"rewards/margins": 0.3667464554309845, |
|
"rewards/rejected": -0.5450573563575745, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3260703363914372e-07, |
|
"logits/chosen": -2.54907488822937, |
|
"logits/rejected": -2.3391122817993164, |
|
"logps/chosen": -345.74346923828125, |
|
"logps/rejected": -252.90414428710938, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07564310729503632, |
|
"rewards/margins": 0.590453028678894, |
|
"rewards/rejected": -0.6660962104797363, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3069571865443425e-07, |
|
"logits/chosen": -2.417306900024414, |
|
"logits/rejected": -2.506610155105591, |
|
"logps/chosen": -223.8877716064453, |
|
"logps/rejected": -169.25540161132812, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12120304256677628, |
|
"rewards/margins": 0.6357380151748657, |
|
"rewards/rejected": -0.7569410800933838, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2878440366972476e-07, |
|
"logits/chosen": -2.6201210021972656, |
|
"logits/rejected": -2.4392848014831543, |
|
"logps/chosen": -270.24615478515625, |
|
"logps/rejected": -343.59423828125, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.28634923696517944, |
|
"rewards/margins": 0.4523712992668152, |
|
"rewards/rejected": -0.7387205958366394, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.268730886850153e-07, |
|
"logits/chosen": -2.069031000137329, |
|
"logits/rejected": -1.9285329580307007, |
|
"logps/chosen": -210.6043243408203, |
|
"logps/rejected": -177.20159912109375, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03490941971540451, |
|
"rewards/margins": 0.9221685528755188, |
|
"rewards/rejected": -0.8872591853141785, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.249617737003058e-07, |
|
"logits/chosen": -2.2896201610565186, |
|
"logits/rejected": -2.356297254562378, |
|
"logps/chosen": -292.9852294921875, |
|
"logps/rejected": -277.8864440917969, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.21859049797058105, |
|
"rewards/margins": 0.5577865839004517, |
|
"rewards/rejected": -0.7763770818710327, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2305045871559633e-07, |
|
"logits/chosen": -2.3246283531188965, |
|
"logits/rejected": -2.1089653968811035, |
|
"logps/chosen": -273.15118408203125, |
|
"logps/rejected": -292.574951171875, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.37160351872444153, |
|
"rewards/margins": 0.06535868346691132, |
|
"rewards/rejected": -0.43696221709251404, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2113914373088686e-07, |
|
"logits/chosen": -2.4427847862243652, |
|
"logits/rejected": -2.560529947280884, |
|
"logps/chosen": -153.76699829101562, |
|
"logps/rejected": -248.28408813476562, |
|
"loss": 0.5375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2880316376686096, |
|
"rewards/margins": 0.30386772751808167, |
|
"rewards/rejected": -0.5918993949890137, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1922782874617736e-07, |
|
"logits/chosen": -2.6262247562408447, |
|
"logits/rejected": -2.416951894760132, |
|
"logps/chosen": -257.9002990722656, |
|
"logps/rejected": -275.48553466796875, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2771896719932556, |
|
"rewards/margins": 0.2873901128768921, |
|
"rewards/rejected": -0.5645797848701477, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1731651376146787e-07, |
|
"logits/chosen": -2.1698267459869385, |
|
"logits/rejected": -2.3672404289245605, |
|
"logps/chosen": -256.00689697265625, |
|
"logps/rejected": -232.8870849609375, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13111671805381775, |
|
"rewards/margins": 0.5583267211914062, |
|
"rewards/rejected": -0.6894434690475464, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.154051987767584e-07, |
|
"logits/chosen": -2.2943813800811768, |
|
"logits/rejected": -2.3611464500427246, |
|
"logps/chosen": -289.64422607421875, |
|
"logps/rejected": -286.1085205078125, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17674392461776733, |
|
"rewards/margins": 0.36700400710105896, |
|
"rewards/rejected": -0.5437479615211487, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.134938837920489e-07, |
|
"logits/chosen": -2.2364659309387207, |
|
"logits/rejected": -2.2148382663726807, |
|
"logps/chosen": -202.55038452148438, |
|
"logps/rejected": -206.96762084960938, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.26416245102882385, |
|
"rewards/margins": 0.6946617960929871, |
|
"rewards/rejected": -0.9588242769241333, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1158256880733944e-07, |
|
"logits/chosen": -2.305535316467285, |
|
"logits/rejected": -2.4840779304504395, |
|
"logps/chosen": -338.34271240234375, |
|
"logps/rejected": -185.28201293945312, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.16436448693275452, |
|
"rewards/margins": 0.42132654786109924, |
|
"rewards/rejected": -0.5856910943984985, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0967125382262994e-07, |
|
"logits/chosen": -2.7777082920074463, |
|
"logits/rejected": -2.5519793033599854, |
|
"logps/chosen": -233.9488067626953, |
|
"logps/rejected": -237.7256622314453, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.48247796297073364, |
|
"rewards/margins": 0.04958399385213852, |
|
"rewards/rejected": -0.5320619344711304, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0775993883792048e-07, |
|
"logits/chosen": -2.5612552165985107, |
|
"logits/rejected": -2.420253276824951, |
|
"logps/chosen": -339.1038818359375, |
|
"logps/rejected": -225.16360473632812, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03660213202238083, |
|
"rewards/margins": 1.2038971185684204, |
|
"rewards/rejected": -1.2404992580413818, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.05848623853211e-07, |
|
"logits/chosen": -2.586907386779785, |
|
"logits/rejected": -2.5101799964904785, |
|
"logps/chosen": -310.6505432128906, |
|
"logps/rejected": -244.5113983154297, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.411550909280777, |
|
"rewards/margins": 0.3417314291000366, |
|
"rewards/rejected": -0.7532823085784912, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0393730886850151e-07, |
|
"logits/chosen": -2.2709717750549316, |
|
"logits/rejected": -2.39669132232666, |
|
"logps/chosen": -226.12057495117188, |
|
"logps/rejected": -200.52218627929688, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.046432264149188995, |
|
"rewards/margins": 1.110090732574463, |
|
"rewards/rejected": -1.0636584758758545, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0202599388379205e-07, |
|
"logits/chosen": -2.3261749744415283, |
|
"logits/rejected": -2.5248923301696777, |
|
"logps/chosen": -222.13772583007812, |
|
"logps/rejected": -425.5648498535156, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3807342052459717, |
|
"rewards/margins": 0.5107225775718689, |
|
"rewards/rejected": -0.8914567232131958, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011467889908258e-07, |
|
"logits/chosen": -2.398069381713867, |
|
"logits/rejected": -2.2855401039123535, |
|
"logps/chosen": -314.5137939453125, |
|
"logps/rejected": -217.12588500976562, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.029106635600328445, |
|
"rewards/margins": 1.0325161218643188, |
|
"rewards/rejected": -1.003409504890442, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9820336391437308e-07, |
|
"logits/chosen": -2.290491819381714, |
|
"logits/rejected": -2.378493070602417, |
|
"logps/chosen": -286.273681640625, |
|
"logps/rejected": -229.33828735351562, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08369754254817963, |
|
"rewards/margins": 0.795507550239563, |
|
"rewards/rejected": -0.879205048084259, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9629204892966362e-07, |
|
"logits/chosen": -2.601147413253784, |
|
"logits/rejected": -2.4275565147399902, |
|
"logps/chosen": -390.74151611328125, |
|
"logps/rejected": -276.0337829589844, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.009929287247359753, |
|
"rewards/margins": 0.7174602746963501, |
|
"rewards/rejected": -0.7075310945510864, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.943807339449541e-07, |
|
"logits/chosen": -2.18162202835083, |
|
"logits/rejected": -1.9625205993652344, |
|
"logps/chosen": -205.7775421142578, |
|
"logps/rejected": -185.28477478027344, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2230360060930252, |
|
"rewards/margins": 0.8223746418952942, |
|
"rewards/rejected": -1.0454107522964478, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9246941896024463e-07, |
|
"logits/chosen": -2.2929797172546387, |
|
"logits/rejected": -2.081751823425293, |
|
"logps/chosen": -176.2759246826172, |
|
"logps/rejected": -127.28376770019531, |
|
"loss": 0.5754, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16625508666038513, |
|
"rewards/margins": 0.5883089900016785, |
|
"rewards/rejected": -0.754564106464386, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9055810397553516e-07, |
|
"logits/chosen": -2.3370862007141113, |
|
"logits/rejected": -2.5263562202453613, |
|
"logps/chosen": -305.7495422363281, |
|
"logps/rejected": -238.85726928710938, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1391083151102066, |
|
"rewards/margins": 0.8672167062759399, |
|
"rewards/rejected": -1.0063250064849854, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8864678899082566e-07, |
|
"logits/chosen": -2.6837048530578613, |
|
"logits/rejected": -2.4654018878936768, |
|
"logps/chosen": -278.2798767089844, |
|
"logps/rejected": -200.07553100585938, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21458300948143005, |
|
"rewards/margins": 0.6255909204483032, |
|
"rewards/rejected": -0.8401739001274109, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.867354740061162e-07, |
|
"logits/chosen": -2.3841135501861572, |
|
"logits/rejected": -2.4274752140045166, |
|
"logps/chosen": -241.267333984375, |
|
"logps/rejected": -294.39300537109375, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15124601125717163, |
|
"rewards/margins": 1.2222580909729004, |
|
"rewards/rejected": -1.3735041618347168, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.0971014499664307, |
|
"eval_logits/rejected": -1.931637167930603, |
|
"eval_logps/chosen": -248.69100952148438, |
|
"eval_logps/rejected": -198.20773315429688, |
|
"eval_loss": 0.531248927116394, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -0.12489113211631775, |
|
"eval_rewards/margins": 0.8351471424102783, |
|
"eval_rewards/rejected": -0.9600383639335632, |
|
"eval_runtime": 49.837, |
|
"eval_samples_per_second": 40.131, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8482415902140673e-07, |
|
"logits/chosen": -2.248683452606201, |
|
"logits/rejected": -2.3425230979919434, |
|
"logps/chosen": -222.09530639648438, |
|
"logps/rejected": -244.0989227294922, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.25359851121902466, |
|
"rewards/margins": 0.6999626159667969, |
|
"rewards/rejected": -0.9535611271858215, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8291284403669723e-07, |
|
"logits/chosen": -2.3472368717193604, |
|
"logits/rejected": -2.4190945625305176, |
|
"logps/chosen": -202.47897338867188, |
|
"logps/rejected": -223.9008331298828, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06839686632156372, |
|
"rewards/margins": 0.7683246731758118, |
|
"rewards/rejected": -0.8367214202880859, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8100152905198777e-07, |
|
"logits/chosen": -2.1655468940734863, |
|
"logits/rejected": -2.06404185295105, |
|
"logps/chosen": -259.51666259765625, |
|
"logps/rejected": -317.1017150878906, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.24679192900657654, |
|
"rewards/margins": 1.218878984451294, |
|
"rewards/rejected": -0.9720870852470398, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7909021406727827e-07, |
|
"logits/chosen": -2.5240676403045654, |
|
"logits/rejected": -2.360107660293579, |
|
"logps/chosen": -175.0189971923828, |
|
"logps/rejected": -180.5626983642578, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4591894745826721, |
|
"rewards/margins": 0.5135048627853394, |
|
"rewards/rejected": -0.9726942777633667, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.771788990825688e-07, |
|
"logits/chosen": -2.3183510303497314, |
|
"logits/rejected": -2.134965658187866, |
|
"logps/chosen": -302.5520324707031, |
|
"logps/rejected": -210.1514434814453, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04827744886279106, |
|
"rewards/margins": 1.1514790058135986, |
|
"rewards/rejected": -1.1032016277313232, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7526758409785934e-07, |
|
"logits/chosen": -2.512765884399414, |
|
"logits/rejected": -2.167898416519165, |
|
"logps/chosen": -355.231201171875, |
|
"logps/rejected": -272.0666809082031, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12298504263162613, |
|
"rewards/margins": 1.1620323657989502, |
|
"rewards/rejected": -1.2850173711776733, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7335626911314984e-07, |
|
"logits/chosen": -2.495887517929077, |
|
"logits/rejected": -2.2603235244750977, |
|
"logps/chosen": -236.11611938476562, |
|
"logps/rejected": -186.68002319335938, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14387288689613342, |
|
"rewards/margins": 0.7451364398002625, |
|
"rewards/rejected": -0.8890093564987183, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7144495412844037e-07, |
|
"logits/chosen": -2.6951799392700195, |
|
"logits/rejected": -2.600787878036499, |
|
"logps/chosen": -388.97418212890625, |
|
"logps/rejected": -281.6431884765625, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.054711710661649704, |
|
"rewards/margins": 1.36897611618042, |
|
"rewards/rejected": -1.4236878156661987, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6953363914373088e-07, |
|
"logits/chosen": -2.5666658878326416, |
|
"logits/rejected": -2.512883186340332, |
|
"logps/chosen": -324.7785949707031, |
|
"logps/rejected": -228.8123016357422, |
|
"loss": 0.5496, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09128103405237198, |
|
"rewards/margins": 1.144072413444519, |
|
"rewards/rejected": -1.2353535890579224, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6762232415902138e-07, |
|
"logits/chosen": -2.499027729034424, |
|
"logits/rejected": -2.298178195953369, |
|
"logps/chosen": -235.67837524414062, |
|
"logps/rejected": -202.81117248535156, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24017953872680664, |
|
"rewards/margins": 0.8434773683547974, |
|
"rewards/rejected": -1.083656907081604, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6571100917431192e-07, |
|
"logits/chosen": -2.5607171058654785, |
|
"logits/rejected": -2.353574514389038, |
|
"logps/chosen": -240.97384643554688, |
|
"logps/rejected": -170.1258544921875, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20230352878570557, |
|
"rewards/margins": 0.6808130741119385, |
|
"rewards/rejected": -0.883116602897644, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6379969418960242e-07, |
|
"logits/chosen": -2.5361034870147705, |
|
"logits/rejected": -2.513338565826416, |
|
"logps/chosen": -381.1001892089844, |
|
"logps/rejected": -299.3647155761719, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.016599655151367188, |
|
"rewards/margins": 0.782288134098053, |
|
"rewards/rejected": -0.7988878488540649, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6188837920489295e-07, |
|
"logits/chosen": -2.3150038719177246, |
|
"logits/rejected": -2.09513783454895, |
|
"logps/chosen": -262.87286376953125, |
|
"logps/rejected": -209.9288330078125, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.19605524837970734, |
|
"rewards/margins": 0.7449124455451965, |
|
"rewards/rejected": -0.9409675598144531, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5997706422018349e-07, |
|
"logits/chosen": -2.9330039024353027, |
|
"logits/rejected": -2.6582858562469482, |
|
"logps/chosen": -341.77850341796875, |
|
"logps/rejected": -298.8485412597656, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.009139073081314564, |
|
"rewards/margins": 0.28165051341056824, |
|
"rewards/rejected": -0.27251148223876953, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.58065749235474e-07, |
|
"logits/chosen": -2.320667266845703, |
|
"logits/rejected": -2.201702356338501, |
|
"logps/chosen": -286.5201416015625, |
|
"logps/rejected": -217.8232421875, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.11670849472284317, |
|
"rewards/margins": 0.8405148386955261, |
|
"rewards/rejected": -0.7238063216209412, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5615443425076452e-07, |
|
"logits/chosen": -2.363339900970459, |
|
"logits/rejected": -2.3478615283966064, |
|
"logps/chosen": -314.27325439453125, |
|
"logps/rejected": -253.44631958007812, |
|
"loss": 0.5247, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.10530459880828857, |
|
"rewards/margins": 1.0353710651397705, |
|
"rewards/rejected": -0.9300664067268372, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5424311926605506e-07, |
|
"logits/chosen": -2.2379679679870605, |
|
"logits/rejected": -2.2152559757232666, |
|
"logps/chosen": -170.92782592773438, |
|
"logps/rejected": -166.93072509765625, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21574795246124268, |
|
"rewards/margins": 1.1248795986175537, |
|
"rewards/rejected": -1.3406277894973755, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5233180428134556e-07, |
|
"logits/chosen": -2.4279611110687256, |
|
"logits/rejected": -2.4034111499786377, |
|
"logps/chosen": -271.7957763671875, |
|
"logps/rejected": -260.822265625, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07596953958272934, |
|
"rewards/margins": 0.9050176739692688, |
|
"rewards/rejected": -0.9809872508049011, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.504204892966361e-07, |
|
"logits/chosen": -2.213738441467285, |
|
"logits/rejected": -2.2372987270355225, |
|
"logps/chosen": -295.6419677734375, |
|
"logps/rejected": -206.6561279296875, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19369874894618988, |
|
"rewards/margins": 0.6303919553756714, |
|
"rewards/rejected": -0.8240906596183777, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.485091743119266e-07, |
|
"logits/chosen": -2.4710030555725098, |
|
"logits/rejected": -2.3606488704681396, |
|
"logps/chosen": -242.4974822998047, |
|
"logps/rejected": -218.1492156982422, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16626985371112823, |
|
"rewards/margins": 0.7947496771812439, |
|
"rewards/rejected": -0.9610195159912109, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.465978593272171e-07, |
|
"logits/chosen": -2.255868434906006, |
|
"logits/rejected": -2.3592605590820312, |
|
"logps/chosen": -228.03219604492188, |
|
"logps/rejected": -184.50611877441406, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3162696957588196, |
|
"rewards/margins": 0.7572251558303833, |
|
"rewards/rejected": -1.0734949111938477, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4468654434250764e-07, |
|
"logits/chosen": -2.2551803588867188, |
|
"logits/rejected": -2.189657688140869, |
|
"logps/chosen": -370.01104736328125, |
|
"logps/rejected": -314.4168395996094, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.027963241562247276, |
|
"rewards/margins": 0.7318282127380371, |
|
"rewards/rejected": -0.7038649320602417, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4277522935779814e-07, |
|
"logits/chosen": -2.4878127574920654, |
|
"logits/rejected": -2.42626953125, |
|
"logps/chosen": -275.42144775390625, |
|
"logps/rejected": -220.00137329101562, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.14348439872264862, |
|
"rewards/margins": 1.2011865377426147, |
|
"rewards/rejected": -1.0577021837234497, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4086391437308867e-07, |
|
"logits/chosen": -2.3342137336730957, |
|
"logits/rejected": -2.4700827598571777, |
|
"logps/chosen": -279.27032470703125, |
|
"logps/rejected": -260.35107421875, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21079519391059875, |
|
"rewards/margins": 0.983725368976593, |
|
"rewards/rejected": -1.1945207118988037, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.389525993883792e-07, |
|
"logits/chosen": -2.543966770172119, |
|
"logits/rejected": -2.178934097290039, |
|
"logps/chosen": -354.38348388671875, |
|
"logps/rejected": -278.5998840332031, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17348089814186096, |
|
"rewards/margins": 1.0600968599319458, |
|
"rewards/rejected": -1.2335779666900635, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.370412844036697e-07, |
|
"logits/chosen": -2.370507001876831, |
|
"logits/rejected": -2.4885435104370117, |
|
"logps/chosen": -282.58270263671875, |
|
"logps/rejected": -245.8155517578125, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.010824834927916527, |
|
"rewards/margins": 0.7261137962341309, |
|
"rewards/rejected": -0.7152889966964722, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3512996941896024e-07, |
|
"logits/chosen": -2.373438835144043, |
|
"logits/rejected": -2.3270111083984375, |
|
"logps/chosen": -255.15109252929688, |
|
"logps/rejected": -247.8589324951172, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.5083593130111694, |
|
"rewards/margins": 0.12943392992019653, |
|
"rewards/rejected": -0.6377933025360107, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3321865443425075e-07, |
|
"logits/chosen": -2.239222526550293, |
|
"logits/rejected": -2.1923537254333496, |
|
"logps/chosen": -299.39422607421875, |
|
"logps/rejected": -174.023681640625, |
|
"loss": 0.5336, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11805693060159683, |
|
"rewards/margins": 1.0056555271148682, |
|
"rewards/rejected": -1.1237125396728516, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3130733944954128e-07, |
|
"logits/chosen": -2.110752820968628, |
|
"logits/rejected": -1.9507097005844116, |
|
"logps/chosen": -329.23724365234375, |
|
"logps/rejected": -225.7882080078125, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32635951042175293, |
|
"rewards/margins": 0.7226699590682983, |
|
"rewards/rejected": -1.0490295886993408, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.293960244648318e-07, |
|
"logits/chosen": -2.5377070903778076, |
|
"logits/rejected": -2.426079750061035, |
|
"logps/chosen": -373.67645263671875, |
|
"logps/rejected": -226.9227294921875, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15080983936786652, |
|
"rewards/margins": 1.0611189603805542, |
|
"rewards/rejected": -1.2119289636611938, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2748470948012232e-07, |
|
"logits/chosen": -2.362394332885742, |
|
"logits/rejected": -2.197788715362549, |
|
"logps/chosen": -327.0963134765625, |
|
"logps/rejected": -272.4471130371094, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.272097110748291, |
|
"rewards/margins": 0.6045467257499695, |
|
"rewards/rejected": -0.8766437768936157, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2557339449541285e-07, |
|
"logits/chosen": -2.2135844230651855, |
|
"logits/rejected": -2.296194076538086, |
|
"logps/chosen": -314.9928894042969, |
|
"logps/rejected": -249.1458282470703, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.059797704219818115, |
|
"rewards/margins": 1.026885747909546, |
|
"rewards/rejected": -1.0866836309432983, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2366207951070336e-07, |
|
"logits/chosen": -2.468040943145752, |
|
"logits/rejected": -2.395946741104126, |
|
"logps/chosen": -259.4168395996094, |
|
"logps/rejected": -228.13162231445312, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3209591507911682, |
|
"rewards/margins": 0.7097271680831909, |
|
"rewards/rejected": -1.0306862592697144, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.217507645259939e-07, |
|
"logits/chosen": -2.540865182876587, |
|
"logits/rejected": -2.305960178375244, |
|
"logps/chosen": -327.03668212890625, |
|
"logps/rejected": -316.6974792480469, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.10364139080047607, |
|
"rewards/margins": 0.5791498422622681, |
|
"rewards/rejected": -0.6827912330627441, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.198394495412844e-07, |
|
"logits/chosen": -2.5025086402893066, |
|
"logits/rejected": -2.4841408729553223, |
|
"logps/chosen": -346.7644958496094, |
|
"logps/rejected": -261.13916015625, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09549558907747269, |
|
"rewards/margins": 0.9585358500480652, |
|
"rewards/rejected": -1.054031491279602, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1792813455657493e-07, |
|
"logits/chosen": -2.3317127227783203, |
|
"logits/rejected": -2.3077309131622314, |
|
"logps/chosen": -263.4445495605469, |
|
"logps/rejected": -303.0923156738281, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.48188915848731995, |
|
"rewards/margins": 0.5629307627677917, |
|
"rewards/rejected": -1.0448198318481445, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1601681957186543e-07, |
|
"logits/chosen": -2.5383365154266357, |
|
"logits/rejected": -2.3553810119628906, |
|
"logps/chosen": -303.4425964355469, |
|
"logps/rejected": -202.1547088623047, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.03705354407429695, |
|
"rewards/margins": 1.2515580654144287, |
|
"rewards/rejected": -1.2145044803619385, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1410550458715595e-07, |
|
"logits/chosen": -2.290337085723877, |
|
"logits/rejected": -2.136826753616333, |
|
"logps/chosen": -309.16717529296875, |
|
"logps/rejected": -254.88674926757812, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.010205010883510113, |
|
"rewards/margins": 0.6125646829605103, |
|
"rewards/rejected": -0.6023596525192261, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1219418960244648e-07, |
|
"logits/chosen": -2.3295750617980957, |
|
"logits/rejected": -2.385953426361084, |
|
"logps/chosen": -215.65237426757812, |
|
"logps/rejected": -212.346435546875, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3336523175239563, |
|
"rewards/margins": 0.6867604851722717, |
|
"rewards/rejected": -1.0204129219055176, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.10282874617737e-07, |
|
"logits/chosen": -2.4023945331573486, |
|
"logits/rejected": -2.4153189659118652, |
|
"logps/chosen": -240.5900115966797, |
|
"logps/rejected": -228.1215362548828, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16529302299022675, |
|
"rewards/margins": 0.8604542016983032, |
|
"rewards/rejected": -1.0257470607757568, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0837155963302752e-07, |
|
"logits/chosen": -2.2563438415527344, |
|
"logits/rejected": -2.125026226043701, |
|
"logps/chosen": -267.7855224609375, |
|
"logps/rejected": -219.22030639648438, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.08529398590326309, |
|
"rewards/margins": 0.9822967648506165, |
|
"rewards/rejected": -1.0675907135009766, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0646024464831804e-07, |
|
"logits/chosen": -2.2471909523010254, |
|
"logits/rejected": -2.319463014602661, |
|
"logps/chosen": -255.18911743164062, |
|
"logps/rejected": -244.0147247314453, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.24005265533924103, |
|
"rewards/margins": 0.49893826246261597, |
|
"rewards/rejected": -0.7389909029006958, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0454892966360856e-07, |
|
"logits/chosen": -2.348158597946167, |
|
"logits/rejected": -2.28670334815979, |
|
"logps/chosen": -271.4364013671875, |
|
"logps/rejected": -196.4866943359375, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04201481118798256, |
|
"rewards/margins": 1.2664215564727783, |
|
"rewards/rejected": -1.3084365129470825, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0263761467889908e-07, |
|
"logits/chosen": -2.466719150543213, |
|
"logits/rejected": -2.4625251293182373, |
|
"logps/chosen": -284.7028503417969, |
|
"logps/rejected": -224.3902587890625, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.19498039782047272, |
|
"rewards/margins": 0.3427335023880005, |
|
"rewards/rejected": -0.537713885307312, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.007262996941896e-07, |
|
"logits/chosen": -2.122545003890991, |
|
"logits/rejected": -2.2347145080566406, |
|
"logps/chosen": -289.37249755859375, |
|
"logps/rejected": -206.8185577392578, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25089073181152344, |
|
"rewards/margins": 0.7263849973678589, |
|
"rewards/rejected": -0.9772756695747375, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.881498470948011e-08, |
|
"logits/chosen": -2.279235363006592, |
|
"logits/rejected": -2.394592523574829, |
|
"logps/chosen": -239.59011840820312, |
|
"logps/rejected": -206.2091827392578, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09624668210744858, |
|
"rewards/margins": 0.9607799649238586, |
|
"rewards/rejected": -1.0570266246795654, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.690366972477065e-08, |
|
"logits/chosen": -2.580003499984741, |
|
"logits/rejected": -2.5049805641174316, |
|
"logps/chosen": -193.94044494628906, |
|
"logps/rejected": -129.92938232421875, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.29306405782699585, |
|
"rewards/margins": 0.3221284747123718, |
|
"rewards/rejected": -0.6151925325393677, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.499235474006116e-08, |
|
"logits/chosen": -2.2965879440307617, |
|
"logits/rejected": -2.5327494144439697, |
|
"logps/chosen": -437.3583068847656, |
|
"logps/rejected": -228.34335327148438, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08578985184431076, |
|
"rewards/margins": 0.9632189869880676, |
|
"rewards/rejected": -1.0490089654922485, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.308103975535168e-08, |
|
"logits/chosen": -2.5039114952087402, |
|
"logits/rejected": -2.533884048461914, |
|
"logps/chosen": -328.80194091796875, |
|
"logps/rejected": -245.8604736328125, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1082562655210495, |
|
"rewards/margins": 0.7982146143913269, |
|
"rewards/rejected": -0.9064709544181824, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.116972477064219e-08, |
|
"logits/chosen": -2.3511176109313965, |
|
"logits/rejected": -2.471139430999756, |
|
"logps/chosen": -351.3453063964844, |
|
"logps/rejected": -363.6473693847656, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06536252051591873, |
|
"rewards/margins": 0.8679245710372925, |
|
"rewards/rejected": -0.9332871437072754, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.925840978593272e-08, |
|
"logits/chosen": -2.4342617988586426, |
|
"logits/rejected": -2.3651440143585205, |
|
"logps/chosen": -185.2665252685547, |
|
"logps/rejected": -184.4652099609375, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.01564156636595726, |
|
"rewards/margins": 0.836595892906189, |
|
"rewards/rejected": -0.820954442024231, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.734709480122324e-08, |
|
"logits/chosen": -2.180828809738159, |
|
"logits/rejected": -2.3217592239379883, |
|
"logps/chosen": -162.02694702148438, |
|
"logps/rejected": -214.3283233642578, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3506285548210144, |
|
"rewards/margins": 0.32000571489334106, |
|
"rewards/rejected": -0.6706342697143555, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.543577981651376e-08, |
|
"logits/chosen": -2.336467742919922, |
|
"logits/rejected": -2.384082317352295, |
|
"logps/chosen": -436.14337158203125, |
|
"logps/rejected": -267.2066955566406, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0021473728120326996, |
|
"rewards/margins": 1.047534465789795, |
|
"rewards/rejected": -1.0496817827224731, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.352446483180428e-08, |
|
"logits/chosen": -2.4472270011901855, |
|
"logits/rejected": -2.302725315093994, |
|
"logps/chosen": -265.781005859375, |
|
"logps/rejected": -185.17855834960938, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04335325211286545, |
|
"rewards/margins": 0.785746157169342, |
|
"rewards/rejected": -0.8290994763374329, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.161314984709481e-08, |
|
"logits/chosen": -2.546318292617798, |
|
"logits/rejected": -2.435561180114746, |
|
"logps/chosen": -341.5940246582031, |
|
"logps/rejected": -224.41000366210938, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08983229100704193, |
|
"rewards/margins": 1.180863380432129, |
|
"rewards/rejected": -1.2706955671310425, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.970183486238531e-08, |
|
"logits/chosen": -2.327275276184082, |
|
"logits/rejected": -2.229645252227783, |
|
"logps/chosen": -198.76148986816406, |
|
"logps/rejected": -200.72247314453125, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06232021003961563, |
|
"rewards/margins": 0.635228157043457, |
|
"rewards/rejected": -0.6975484490394592, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.779051987767583e-08, |
|
"logits/chosen": -2.560883045196533, |
|
"logits/rejected": -2.422776699066162, |
|
"logps/chosen": -403.97991943359375, |
|
"logps/rejected": -216.1969451904297, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.12103588879108429, |
|
"rewards/margins": 1.1826056241989136, |
|
"rewards/rejected": -1.0615696907043457, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.587920489296635e-08, |
|
"logits/chosen": -2.3078255653381348, |
|
"logits/rejected": -2.3434040546417236, |
|
"logps/chosen": -200.40487670898438, |
|
"logps/rejected": -198.0846405029297, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07855480164289474, |
|
"rewards/margins": 0.8178641200065613, |
|
"rewards/rejected": -0.8964190483093262, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.396788990825688e-08, |
|
"logits/chosen": -2.438007354736328, |
|
"logits/rejected": -2.413921594619751, |
|
"logps/chosen": -205.8711395263672, |
|
"logps/rejected": -196.82382202148438, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14088700711727142, |
|
"rewards/margins": 0.9455137252807617, |
|
"rewards/rejected": -1.0864007472991943, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.20565749235474e-08, |
|
"logits/chosen": -2.546194553375244, |
|
"logits/rejected": -2.4628357887268066, |
|
"logps/chosen": -181.03305053710938, |
|
"logps/rejected": -232.1865234375, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.23946937918663025, |
|
"rewards/margins": 0.2353799045085907, |
|
"rewards/rejected": -0.47484928369522095, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.014525993883792e-08, |
|
"logits/chosen": -2.6189305782318115, |
|
"logits/rejected": -2.1566860675811768, |
|
"logps/chosen": -291.4128112792969, |
|
"logps/rejected": -229.62069702148438, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.10792186111211777, |
|
"rewards/margins": 0.8227887153625488, |
|
"rewards/rejected": -0.7148668169975281, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.823394495412843e-08, |
|
"logits/chosen": -2.3303613662719727, |
|
"logits/rejected": -2.243708610534668, |
|
"logps/chosen": -240.57321166992188, |
|
"logps/rejected": -237.77621459960938, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18570654094219208, |
|
"rewards/margins": 0.8395042419433594, |
|
"rewards/rejected": -1.0252107381820679, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.632262996941895e-08, |
|
"logits/chosen": -2.184171438217163, |
|
"logits/rejected": -2.440727949142456, |
|
"logps/chosen": -293.82208251953125, |
|
"logps/rejected": -232.8399658203125, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14512939751148224, |
|
"rewards/margins": 0.7270814180374146, |
|
"rewards/rejected": -0.8722109794616699, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.441131498470948e-08, |
|
"logits/chosen": -2.308842182159424, |
|
"logits/rejected": -2.357908010482788, |
|
"logps/chosen": -276.0211181640625, |
|
"logps/rejected": -183.45028686523438, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06593456864356995, |
|
"rewards/margins": 1.2839319705963135, |
|
"rewards/rejected": -1.3498667478561401, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.4197561740875244, |
|
"logits/rejected": -2.3011603355407715, |
|
"logps/chosen": -332.51519775390625, |
|
"logps/rejected": -231.3236846923828, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08808217942714691, |
|
"rewards/margins": 0.9846493005752563, |
|
"rewards/rejected": -0.896567165851593, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.058868501529052e-08, |
|
"logits/chosen": -2.518052577972412, |
|
"logits/rejected": -2.333383083343506, |
|
"logps/chosen": -253.10482788085938, |
|
"logps/rejected": -198.84860229492188, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3139076828956604, |
|
"rewards/margins": 0.5393510460853577, |
|
"rewards/rejected": -0.8532587289810181, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.8677370030581035e-08, |
|
"logits/chosen": -2.1512084007263184, |
|
"logits/rejected": -2.312035083770752, |
|
"logps/chosen": -288.53680419921875, |
|
"logps/rejected": -222.78762817382812, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.13815611600875854, |
|
"rewards/margins": 1.1774489879608154, |
|
"rewards/rejected": -1.0392926931381226, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6766055045871554e-08, |
|
"logits/chosen": -2.2546794414520264, |
|
"logits/rejected": -2.3793716430664062, |
|
"logps/chosen": -343.5367736816406, |
|
"logps/rejected": -309.8175354003906, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32955241203308105, |
|
"rewards/margins": 0.6453115940093994, |
|
"rewards/rejected": -0.9748640060424805, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.485474006116208e-08, |
|
"logits/chosen": -2.549440622329712, |
|
"logits/rejected": -2.3844194412231445, |
|
"logps/chosen": -258.51177978515625, |
|
"logps/rejected": -215.14132690429688, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.36421215534210205, |
|
"rewards/margins": 1.2265236377716064, |
|
"rewards/rejected": -0.8623113632202148, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.294342507645259e-08, |
|
"logits/chosen": -2.530996561050415, |
|
"logits/rejected": -2.3681914806365967, |
|
"logps/chosen": -335.9801330566406, |
|
"logps/rejected": -263.645263671875, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1152672991156578, |
|
"rewards/margins": 0.8920395970344543, |
|
"rewards/rejected": -0.7767722606658936, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.1032110091743117e-08, |
|
"logits/chosen": -2.185145378112793, |
|
"logits/rejected": -2.3489866256713867, |
|
"logps/chosen": -260.3304138183594, |
|
"logps/rejected": -180.7406768798828, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4966302514076233, |
|
"rewards/margins": 0.4965400695800781, |
|
"rewards/rejected": -0.9931701421737671, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.9120795107033635e-08, |
|
"logits/chosen": -2.3882431983947754, |
|
"logits/rejected": -2.4234635829925537, |
|
"logps/chosen": -267.7854919433594, |
|
"logps/rejected": -259.3836669921875, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1018451452255249, |
|
"rewards/margins": 1.0978671312332153, |
|
"rewards/rejected": -1.1997122764587402, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.7209480122324154e-08, |
|
"logits/chosen": -2.515261650085449, |
|
"logits/rejected": -2.3157753944396973, |
|
"logps/chosen": -299.7487487792969, |
|
"logps/rejected": -317.60516357421875, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.024449225515127182, |
|
"rewards/margins": 1.3952572345733643, |
|
"rewards/rejected": -1.3708080053329468, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.529816513761467e-08, |
|
"logits/chosen": -2.3622994422912598, |
|
"logits/rejected": -2.125053644180298, |
|
"logps/chosen": -248.30117797851562, |
|
"logps/rejected": -275.47894287109375, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.29935550689697266, |
|
"rewards/margins": 0.4452272355556488, |
|
"rewards/rejected": -0.7445827722549438, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.33868501529052e-08, |
|
"logits/chosen": -2.584453821182251, |
|
"logits/rejected": -2.18440580368042, |
|
"logps/chosen": -242.4678955078125, |
|
"logps/rejected": -205.87344360351562, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.28912895917892456, |
|
"rewards/margins": 0.5633198618888855, |
|
"rewards/rejected": -0.8524488210678101, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.147553516819572e-08, |
|
"logits/chosen": -2.475851058959961, |
|
"logits/rejected": -2.3260536193847656, |
|
"logps/chosen": -214.3905029296875, |
|
"logps/rejected": -229.3086700439453, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2511672079563141, |
|
"rewards/margins": 0.7018988132476807, |
|
"rewards/rejected": -0.9530660510063171, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.9564220183486236e-08, |
|
"logits/chosen": -2.319629192352295, |
|
"logits/rejected": -2.3051748275756836, |
|
"logps/chosen": -242.98965454101562, |
|
"logps/rejected": -185.3482666015625, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.17325511574745178, |
|
"rewards/margins": 0.6108760833740234, |
|
"rewards/rejected": -0.7841311693191528, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7652905198776755e-08, |
|
"logits/chosen": -2.633847713470459, |
|
"logits/rejected": -2.576089382171631, |
|
"logps/chosen": -300.53460693359375, |
|
"logps/rejected": -229.891357421875, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.30726686120033264, |
|
"rewards/margins": 0.44638171792030334, |
|
"rewards/rejected": -0.753648579120636, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.574159021406728e-08, |
|
"logits/chosen": -2.354677438735962, |
|
"logits/rejected": -2.4129960536956787, |
|
"logps/chosen": -258.06610107421875, |
|
"logps/rejected": -236.42153930664062, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.44077181816101074, |
|
"rewards/margins": 0.5614089369773865, |
|
"rewards/rejected": -1.002180814743042, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.383027522935779e-08, |
|
"logits/chosen": -2.444614887237549, |
|
"logits/rejected": -2.2034354209899902, |
|
"logps/chosen": -259.88946533203125, |
|
"logps/rejected": -274.34490966796875, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.042647577822208405, |
|
"rewards/margins": 1.146150827407837, |
|
"rewards/rejected": -1.1035032272338867, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.191896024464832e-08, |
|
"logits/chosen": -2.2883150577545166, |
|
"logits/rejected": -2.3496975898742676, |
|
"logps/chosen": -292.2751770019531, |
|
"logps/rejected": -200.29855346679688, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0850500836968422, |
|
"rewards/margins": 1.3057624101638794, |
|
"rewards/rejected": -1.2207123041152954, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0007645259938836e-08, |
|
"logits/chosen": -2.456864356994629, |
|
"logits/rejected": -2.5583481788635254, |
|
"logps/chosen": -241.4561767578125, |
|
"logps/rejected": -275.2192077636719, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.22843988239765167, |
|
"rewards/margins": 0.4716885983943939, |
|
"rewards/rejected": -0.7001284956932068, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.809633027522936e-08, |
|
"logits/chosen": -2.2733702659606934, |
|
"logits/rejected": -2.3403806686401367, |
|
"logps/chosen": -196.03524780273438, |
|
"logps/rejected": -215.77767944335938, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.21540656685829163, |
|
"rewards/margins": 0.6115992665290833, |
|
"rewards/rejected": -0.8270059823989868, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6185015290519877e-08, |
|
"logits/chosen": -2.3207640647888184, |
|
"logits/rejected": -2.3754372596740723, |
|
"logps/chosen": -177.1396026611328, |
|
"logps/rejected": -185.77505493164062, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.019303400069475174, |
|
"rewards/margins": 0.6052744388580322, |
|
"rewards/rejected": -0.5859710574150085, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4273700305810396e-08, |
|
"logits/chosen": -2.4921469688415527, |
|
"logits/rejected": -2.460731029510498, |
|
"logps/chosen": -291.66229248046875, |
|
"logps/rejected": -241.90072631835938, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.022645365446805954, |
|
"rewards/margins": 1.2275021076202393, |
|
"rewards/rejected": -1.2048569917678833, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2362385321100918e-08, |
|
"logits/chosen": -2.3739137649536133, |
|
"logits/rejected": -2.2125802040100098, |
|
"logps/chosen": -286.7039794921875, |
|
"logps/rejected": -244.8232421875, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10285166651010513, |
|
"rewards/margins": 0.4872564375400543, |
|
"rewards/rejected": -0.5901080965995789, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0451070336391437e-08, |
|
"logits/chosen": -1.8592112064361572, |
|
"logits/rejected": -1.7996069192886353, |
|
"logps/chosen": -194.89865112304688, |
|
"logps/rejected": -183.8830108642578, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.18555238842964172, |
|
"rewards/margins": 0.03723742812871933, |
|
"rewards/rejected": -0.22278980910778046, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8539755351681956e-08, |
|
"logits/chosen": -2.2919793128967285, |
|
"logits/rejected": -2.5266997814178467, |
|
"logps/chosen": -407.080810546875, |
|
"logps/rejected": -314.6758117675781, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05720346048474312, |
|
"rewards/margins": 0.6664168238639832, |
|
"rewards/rejected": -0.6092134714126587, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6628440366972478e-08, |
|
"logits/chosen": -2.0835564136505127, |
|
"logits/rejected": -2.263801097869873, |
|
"logps/chosen": -319.478271484375, |
|
"logps/rejected": -161.7843017578125, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.046999458223581314, |
|
"rewards/margins": 1.0421249866485596, |
|
"rewards/rejected": -1.0891244411468506, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4717125382262997e-08, |
|
"logits/chosen": -2.5174002647399902, |
|
"logits/rejected": -2.6593270301818848, |
|
"logps/chosen": -317.7415771484375, |
|
"logps/rejected": -299.45513916015625, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22343751788139343, |
|
"rewards/margins": 0.47214531898498535, |
|
"rewards/rejected": -0.695582926273346, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2805810397553517e-08, |
|
"logits/chosen": -2.3480212688446045, |
|
"logits/rejected": -2.281808376312256, |
|
"logps/chosen": -368.12774658203125, |
|
"logps/rejected": -359.60064697265625, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30808025598526, |
|
"rewards/margins": 0.5734547972679138, |
|
"rewards/rejected": -0.8815349340438843, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0894495412844038e-08, |
|
"logits/chosen": -2.2814998626708984, |
|
"logits/rejected": -2.3192315101623535, |
|
"logps/chosen": -251.41836547851562, |
|
"logps/rejected": -233.8671417236328, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1413925290107727, |
|
"rewards/margins": 0.6429818272590637, |
|
"rewards/rejected": -0.7843743562698364, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.983180428134555e-09, |
|
"logits/chosen": -2.0733284950256348, |
|
"logits/rejected": -2.226485252380371, |
|
"logps/chosen": -212.7808380126953, |
|
"logps/rejected": -141.1074676513672, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.39890027046203613, |
|
"rewards/margins": 0.5048045516014099, |
|
"rewards/rejected": -0.903704822063446, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.071865443425076e-09, |
|
"logits/chosen": -2.551570415496826, |
|
"logits/rejected": -2.3837685585021973, |
|
"logps/chosen": -274.71490478515625, |
|
"logps/rejected": -316.5244140625, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2874767482280731, |
|
"rewards/margins": 0.5797588229179382, |
|
"rewards/rejected": -0.867235541343689, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.1605504587155965e-09, |
|
"logits/chosen": -2.5565171241760254, |
|
"logits/rejected": -2.4579882621765137, |
|
"logps/chosen": -253.5585479736328, |
|
"logps/rejected": -178.70553588867188, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20143918693065643, |
|
"rewards/margins": 0.6308044195175171, |
|
"rewards/rejected": -0.8322436213493347, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.249235474006116e-09, |
|
"logits/chosen": -2.354980707168579, |
|
"logits/rejected": -2.389235496520996, |
|
"logps/chosen": -283.6640319824219, |
|
"logps/rejected": -232.73165893554688, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4438861906528473, |
|
"rewards/margins": 0.5923529267311096, |
|
"rewards/rejected": -1.0362392663955688, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.3379204892966359e-09, |
|
"logits/chosen": -2.1370351314544678, |
|
"logits/rejected": -2.2694876194000244, |
|
"logps/chosen": -225.5801239013672, |
|
"logps/rejected": -227.0513916015625, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.11508389562368393, |
|
"rewards/margins": 1.1479651927947998, |
|
"rewards/rejected": -1.032881259918213, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.085988998413086, |
|
"eval_logits/rejected": -1.9190013408660889, |
|
"eval_logps/chosen": -248.65191650390625, |
|
"eval_logps/rejected": -198.58494567871094, |
|
"eval_loss": 0.5269633531570435, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.12098389863967896, |
|
"eval_rewards/margins": 0.8767741918563843, |
|
"eval_rewards/rejected": -0.9977580308914185, |
|
"eval_runtime": 50.0854, |
|
"eval_samples_per_second": 39.932, |
|
"eval_steps_per_second": 0.319, |
|
"step": 2907 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2907, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5643668570057567, |
|
"train_runtime": 8096.9375, |
|
"train_samples_per_second": 22.959, |
|
"train_steps_per_second": 0.359 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2907, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|