|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 2428, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 2.0576131687242803e-08, |
|
"logits/chosen": 0.24564924836158752, |
|
"logits/rejected": 1.0062695741653442, |
|
"logps/chosen": -229.83255004882812, |
|
"logps/rejected": -164.65399169921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 2.05761316872428e-07, |
|
"logits/chosen": -0.0490909218788147, |
|
"logits/rejected": 0.6121826171875, |
|
"logps/chosen": -238.83880615234375, |
|
"logps/rejected": -207.5596923828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.00032901231315918267, |
|
"rewards/margins": 0.0006913852412253618, |
|
"rewards/margins_max": 0.002890574047341943, |
|
"rewards/margins_min": -0.0015078035648912191, |
|
"rewards/margins_std": 0.0031101228669285774, |
|
"rewards/rejected": -0.001020397525280714, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 4.11522633744856e-07, |
|
"logits/chosen": 0.05002685636281967, |
|
"logits/rejected": 0.6022137403488159, |
|
"logps/chosen": -255.0900115966797, |
|
"logps/rejected": -220.280517578125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00017908953304868191, |
|
"rewards/margins": 0.0004872865101788193, |
|
"rewards/margins_max": 0.0039043165743350983, |
|
"rewards/margins_min": -0.0029297438450157642, |
|
"rewards/margins_std": 0.004832410719245672, |
|
"rewards/rejected": -0.0003081969916820526, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 6.17283950617284e-07, |
|
"logits/chosen": 0.07209397852420807, |
|
"logits/rejected": 0.5803325176239014, |
|
"logps/chosen": -241.93930053710938, |
|
"logps/rejected": -229.0738067626953, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0010193719062954187, |
|
"rewards/margins": 0.001458501792512834, |
|
"rewards/margins_max": 0.0036475714296102524, |
|
"rewards/margins_min": -0.0007305679609999061, |
|
"rewards/margins_std": 0.0030958119314163923, |
|
"rewards/rejected": -0.0004391298571135849, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 8.23045267489712e-07, |
|
"logits/chosen": 0.08637161552906036, |
|
"logits/rejected": 0.6608158946037292, |
|
"logps/chosen": -272.7409973144531, |
|
"logps/rejected": -232.7211151123047, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0017092287307605147, |
|
"rewards/margins": 0.002595087978988886, |
|
"rewards/margins_max": 0.0045972722582519054, |
|
"rewards/margins_min": 0.0005929030594415963, |
|
"rewards/margins_std": 0.0028315167874097824, |
|
"rewards/rejected": -0.0008858589571900666, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.02880658436214e-06, |
|
"logits/chosen": 0.039637185633182526, |
|
"logits/rejected": 0.42562946677207947, |
|
"logps/chosen": -248.4722137451172, |
|
"logps/rejected": -249.7132568359375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.0025177341885864735, |
|
"rewards/margins": 0.004976312164217234, |
|
"rewards/margins_max": 0.008509628474712372, |
|
"rewards/margins_min": 0.0014429950388148427, |
|
"rewards/margins_std": 0.004996864590793848, |
|
"rewards/rejected": -0.002458578208461404, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.234567901234568e-06, |
|
"logits/chosen": 0.030338022857904434, |
|
"logits/rejected": 0.6016219854354858, |
|
"logps/chosen": -242.9213409423828, |
|
"logps/rejected": -205.34011840820312, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.002770364750176668, |
|
"rewards/margins": 0.006898685358464718, |
|
"rewards/margins_max": 0.01105786394327879, |
|
"rewards/margins_min": 0.002739507704973221, |
|
"rewards/margins_std": 0.005881965160369873, |
|
"rewards/rejected": -0.00412832060828805, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 1.440329218106996e-06, |
|
"logits/chosen": 0.12884962558746338, |
|
"logits/rejected": 0.6521704196929932, |
|
"logps/chosen": -233.1442108154297, |
|
"logps/rejected": -180.2538299560547, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.0031278387177735567, |
|
"rewards/margins": 0.008645228110253811, |
|
"rewards/margins_max": 0.012719206511974335, |
|
"rewards/margins_min": 0.004571248777210712, |
|
"rewards/margins_std": 0.005761477164924145, |
|
"rewards/rejected": -0.005517390090972185, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.646090534979424e-06, |
|
"logits/chosen": -0.02626526914536953, |
|
"logits/rejected": 0.4111458361148834, |
|
"logps/chosen": -235.10330200195312, |
|
"logps/rejected": -224.97488403320312, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.006755024194717407, |
|
"rewards/margins": 0.014125975780189037, |
|
"rewards/margins_max": 0.020800447091460228, |
|
"rewards/margins_min": 0.007451505865901709, |
|
"rewards/margins_std": 0.009439127519726753, |
|
"rewards/rejected": -0.007370952516794205, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"logits/chosen": 0.26549288630485535, |
|
"logits/rejected": 0.6299537420272827, |
|
"logps/chosen": -205.5663604736328, |
|
"logps/rejected": -195.4409637451172, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.00643587950617075, |
|
"rewards/margins": 0.016837503761053085, |
|
"rewards/margins_max": 0.02518610656261444, |
|
"rewards/margins_min": 0.008488905616104603, |
|
"rewards/margins_std": 0.011806704103946686, |
|
"rewards/rejected": -0.010401626117527485, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 2.05761316872428e-06, |
|
"logits/chosen": -0.02292916737496853, |
|
"logits/rejected": 0.4407041072845459, |
|
"logps/chosen": -237.1365509033203, |
|
"logps/rejected": -234.0122833251953, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.008963329717516899, |
|
"rewards/margins": 0.023605378344655037, |
|
"rewards/margins_max": 0.0348113588988781, |
|
"rewards/margins_min": 0.012399397790431976, |
|
"rewards/margins_std": 0.015847649425268173, |
|
"rewards/rejected": -0.014642049558460712, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5, |
|
"learning_rate": 2.263374485596708e-06, |
|
"logits/chosen": 0.06019078567624092, |
|
"logits/rejected": 0.6456455588340759, |
|
"logps/chosen": -252.55941772460938, |
|
"logps/rejected": -202.68516540527344, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01171482540667057, |
|
"rewards/margins": 0.0284560676664114, |
|
"rewards/margins_max": 0.040376029908657074, |
|
"rewards/margins_min": 0.016536109149456024, |
|
"rewards/margins_std": 0.016857367008924484, |
|
"rewards/rejected": -0.01674124039709568, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 2.469135802469136e-06, |
|
"logits/chosen": 0.03018159233033657, |
|
"logits/rejected": 0.5444492101669312, |
|
"logps/chosen": -230.00732421875, |
|
"logps/rejected": -204.04888916015625, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.010969040915369987, |
|
"rewards/margins": 0.03301534429192543, |
|
"rewards/margins_max": 0.045270394533872604, |
|
"rewards/margins_min": 0.020760290324687958, |
|
"rewards/margins_std": 0.017331259325146675, |
|
"rewards/rejected": -0.022046301513910294, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 2.674897119341564e-06, |
|
"logits/chosen": 0.1473396122455597, |
|
"logits/rejected": 0.6573908925056458, |
|
"logps/chosen": -263.9186096191406, |
|
"logps/rejected": -234.0851593017578, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.019587906077504158, |
|
"rewards/margins": 0.04693462699651718, |
|
"rewards/margins_max": 0.06604455411434174, |
|
"rewards/margins_min": 0.02782469615340233, |
|
"rewards/margins_std": 0.027025526389479637, |
|
"rewards/rejected": -0.027346724644303322, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 2.880658436213992e-06, |
|
"logits/chosen": 0.1025664433836937, |
|
"logits/rejected": 0.6043235063552856, |
|
"logps/chosen": -249.485595703125, |
|
"logps/rejected": -218.0284423828125, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.020640945062041283, |
|
"rewards/margins": 0.054881542921066284, |
|
"rewards/margins_max": 0.0779130607843399, |
|
"rewards/margins_min": 0.03185003623366356, |
|
"rewards/margins_std": 0.032571472227573395, |
|
"rewards/rejected": -0.03424059972167015, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 3.08641975308642e-06, |
|
"logits/chosen": 0.03741316497325897, |
|
"logits/rejected": 0.730408787727356, |
|
"logps/chosen": -271.1098327636719, |
|
"logps/rejected": -231.9659423828125, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03794144093990326, |
|
"rewards/margins": 0.07607483863830566, |
|
"rewards/margins_max": 0.10968559980392456, |
|
"rewards/margins_min": 0.04246408864855766, |
|
"rewards/margins_std": 0.04753277823328972, |
|
"rewards/rejected": -0.038133405148983, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 3.292181069958848e-06, |
|
"logits/chosen": 0.027915984392166138, |
|
"logits/rejected": 0.5170690417289734, |
|
"logps/chosen": -227.6484375, |
|
"logps/rejected": -201.67355346679688, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.029438817873597145, |
|
"rewards/margins": 0.07748283445835114, |
|
"rewards/margins_max": 0.1150212287902832, |
|
"rewards/margins_min": 0.03994445875287056, |
|
"rewards/margins_std": 0.05308728292584419, |
|
"rewards/rejected": -0.04804402217268944, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 3.4979423868312762e-06, |
|
"logits/chosen": 0.06907240301370621, |
|
"logits/rejected": 0.5936463475227356, |
|
"logps/chosen": -229.10653686523438, |
|
"logps/rejected": -239.67593383789062, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.05052729696035385, |
|
"rewards/margins": 0.09715622663497925, |
|
"rewards/margins_max": 0.14001211524009705, |
|
"rewards/margins_min": 0.05430033057928085, |
|
"rewards/margins_std": 0.060607392340898514, |
|
"rewards/rejected": -0.046628933399915695, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"logits/chosen": 0.23458850383758545, |
|
"logits/rejected": 0.604918360710144, |
|
"logps/chosen": -207.04891967773438, |
|
"logps/rejected": -222.05416870117188, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.021370261907577515, |
|
"rewards/margins": 0.09243619441986084, |
|
"rewards/margins_max": 0.13056758046150208, |
|
"rewards/margins_min": 0.0543048158288002, |
|
"rewards/margins_std": 0.05392590910196304, |
|
"rewards/rejected": -0.07106593251228333, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 3.909465020576132e-06, |
|
"logits/chosen": 0.1551034152507782, |
|
"logits/rejected": 0.7508169412612915, |
|
"logps/chosen": -252.1875457763672, |
|
"logps/rejected": -227.7545166015625, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.03474985808134079, |
|
"rewards/margins": 0.13373127579689026, |
|
"rewards/margins_max": 0.19556060433387756, |
|
"rewards/margins_min": 0.07190193980932236, |
|
"rewards/margins_std": 0.0874398797750473, |
|
"rewards/rejected": -0.09898141771554947, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 4.11522633744856e-06, |
|
"logits/chosen": 0.06500478088855743, |
|
"logits/rejected": 0.7195091247558594, |
|
"logps/chosen": -267.84259033203125, |
|
"logps/rejected": -238.9481658935547, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.0433725044131279, |
|
"rewards/margins": 0.16162040829658508, |
|
"rewards/margins_max": 0.2354629933834076, |
|
"rewards/margins_min": 0.08777783066034317, |
|
"rewards/margins_std": 0.10442917048931122, |
|
"rewards/rejected": -0.11824791133403778, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 4.3209876543209875e-06, |
|
"logits/chosen": 0.07097109407186508, |
|
"logits/rejected": 0.5758925676345825, |
|
"logps/chosen": -244.7234649658203, |
|
"logps/rejected": -232.4202117919922, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.027079004794359207, |
|
"rewards/margins": 0.17639262974262238, |
|
"rewards/margins_max": 0.2551138401031494, |
|
"rewards/margins_min": 0.09767140448093414, |
|
"rewards/margins_std": 0.1113286241889, |
|
"rewards/rejected": -0.14931362867355347, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 4.526748971193416e-06, |
|
"logits/chosen": 0.10515166819095612, |
|
"logits/rejected": 0.6553866267204285, |
|
"logps/chosen": -236.6344451904297, |
|
"logps/rejected": -224.96749877929688, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01706048846244812, |
|
"rewards/margins": 0.21246306598186493, |
|
"rewards/margins_max": 0.31518980860710144, |
|
"rewards/margins_min": 0.10973634570837021, |
|
"rewards/margins_std": 0.14527757465839386, |
|
"rewards/rejected": -0.1954026073217392, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 4.732510288065844e-06, |
|
"logits/chosen": -0.010027505457401276, |
|
"logits/rejected": 0.5649107098579407, |
|
"logps/chosen": -292.68572998046875, |
|
"logps/rejected": -271.10955810546875, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.009528962895274162, |
|
"rewards/margins": 0.2713000476360321, |
|
"rewards/margins_max": 0.4035729765892029, |
|
"rewards/margins_min": 0.13902710378170013, |
|
"rewards/margins_std": 0.18706218898296356, |
|
"rewards/rejected": -0.2808290421962738, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 4.938271604938272e-06, |
|
"logits/chosen": 0.028728529810905457, |
|
"logits/rejected": 0.5883212685585022, |
|
"logps/chosen": -252.4741973876953, |
|
"logps/rejected": -263.96063232421875, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.026816654950380325, |
|
"rewards/margins": 0.36577218770980835, |
|
"rewards/margins_max": 0.5184392333030701, |
|
"rewards/margins_min": 0.21310511231422424, |
|
"rewards/margins_std": 0.2159038782119751, |
|
"rewards/rejected": -0.39258888363838196, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 4.999873380880316e-06, |
|
"logits/chosen": -0.04030367732048035, |
|
"logits/rejected": 0.5767666697502136, |
|
"logps/chosen": -280.7464904785156, |
|
"logps/rejected": -289.3246154785156, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.04451828449964523, |
|
"rewards/margins": 0.31411212682724, |
|
"rewards/margins_max": 0.4603235125541687, |
|
"rewards/margins_min": 0.16790074110031128, |
|
"rewards/margins_std": 0.20677416026592255, |
|
"rewards/rejected": -0.3586304783821106, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 4.999253236476256e-06, |
|
"logits/chosen": 0.11786775290966034, |
|
"logits/rejected": 0.7519556879997253, |
|
"logps/chosen": -285.5740966796875, |
|
"logps/rejected": -260.8897399902344, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.16191735863685608, |
|
"rewards/margins": 0.3822050988674164, |
|
"rewards/margins_max": 0.6438002586364746, |
|
"rewards/margins_min": 0.12060992419719696, |
|
"rewards/margins_std": 0.36995142698287964, |
|
"rewards/rejected": -0.5441225171089172, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 4.998116438252842e-06, |
|
"logits/chosen": -0.01648726500570774, |
|
"logits/rejected": 0.596198558807373, |
|
"logps/chosen": -308.7812194824219, |
|
"logps/rejected": -326.95343017578125, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.16989798843860626, |
|
"rewards/margins": 0.7350731492042542, |
|
"rewards/margins_max": 1.1802551746368408, |
|
"rewards/margins_min": 0.28989118337631226, |
|
"rewards/margins_std": 0.6295824646949768, |
|
"rewards/rejected": -0.9049711227416992, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.9964632212127305e-06, |
|
"logits/chosen": 0.0899326428771019, |
|
"logits/rejected": 0.6752752065658569, |
|
"logps/chosen": -290.4791564941406, |
|
"logps/rejected": -322.41815185546875, |
|
"loss": 0.4527, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.32237640023231506, |
|
"rewards/margins": 0.6386328935623169, |
|
"rewards/margins_max": 0.9289911985397339, |
|
"rewards/margins_min": 0.3482745587825775, |
|
"rewards/margins_std": 0.41062870621681213, |
|
"rewards/rejected": -0.9610093235969543, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 4.994293927114362e-06, |
|
"logits/chosen": 0.06901798397302628, |
|
"logits/rejected": 0.6215580105781555, |
|
"logps/chosen": -290.79632568359375, |
|
"logps/rejected": -373.4809265136719, |
|
"loss": 0.4322, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4801758825778961, |
|
"rewards/margins": 1.097102403640747, |
|
"rewards/margins_max": 1.8094851970672607, |
|
"rewards/margins_min": 0.3847196698188782, |
|
"rewards/margins_std": 1.0074613094329834, |
|
"rewards/rejected": -1.5772783756256104, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 4.991609004401324e-06, |
|
"logits/chosen": -0.018922004848718643, |
|
"logits/rejected": 0.6193499565124512, |
|
"logps/chosen": -317.2272644042969, |
|
"logps/rejected": -400.44696044921875, |
|
"loss": 0.3836, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.622567355632782, |
|
"rewards/margins": 1.1482007503509521, |
|
"rewards/margins_max": 1.7601646184921265, |
|
"rewards/margins_min": 0.5362368226051331, |
|
"rewards/margins_std": 0.8654475212097168, |
|
"rewards/rejected": -1.770768165588379, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 4.988409008109638e-06, |
|
"logits/chosen": 0.18614912033081055, |
|
"logits/rejected": 0.5903946161270142, |
|
"logps/chosen": -306.6070251464844, |
|
"logps/rejected": -419.8663024902344, |
|
"loss": 0.3599, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.8805279731750488, |
|
"rewards/margins": 1.354397177696228, |
|
"rewards/margins_max": 2.106048107147217, |
|
"rewards/margins_min": 0.6027460098266602, |
|
"rewards/margins_std": 1.062995195388794, |
|
"rewards/rejected": -2.2349250316619873, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 4.984694599753024e-06, |
|
"logits/chosen": 0.04539443925023079, |
|
"logits/rejected": 0.5839862823486328, |
|
"logps/chosen": -364.6256408691406, |
|
"logps/rejected": -489.1861877441406, |
|
"loss": 0.3496, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.2731292247772217, |
|
"rewards/margins": 1.5762968063354492, |
|
"rewards/margins_max": 2.456343650817871, |
|
"rewards/margins_min": 0.696249783039093, |
|
"rewards/margins_std": 1.2445745468139648, |
|
"rewards/rejected": -2.849426031112671, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.980466547186149e-06, |
|
"logits/chosen": -0.06857666373252869, |
|
"logits/rejected": 0.6623315811157227, |
|
"logps/chosen": -401.6612243652344, |
|
"logps/rejected": -569.5274047851562, |
|
"loss": 0.2962, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7086451053619385, |
|
"rewards/margins": 2.181455135345459, |
|
"rewards/margins_max": 3.330390453338623, |
|
"rewards/margins_min": 1.0325195789337158, |
|
"rewards/margins_std": 1.6248401403427124, |
|
"rewards/rejected": -3.8901000022888184, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 4.975725724445898e-06, |
|
"logits/chosen": 0.18517382442951202, |
|
"logits/rejected": 0.7153784036636353, |
|
"logps/chosen": -425.679443359375, |
|
"logps/rejected": -613.3704833984375, |
|
"loss": 0.3906, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.7429125308990479, |
|
"rewards/margins": 2.2590205669403076, |
|
"rewards/margins_max": 3.9183871746063232, |
|
"rewards/margins_min": 0.5996544361114502, |
|
"rewards/margins_std": 2.34669828414917, |
|
"rewards/rejected": -4.0019330978393555, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 4.9704731115706805e-06, |
|
"logits/chosen": 0.06402029097080231, |
|
"logits/rejected": 0.6806127429008484, |
|
"logps/chosen": -403.9203186035156, |
|
"logps/rejected": -743.2752075195312, |
|
"loss": 0.2744, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.463424801826477, |
|
"rewards/margins": 3.6999382972717285, |
|
"rewards/margins_max": 6.0486063957214355, |
|
"rewards/margins_min": 1.3512706756591797, |
|
"rewards/margins_std": 3.3215174674987793, |
|
"rewards/rejected": -5.163362979888916, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.964709794397846e-06, |
|
"logits/chosen": 0.17624667286872864, |
|
"logits/rejected": 0.8073934316635132, |
|
"logps/chosen": -420.7196350097656, |
|
"logps/rejected": -765.087158203125, |
|
"loss": 0.2887, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7227516174316406, |
|
"rewards/margins": 3.5676417350769043, |
|
"rewards/margins_max": 6.014761447906494, |
|
"rewards/margins_min": 1.1205217838287354, |
|
"rewards/margins_std": 3.460750102996826, |
|
"rewards/rejected": -5.290392875671387, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 4.9584369643392076e-06, |
|
"logits/chosen": 0.145114004611969, |
|
"logits/rejected": 0.8146367073059082, |
|
"logps/chosen": -478.583984375, |
|
"logps/rejected": -902.0680541992188, |
|
"loss": 0.2173, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.5044260025024414, |
|
"rewards/margins": 4.49790096282959, |
|
"rewards/margins_max": 7.457921028137207, |
|
"rewards/margins_min": 1.5378811359405518, |
|
"rewards/margins_std": 4.186100006103516, |
|
"rewards/rejected": -7.002326965332031, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 4.951655918134749e-06, |
|
"logits/chosen": 0.10492346435785294, |
|
"logits/rejected": 0.6990815997123718, |
|
"logps/chosen": -523.7138671875, |
|
"logps/rejected": -888.4390869140625, |
|
"loss": 0.2891, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.9183859825134277, |
|
"rewards/margins": 3.839510679244995, |
|
"rewards/margins_max": 6.660401344299316, |
|
"rewards/margins_min": 1.0186195373535156, |
|
"rewards/margins_std": 3.989342451095581, |
|
"rewards/rejected": -6.757896423339844, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 4.944368057584568e-06, |
|
"logits/chosen": 0.10440587997436523, |
|
"logits/rejected": 0.8017401695251465, |
|
"logps/chosen": -489.6419982910156, |
|
"logps/rejected": -886.7931518554688, |
|
"loss": 0.2525, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.3617236614227295, |
|
"rewards/margins": 4.341439247131348, |
|
"rewards/margins_max": 6.716013431549072, |
|
"rewards/margins_min": 1.9668653011322021, |
|
"rewards/margins_std": 3.358154773712158, |
|
"rewards/rejected": -6.703163146972656, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 4.936574889259076e-06, |
|
"logits/chosen": 0.20124737918376923, |
|
"logits/rejected": 0.9363399744033813, |
|
"logps/chosen": -510.68682861328125, |
|
"logps/rejected": -808.8845825195312, |
|
"loss": 0.2781, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.1757853031158447, |
|
"rewards/margins": 3.780200958251953, |
|
"rewards/margins_max": 6.390686511993408, |
|
"rewards/margins_min": 1.1697145700454712, |
|
"rewards/margins_std": 3.691784620285034, |
|
"rewards/rejected": -5.9559855461120605, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 4.928278024187572e-06, |
|
"logits/chosen": 0.07302796840667725, |
|
"logits/rejected": 0.7525766491889954, |
|
"logps/chosen": -441.041015625, |
|
"logps/rejected": -718.113525390625, |
|
"loss": 0.2566, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.917728066444397, |
|
"rewards/margins": 3.044644832611084, |
|
"rewards/margins_max": 4.838767051696777, |
|
"rewards/margins_min": 1.2505226135253906, |
|
"rewards/margins_std": 2.537271738052368, |
|
"rewards/rejected": -4.962372779846191, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 4.91947917752519e-06, |
|
"logits/chosen": 0.2481038123369217, |
|
"logits/rejected": 0.8633731603622437, |
|
"logps/chosen": -495.15234375, |
|
"logps/rejected": -929.7926635742188, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.3359451293945312, |
|
"rewards/margins": 4.435263156890869, |
|
"rewards/margins_max": 6.896138668060303, |
|
"rewards/margins_min": 1.9743881225585938, |
|
"rewards/margins_std": 3.480203151702881, |
|
"rewards/rejected": -6.7712082862854, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.91018016819835e-06, |
|
"logits/chosen": 0.19703389704227448, |
|
"logits/rejected": 0.8145115971565247, |
|
"logps/chosen": -463.3583068847656, |
|
"logps/rejected": -735.3023681640625, |
|
"loss": 0.3076, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0541656017303467, |
|
"rewards/margins": 3.166626214981079, |
|
"rewards/margins_max": 5.232685565948486, |
|
"rewards/margins_min": 1.1005662679672241, |
|
"rewards/margins_std": 2.921849489212036, |
|
"rewards/rejected": -5.220791816711426, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 4.900382918528732e-06, |
|
"logits/chosen": 0.37838277220726013, |
|
"logits/rejected": 0.9560055732727051, |
|
"logps/chosen": -490.0171813964844, |
|
"logps/rejected": -867.9541015625, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.327396869659424, |
|
"rewards/margins": 4.201695919036865, |
|
"rewards/margins_max": 6.30963659286499, |
|
"rewards/margins_min": 2.0937557220458984, |
|
"rewards/margins_std": 2.9810779094696045, |
|
"rewards/rejected": -6.529093265533447, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 4.890089453835894e-06, |
|
"logits/chosen": 0.16315485537052155, |
|
"logits/rejected": 0.8696213960647583, |
|
"logps/chosen": -516.959716796875, |
|
"logps/rejected": -999.4393310546875, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.290456533432007, |
|
"rewards/margins": 5.147913932800293, |
|
"rewards/margins_max": 8.1465425491333, |
|
"rewards/margins_min": 2.1492867469787598, |
|
"rewards/margins_std": 4.240699768066406, |
|
"rewards/rejected": -7.438370704650879, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 4.879301902018592e-06, |
|
"logits/chosen": 0.2864415943622589, |
|
"logits/rejected": 0.7803254127502441, |
|
"logps/chosen": -533.4550170898438, |
|
"logps/rejected": -1063.6312255859375, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9031295776367188, |
|
"rewards/margins": 5.533447265625, |
|
"rewards/margins_max": 8.971426010131836, |
|
"rewards/margins_min": 2.0954694747924805, |
|
"rewards/margins_std": 4.862034797668457, |
|
"rewards/rejected": -8.436576843261719, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.868022493114887e-06, |
|
"logits/chosen": 0.33959221839904785, |
|
"logits/rejected": 1.040248155593872, |
|
"logps/chosen": -664.7828369140625, |
|
"logps/rejected": -1284.1910400390625, |
|
"loss": 0.1801, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.016963958740234, |
|
"rewards/margins": 6.7210187911987305, |
|
"rewards/margins_max": 10.781229019165039, |
|
"rewards/margins_min": 2.6608097553253174, |
|
"rewards/margins_std": 5.742003440856934, |
|
"rewards/rejected": -10.737983703613281, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 4.856253558841153e-06, |
|
"logits/chosen": 0.43446415662765503, |
|
"logits/rejected": 1.01176118850708, |
|
"logps/chosen": -664.5281982421875, |
|
"logps/rejected": -1319.181640625, |
|
"loss": 0.3663, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.380959987640381, |
|
"rewards/margins": 6.677268028259277, |
|
"rewards/margins_max": 10.735626220703125, |
|
"rewards/margins_min": 2.618910074234009, |
|
"rewards/margins_std": 5.739384651184082, |
|
"rewards/rejected": -11.0582275390625, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 4.843997532110051e-06, |
|
"logits/chosen": 0.4099550247192383, |
|
"logits/rejected": 0.9675588607788086, |
|
"logps/chosen": -634.2232666015625, |
|
"logps/rejected": -1535.5347900390625, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.844944477081299, |
|
"rewards/margins": 9.073770523071289, |
|
"rewards/margins_max": 14.664604187011719, |
|
"rewards/margins_min": 3.482936143875122, |
|
"rewards/margins_std": 7.906632423400879, |
|
"rewards/rejected": -12.91871452331543, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.831256946527591e-06, |
|
"logits/chosen": 0.41468414664268494, |
|
"logits/rejected": 1.1351321935653687, |
|
"logps/chosen": -591.6776123046875, |
|
"logps/rejected": -1291.275146484375, |
|
"loss": 0.2315, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.6611881256103516, |
|
"rewards/margins": 7.434420585632324, |
|
"rewards/margins_max": 11.837034225463867, |
|
"rewards/margins_min": 3.0318071842193604, |
|
"rewards/margins_std": 6.226236820220947, |
|
"rewards/rejected": -11.095609664916992, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 4.818034435869377e-06, |
|
"logits/chosen": 0.5877698063850403, |
|
"logits/rejected": 1.2467072010040283, |
|
"logps/chosen": -623.4757080078125, |
|
"logps/rejected": -1281.064697265625, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.8208084106445312, |
|
"rewards/margins": 6.917235374450684, |
|
"rewards/margins_max": 10.565729141235352, |
|
"rewards/margins_min": 3.268742322921753, |
|
"rewards/margins_std": 5.159748077392578, |
|
"rewards/rejected": -10.738044738769531, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 4.804332733536141e-06, |
|
"logits/chosen": 0.45656394958496094, |
|
"logits/rejected": 1.1674026250839233, |
|
"logps/chosen": -701.67041015625, |
|
"logps/rejected": -1496.7086181640625, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.459706783294678, |
|
"rewards/margins": 8.300594329833984, |
|
"rewards/margins_max": 13.440702438354492, |
|
"rewards/margins_min": 3.1604866981506348, |
|
"rewards/margins_std": 7.2692108154296875, |
|
"rewards/rejected": -12.76030158996582, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 4.790154671988696e-06, |
|
"logits/chosen": 0.707282304763794, |
|
"logits/rejected": 1.2839213609695435, |
|
"logps/chosen": -713.0794067382812, |
|
"logps/rejected": -1470.809326171875, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.646553993225098, |
|
"rewards/margins": 7.7992706298828125, |
|
"rewards/margins_max": 12.847709655761719, |
|
"rewards/margins_min": 2.7508316040039062, |
|
"rewards/margins_std": 7.1395721435546875, |
|
"rewards/rejected": -12.44582462310791, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 22.625, |
|
"learning_rate": 4.775503182162386e-06, |
|
"logits/chosen": 0.6817615032196045, |
|
"logits/rejected": 1.3176844120025635, |
|
"logps/chosen": -850.7849731445312, |
|
"logps/rejected": -1690.505126953125, |
|
"loss": 0.253, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.02855110168457, |
|
"rewards/margins": 8.858359336853027, |
|
"rewards/margins_max": 13.857948303222656, |
|
"rewards/margins_min": 3.858771800994873, |
|
"rewards/margins_std": 7.0704851150512695, |
|
"rewards/rejected": -14.886911392211914, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 4.7603812928612e-06, |
|
"logits/chosen": 0.4829481542110443, |
|
"logits/rejected": 1.1649879217147827, |
|
"logps/chosen": -747.2049560546875, |
|
"logps/rejected": -1385.47216796875, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.999864101409912, |
|
"rewards/margins": 6.820773124694824, |
|
"rewards/margins_max": 10.468523025512695, |
|
"rewards/margins_min": 3.1730237007141113, |
|
"rewards/margins_std": 5.158697605133057, |
|
"rewards/rejected": -11.820637702941895, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 4.744792130131653e-06, |
|
"logits/chosen": 0.3002074360847473, |
|
"logits/rejected": 1.0043690204620361, |
|
"logps/chosen": -662.5621948242188, |
|
"logps/rejected": -1360.820068359375, |
|
"loss": 0.1538, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.9544594287872314, |
|
"rewards/margins": 7.2028093338012695, |
|
"rewards/margins_max": 11.087037086486816, |
|
"rewards/margins_min": 3.318582534790039, |
|
"rewards/margins_std": 5.493126392364502, |
|
"rewards/rejected": -11.157269477844238, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 4.728738916616552e-06, |
|
"logits/chosen": 0.5242341756820679, |
|
"logits/rejected": 1.1999857425689697, |
|
"logps/chosen": -646.2457885742188, |
|
"logps/rejected": -1409.1556396484375, |
|
"loss": 0.2874, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.057827949523926, |
|
"rewards/margins": 7.776298522949219, |
|
"rewards/margins_max": 11.975003242492676, |
|
"rewards/margins_min": 3.57759428024292, |
|
"rewards/margins_std": 5.937864780426025, |
|
"rewards/rejected": -11.834127426147461, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.712224970888801e-06, |
|
"logits/chosen": 0.580299973487854, |
|
"logits/rejected": 1.3875830173492432, |
|
"logps/chosen": -721.7586059570312, |
|
"logps/rejected": -1617.5888671875, |
|
"loss": 0.2512, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.572092056274414, |
|
"rewards/margins": 9.203888893127441, |
|
"rewards/margins_max": 14.512364387512207, |
|
"rewards/margins_min": 3.8954148292541504, |
|
"rewards/margins_std": 7.507315635681152, |
|
"rewards/rejected": -13.775980949401855, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.69525370676538e-06, |
|
"logits/chosen": 0.5429633855819702, |
|
"logits/rejected": 1.3331568241119385, |
|
"logps/chosen": -695.3401489257812, |
|
"logps/rejected": -1387.588134765625, |
|
"loss": 0.2468, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.615943908691406, |
|
"rewards/margins": 7.4264984130859375, |
|
"rewards/margins_max": 11.769147872924805, |
|
"rewards/margins_min": 3.0838465690612793, |
|
"rewards/margins_std": 6.141435623168945, |
|
"rewards/rejected": -12.042441368103027, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 4.677828632601625e-06, |
|
"logits/chosen": 0.49036288261413574, |
|
"logits/rejected": 1.2113770246505737, |
|
"logps/chosen": -631.5177001953125, |
|
"logps/rejected": -1210.844482421875, |
|
"loss": 0.128, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.760044813156128, |
|
"rewards/margins": 6.260881423950195, |
|
"rewards/margins_max": 9.719388961791992, |
|
"rewards/margins_min": 2.8023738861083984, |
|
"rewards/margins_std": 4.891068458557129, |
|
"rewards/rejected": -10.020925521850586, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.65995335056597e-06, |
|
"logits/chosen": 0.4661685824394226, |
|
"logits/rejected": 1.1997615098953247, |
|
"logps/chosen": -697.4072265625, |
|
"logps/rejected": -1316.599853515625, |
|
"loss": 0.2737, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.225058555603027, |
|
"rewards/margins": 6.9119462966918945, |
|
"rewards/margins_max": 10.23686408996582, |
|
"rewards/margins_min": 3.5870280265808105, |
|
"rewards/margins_std": 4.702144145965576, |
|
"rewards/rejected": -11.137005805969238, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 4.6416315558952985e-06, |
|
"logits/chosen": 0.5700492858886719, |
|
"logits/rejected": 1.2297275066375732, |
|
"logps/chosen": -648.19482421875, |
|
"logps/rejected": -1285.586181640625, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.9675934314727783, |
|
"rewards/margins": 6.879029273986816, |
|
"rewards/margins_max": 11.239900588989258, |
|
"rewards/margins_min": 2.518155336380005, |
|
"rewards/margins_std": 6.167205810546875, |
|
"rewards/rejected": -10.846620559692383, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 4.622867036131045e-06, |
|
"logits/chosen": 0.4446844160556793, |
|
"logits/rejected": 1.1179345846176147, |
|
"logps/chosen": -699.2020263671875, |
|
"logps/rejected": -1307.1138916015625, |
|
"loss": 0.1037, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.177753925323486, |
|
"rewards/margins": 6.502650260925293, |
|
"rewards/margins_max": 10.074499130249023, |
|
"rewards/margins_min": 2.9308011531829834, |
|
"rewards/margins_std": 5.051357746124268, |
|
"rewards/rejected": -10.680402755737305, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.984375, |
|
"learning_rate": 4.60366367033623e-06, |
|
"logits/chosen": 0.40803995728492737, |
|
"logits/rejected": 1.114776849746704, |
|
"logps/chosen": -723.8427734375, |
|
"logps/rejected": -1445.0594482421875, |
|
"loss": 0.1899, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.7852630615234375, |
|
"rewards/margins": 7.497411251068115, |
|
"rewards/margins_max": 11.162347793579102, |
|
"rewards/margins_min": 3.8324737548828125, |
|
"rewards/margins_std": 5.183003902435303, |
|
"rewards/rejected": -12.282673835754395, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 4.5840254282935604e-06, |
|
"logits/chosen": 0.5937483310699463, |
|
"logits/rejected": 1.2330735921859741, |
|
"logps/chosen": -796.9608154296875, |
|
"logps/rejected": -1525.527587890625, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.397136688232422, |
|
"rewards/margins": 7.485539436340332, |
|
"rewards/margins_max": 11.793124198913574, |
|
"rewards/margins_min": 3.177953004837036, |
|
"rewards/margins_std": 6.091846942901611, |
|
"rewards/rejected": -12.882675170898438, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.875, |
|
"learning_rate": 4.56395636968479e-06, |
|
"logits/chosen": 0.6977173089981079, |
|
"logits/rejected": 1.2386213541030884, |
|
"logps/chosen": -645.6939697265625, |
|
"logps/rejected": -1467.990234375, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.246734619140625, |
|
"rewards/margins": 8.112640380859375, |
|
"rewards/margins_max": 12.321252822875977, |
|
"rewards/margins_min": 3.904027223587036, |
|
"rewards/margins_std": 5.951877593994141, |
|
"rewards/rejected": -12.359376907348633, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 4.543460643251481e-06, |
|
"logits/chosen": 0.5177757740020752, |
|
"logits/rejected": 1.1193509101867676, |
|
"logps/chosen": -690.61572265625, |
|
"logps/rejected": -1552.57568359375, |
|
"loss": 0.1118, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.566229820251465, |
|
"rewards/margins": 8.843162536621094, |
|
"rewards/margins_max": 13.59516429901123, |
|
"rewards/margins_min": 4.091159820556641, |
|
"rewards/margins_std": 6.720346927642822, |
|
"rewards/rejected": -13.409391403198242, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": 0.6444950103759766, |
|
"logits/rejected": 1.3874423503875732, |
|
"logps/chosen": -798.6126708984375, |
|
"logps/rejected": -1724.8717041015625, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.293717861175537, |
|
"rewards/margins": 9.743268966674805, |
|
"rewards/margins_max": 15.012044906616211, |
|
"rewards/margins_min": 4.474490165710449, |
|
"rewards/margins_std": 7.451178073883057, |
|
"rewards/rejected": -15.036985397338867, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 4.5012062220124845e-06, |
|
"logits/chosen": 0.5247820019721985, |
|
"logits/rejected": 1.264107346534729, |
|
"logps/chosen": -724.8851928710938, |
|
"logps/rejected": -1684.8916015625, |
|
"loss": 0.1534, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.79865026473999, |
|
"rewards/margins": 9.892607688903809, |
|
"rewards/margins_max": 15.237088203430176, |
|
"rewards/margins_min": 4.548129081726074, |
|
"rewards/margins_std": 7.558236122131348, |
|
"rewards/rejected": -14.691259384155273, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 4.479456262179228e-06, |
|
"logits/chosen": 0.5434385538101196, |
|
"logits/rejected": 1.2754974365234375, |
|
"logps/chosen": -826.7483520507812, |
|
"logps/rejected": -1494.5345458984375, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.704430103302002, |
|
"rewards/margins": 7.395480155944824, |
|
"rewards/margins_max": 11.325884819030762, |
|
"rewards/margins_min": 3.465075969696045, |
|
"rewards/margins_std": 5.558432102203369, |
|
"rewards/rejected": -13.099909782409668, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 4.4572971026605726e-06, |
|
"logits/chosen": 0.5515539646148682, |
|
"logits/rejected": 1.3576513528823853, |
|
"logps/chosen": -805.1383666992188, |
|
"logps/rejected": -1752.7252197265625, |
|
"loss": 0.1508, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.342719078063965, |
|
"rewards/margins": 9.904947280883789, |
|
"rewards/margins_max": 14.326292037963867, |
|
"rewards/margins_min": 5.4836015701293945, |
|
"rewards/margins_std": 6.252727031707764, |
|
"rewards/rejected": -15.24766731262207, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 4.434733324270592e-06, |
|
"logits/chosen": 0.5185344815254211, |
|
"logits/rejected": 1.1416656970977783, |
|
"logps/chosen": -690.1055908203125, |
|
"logps/rejected": -1510.14453125, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.4360551834106445, |
|
"rewards/margins": 8.723947525024414, |
|
"rewards/margins_max": 12.72030258178711, |
|
"rewards/margins_min": 4.727590084075928, |
|
"rewards/margins_std": 5.651700973510742, |
|
"rewards/rejected": -13.160001754760742, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 4.411769591467497e-06, |
|
"logits/chosen": 0.4622286856174469, |
|
"logits/rejected": 1.096407175064087, |
|
"logps/chosen": -706.9508056640625, |
|
"logps/rejected": -1390.883544921875, |
|
"loss": 0.1088, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.562924385070801, |
|
"rewards/margins": 7.009686470031738, |
|
"rewards/margins_max": 10.363374710083008, |
|
"rewards/margins_min": 3.6559970378875732, |
|
"rewards/margins_std": 4.742833137512207, |
|
"rewards/rejected": -11.572611808776855, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 4.3884106513893895e-06, |
|
"logits/chosen": 0.5636991262435913, |
|
"logits/rejected": 1.2218422889709473, |
|
"logps/chosen": -723.6990966796875, |
|
"logps/rejected": -1594.478515625, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.5525712966918945, |
|
"rewards/margins": 9.058218002319336, |
|
"rewards/margins_max": 13.891519546508789, |
|
"rewards/margins_min": 4.224917411804199, |
|
"rewards/margins_std": 6.835320949554443, |
|
"rewards/rejected": -13.61078929901123, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 4.364661332872913e-06, |
|
"logits/chosen": 0.4284195005893707, |
|
"logits/rejected": 1.1675077676773071, |
|
"logps/chosen": -757.3233642578125, |
|
"logps/rejected": -1814.9117431640625, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.945916652679443, |
|
"rewards/margins": 11.039398193359375, |
|
"rewards/margins_max": 16.22256851196289, |
|
"rewards/margins_min": 5.856227874755859, |
|
"rewards/margins_std": 7.330111026763916, |
|
"rewards/rejected": -15.985315322875977, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 4.340526545455016e-06, |
|
"logits/chosen": 0.5042354464530945, |
|
"logits/rejected": 1.2818940877914429, |
|
"logps/chosen": -712.5407104492188, |
|
"logps/rejected": -1623.8824462890625, |
|
"loss": 0.1499, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.52707576751709, |
|
"rewards/margins": 9.589614868164062, |
|
"rewards/margins_max": 15.405502319335938, |
|
"rewards/margins_min": 3.77372670173645, |
|
"rewards/margins_std": 8.224907875061035, |
|
"rewards/rejected": -14.116689682006836, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.5, |
|
"learning_rate": 4.31601127835805e-06, |
|
"logits/chosen": 0.4573752284049988, |
|
"logits/rejected": 1.2255313396453857, |
|
"logps/chosen": -803.6546630859375, |
|
"logps/rejected": -1744.1165771484375, |
|
"loss": 0.1508, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.967880725860596, |
|
"rewards/margins": 10.099109649658203, |
|
"rewards/margins_max": 15.592402458190918, |
|
"rewards/margins_min": 4.605815887451172, |
|
"rewards/margins_std": 7.768690586090088, |
|
"rewards/rejected": -15.066988945007324, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.291120599458366e-06, |
|
"logits/chosen": 0.6284778118133545, |
|
"logits/rejected": 1.3736778497695923, |
|
"logps/chosen": -744.2955322265625, |
|
"logps/rejected": -1689.0992431640625, |
|
"loss": 0.108, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.914001941680908, |
|
"rewards/margins": 9.785839080810547, |
|
"rewards/margins_max": 14.224902153015137, |
|
"rewards/margins_min": 5.346776008605957, |
|
"rewards/margins_std": 6.2777838706970215, |
|
"rewards/rejected": -14.69983959197998, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 4.265859654238676e-06, |
|
"logits/chosen": 0.518182635307312, |
|
"logits/rejected": 1.266416311264038, |
|
"logps/chosen": -795.889404296875, |
|
"logps/rejected": -1599.6748046875, |
|
"loss": 0.1042, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.121318340301514, |
|
"rewards/margins": 8.590972900390625, |
|
"rewards/margins_max": 12.986808776855469, |
|
"rewards/margins_min": 4.195137023925781, |
|
"rewards/margins_std": 6.216650485992432, |
|
"rewards/rejected": -13.71229076385498, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 4.240233664724358e-06, |
|
"logits/chosen": 0.5838888883590698, |
|
"logits/rejected": 1.3169996738433838, |
|
"logps/chosen": -767.9039916992188, |
|
"logps/rejected": -1709.415283203125, |
|
"loss": 0.1546, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.298363208770752, |
|
"rewards/margins": 9.643119812011719, |
|
"rewards/margins_max": 14.92499828338623, |
|
"rewards/margins_min": 4.361241340637207, |
|
"rewards/margins_std": 7.469703674316406, |
|
"rewards/rejected": -14.941482543945312, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 4.2142479284039445e-06, |
|
"logits/chosen": 0.5468761920928955, |
|
"logits/rejected": 1.227432131767273, |
|
"logps/chosen": -770.5872802734375, |
|
"logps/rejected": -1546.19384765625, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.143388748168945, |
|
"rewards/margins": 8.180723190307617, |
|
"rewards/margins_max": 13.141166687011719, |
|
"rewards/margins_min": 3.2202792167663574, |
|
"rewards/margins_std": 7.015126705169678, |
|
"rewards/rejected": -13.324111938476562, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 4.187907817134005e-06, |
|
"logits/chosen": 0.5028406381607056, |
|
"logits/rejected": 1.2698485851287842, |
|
"logps/chosen": -769.3389282226562, |
|
"logps/rejected": -2071.339111328125, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.078272342681885, |
|
"rewards/margins": 13.281135559082031, |
|
"rewards/margins_max": 18.268909454345703, |
|
"rewards/margins_min": 8.293363571166992, |
|
"rewards/margins_std": 7.053775787353516, |
|
"rewards/rejected": -18.359407424926758, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 4.161218776028661e-06, |
|
"logits/chosen": 0.4837300181388855, |
|
"logits/rejected": 1.2130780220031738, |
|
"logps/chosen": -780.1266479492188, |
|
"logps/rejected": -2050.310302734375, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.4014573097229, |
|
"rewards/margins": 12.880502700805664, |
|
"rewards/margins_max": 20.169330596923828, |
|
"rewards/margins_min": 5.591673851013184, |
|
"rewards/margins_std": 10.30795955657959, |
|
"rewards/rejected": -18.281957626342773, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 4.134186322333951e-06, |
|
"logits/chosen": 0.5044664144515991, |
|
"logits/rejected": 1.2629055976867676, |
|
"logps/chosen": -710.2357788085938, |
|
"logps/rejected": -1879.140625, |
|
"loss": 0.1806, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.55966329574585, |
|
"rewards/margins": 11.992768287658691, |
|
"rewards/margins_max": 17.50288200378418, |
|
"rewards/margins_min": 6.482656002044678, |
|
"rewards/margins_std": 7.79247522354126, |
|
"rewards/rejected": -16.552433013916016, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.796875, |
|
"learning_rate": 4.106816044287292e-06, |
|
"logits/chosen": 0.5818988084793091, |
|
"logits/rejected": 1.2744948863983154, |
|
"logps/chosen": -702.9332885742188, |
|
"logps/rejected": -1656.512939453125, |
|
"loss": 0.1058, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.6068434715271, |
|
"rewards/margins": 9.725323677062988, |
|
"rewards/margins_max": 13.76390266418457, |
|
"rewards/margins_min": 5.68674373626709, |
|
"rewards/margins_std": 5.711414337158203, |
|
"rewards/rejected": -14.332165718078613, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.079113599962257e-06, |
|
"logits/chosen": 0.6045584082603455, |
|
"logits/rejected": 1.40791916847229, |
|
"logps/chosen": -795.3938598632812, |
|
"logps/rejected": -1809.0947265625, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.249255657196045, |
|
"rewards/margins": 10.42540168762207, |
|
"rewards/margins_max": 15.904159545898438, |
|
"rewards/margins_min": 4.9466447830200195, |
|
"rewards/margins_std": 7.748133182525635, |
|
"rewards/rejected": -15.674657821655273, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 4.051084716098921e-06, |
|
"logits/chosen": 0.5180607438087463, |
|
"logits/rejected": 1.220595121383667, |
|
"logps/chosen": -676.9715576171875, |
|
"logps/rejected": -1734.108642578125, |
|
"loss": 0.1499, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.324917793273926, |
|
"rewards/margins": 10.716940879821777, |
|
"rewards/margins_max": 15.53416633605957, |
|
"rewards/margins_min": 5.899716377258301, |
|
"rewards/margins_std": 6.812585353851318, |
|
"rewards/rejected": -15.04185962677002, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 4.022735186920008e-06, |
|
"logits/chosen": 0.487175315618515, |
|
"logits/rejected": 1.2153656482696533, |
|
"logps/chosen": -689.3336791992188, |
|
"logps/rejected": -1664.072998046875, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.373027801513672, |
|
"rewards/margins": 10.115726470947266, |
|
"rewards/margins_max": 15.508180618286133, |
|
"rewards/margins_min": 4.72327184677124, |
|
"rewards/margins_std": 7.626082420349121, |
|
"rewards/rejected": -14.488754272460938, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.4375, |
|
"learning_rate": 3.994070872933097e-06, |
|
"logits/chosen": 0.4529595375061035, |
|
"logits/rejected": 1.1865074634552002, |
|
"logps/chosen": -645.4569091796875, |
|
"logps/rejected": -1371.4666748046875, |
|
"loss": 0.1495, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.04251766204834, |
|
"rewards/margins": 7.7823076248168945, |
|
"rewards/margins_max": 11.296024322509766, |
|
"rewards/margins_min": 4.268589973449707, |
|
"rewards/margins_std": 4.969146251678467, |
|
"rewards/rejected": -11.824824333190918, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 3.965097699719109e-06, |
|
"logits/chosen": 0.5944451093673706, |
|
"logits/rejected": 1.3090002536773682, |
|
"logps/chosen": -762.5585327148438, |
|
"logps/rejected": -1599.050537109375, |
|
"loss": 0.1855, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.843676567077637, |
|
"rewards/margins": 8.63348388671875, |
|
"rewards/margins_max": 13.529146194458008, |
|
"rewards/margins_min": 3.7378222942352295, |
|
"rewards/margins_std": 6.9235124588012695, |
|
"rewards/rejected": -13.477160453796387, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 3.935821656707359e-06, |
|
"logits/chosen": 0.5119448304176331, |
|
"logits/rejected": 1.1734087467193604, |
|
"logps/chosen": -652.0521850585938, |
|
"logps/rejected": -1535.1744384765625, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.284106254577637, |
|
"rewards/margins": 8.888346672058105, |
|
"rewards/margins_max": 13.543110847473145, |
|
"rewards/margins_min": 4.233582496643066, |
|
"rewards/margins_std": 6.582831382751465, |
|
"rewards/rejected": -13.172452926635742, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 3.9062487959374e-06, |
|
"logits/chosen": 0.41363000869750977, |
|
"logits/rejected": 1.170240879058838, |
|
"logps/chosen": -667.77783203125, |
|
"logps/rejected": -1525.485595703125, |
|
"loss": 0.1262, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.287820816040039, |
|
"rewards/margins": 9.004143714904785, |
|
"rewards/margins_max": 13.4224214553833, |
|
"rewards/margins_min": 4.585866451263428, |
|
"rewards/margins_std": 6.248387336730957, |
|
"rewards/rejected": -13.291964530944824, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.75, |
|
"learning_rate": 3.8763852308079244e-06, |
|
"logits/chosen": 0.5807031393051147, |
|
"logits/rejected": 1.292966365814209, |
|
"logps/chosen": -698.1134643554688, |
|
"logps/rejected": -1579.521240234375, |
|
"loss": 0.1198, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.37972354888916, |
|
"rewards/margins": 9.12094497680664, |
|
"rewards/margins_max": 14.104803085327148, |
|
"rewards/margins_min": 4.137085914611816, |
|
"rewards/margins_std": 7.048240661621094, |
|
"rewards/rejected": -13.5006685256958, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.875, |
|
"learning_rate": 3.8462371348129805e-06, |
|
"logits/chosen": 0.539486289024353, |
|
"logits/rejected": 1.2316633462905884, |
|
"logps/chosen": -694.4327392578125, |
|
"logps/rejected": -1500.2738037109375, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.638047218322754, |
|
"rewards/margins": 8.356195449829102, |
|
"rewards/margins_max": 12.973353385925293, |
|
"rewards/margins_min": 3.7390365600585938, |
|
"rewards/margins_std": 6.5296478271484375, |
|
"rewards/rejected": -12.994241714477539, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 3.815810740265769e-06, |
|
"logits/chosen": 0.5020047426223755, |
|
"logits/rejected": 1.345840573310852, |
|
"logps/chosen": -702.5892333984375, |
|
"logps/rejected": -1638.1595458984375, |
|
"loss": 0.1495, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.527231216430664, |
|
"rewards/margins": 9.787068367004395, |
|
"rewards/margins_max": 14.262059211730957, |
|
"rewards/margins_min": 5.312075614929199, |
|
"rewards/margins_std": 6.328594207763672, |
|
"rewards/rejected": -14.314300537109375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 3.785112337010284e-06, |
|
"logits/chosen": 0.6428021192550659, |
|
"logits/rejected": 1.342193365097046, |
|
"logps/chosen": -698.9358520507812, |
|
"logps/rejected": -1490.9117431640625, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.455615043640137, |
|
"rewards/margins": 8.250611305236816, |
|
"rewards/margins_max": 11.710147857666016, |
|
"rewards/margins_min": 4.791074275970459, |
|
"rewards/margins_std": 4.892523765563965, |
|
"rewards/rejected": -12.706225395202637, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 3.7541482711210474e-06, |
|
"logits/chosen": 0.49654191732406616, |
|
"logits/rejected": 1.2780824899673462, |
|
"logps/chosen": -770.838623046875, |
|
"logps/rejected": -1911.4193115234375, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.841742038726807, |
|
"rewards/margins": 11.952981948852539, |
|
"rewards/margins_max": 17.861114501953125, |
|
"rewards/margins_min": 6.044848442077637, |
|
"rewards/margins_std": 8.355361938476562, |
|
"rewards/rejected": -16.794721603393555, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 3.722924943591232e-06, |
|
"logits/chosen": 0.5268442034721375, |
|
"logits/rejected": 1.2990949153900146, |
|
"logps/chosen": -794.0743408203125, |
|
"logps/rejected": -1849.2060546875, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.302125930786133, |
|
"rewards/margins": 11.0300874710083, |
|
"rewards/margins_max": 15.024978637695312, |
|
"rewards/margins_min": 7.035195827484131, |
|
"rewards/margins_std": 5.649630069732666, |
|
"rewards/rejected": -16.33221435546875, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 3.691448809009427e-06, |
|
"logits/chosen": 0.627538800239563, |
|
"logits/rejected": 1.3176391124725342, |
|
"logps/chosen": -826.3603515625, |
|
"logps/rejected": -1807.1185302734375, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.355879783630371, |
|
"rewards/margins": 10.351202011108398, |
|
"rewards/margins_max": 15.11308479309082, |
|
"rewards/margins_min": 5.589318752288818, |
|
"rewards/margins_std": 6.73431921005249, |
|
"rewards/rejected": -15.70708179473877, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 3.659726374225323e-06, |
|
"logits/chosen": 0.47057127952575684, |
|
"logits/rejected": 1.1657397747039795, |
|
"logps/chosen": -652.9990234375, |
|
"logps/rejected": -1534.808349609375, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.1840925216674805, |
|
"rewards/margins": 8.879453659057617, |
|
"rewards/margins_max": 13.41234302520752, |
|
"rewards/margins_min": 4.346565246582031, |
|
"rewards/margins_std": 6.410472869873047, |
|
"rewards/rejected": -13.063547134399414, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 3.6277641970045975e-06, |
|
"logits/chosen": 0.5770415663719177, |
|
"logits/rejected": 1.3713629245758057, |
|
"logps/chosen": -804.15576171875, |
|
"logps/rejected": -1777.1861572265625, |
|
"loss": 0.1385, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.3897552490234375, |
|
"rewards/margins": 10.179000854492188, |
|
"rewards/margins_max": 15.141960144042969, |
|
"rewards/margins_min": 5.216042995452881, |
|
"rewards/margins_std": 7.018682956695557, |
|
"rewards/rejected": -15.568756103515625, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 3.5955688846732677e-06, |
|
"logits/chosen": 0.5724108815193176, |
|
"logits/rejected": 1.2440688610076904, |
|
"logps/chosen": -775.36328125, |
|
"logps/rejected": -2097.964599609375, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.414142608642578, |
|
"rewards/margins": 13.599000930786133, |
|
"rewards/margins_max": 21.20298957824707, |
|
"rewards/margins_min": 5.995011329650879, |
|
"rewards/margins_std": 10.753664016723633, |
|
"rewards/rejected": -19.013145446777344, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 3.563147092751807e-06, |
|
"logits/chosen": 0.5183674097061157, |
|
"logits/rejected": 1.306438684463501, |
|
"logps/chosen": -913.7030029296875, |
|
"logps/rejected": -1972.9140625, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.088994026184082, |
|
"rewards/margins": 11.24118423461914, |
|
"rewards/margins_max": 17.46712875366211, |
|
"rewards/margins_min": 5.0152411460876465, |
|
"rewards/margins_std": 8.804813385009766, |
|
"rewards/rejected": -17.330181121826172, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 3.5305055235792906e-06, |
|
"logits/chosen": 0.5217747688293457, |
|
"logits/rejected": 1.2815120220184326, |
|
"logps/chosen": -753.7869262695312, |
|
"logps/rejected": -1882.3675537109375, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.9851508140563965, |
|
"rewards/margins": 11.520073890686035, |
|
"rewards/margins_max": 17.029399871826172, |
|
"rewards/margins_min": 6.010746955871582, |
|
"rewards/margins_std": 7.791365623474121, |
|
"rewards/rejected": -16.505224227905273, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 9.625, |
|
"learning_rate": 3.4976509249278673e-06, |
|
"logits/chosen": 0.6170846819877625, |
|
"logits/rejected": 1.3059895038604736, |
|
"logps/chosen": -820.76708984375, |
|
"logps/rejected": -1968.9228515625, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.856749534606934, |
|
"rewards/margins": 11.451845169067383, |
|
"rewards/margins_max": 17.031482696533203, |
|
"rewards/margins_min": 5.8722076416015625, |
|
"rewards/margins_std": 7.890799045562744, |
|
"rewards/rejected": -17.30859375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 10.0, |
|
"learning_rate": 3.4645900886078388e-06, |
|
"logits/chosen": 0.47162705659866333, |
|
"logits/rejected": 1.2156587839126587, |
|
"logps/chosen": -745.6646728515625, |
|
"logps/rejected": -1683.563232421875, |
|
"loss": 0.1421, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.883286476135254, |
|
"rewards/margins": 9.705187797546387, |
|
"rewards/margins_max": 14.16901683807373, |
|
"rewards/margins_min": 5.241359710693359, |
|
"rewards/margins_std": 6.312806129455566, |
|
"rewards/rejected": -14.588473320007324, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 3.4313298490636328e-06, |
|
"logits/chosen": 0.542891800403595, |
|
"logits/rejected": 1.327044129371643, |
|
"logps/chosen": -745.6140747070312, |
|
"logps/rejected": -1832.4476318359375, |
|
"loss": 0.1122, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.060235023498535, |
|
"rewards/margins": 11.030832290649414, |
|
"rewards/margins_max": 15.847732543945312, |
|
"rewards/margins_min": 6.21393346786499, |
|
"rewards/margins_std": 6.812124729156494, |
|
"rewards/rejected": -16.091068267822266, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 3.3978770819609647e-06, |
|
"logits/chosen": 0.5193914175033569, |
|
"logits/rejected": 1.2432626485824585, |
|
"logps/chosen": -718.4923095703125, |
|
"logps/rejected": -1824.8092041015625, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.907055377960205, |
|
"rewards/margins": 11.238981246948242, |
|
"rewards/margins_max": 16.069297790527344, |
|
"rewards/margins_min": 6.40866756439209, |
|
"rewards/margins_std": 6.831096649169922, |
|
"rewards/rejected": -16.146038055419922, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0, |
|
"learning_rate": 3.364238702765477e-06, |
|
"logits/chosen": 0.6283344030380249, |
|
"logits/rejected": 1.1587202548980713, |
|
"logps/chosen": -784.2772216796875, |
|
"logps/rejected": -1649.103515625, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.440009117126465, |
|
"rewards/margins": 8.746278762817383, |
|
"rewards/margins_max": 12.856730461120605, |
|
"rewards/margins_min": 4.63582706451416, |
|
"rewards/margins_std": 5.813055992126465, |
|
"rewards/rejected": -14.186288833618164, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.11669921875, |
|
"learning_rate": 3.3304216653131566e-06, |
|
"logits/chosen": 0.4906349778175354, |
|
"logits/rejected": 1.1233699321746826, |
|
"logps/chosen": -726.7098388671875, |
|
"logps/rejected": -1864.184326171875, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.014734268188477, |
|
"rewards/margins": 11.505403518676758, |
|
"rewards/margins_max": 17.862531661987305, |
|
"rewards/margins_min": 5.148275375366211, |
|
"rewards/margins_std": 8.990338325500488, |
|
"rewards/rejected": -16.520137786865234, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 3.2964329603728046e-06, |
|
"logits/chosen": 0.4619167447090149, |
|
"logits/rejected": 1.1618800163269043, |
|
"logps/chosen": -792.4830322265625, |
|
"logps/rejected": -1843.839111328125, |
|
"loss": 0.1262, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.372784614562988, |
|
"rewards/margins": 10.8291597366333, |
|
"rewards/margins_max": 15.854537963867188, |
|
"rewards/margins_min": 5.803778648376465, |
|
"rewards/margins_std": 7.106959342956543, |
|
"rewards/rejected": -16.201942443847656, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 3.262279614200892e-06, |
|
"logits/chosen": 0.5689177513122559, |
|
"logits/rejected": 1.27706778049469, |
|
"logps/chosen": -735.7247314453125, |
|
"logps/rejected": -1631.17578125, |
|
"loss": 0.1125, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.984173774719238, |
|
"rewards/margins": 9.399529457092285, |
|
"rewards/margins_max": 13.701881408691406, |
|
"rewards/margins_min": 5.097177505493164, |
|
"rewards/margins_std": 6.084444046020508, |
|
"rewards/rejected": -14.383702278137207, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 3.2279686870890637e-06, |
|
"logits/chosen": 0.4834915101528168, |
|
"logits/rejected": 1.2427217960357666, |
|
"logps/chosen": -703.0142822265625, |
|
"logps/rejected": -1653.974365234375, |
|
"loss": 0.0839, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.775191307067871, |
|
"rewards/margins": 9.554444313049316, |
|
"rewards/margins_max": 14.255029678344727, |
|
"rewards/margins_min": 4.853858470916748, |
|
"rewards/margins_std": 6.6476311683654785, |
|
"rewards/rejected": -14.329633712768555, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 3.193507271904612e-06, |
|
"logits/chosen": 0.44650688767433167, |
|
"logits/rejected": 1.2217845916748047, |
|
"logps/chosen": -858.9959106445312, |
|
"logps/rejected": -1833.328125, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.681185722351074, |
|
"rewards/margins": 10.469725608825684, |
|
"rewards/margins_max": 15.999313354492188, |
|
"rewards/margins_min": 4.940136909484863, |
|
"rewards/margins_std": 7.8200178146362305, |
|
"rewards/rejected": -16.150911331176758, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 3.158902492624218e-06, |
|
"logits/chosen": 0.4523468613624573, |
|
"logits/rejected": 1.2057933807373047, |
|
"logps/chosen": -841.3018798828125, |
|
"logps/rejected": -1831.9058837890625, |
|
"loss": 0.0927, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.5725226402282715, |
|
"rewards/margins": 10.594744682312012, |
|
"rewards/margins_max": 15.057345390319824, |
|
"rewards/margins_min": 6.132142543792725, |
|
"rewards/margins_std": 6.311070442199707, |
|
"rewards/rejected": -16.167264938354492, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.125, |
|
"learning_rate": 3.1241615028612563e-06, |
|
"logits/chosen": 0.5951441526412964, |
|
"logits/rejected": 1.2352155447006226, |
|
"logps/chosen": -768.4414672851562, |
|
"logps/rejected": -1707.2896728515625, |
|
"loss": 0.2036, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.0751752853393555, |
|
"rewards/margins": 9.864356994628906, |
|
"rewards/margins_max": 14.157461166381836, |
|
"rewards/margins_min": 5.57125186920166, |
|
"rewards/margins_std": 6.071366786956787, |
|
"rewards/rejected": -14.939532279968262, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 3.0892914843869838e-06, |
|
"logits/chosen": 0.5745668411254883, |
|
"logits/rejected": 1.3735682964324951, |
|
"logps/chosen": -716.9601440429688, |
|
"logps/rejected": -1639.5445556640625, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.607513904571533, |
|
"rewards/margins": 9.50660228729248, |
|
"rewards/margins_max": 14.321581840515137, |
|
"rewards/margins_min": 4.691622734069824, |
|
"rewards/margins_std": 6.809409141540527, |
|
"rewards/rejected": -14.114115715026855, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 3.054299645645889e-06, |
|
"logits/chosen": 0.574237048625946, |
|
"logits/rejected": 1.1578586101531982, |
|
"logps/chosen": -723.4119262695312, |
|
"logps/rejected": -1720.607666015625, |
|
"loss": 0.1266, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.802746772766113, |
|
"rewards/margins": 10.177275657653809, |
|
"rewards/margins_max": 15.720603942871094, |
|
"rewards/margins_min": 4.633947372436523, |
|
"rewards/margins_std": 7.839449882507324, |
|
"rewards/rejected": -14.980023384094238, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 3.01919322026555e-06, |
|
"logits/chosen": 0.57005774974823, |
|
"logits/rejected": 1.3801429271697998, |
|
"logps/chosen": -777.7997436523438, |
|
"logps/rejected": -1871.4964599609375, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.197485446929932, |
|
"rewards/margins": 11.358160018920898, |
|
"rewards/margins_max": 16.549087524414062, |
|
"rewards/margins_min": 6.167231559753418, |
|
"rewards/margins_std": 7.341080665588379, |
|
"rewards/rejected": -16.555644989013672, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 2.9839794655612674e-06, |
|
"logits/chosen": 0.4680374562740326, |
|
"logits/rejected": 1.2621392011642456, |
|
"logps/chosen": -701.8974609375, |
|
"logps/rejected": -1736.7503662109375, |
|
"loss": 0.1476, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.581705570220947, |
|
"rewards/margins": 10.844728469848633, |
|
"rewards/margins_max": 15.584383964538574, |
|
"rewards/margins_min": 6.105072498321533, |
|
"rewards/margins_std": 6.702885627746582, |
|
"rewards/rejected": -15.426434516906738, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 2.9486656610358143e-06, |
|
"logits/chosen": 0.48323068022727966, |
|
"logits/rejected": 1.2080551385879517, |
|
"logps/chosen": -702.0436401367188, |
|
"logps/rejected": -1731.0406494140625, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.697200775146484, |
|
"rewards/margins": 10.60991096496582, |
|
"rewards/margins_max": 16.56106948852539, |
|
"rewards/margins_min": 4.658753871917725, |
|
"rewards/margins_std": 8.416207313537598, |
|
"rewards/rejected": -15.307113647460938, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 2.9132591068745884e-06, |
|
"logits/chosen": 0.5117800235748291, |
|
"logits/rejected": 1.158496618270874, |
|
"logps/chosen": -699.7086791992188, |
|
"logps/rejected": -1694.8929443359375, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.589441776275635, |
|
"rewards/margins": 9.935541152954102, |
|
"rewards/margins_max": 14.52270793914795, |
|
"rewards/margins_min": 5.348374366760254, |
|
"rewards/margins_std": 6.487233638763428, |
|
"rewards/rejected": -14.524983406066895, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 2.8777671224364966e-06, |
|
"logits/chosen": 0.5292683243751526, |
|
"logits/rejected": 1.3735748529434204, |
|
"logps/chosen": -793.8040771484375, |
|
"logps/rejected": -2016.2437744140625, |
|
"loss": 0.1016, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.2961931228637695, |
|
"rewards/margins": 12.560154914855957, |
|
"rewards/margins_max": 19.488279342651367, |
|
"rewards/margins_min": 5.632030487060547, |
|
"rewards/margins_std": 9.797847747802734, |
|
"rewards/rejected": -17.856348037719727, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 2.842197044740873e-06, |
|
"logits/chosen": 0.5125163793563843, |
|
"logits/rejected": 1.1910914182662964, |
|
"logps/chosen": -716.28271484375, |
|
"logps/rejected": -1681.4847412109375, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.732181549072266, |
|
"rewards/margins": 9.891237258911133, |
|
"rewards/margins_max": 14.310302734375, |
|
"rewards/margins_min": 5.472168922424316, |
|
"rewards/margins_std": 6.249504566192627, |
|
"rewards/rejected": -14.623417854309082, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 2.8065562269507464e-06, |
|
"logits/chosen": 0.6009246110916138, |
|
"logits/rejected": 1.1898201704025269, |
|
"logps/chosen": -778.5814819335938, |
|
"logps/rejected": -2116.184814453125, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.313014030456543, |
|
"rewards/margins": 13.718805313110352, |
|
"rewards/margins_max": 19.344837188720703, |
|
"rewards/margins_min": 8.092771530151367, |
|
"rewards/margins_std": 7.956411838531494, |
|
"rewards/rejected": -19.031816482543945, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.75, |
|
"learning_rate": 2.7708520368527687e-06, |
|
"logits/chosen": 0.6829395294189453, |
|
"logits/rejected": 1.4511185884475708, |
|
"logps/chosen": -764.1370849609375, |
|
"logps/rejected": -1753.5296630859375, |
|
"loss": 0.0735, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.051453590393066, |
|
"rewards/margins": 10.619558334350586, |
|
"rewards/margins_max": 16.41791534423828, |
|
"rewards/margins_min": 4.821201324462891, |
|
"rewards/margins_std": 8.200114250183105, |
|
"rewards/rejected": -15.671010971069336, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 2.735091855334122e-06, |
|
"logits/chosen": 0.5935325622558594, |
|
"logits/rejected": 1.272655963897705, |
|
"logps/chosen": -780.0508422851562, |
|
"logps/rejected": -1765.977294921875, |
|
"loss": 0.1392, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.5274786949157715, |
|
"rewards/margins": 10.1564302444458, |
|
"rewards/margins_max": 15.466066360473633, |
|
"rewards/margins_min": 4.846795082092285, |
|
"rewards/margins_std": 7.508957862854004, |
|
"rewards/rejected": -15.68390941619873, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 2.6992830748567204e-06, |
|
"logits/chosen": 0.601089596748352, |
|
"logits/rejected": 1.3095543384552002, |
|
"logps/chosen": -735.0325317382812, |
|
"logps/rejected": -1644.493408203125, |
|
"loss": 0.1702, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.930995464324951, |
|
"rewards/margins": 9.481109619140625, |
|
"rewards/margins_max": 13.667490005493164, |
|
"rewards/margins_min": 5.294730186462402, |
|
"rewards/margins_std": 5.920435428619385, |
|
"rewards/rejected": -14.412104606628418, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 2.6634330979290133e-06, |
|
"logits/chosen": 0.5804930925369263, |
|
"logits/rejected": 1.1953046321868896, |
|
"logps/chosen": -664.8348388671875, |
|
"logps/rejected": -1494.647216796875, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.399333477020264, |
|
"rewards/margins": 8.430289268493652, |
|
"rewards/margins_max": 12.645450592041016, |
|
"rewards/margins_min": 4.215127944946289, |
|
"rewards/margins_std": 5.9611382484436035, |
|
"rewards/rejected": -12.829623222351074, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 2.6275493355757166e-06, |
|
"logits/chosen": 0.5969884395599365, |
|
"logits/rejected": 1.2400842905044556, |
|
"logps/chosen": -675.366943359375, |
|
"logps/rejected": -1664.972412109375, |
|
"loss": 0.1129, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.368256568908691, |
|
"rewards/margins": 10.127969741821289, |
|
"rewards/margins_max": 15.879676818847656, |
|
"rewards/margins_min": 4.376260757446289, |
|
"rewards/margins_std": 8.134143829345703, |
|
"rewards/rejected": -14.496225357055664, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 2.5916392058057754e-06, |
|
"logits/chosen": 0.6539616584777832, |
|
"logits/rejected": 1.2341909408569336, |
|
"logps/chosen": -646.232421875, |
|
"logps/rejected": -1600.320556640625, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.306728363037109, |
|
"rewards/margins": 9.608429908752441, |
|
"rewards/margins_max": 13.071403503417969, |
|
"rewards/margins_min": 6.145455837249756, |
|
"rewards/margins_std": 4.897385597229004, |
|
"rewards/rejected": -13.91515827178955, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 2.5557101320789005e-06, |
|
"logits/chosen": 0.43818527460098267, |
|
"logits/rejected": 1.1889019012451172, |
|
"logps/chosen": -740.1410522460938, |
|
"logps/rejected": -1722.2867431640625, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.829034805297852, |
|
"rewards/margins": 10.092082023620605, |
|
"rewards/margins_max": 14.363845825195312, |
|
"rewards/margins_min": 5.820317268371582, |
|
"rewards/margins_std": 6.041186332702637, |
|
"rewards/rejected": -14.921116828918457, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 2.519769541770954e-06, |
|
"logits/chosen": 0.6074897646903992, |
|
"logits/rejected": 1.3447411060333252, |
|
"logps/chosen": -747.0977783203125, |
|
"logps/rejected": -1610.9615478515625, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.645691871643066, |
|
"rewards/margins": 9.302739143371582, |
|
"rewards/margins_max": 13.083358764648438, |
|
"rewards/margins_min": 5.522116661071777, |
|
"rewards/margins_std": 5.34660530090332, |
|
"rewards/rejected": -13.948430061340332, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 2.4838248646385458e-06, |
|
"logits/chosen": 0.4675142765045166, |
|
"logits/rejected": 1.2192738056182861, |
|
"logps/chosen": -713.5174560546875, |
|
"logps/rejected": -1698.417724609375, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.643341064453125, |
|
"rewards/margins": 10.201144218444824, |
|
"rewards/margins_max": 15.106475830078125, |
|
"rewards/margins_min": 5.29581356048584, |
|
"rewards/margins_std": 6.937185764312744, |
|
"rewards/rejected": -14.844487190246582, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.5, |
|
"learning_rate": 2.447883531283127e-06, |
|
"logits/chosen": 0.480851411819458, |
|
"logits/rejected": 1.3301368951797485, |
|
"logps/chosen": -769.1808471679688, |
|
"logps/rejected": -1694.986328125, |
|
"loss": 0.1297, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.698918342590332, |
|
"rewards/margins": 10.097999572753906, |
|
"rewards/margins_max": 13.938295364379883, |
|
"rewards/margins_min": 6.257704257965088, |
|
"rewards/margins_std": 5.4309983253479, |
|
"rewards/rejected": -14.796917915344238, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 2.4119529716149126e-06, |
|
"logits/chosen": 0.5563157796859741, |
|
"logits/rejected": 1.2523400783538818, |
|
"logps/chosen": -786.4019775390625, |
|
"logps/rejected": -1475.5535888671875, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.191001892089844, |
|
"rewards/margins": 7.456092834472656, |
|
"rewards/margins_max": 10.319292068481445, |
|
"rewards/margins_min": 4.592894077301025, |
|
"rewards/margins_std": 4.0491743087768555, |
|
"rewards/rejected": -12.647093772888184, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 2.376040613316944e-06, |
|
"logits/chosen": 0.46584218740463257, |
|
"logits/rejected": 1.1385295391082764, |
|
"logps/chosen": -699.6835327148438, |
|
"logps/rejected": -1963.931884765625, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.705713272094727, |
|
"rewards/margins": 12.683069229125977, |
|
"rewards/margins_max": 18.711505889892578, |
|
"rewards/margins_min": 6.654633522033691, |
|
"rewards/margins_std": 8.525496482849121, |
|
"rewards/rejected": -17.388782501220703, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 2.340153880309619e-06, |
|
"logits/chosen": 0.6857975721359253, |
|
"logits/rejected": 1.3219093084335327, |
|
"logps/chosen": -779.4803466796875, |
|
"logps/rejected": -1705.5458984375, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.236274242401123, |
|
"rewards/margins": 9.680828094482422, |
|
"rewards/margins_max": 14.501251220703125, |
|
"rewards/margins_min": 4.860402584075928, |
|
"rewards/margins_std": 6.81710958480835, |
|
"rewards/rejected": -14.917101860046387, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.75, |
|
"learning_rate": 2.3043001912159892e-06, |
|
"logits/chosen": 0.5691137313842773, |
|
"logits/rejected": 1.298168659210205, |
|
"logps/chosen": -779.42431640625, |
|
"logps/rejected": -1864.0394287109375, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.378870010375977, |
|
"rewards/margins": 11.22581672668457, |
|
"rewards/margins_max": 16.610761642456055, |
|
"rewards/margins_min": 5.840869903564453, |
|
"rewards/margins_std": 7.615464687347412, |
|
"rewards/rejected": -16.604686737060547, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 16.25, |
|
"learning_rate": 2.268486957828159e-06, |
|
"logits/chosen": 0.6387670636177063, |
|
"logits/rejected": 1.1569719314575195, |
|
"logps/chosen": -729.2189331054688, |
|
"logps/rejected": -1777.036376953125, |
|
"loss": 0.213, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.217606544494629, |
|
"rewards/margins": 10.492830276489258, |
|
"rewards/margins_max": 15.320582389831543, |
|
"rewards/margins_min": 5.665076732635498, |
|
"rewards/margins_std": 6.827474117279053, |
|
"rewards/rejected": -15.71043586730957, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 2.232721583575099e-06, |
|
"logits/chosen": 0.4919258654117584, |
|
"logits/rejected": 1.2310242652893066, |
|
"logps/chosen": -778.7653198242188, |
|
"logps/rejected": -1702.4404296875, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.0904669761657715, |
|
"rewards/margins": 9.812942504882812, |
|
"rewards/margins_max": 14.663922309875488, |
|
"rewards/margins_min": 4.961963176727295, |
|
"rewards/margins_std": 6.860320091247559, |
|
"rewards/rejected": -14.903407096862793, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 2.1970114619921804e-06, |
|
"logits/chosen": 0.5403339862823486, |
|
"logits/rejected": 1.279847264289856, |
|
"logps/chosen": -782.6492309570312, |
|
"logps/rejected": -1954.859130859375, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.303040504455566, |
|
"rewards/margins": 11.789950370788574, |
|
"rewards/margins_max": 17.79424476623535, |
|
"rewards/margins_min": 5.785655975341797, |
|
"rewards/margins_std": 8.491353988647461, |
|
"rewards/rejected": -17.09299087524414, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 2.1613639751927636e-06, |
|
"logits/chosen": 0.5678201913833618, |
|
"logits/rejected": 1.2467429637908936, |
|
"logps/chosen": -794.25341796875, |
|
"logps/rejected": -1776.8765869140625, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.573482513427734, |
|
"rewards/margins": 10.259510040283203, |
|
"rewards/margins_max": 15.224624633789062, |
|
"rewards/margins_min": 5.294394493103027, |
|
"rewards/margins_std": 7.021732330322266, |
|
"rewards/rejected": -15.832992553710938, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 2.1257864923421405e-06, |
|
"logits/chosen": 0.5252267122268677, |
|
"logits/rejected": 1.167436957359314, |
|
"logps/chosen": -733.9835205078125, |
|
"logps/rejected": -1947.4000244140625, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.864560127258301, |
|
"rewards/margins": 12.287667274475098, |
|
"rewards/margins_max": 17.395977020263672, |
|
"rewards/margins_min": 7.179357051849365, |
|
"rewards/margins_std": 7.224241733551025, |
|
"rewards/rejected": -17.152225494384766, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 2.0902863681341546e-06, |
|
"logits/chosen": 0.592448353767395, |
|
"logits/rejected": 1.254591703414917, |
|
"logps/chosen": -762.2680053710938, |
|
"logps/rejected": -1617.958740234375, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.002324104309082, |
|
"rewards/margins": 8.928361892700195, |
|
"rewards/margins_max": 13.639185905456543, |
|
"rewards/margins_min": 4.2175397872924805, |
|
"rewards/margins_std": 6.662109375, |
|
"rewards/rejected": -13.930686950683594, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 2.0548709412708235e-06, |
|
"logits/chosen": 0.46100831031799316, |
|
"logits/rejected": 1.1205612421035767, |
|
"logps/chosen": -758.50244140625, |
|
"logps/rejected": -1687.9713134765625, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.229290962219238, |
|
"rewards/margins": 9.612086296081543, |
|
"rewards/margins_max": 13.77092456817627, |
|
"rewards/margins_min": 5.453249931335449, |
|
"rewards/margins_std": 5.881483554840088, |
|
"rewards/rejected": -14.841377258300781, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 2.019547532945246e-06, |
|
"logits/chosen": 0.5935944318771362, |
|
"logits/rejected": 1.189452886581421, |
|
"logps/chosen": -698.9295654296875, |
|
"logps/rejected": -1723.960205078125, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.868830680847168, |
|
"rewards/margins": 10.070269584655762, |
|
"rewards/margins_max": 15.034326553344727, |
|
"rewards/margins_min": 5.1062116622924805, |
|
"rewards/margins_std": 7.020236015319824, |
|
"rewards/rejected": -14.939099311828613, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.25, |
|
"learning_rate": 1.9843234453281503e-06, |
|
"logits/chosen": 0.5408506989479065, |
|
"logits/rejected": 1.2704055309295654, |
|
"logps/chosen": -782.6818237304688, |
|
"logps/rejected": -1806.959228515625, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.255041122436523, |
|
"rewards/margins": 10.520251274108887, |
|
"rewards/margins_max": 15.160499572753906, |
|
"rewards/margins_min": 5.880003929138184, |
|
"rewards/margins_std": 6.562302589416504, |
|
"rewards/rejected": -15.775293350219727, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 1.949205960058361e-06, |
|
"logits/chosen": 0.4531838297843933, |
|
"logits/rejected": 1.268123745918274, |
|
"logps/chosen": -836.1715698242188, |
|
"logps/rejected": -1688.537353515625, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.429810523986816, |
|
"rewards/margins": 9.195915222167969, |
|
"rewards/margins_max": 13.804384231567383, |
|
"rewards/margins_min": 4.587449073791504, |
|
"rewards/margins_std": 6.517356872558594, |
|
"rewards/rejected": -14.625727653503418, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.914202336737517e-06, |
|
"logits/chosen": 0.4794815182685852, |
|
"logits/rejected": 1.1748192310333252, |
|
"logps/chosen": -741.9356079101562, |
|
"logps/rejected": -1867.9176025390625, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.017170429229736, |
|
"rewards/margins": 11.546915054321289, |
|
"rewards/margins_max": 17.515758514404297, |
|
"rewards/margins_min": 5.5780720710754395, |
|
"rewards/margins_std": 8.441219329833984, |
|
"rewards/rejected": -16.564085006713867, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 1.8793198114293419e-06, |
|
"logits/chosen": 0.5758123993873596, |
|
"logits/rejected": 1.2776639461517334, |
|
"logps/chosen": -671.192138671875, |
|
"logps/rejected": -1982.5927734375, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.604527473449707, |
|
"rewards/margins": 13.000114440917969, |
|
"rewards/margins_max": 19.173341751098633, |
|
"rewards/margins_min": 6.826885223388672, |
|
"rewards/margins_std": 8.730262756347656, |
|
"rewards/rejected": -17.604642868041992, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.8445655951637797e-06, |
|
"logits/chosen": 0.5493451952934265, |
|
"logits/rejected": 1.379970908164978, |
|
"logps/chosen": -748.9559326171875, |
|
"logps/rejected": -1775.750244140625, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.9326372146606445, |
|
"rewards/margins": 10.712015151977539, |
|
"rewards/margins_max": 15.60670280456543, |
|
"rewards/margins_min": 5.817324638366699, |
|
"rewards/margins_std": 6.922135829925537, |
|
"rewards/rejected": -15.64465045928955, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 1.809946872446312e-06, |
|
"logits/chosen": 0.5186041593551636, |
|
"logits/rejected": 1.1791023015975952, |
|
"logps/chosen": -725.4397583007812, |
|
"logps/rejected": -1589.250732421875, |
|
"loss": 0.0827, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.731976509094238, |
|
"rewards/margins": 9.246491432189941, |
|
"rewards/margins_max": 14.182947158813477, |
|
"rewards/margins_min": 4.310037136077881, |
|
"rewards/margins_std": 6.981202125549316, |
|
"rewards/rejected": -13.978469848632812, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.7754707997727471e-06, |
|
"logits/chosen": 0.6401320695877075, |
|
"logits/rejected": 1.1825447082519531, |
|
"logps/chosen": -791.119140625, |
|
"logps/rejected": -1879.62109375, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.561471939086914, |
|
"rewards/margins": 10.975897789001465, |
|
"rewards/margins_max": 15.100332260131836, |
|
"rewards/margins_min": 6.85146427154541, |
|
"rewards/margins_std": 5.832830429077148, |
|
"rewards/rejected": -16.537368774414062, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 1.7411445041498099e-06, |
|
"logits/chosen": 0.5857383012771606, |
|
"logits/rejected": 1.3303660154342651, |
|
"logps/chosen": -796.5535888671875, |
|
"logps/rejected": -2160.908203125, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.3245439529418945, |
|
"rewards/margins": 13.754674911499023, |
|
"rewards/margins_max": 19.97846221923828, |
|
"rewards/margins_min": 7.530887603759766, |
|
"rewards/margins_std": 8.801763534545898, |
|
"rewards/rejected": -19.079219818115234, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.7069750816218218e-06, |
|
"logits/chosen": 0.5591040849685669, |
|
"logits/rejected": 1.376008152961731, |
|
"logps/chosen": -757.9560546875, |
|
"logps/rejected": -1931.6070556640625, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.156350135803223, |
|
"rewards/margins": 12.076948165893555, |
|
"rewards/margins_max": 17.336977005004883, |
|
"rewards/margins_min": 6.816922664642334, |
|
"rewards/margins_std": 7.438802242279053, |
|
"rewards/rejected": -17.233299255371094, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.390625, |
|
"learning_rate": 1.6729695958037856e-06, |
|
"logits/chosen": 0.5422581434249878, |
|
"logits/rejected": 1.107097864151001, |
|
"logps/chosen": -806.7074584960938, |
|
"logps/rejected": -1815.927734375, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.696779727935791, |
|
"rewards/margins": 10.256368637084961, |
|
"rewards/margins_max": 15.249975204467773, |
|
"rewards/margins_min": 5.262759685516357, |
|
"rewards/margins_std": 7.062028408050537, |
|
"rewards/rejected": -15.953149795532227, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 1.6391350764211675e-06, |
|
"logits/chosen": 0.47015446424484253, |
|
"logits/rejected": 1.3002904653549194, |
|
"logps/chosen": -784.7755737304688, |
|
"logps/rejected": -1845.008544921875, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.293187618255615, |
|
"rewards/margins": 11.025456428527832, |
|
"rewards/margins_max": 15.80299186706543, |
|
"rewards/margins_min": 6.247918605804443, |
|
"rewards/margins_std": 6.756457328796387, |
|
"rewards/rejected": -16.31864356994629, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 1.6054785178566944e-06, |
|
"logits/chosen": 0.39869189262390137, |
|
"logits/rejected": 1.1358020305633545, |
|
"logps/chosen": -790.1834106445312, |
|
"logps/rejected": -1959.004638671875, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.488138198852539, |
|
"rewards/margins": 12.004860877990723, |
|
"rewards/margins_max": 17.329792022705078, |
|
"rewards/margins_min": 6.679928779602051, |
|
"rewards/margins_std": 7.530592441558838, |
|
"rewards/rejected": -17.493000030517578, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 1.5720068777044479e-06, |
|
"logits/chosen": 0.5967472195625305, |
|
"logits/rejected": 1.3974864482879639, |
|
"logps/chosen": -806.0808715820312, |
|
"logps/rejected": -1895.9869384765625, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.477103233337402, |
|
"rewards/margins": 11.376334190368652, |
|
"rewards/margins_max": 16.01239776611328, |
|
"rewards/margins_min": 6.74027156829834, |
|
"rewards/margins_std": 6.5563836097717285, |
|
"rewards/rejected": -16.853437423706055, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 1.5387270753315726e-06, |
|
"logits/chosen": 0.5518096089363098, |
|
"logits/rejected": 1.32808256149292, |
|
"logps/chosen": -816.740234375, |
|
"logps/rejected": -2068.84326171875, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.405552387237549, |
|
"rewards/margins": 12.889608383178711, |
|
"rewards/margins_max": 20.060169219970703, |
|
"rewards/margins_min": 5.719046592712402, |
|
"rewards/margins_std": 10.140707015991211, |
|
"rewards/rejected": -18.295162200927734, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.5056459904478738e-06, |
|
"logits/chosen": 0.5233970880508423, |
|
"logits/rejected": 1.1991077661514282, |
|
"logps/chosen": -799.1451416015625, |
|
"logps/rejected": -1898.0875244140625, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.3765363693237305, |
|
"rewards/margins": 11.210358619689941, |
|
"rewards/margins_max": 15.882084846496582, |
|
"rewards/margins_min": 6.538631439208984, |
|
"rewards/margins_std": 6.606819152832031, |
|
"rewards/rejected": -16.586894989013672, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 7.25, |
|
"learning_rate": 1.4727704616836297e-06, |
|
"logits/chosen": 0.4744800925254822, |
|
"logits/rejected": 1.247642993927002, |
|
"logps/chosen": -778.9432373046875, |
|
"logps/rejected": -1884.1217041015625, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.104981422424316, |
|
"rewards/margins": 11.539787292480469, |
|
"rewards/margins_max": 16.65086555480957, |
|
"rewards/margins_min": 6.428709506988525, |
|
"rewards/margins_std": 7.228156089782715, |
|
"rewards/rejected": -16.6447696685791, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 1.4401072851758835e-06, |
|
"logits/chosen": 0.5687705278396606, |
|
"logits/rejected": 1.1934126615524292, |
|
"logps/chosen": -706.02294921875, |
|
"logps/rejected": -1641.071044921875, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.839352607727051, |
|
"rewards/margins": 9.687154769897461, |
|
"rewards/margins_max": 12.636642456054688, |
|
"rewards/margins_min": 6.737668514251709, |
|
"rewards/margins_std": 4.17120361328125, |
|
"rewards/rejected": -14.526507377624512, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 1.4076632131635226e-06, |
|
"logits/chosen": 0.46886777877807617, |
|
"logits/rejected": 1.1962741613388062, |
|
"logps/chosen": -732.7435302734375, |
|
"logps/rejected": -1612.6419677734375, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.984394550323486, |
|
"rewards/margins": 9.234537124633789, |
|
"rewards/margins_max": 13.321691513061523, |
|
"rewards/margins_min": 5.147382736206055, |
|
"rewards/margins_std": 5.780109882354736, |
|
"rewards/rejected": -14.218931198120117, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.3754449525914359e-06, |
|
"logits/chosen": 0.5064732432365417, |
|
"logits/rejected": 1.1770398616790771, |
|
"logps/chosen": -800.9207153320312, |
|
"logps/rejected": -1703.955810546875, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.16342306137085, |
|
"rewards/margins": 9.53877067565918, |
|
"rewards/margins_max": 14.161503791809082, |
|
"rewards/margins_min": 4.916037559509277, |
|
"rewards/margins_std": 6.537531852722168, |
|
"rewards/rejected": -14.702194213867188, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 1.343459163724032e-06, |
|
"logits/chosen": 0.6023787260055542, |
|
"logits/rejected": 1.207897424697876, |
|
"logps/chosen": -743.8614501953125, |
|
"logps/rejected": -1714.038330078125, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.851070404052734, |
|
"rewards/margins": 10.04643440246582, |
|
"rewards/margins_max": 14.574777603149414, |
|
"rewards/margins_min": 5.518091678619385, |
|
"rewards/margins_std": 6.404044151306152, |
|
"rewards/rejected": -14.897504806518555, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 1.311712458768406e-06, |
|
"logits/chosen": 0.6761046648025513, |
|
"logits/rejected": 1.2278960943222046, |
|
"logps/chosen": -726.6144409179688, |
|
"logps/rejected": -1568.93359375, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.09859561920166, |
|
"rewards/margins": 8.505921363830566, |
|
"rewards/margins_max": 12.416373252868652, |
|
"rewards/margins_min": 4.595466613769531, |
|
"rewards/margins_std": 5.530216217041016, |
|
"rewards/rejected": -13.604515075683594, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.280211400507444e-06, |
|
"logits/chosen": 0.6303955316543579, |
|
"logits/rejected": 1.32115638256073, |
|
"logps/chosen": -698.5577392578125, |
|
"logps/rejected": -1850.8424072265625, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.735129356384277, |
|
"rewards/margins": 11.382537841796875, |
|
"rewards/margins_max": 16.18451499938965, |
|
"rewards/margins_min": 6.580558776855469, |
|
"rewards/margins_std": 6.791023254394531, |
|
"rewards/rejected": -16.117666244506836, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 1.2489625009431409e-06, |
|
"logits/chosen": 0.5856636762619019, |
|
"logits/rejected": 1.2052780389785767, |
|
"logps/chosen": -733.9873046875, |
|
"logps/rejected": -1687.567138671875, |
|
"loss": 0.1764, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.016100883483887, |
|
"rewards/margins": 9.677934646606445, |
|
"rewards/margins_max": 15.035099983215332, |
|
"rewards/margins_min": 4.320771217346191, |
|
"rewards/margins_std": 7.576174259185791, |
|
"rewards/rejected": -14.694036483764648, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 1.2179722199504213e-06, |
|
"logits/chosen": 0.5713605284690857, |
|
"logits/rejected": 1.207334280014038, |
|
"logps/chosen": -728.2240600585938, |
|
"logps/rejected": -1619.398681640625, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.969111919403076, |
|
"rewards/margins": 9.243115425109863, |
|
"rewards/margins_max": 13.461108207702637, |
|
"rewards/margins_min": 5.025121212005615, |
|
"rewards/margins_std": 5.96514368057251, |
|
"rewards/rejected": -14.212226867675781, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 1.187246963941731e-06, |
|
"logits/chosen": 0.5765690803527832, |
|
"logits/rejected": 1.1067253351211548, |
|
"logps/chosen": -698.3675537109375, |
|
"logps/rejected": -1611.295654296875, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.707912445068359, |
|
"rewards/margins": 9.343481063842773, |
|
"rewards/margins_max": 13.480981826782227, |
|
"rewards/margins_min": 5.205979347229004, |
|
"rewards/margins_std": 5.851310729980469, |
|
"rewards/rejected": -14.05139446258545, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.1567930845426802e-06, |
|
"logits/chosen": 0.41190090775489807, |
|
"logits/rejected": 1.0678179264068604, |
|
"logps/chosen": -716.1203002929688, |
|
"logps/rejected": -1831.5726318359375, |
|
"loss": 0.1423, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.89259147644043, |
|
"rewards/margins": 11.263982772827148, |
|
"rewards/margins_max": 16.847904205322266, |
|
"rewards/margins_min": 5.680062294006348, |
|
"rewards/margins_std": 7.89685583114624, |
|
"rewards/rejected": -16.156574249267578, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 1.1266168772790195e-06, |
|
"logits/chosen": 0.3195948004722595, |
|
"logits/rejected": 1.1387958526611328, |
|
"logps/chosen": -776.84228515625, |
|
"logps/rejected": -1585.8587646484375, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.140332221984863, |
|
"rewards/margins": 8.726045608520508, |
|
"rewards/margins_max": 12.733012199401855, |
|
"rewards/margins_min": 4.719078063964844, |
|
"rewards/margins_std": 5.666707515716553, |
|
"rewards/rejected": -13.866376876831055, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 1.0967245802752044e-06, |
|
"logits/chosen": 0.5815094113349915, |
|
"logits/rejected": 1.331162691116333, |
|
"logps/chosen": -753.7833862304688, |
|
"logps/rejected": -1863.087646484375, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.917272090911865, |
|
"rewards/margins": 11.66606330871582, |
|
"rewards/margins_max": 16.2323055267334, |
|
"rewards/margins_min": 7.099822044372559, |
|
"rewards/margins_std": 6.457640171051025, |
|
"rewards/rejected": -16.583335876464844, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 1.0671223729648338e-06, |
|
"logits/chosen": 0.5788689851760864, |
|
"logits/rejected": 1.1679919958114624, |
|
"logps/chosen": -738.84423828125, |
|
"logps/rejected": -1693.3870849609375, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.073354244232178, |
|
"rewards/margins": 9.768596649169922, |
|
"rewards/margins_max": 14.380800247192383, |
|
"rewards/margins_min": 5.156393051147461, |
|
"rewards/margins_std": 6.522641658782959, |
|
"rewards/rejected": -14.841951370239258, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 1.0378163748132102e-06, |
|
"logits/chosen": 0.49502748250961304, |
|
"logits/rejected": 1.2685495615005493, |
|
"logps/chosen": -712.3984375, |
|
"logps/rejected": -1658.44921875, |
|
"loss": 0.0608, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.415982246398926, |
|
"rewards/margins": 9.93490219116211, |
|
"rewards/margins_max": 14.178258895874023, |
|
"rewards/margins_min": 5.691543102264404, |
|
"rewards/margins_std": 6.001015663146973, |
|
"rewards/rejected": -14.350883483886719, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 6.125, |
|
"learning_rate": 1.008812644052311e-06, |
|
"logits/chosen": 0.4484991431236267, |
|
"logits/rejected": 1.1256628036499023, |
|
"logps/chosen": -690.5672607421875, |
|
"logps/rejected": -1668.564697265625, |
|
"loss": 0.0796, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.382365703582764, |
|
"rewards/margins": 10.01590347290039, |
|
"rewards/margins_max": 13.478933334350586, |
|
"rewards/margins_min": 6.552873134613037, |
|
"rewards/margins_std": 4.897465229034424, |
|
"rewards/rejected": -14.398269653320312, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 9.801171764284072e-07, |
|
"logits/chosen": 0.5813416838645935, |
|
"logits/rejected": 1.228780746459961, |
|
"logps/chosen": -712.302734375, |
|
"logps/rejected": -1828.1448974609375, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.676440238952637, |
|
"rewards/margins": 11.309109687805176, |
|
"rewards/margins_max": 16.14137840270996, |
|
"rewards/margins_min": 6.4768385887146, |
|
"rewards/margins_std": 6.833861351013184, |
|
"rewards/rejected": -15.985549926757812, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 9.517359039626043e-07, |
|
"logits/chosen": 0.5194617509841919, |
|
"logits/rejected": 1.1831514835357666, |
|
"logps/chosen": -732.8680419921875, |
|
"logps/rejected": -1726.790283203125, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.601668357849121, |
|
"rewards/margins": 10.169393539428711, |
|
"rewards/margins_max": 15.003515243530273, |
|
"rewards/margins_min": 5.335273742675781, |
|
"rewards/margins_std": 6.836478233337402, |
|
"rewards/rejected": -14.771062850952148, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 9.23674693724555e-07, |
|
"logits/chosen": 0.2990169823169708, |
|
"logits/rejected": 0.9671838879585266, |
|
"logps/chosen": -760.0450439453125, |
|
"logps/rejected": -1871.8310546875, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.976813316345215, |
|
"rewards/margins": 11.278745651245117, |
|
"rewards/margins_max": 14.921788215637207, |
|
"rewards/margins_min": 7.63570499420166, |
|
"rewards/margins_std": 5.152037620544434, |
|
"rewards/rejected": -16.255558013916016, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 8.959393466195973e-07, |
|
"logits/chosen": 0.41968780755996704, |
|
"logits/rejected": 1.290880799293518, |
|
"logps/chosen": -761.1870727539062, |
|
"logps/rejected": -1643.1441650390625, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.9296698570251465, |
|
"rewards/margins": 9.346491813659668, |
|
"rewards/margins_max": 12.647331237792969, |
|
"rewards/margins_min": 6.045652866363525, |
|
"rewards/margins_std": 4.6680908203125, |
|
"rewards/rejected": -14.276163101196289, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 8.685355961895783e-07, |
|
"logits/chosen": 0.687114417552948, |
|
"logits/rejected": 1.4132459163665771, |
|
"logps/chosen": -767.54248046875, |
|
"logps/rejected": -1862.729248046875, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.025767803192139, |
|
"rewards/margins": 11.429033279418945, |
|
"rewards/margins_max": 17.451461791992188, |
|
"rewards/margins_min": 5.406604290008545, |
|
"rewards/margins_std": 8.517000198364258, |
|
"rewards/rejected": -16.45479965209961, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 8.414691074275916e-07, |
|
"logits/chosen": 0.4633597433567047, |
|
"logits/rejected": 1.248290777206421, |
|
"logps/chosen": -777.6952514648438, |
|
"logps/rejected": -1863.720458984375, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.001014232635498, |
|
"rewards/margins": 11.318872451782227, |
|
"rewards/margins_max": 15.89136028289795, |
|
"rewards/margins_min": 6.7463860511779785, |
|
"rewards/margins_std": 6.466473579406738, |
|
"rewards/rejected": -16.319889068603516, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 8.147454756068937e-07, |
|
"logits/chosen": 0.5497418642044067, |
|
"logits/rejected": 1.2043471336364746, |
|
"logps/chosen": -709.6234130859375, |
|
"logps/rejected": -1719.3043212890625, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.652140140533447, |
|
"rewards/margins": 10.480083465576172, |
|
"rewards/margins_max": 15.241083145141602, |
|
"rewards/margins_min": 5.719081401824951, |
|
"rewards/margins_std": 6.733071804046631, |
|
"rewards/rejected": -15.132222175598145, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 7.883702251242298e-07, |
|
"logits/chosen": 0.45454102754592896, |
|
"logits/rejected": 1.1140748262405396, |
|
"logps/chosen": -678.3165283203125, |
|
"logps/rejected": -1609.6807861328125, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.47580623626709, |
|
"rewards/margins": 9.60850715637207, |
|
"rewards/margins_max": 13.529436111450195, |
|
"rewards/margins_min": 5.6875810623168945, |
|
"rewards/margins_std": 5.545028209686279, |
|
"rewards/rejected": -14.084314346313477, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 7.623488083578148e-07, |
|
"logits/chosen": 0.48715901374816895, |
|
"logits/rejected": 1.142924189567566, |
|
"logps/chosen": -676.9874267578125, |
|
"logps/rejected": -1663.1302490234375, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.443617820739746, |
|
"rewards/margins": 10.04162311553955, |
|
"rewards/margins_max": 15.552085876464844, |
|
"rewards/margins_min": 4.531158447265625, |
|
"rewards/margins_std": 7.792973518371582, |
|
"rewards/rejected": -14.485241889953613, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 7.366866045401968e-07, |
|
"logits/chosen": 0.5052765607833862, |
|
"logits/rejected": 1.288438081741333, |
|
"logps/chosen": -724.85302734375, |
|
"logps/rejected": -1638.660400390625, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.777608394622803, |
|
"rewards/margins": 9.423945426940918, |
|
"rewards/margins_max": 13.84093952178955, |
|
"rewards/margins_min": 5.006953239440918, |
|
"rewards/margins_std": 6.246571063995361, |
|
"rewards/rejected": -14.201555252075195, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 7.113889186462477e-07, |
|
"logits/chosen": 0.6119362115859985, |
|
"logits/rejected": 1.1571754217147827, |
|
"logps/chosen": -736.3836669921875, |
|
"logps/rejected": -1706.408447265625, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.75775146484375, |
|
"rewards/margins": 10.034549713134766, |
|
"rewards/margins_max": 14.87476921081543, |
|
"rewards/margins_min": 5.194329738616943, |
|
"rewards/margins_std": 6.845104217529297, |
|
"rewards/rejected": -14.7923002243042, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 6.864609802964978e-07, |
|
"logits/chosen": 0.5309674143791199, |
|
"logits/rejected": 1.2003862857818604, |
|
"logps/chosen": -700.8447265625, |
|
"logps/rejected": -1731.847412109375, |
|
"loss": 0.058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.624502658843994, |
|
"rewards/margins": 10.476162910461426, |
|
"rewards/margins_max": 15.123468399047852, |
|
"rewards/margins_min": 5.828855991363525, |
|
"rewards/margins_std": 6.572283744812012, |
|
"rewards/rejected": -15.100665092468262, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 6.619079426760545e-07, |
|
"logits/chosen": 0.49570074677467346, |
|
"logits/rejected": 1.1981004476547241, |
|
"logps/chosen": -769.2633056640625, |
|
"logps/rejected": -1941.0648193359375, |
|
"loss": 0.0931, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.233429908752441, |
|
"rewards/margins": 12.01481819152832, |
|
"rewards/margins_max": 17.587182998657227, |
|
"rewards/margins_min": 6.4424543380737305, |
|
"rewards/margins_std": 7.8805131912231445, |
|
"rewards/rejected": -17.248249053955078, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 6.377348814693174e-07, |
|
"logits/chosen": 0.5919948220252991, |
|
"logits/rejected": 1.398564338684082, |
|
"logps/chosen": -762.436279296875, |
|
"logps/rejected": -1769.9222412109375, |
|
"loss": 0.113, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.956363201141357, |
|
"rewards/margins": 10.631746292114258, |
|
"rewards/margins_max": 15.959482192993164, |
|
"rewards/margins_min": 5.304008483886719, |
|
"rewards/margins_std": 7.5345563888549805, |
|
"rewards/rejected": -15.588109016418457, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 6.139467938107169e-07, |
|
"logits/chosen": 0.38951975107192993, |
|
"logits/rejected": 1.1649284362792969, |
|
"logps/chosen": -778.3822021484375, |
|
"logps/rejected": -2023.4833984375, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.072902679443359, |
|
"rewards/margins": 12.735228538513184, |
|
"rewards/margins_max": 17.881254196166992, |
|
"rewards/margins_min": 7.589202880859375, |
|
"rewards/margins_std": 7.277578830718994, |
|
"rewards/rejected": -17.80813217163086, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.625, |
|
"learning_rate": 5.905485972516903e-07, |
|
"logits/chosen": 0.5617870092391968, |
|
"logits/rejected": 1.2924219369888306, |
|
"logps/chosen": -818.1054077148438, |
|
"logps/rejected": -1890.950927734375, |
|
"loss": 0.13, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.535712718963623, |
|
"rewards/margins": 11.305818557739258, |
|
"rewards/margins_max": 17.704103469848633, |
|
"rewards/margins_min": 4.907529830932617, |
|
"rewards/margins_std": 9.048542976379395, |
|
"rewards/rejected": -16.841529846191406, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 5.675451287441072e-07, |
|
"logits/chosen": 0.7306760549545288, |
|
"logits/rejected": 1.395262360572815, |
|
"logps/chosen": -816.7340087890625, |
|
"logps/rejected": -1670.490234375, |
|
"loss": 0.1545, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.269486427307129, |
|
"rewards/margins": 9.290313720703125, |
|
"rewards/margins_max": 13.515324592590332, |
|
"rewards/margins_min": 5.065301895141602, |
|
"rewards/margins_std": 5.975068092346191, |
|
"rewards/rejected": -14.55980110168457, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 5.449411436403632e-07, |
|
"logits/chosen": 0.7268288135528564, |
|
"logits/rejected": 1.3329485654830933, |
|
"logps/chosen": -696.4193115234375, |
|
"logps/rejected": -1806.069091796875, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.67076301574707, |
|
"rewards/margins": 11.251920700073242, |
|
"rewards/margins_max": 16.409025192260742, |
|
"rewards/margins_min": 6.094817638397217, |
|
"rewards/margins_std": 7.2932448387146, |
|
"rewards/rejected": -15.92268180847168, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 5.227413147103336e-07, |
|
"logits/chosen": 0.5869401693344116, |
|
"logits/rejected": 1.2344766855239868, |
|
"logps/chosen": -729.2957153320312, |
|
"logps/rejected": -1567.1939697265625, |
|
"loss": 0.1098, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.183860778808594, |
|
"rewards/margins": 8.676237106323242, |
|
"rewards/margins_max": 12.739700317382812, |
|
"rewards/margins_min": 4.6127729415893555, |
|
"rewards/margins_std": 5.746604919433594, |
|
"rewards/rejected": -13.86009693145752, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 5.009502311754081e-07, |
|
"logits/chosen": 0.5038915872573853, |
|
"logits/rejected": 1.1727396249771118, |
|
"logps/chosen": -724.4193725585938, |
|
"logps/rejected": -1736.091064453125, |
|
"loss": 0.146, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.8910675048828125, |
|
"rewards/margins": 10.278018951416016, |
|
"rewards/margins_max": 15.358955383300781, |
|
"rewards/margins_min": 5.197081565856934, |
|
"rewards/margins_std": 7.1855292320251465, |
|
"rewards/rejected": -15.169085502624512, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 4.795723977597844e-07, |
|
"logits/chosen": 0.5357404947280884, |
|
"logits/rejected": 1.154956579208374, |
|
"logps/chosen": -719.9663696289062, |
|
"logps/rejected": -1709.3209228515625, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.783188819885254, |
|
"rewards/margins": 10.301424026489258, |
|
"rewards/margins_max": 14.93207836151123, |
|
"rewards/margins_min": 5.670768737792969, |
|
"rewards/margins_std": 6.54873514175415, |
|
"rewards/rejected": -15.084611892700195, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 4.586122337592444e-07, |
|
"logits/chosen": 0.48415178060531616, |
|
"logits/rejected": 1.2925946712493896, |
|
"logps/chosen": -734.2363891601562, |
|
"logps/rejected": -1889.037353515625, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.827243804931641, |
|
"rewards/margins": 11.82060432434082, |
|
"rewards/margins_max": 17.99706268310547, |
|
"rewards/margins_min": 5.644143104553223, |
|
"rewards/margins_std": 8.734832763671875, |
|
"rewards/rejected": -16.647846221923828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 4.380740721275786e-07, |
|
"logits/chosen": 0.6227355003356934, |
|
"logits/rejected": 1.2969437837600708, |
|
"logps/chosen": -780.354736328125, |
|
"logps/rejected": -1921.7880859375, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.117171287536621, |
|
"rewards/margins": 11.844526290893555, |
|
"rewards/margins_max": 17.806795120239258, |
|
"rewards/margins_min": 5.882256031036377, |
|
"rewards/margins_std": 8.43192195892334, |
|
"rewards/rejected": -16.96169662475586, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 4.1796215858086577e-07, |
|
"logits/chosen": 0.6349445581436157, |
|
"logits/rejected": 1.3867194652557373, |
|
"logps/chosen": -799.1439208984375, |
|
"logps/rejected": -1852.981689453125, |
|
"loss": 0.1059, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.414491176605225, |
|
"rewards/margins": 11.105630874633789, |
|
"rewards/margins_max": 17.207439422607422, |
|
"rewards/margins_min": 5.003822326660156, |
|
"rewards/margins_std": 8.629260063171387, |
|
"rewards/rejected": -16.520122528076172, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 3.982806507197831e-07, |
|
"logits/chosen": 0.6008701324462891, |
|
"logits/rejected": 1.230450987815857, |
|
"logps/chosen": -744.5529174804688, |
|
"logps/rejected": -1744.4771728515625, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.9854936599731445, |
|
"rewards/margins": 9.673846244812012, |
|
"rewards/margins_max": 13.759689331054688, |
|
"rewards/margins_min": 5.5880022048950195, |
|
"rewards/margins_std": 5.778256416320801, |
|
"rewards/rejected": -14.659339904785156, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 3.790336171701331e-07, |
|
"logits/chosen": 0.5796440839767456, |
|
"logits/rejected": 1.2289059162139893, |
|
"logps/chosen": -720.2902221679688, |
|
"logps/rejected": -1942.671142578125, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.671596050262451, |
|
"rewards/margins": 12.241273880004883, |
|
"rewards/margins_max": 16.92319107055664, |
|
"rewards/margins_min": 7.55935525894165, |
|
"rewards/margins_std": 6.621232509613037, |
|
"rewards/rejected": -16.912870407104492, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 3.6022503674176537e-07, |
|
"logits/chosen": 0.5198200941085815, |
|
"logits/rejected": 1.3343006372451782, |
|
"logps/chosen": -796.9490966796875, |
|
"logps/rejected": -1900.691162109375, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.069548606872559, |
|
"rewards/margins": 11.76386547088623, |
|
"rewards/margins_max": 17.033977508544922, |
|
"rewards/margins_min": 6.4937543869018555, |
|
"rewards/margins_std": 7.453061580657959, |
|
"rewards/rejected": -16.83341407775879, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 3.4185879760606525e-07, |
|
"logits/chosen": 0.5187299847602844, |
|
"logits/rejected": 1.1834654808044434, |
|
"logps/chosen": -736.1161499023438, |
|
"logps/rejected": -1869.806640625, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.959606647491455, |
|
"rewards/margins": 11.514524459838867, |
|
"rewards/margins_max": 16.145587921142578, |
|
"rewards/margins_min": 6.883460998535156, |
|
"rewards/margins_std": 6.549312591552734, |
|
"rewards/rejected": -16.474130630493164, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 3.2393869649217454e-07, |
|
"logits/chosen": 0.5701602697372437, |
|
"logits/rejected": 1.3300843238830566, |
|
"logps/chosen": -761.6326904296875, |
|
"logps/rejected": -1840.357177734375, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.093400478363037, |
|
"rewards/margins": 11.083673477172852, |
|
"rewards/margins_max": 15.876144409179688, |
|
"rewards/margins_min": 6.291202545166016, |
|
"rewards/margins_std": 6.777576446533203, |
|
"rewards/rejected": -16.177074432373047, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.5, |
|
"learning_rate": 3.064684379021207e-07, |
|
"logits/chosen": 0.43363428115844727, |
|
"logits/rejected": 1.0424432754516602, |
|
"logps/chosen": -684.9832763671875, |
|
"logps/rejected": -1844.010009765625, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.640398979187012, |
|
"rewards/margins": 11.688333511352539, |
|
"rewards/margins_max": 16.679744720458984, |
|
"rewards/margins_min": 6.696922302246094, |
|
"rewards/margins_std": 7.058920860290527, |
|
"rewards/rejected": -16.328731536865234, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.894516333450115e-07, |
|
"logits/chosen": 0.5114481449127197, |
|
"logits/rejected": 1.14482843875885, |
|
"logps/chosen": -735.6387939453125, |
|
"logps/rejected": -1779.406005859375, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.890014171600342, |
|
"rewards/margins": 10.568880081176758, |
|
"rewards/margins_max": 14.828028678894043, |
|
"rewards/margins_min": 6.309730529785156, |
|
"rewards/margins_std": 6.023346900939941, |
|
"rewards/rejected": -15.458892822265625, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 2.728918005904513e-07, |
|
"logits/chosen": 0.3923017084598541, |
|
"logits/rejected": 1.0707480907440186, |
|
"logps/chosen": -806.395751953125, |
|
"logps/rejected": -1856.805908203125, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.439145565032959, |
|
"rewards/margins": 10.84535026550293, |
|
"rewards/margins_max": 16.374027252197266, |
|
"rewards/margins_min": 5.316674709320068, |
|
"rewards/margins_std": 7.818729400634766, |
|
"rewards/rejected": -16.284496307373047, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 2.5679236294133493e-07, |
|
"logits/chosen": 0.5716456174850464, |
|
"logits/rejected": 1.229247808456421, |
|
"logps/chosen": -732.1973876953125, |
|
"logps/rejected": -1738.814697265625, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.678011894226074, |
|
"rewards/margins": 10.548458099365234, |
|
"rewards/margins_max": 15.896784782409668, |
|
"rewards/margins_min": 5.200132369995117, |
|
"rewards/margins_std": 7.563673496246338, |
|
"rewards/rejected": -15.226470947265625, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 2.4115664852617294e-07, |
|
"logits/chosen": 0.5404381155967712, |
|
"logits/rejected": 1.2678707838058472, |
|
"logps/chosen": -750.8515014648438, |
|
"logps/rejected": -1882.701416015625, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.122988700866699, |
|
"rewards/margins": 11.382095336914062, |
|
"rewards/margins_max": 17.519346237182617, |
|
"rewards/margins_min": 5.244842529296875, |
|
"rewards/margins_std": 8.679386138916016, |
|
"rewards/rejected": -16.505083084106445, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 2.2598788961108897e-07, |
|
"logits/chosen": 0.5512218475341797, |
|
"logits/rejected": 1.235686182975769, |
|
"logps/chosen": -703.8411865234375, |
|
"logps/rejected": -1646.0699462890625, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.887363910675049, |
|
"rewards/margins": 9.765475273132324, |
|
"rewards/margins_max": 14.917486190795898, |
|
"rewards/margins_min": 4.613465309143066, |
|
"rewards/margins_std": 7.286043643951416, |
|
"rewards/rejected": -14.652839660644531, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 2.1128922193163564e-07, |
|
"logits/chosen": 0.5618628263473511, |
|
"logits/rejected": 1.2874120473861694, |
|
"logps/chosen": -742.8013916015625, |
|
"logps/rejected": -1916.2711181640625, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.009154796600342, |
|
"rewards/margins": 12.049230575561523, |
|
"rewards/margins_max": 17.573997497558594, |
|
"rewards/margins_min": 6.5244646072387695, |
|
"rewards/margins_std": 7.813199043273926, |
|
"rewards/rejected": -17.058387756347656, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 1.9706368404456472e-07, |
|
"logits/chosen": 0.4528091549873352, |
|
"logits/rejected": 1.111604928970337, |
|
"logps/chosen": -745.5892333984375, |
|
"logps/rejected": -1809.7484130859375, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.078321933746338, |
|
"rewards/margins": 10.962045669555664, |
|
"rewards/margins_max": 15.926958084106445, |
|
"rewards/margins_min": 5.997132778167725, |
|
"rewards/margins_std": 7.02144718170166, |
|
"rewards/rejected": -16.040367126464844, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.75, |
|
"learning_rate": 1.8331421669968708e-07, |
|
"logits/chosen": 0.6266171336174011, |
|
"logits/rejected": 1.3515210151672363, |
|
"logps/chosen": -768.0899047851562, |
|
"logps/rejected": -1722.0845947265625, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.038366794586182, |
|
"rewards/margins": 9.89087200164795, |
|
"rewards/margins_max": 14.385602951049805, |
|
"rewards/margins_min": 5.396140098571777, |
|
"rewards/margins_std": 6.356511116027832, |
|
"rewards/rejected": -14.929239273071289, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 5.0, |
|
"learning_rate": 1.7004366223194984e-07, |
|
"logits/chosen": 0.5014376044273376, |
|
"logits/rejected": 1.2441834211349487, |
|
"logps/chosen": -747.4188232421875, |
|
"logps/rejected": -1764.168212890625, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.955763339996338, |
|
"rewards/margins": 10.59716510772705, |
|
"rewards/margins_max": 15.370004653930664, |
|
"rewards/margins_min": 5.8243255615234375, |
|
"rewards/margins_std": 6.749813079833984, |
|
"rewards/rejected": -15.552927017211914, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.5725476397386197e-07, |
|
"logits/chosen": 0.3932679295539856, |
|
"logits/rejected": 1.2315890789031982, |
|
"logps/chosen": -707.3714599609375, |
|
"logps/rejected": -1953.8558349609375, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.406428337097168, |
|
"rewards/margins": 12.903097152709961, |
|
"rewards/margins_max": 18.890384674072266, |
|
"rewards/margins_min": 6.915809631347656, |
|
"rewards/margins_std": 8.467303276062012, |
|
"rewards/rejected": -17.309528350830078, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.4495016568838198e-07, |
|
"logits/chosen": 0.44051748514175415, |
|
"logits/rejected": 1.1644173860549927, |
|
"logps/chosen": -771.8656616210938, |
|
"logps/rejected": -1771.4254150390625, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.945860385894775, |
|
"rewards/margins": 10.547651290893555, |
|
"rewards/margins_max": 15.509611129760742, |
|
"rewards/margins_min": 5.585693359375, |
|
"rewards/margins_std": 7.017270565032959, |
|
"rewards/rejected": -15.493513107299805, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.3313241102239056e-07, |
|
"logits/chosen": 0.6278412342071533, |
|
"logits/rejected": 1.4244401454925537, |
|
"logps/chosen": -682.3499755859375, |
|
"logps/rejected": -1634.0439453125, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.619953155517578, |
|
"rewards/margins": 9.823567390441895, |
|
"rewards/margins_max": 13.926409721374512, |
|
"rewards/margins_min": 5.720723628997803, |
|
"rewards/margins_std": 5.802296161651611, |
|
"rewards/rejected": -14.443519592285156, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.2180394298086095e-07, |
|
"logits/chosen": 0.5217547416687012, |
|
"logits/rejected": 1.2030553817749023, |
|
"logps/chosen": -736.8463745117188, |
|
"logps/rejected": -1752.949951171875, |
|
"loss": 0.0513, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.804549217224121, |
|
"rewards/margins": 10.467925071716309, |
|
"rewards/margins_max": 14.837709426879883, |
|
"rewards/margins_min": 6.098140716552734, |
|
"rewards/margins_std": 6.179808139801025, |
|
"rewards/rejected": -15.272473335266113, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.1096710342183042e-07, |
|
"logits/chosen": 0.4959636628627777, |
|
"logits/rejected": 1.1241891384124756, |
|
"logps/chosen": -719.9772338867188, |
|
"logps/rejected": -1887.967529296875, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.843437671661377, |
|
"rewards/margins": 12.002474784851074, |
|
"rewards/margins_max": 17.96475601196289, |
|
"rewards/margins_min": 6.040192604064941, |
|
"rewards/margins_std": 8.431940078735352, |
|
"rewards/rejected": -16.84591293334961, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 1.0062413257228676e-07, |
|
"logits/chosen": 0.5790583491325378, |
|
"logits/rejected": 1.3127405643463135, |
|
"logps/chosen": -810.7774658203125, |
|
"logps/rejected": -1996.8209228515625, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.4459967613220215, |
|
"rewards/margins": 12.252093315124512, |
|
"rewards/margins_max": 17.894405364990234, |
|
"rewards/margins_min": 6.6097846031188965, |
|
"rewards/margins_std": 7.979430198669434, |
|
"rewards/rejected": -17.698089599609375, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 9.077716856505825e-08, |
|
"logits/chosen": 0.5055748224258423, |
|
"logits/rejected": 1.3198411464691162, |
|
"logps/chosen": -762.1981201171875, |
|
"logps/rejected": -1757.9710693359375, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.086750030517578, |
|
"rewards/margins": 10.44882583618164, |
|
"rewards/margins_max": 14.819003105163574, |
|
"rewards/margins_min": 6.078649044036865, |
|
"rewards/margins_std": 6.180363178253174, |
|
"rewards/rejected": -15.535575866699219, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 8.142824699681501e-08, |
|
"logits/chosen": 0.5170903205871582, |
|
"logits/rejected": 1.170555830001831, |
|
"logps/chosen": -716.73583984375, |
|
"logps/rejected": -1645.933349609375, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.820572376251221, |
|
"rewards/margins": 9.699037551879883, |
|
"rewards/margins_max": 15.123661994934082, |
|
"rewards/margins_min": 4.274412155151367, |
|
"rewards/margins_std": 7.671577453613281, |
|
"rewards/rejected": -14.519609451293945, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 7.257930050726003e-08, |
|
"logits/chosen": 0.5653474926948547, |
|
"logits/rejected": 1.3792940378189087, |
|
"logps/chosen": -771.275634765625, |
|
"logps/rejected": -1766.762939453125, |
|
"loss": 0.095, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.0928826332092285, |
|
"rewards/margins": 10.51708698272705, |
|
"rewards/margins_max": 15.350242614746094, |
|
"rewards/margins_min": 5.683931827545166, |
|
"rewards/margins_std": 6.835114479064941, |
|
"rewards/rejected": -15.609970092773438, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 6.423215837961045e-08, |
|
"logits/chosen": 0.5271497368812561, |
|
"logits/rejected": 1.3941797018051147, |
|
"logps/chosen": -722.5052490234375, |
|
"logps/rejected": -1865.8720703125, |
|
"loss": 0.0869, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.881400108337402, |
|
"rewards/margins": 11.674361228942871, |
|
"rewards/margins_max": 16.58696746826172, |
|
"rewards/margins_min": 6.76175594329834, |
|
"rewards/margins_std": 6.947473049163818, |
|
"rewards/rejected": -16.555761337280273, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 5.6388546162442215e-08, |
|
"logits/chosen": 0.6665564775466919, |
|
"logits/rejected": 1.2474958896636963, |
|
"logps/chosen": -746.4205322265625, |
|
"logps/rejected": -1758.126708984375, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.9972076416015625, |
|
"rewards/margins": 10.384644508361816, |
|
"rewards/margins_max": 15.061482429504395, |
|
"rewards/margins_min": 5.707806587219238, |
|
"rewards/margins_std": 6.614047050476074, |
|
"rewards/rejected": -15.381853103637695, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 4.905008531297661e-08, |
|
"logits/chosen": 0.43994975090026855, |
|
"logits/rejected": 1.1360465288162231, |
|
"logps/chosen": -817.5567016601562, |
|
"logps/rejected": -1885.947998046875, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.406924247741699, |
|
"rewards/margins": 11.238929748535156, |
|
"rewards/margins_max": 16.946365356445312, |
|
"rewards/margins_min": 5.531497955322266, |
|
"rewards/margins_std": 8.071528434753418, |
|
"rewards/rejected": -16.645854949951172, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.2218292861889444e-08, |
|
"logits/chosen": 0.5859326124191284, |
|
"logits/rejected": 1.2896353006362915, |
|
"logps/chosen": -773.9613037109375, |
|
"logps/rejected": -1735.0863037109375, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.150477409362793, |
|
"rewards/margins": 10.18777084350586, |
|
"rewards/margins_max": 15.025718688964844, |
|
"rewards/margins_min": 5.34982442855835, |
|
"rewards/margins_std": 6.84188985824585, |
|
"rewards/rejected": -15.338247299194336, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 3.589458109970467e-08, |
|
"logits/chosen": 0.5294678807258606, |
|
"logits/rejected": 1.2444860935211182, |
|
"logps/chosen": -743.5758666992188, |
|
"logps/rejected": -1683.407958984375, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.184317588806152, |
|
"rewards/margins": 9.598274230957031, |
|
"rewards/margins_max": 14.4242582321167, |
|
"rewards/margins_min": 4.7722883224487305, |
|
"rewards/margins_std": 6.824974060058594, |
|
"rewards/rejected": -14.78258991241455, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 3.008025728484132e-08, |
|
"logits/chosen": 0.5059491991996765, |
|
"logits/rejected": 1.3568942546844482, |
|
"logps/chosen": -737.1229858398438, |
|
"logps/rejected": -1987.5445556640625, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.759067535400391, |
|
"rewards/margins": 12.818672180175781, |
|
"rewards/margins_max": 18.131336212158203, |
|
"rewards/margins_min": 7.506007194519043, |
|
"rewards/margins_std": 7.513242244720459, |
|
"rewards/rejected": -17.577739715576172, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 2.4776523373372385e-08, |
|
"logits/chosen": 0.5932313799858093, |
|
"logits/rejected": 1.2811510562896729, |
|
"logps/chosen": -719.3793334960938, |
|
"logps/rejected": -1677.4947509765625, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.827818870544434, |
|
"rewards/margins": 9.848733901977539, |
|
"rewards/margins_max": 14.251507759094238, |
|
"rewards/margins_min": 5.44596004486084, |
|
"rewards/margins_std": 6.226462364196777, |
|
"rewards/rejected": -14.676549911499023, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 1.998447577055307e-08, |
|
"logits/chosen": 0.531481146812439, |
|
"logits/rejected": 1.223459005355835, |
|
"logps/chosen": -786.4287109375, |
|
"logps/rejected": -1908.134765625, |
|
"loss": 0.1179, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.9455766677856445, |
|
"rewards/margins": 11.500458717346191, |
|
"rewards/margins_max": 16.30417823791504, |
|
"rewards/margins_min": 6.696742057800293, |
|
"rewards/margins_std": 6.793482303619385, |
|
"rewards/rejected": -16.44603729248047, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.765625, |
|
"learning_rate": 1.5705105104167617e-08, |
|
"logits/chosen": 0.446284681558609, |
|
"logits/rejected": 1.1109905242919922, |
|
"logps/chosen": -792.7479858398438, |
|
"logps/rejected": -1819.526611328125, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.261641502380371, |
|
"rewards/margins": 10.497610092163086, |
|
"rewards/margins_max": 15.17370891571045, |
|
"rewards/margins_min": 5.821512699127197, |
|
"rewards/margins_std": 6.613001346588135, |
|
"rewards/rejected": -15.759251594543457, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 1.1939296019744529e-08, |
|
"logits/chosen": 0.5473194122314453, |
|
"logits/rejected": 1.104913353919983, |
|
"logps/chosen": -683.9682006835938, |
|
"logps/rejected": -1851.5924072265625, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.581644058227539, |
|
"rewards/margins": 11.716299057006836, |
|
"rewards/margins_max": 16.266063690185547, |
|
"rewards/margins_min": 7.166537284851074, |
|
"rewards/margins_std": 6.434335231781006, |
|
"rewards/rejected": -16.29794692993164, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 8.687826997678116e-09, |
|
"logits/chosen": 0.5780460834503174, |
|
"logits/rejected": 1.2975406646728516, |
|
"logps/chosen": -729.7520751953125, |
|
"logps/rejected": -1835.2408447265625, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.86895751953125, |
|
"rewards/margins": 11.184858322143555, |
|
"rewards/margins_max": 15.93799114227295, |
|
"rewards/margins_min": 6.431723117828369, |
|
"rewards/margins_std": 6.721946716308594, |
|
"rewards/rejected": -16.053813934326172, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 5.951370192300576e-09, |
|
"logits/chosen": 0.5345112085342407, |
|
"logits/rejected": 1.213844656944275, |
|
"logps/chosen": -702.4107055664062, |
|
"logps/rejected": -1682.540283203125, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.7045135498046875, |
|
"rewards/margins": 10.013148307800293, |
|
"rewards/margins_max": 14.771675109863281, |
|
"rewards/margins_min": 5.254621982574463, |
|
"rewards/margins_std": 6.729572296142578, |
|
"rewards/rejected": -14.71766185760498, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 3.730491292930072e-09, |
|
"logits/chosen": 0.5803747773170471, |
|
"logits/rejected": 1.2554329633712769, |
|
"logps/chosen": -735.1619262695312, |
|
"logps/rejected": -1728.2470703125, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.109474182128906, |
|
"rewards/margins": 10.042867660522461, |
|
"rewards/margins_max": 15.090121269226074, |
|
"rewards/margins_min": 4.995615482330322, |
|
"rewards/margins_std": 7.1378936767578125, |
|
"rewards/rejected": -15.152341842651367, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 2.0256494069306744e-09, |
|
"logits/chosen": 0.5878351926803589, |
|
"logits/rejected": 1.3185656070709229, |
|
"logps/chosen": -693.908203125, |
|
"logps/rejected": -1897.8695068359375, |
|
"loss": 0.1782, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.697625160217285, |
|
"rewards/margins": 12.107023239135742, |
|
"rewards/margins_max": 17.24478530883789, |
|
"rewards/margins_min": 6.969258785247803, |
|
"rewards/margins_std": 7.265894889831543, |
|
"rewards/rejected": -16.80464744567871, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 8.371969648043876e-10, |
|
"logits/chosen": 0.6715101003646851, |
|
"logits/rejected": 1.3766255378723145, |
|
"logps/chosen": -733.6317138671875, |
|
"logps/rejected": -1728.0810546875, |
|
"loss": 0.1379, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.896046161651611, |
|
"rewards/margins": 10.241785049438477, |
|
"rewards/margins_max": 14.780932426452637, |
|
"rewards/margins_min": 5.702638626098633, |
|
"rewards/margins_std": 6.419322967529297, |
|
"rewards/rejected": -15.13783073425293, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 1.653796473341518e-10, |
|
"logits/chosen": 0.4578518271446228, |
|
"logits/rejected": 1.291621446609497, |
|
"logps/chosen": -713.6257934570312, |
|
"logps/rejected": -1659.1171875, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -4.576380729675293, |
|
"rewards/margins": 9.997756958007812, |
|
"rewards/margins_max": 14.97430419921875, |
|
"rewards/margins_min": 5.021212577819824, |
|
"rewards/margins_std": 7.037899017333984, |
|
"rewards/rejected": -14.574139595031738, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": 1.1567333936691284, |
|
"eval_logits/rejected": 1.3502662181854248, |
|
"eval_logps/chosen": -815.366455078125, |
|
"eval_logps/rejected": -885.2385864257812, |
|
"eval_loss": 0.8997361063957214, |
|
"eval_rewards/accuracies": 0.5877500176429749, |
|
"eval_rewards/chosen": -4.770576000213623, |
|
"eval_rewards/margins": 0.8909361362457275, |
|
"eval_rewards/margins_max": 6.031704425811768, |
|
"eval_rewards/margins_min": -2.9257826805114746, |
|
"eval_rewards/margins_std": 2.9022324085235596, |
|
"eval_rewards/rejected": -5.66151237487793, |
|
"eval_runtime": 1670.0359, |
|
"eval_samples_per_second": 4.79, |
|
"eval_steps_per_second": 0.299, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2428, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19075011757787017, |
|
"train_runtime": 22524.5017, |
|
"train_samples_per_second": 1.725, |
|
"train_steps_per_second": 0.108 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2428, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|